Index: head/sys/dev/awi/awi.c
===================================================================
--- head/sys/dev/awi/awi.c	(revision 108465)
+++ head/sys/dev/awi/awi.c	(revision 108466)
@@ -1,2993 +1,2993 @@
 /*	$NetBSD: awi.c,v 1.26 2000/07/21 04:48:55 onoe Exp $	*/
 /* $FreeBSD$ */
 
 /*-
  * Copyright (c) 1999 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Bill Sommerfeld
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *        This product includes software developed by the NetBSD
  *        Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Driver for AMD 802.11 firmware.
  * Uses am79c930 chip driver to talk to firmware running on the am79c930.
  *
  * More-or-less a generic ethernet-like if driver, with 802.11 gorp added.
  */
 
 /*
  * todo:
  *	- flush tx queue on resynch.
  *	- clear oactive on "down".
  *	- rewrite copy-into-mbuf code
  *	- mgmt state machine gets stuck retransmitting assoc requests.
  *	- multicast filter.
  *	- fix device reset so it's more likely to work
  *	- show status goo through ifmedia.
  *
  * more todo:
  *	- deal with more 802.11 frames.
  *		- send reassoc request
  *		- deal with reassoc response
  *		- send/deal with disassociation
  *	- deal with "full" access points (no room for me).
  *	- power save mode
  *
  * later:
  *	- SSID preferences
  *	- need ioctls for poking at the MIBs
  *	- implement ad-hoc mode (including bss creation).
  *	- decide when to do "ad hoc" vs. infrastructure mode (IFF_LINK flags?)
  *		(focus on inf. mode since that will be needed for ietf)
  *	- deal with DH vs. FH versions of the card
  *	- deal with faster cards (2mb/s)
  *	- ?WEP goo (mmm, rc4) (it looks not particularly useful).
  *	- ifmedia revision.
  *	- common 802.11 mibish things.
  *	- common 802.11 media layer.
  */
 
 /*
  * Driver for AMD 802.11 PCnetMobile firmware.
  * Uses am79c930 chip driver to talk to firmware running on the am79c930.
  *
  * The initial version of the driver was written by
  * Bill Sommerfeld <sommerfeld@netbsd.org>.
  * Then the driver module completely rewritten to support cards with DS phy
  * and to support adhoc mode by Atsushi Onoe <onoe@netbsd.org>
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #if defined(__FreeBSD__) && __FreeBSD_version >= 400000
 #include <sys/bus.h>
 #else
 #include <sys/device.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #ifdef __FreeBSD__
 #include <net/ethernet.h>
 #else
 #include <net/if_ether.h>
 #endif
 #include <net/if_media.h>
 #include <net/if_llc.h>
 #include <net/if_ieee80211.h>
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #ifdef __NetBSD__
 #include <netinet/if_inarp.h>
 #else
 #include <netinet/if_ether.h>
 #endif
 #endif
 
 #if defined(__FreeBSD__) && __FreeBSD_version >= 400000
 #define	NBPFILTER	1
 #elif defined(__FreeBSD__) && __FreeBSD_version >= 300000
 #include "bpf.h"
 #define	NBPFILTER	NBPF
 #else
 #include "bpfilter.h"
 #endif
 
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #include <net/bpfdesc.h>
 #endif
 
 #include <machine/cpu.h>
 #include <machine/bus.h>
 #ifdef __NetBSD__
 #include <machine/intr.h>
 #endif
 
 #ifdef __NetBSD__
 #include <dev/ic/am79c930reg.h>
 #include <dev/ic/am79c930var.h>
 #include <dev/ic/awireg.h>
 #include <dev/ic/awivar.h>
 #endif
 #ifdef __FreeBSD__
 #include <dev/awi/am79c930reg.h>
 #include <dev/awi/am79c930var.h>
 #include <dev/awi/awireg.h>
 #include <dev/awi/awivar.h>
 #endif
 
 static int awi_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
 #ifdef IFM_IEEE80211
 static int awi_media_rate2opt(struct awi_softc *sc, int rate);
 static int awi_media_opt2rate(struct awi_softc *sc, int opt);
 static int awi_media_change(struct ifnet *ifp);
 static void awi_media_status(struct ifnet *ifp, struct ifmediareq *imr);
 #endif
 static void awi_watchdog(struct ifnet *ifp);
 static void awi_start(struct ifnet *ifp);
 static void awi_txint(struct awi_softc *sc);
 static struct mbuf * awi_fix_txhdr(struct awi_softc *sc, struct mbuf *m0);
 static struct mbuf * awi_fix_rxhdr(struct awi_softc *sc, struct mbuf *m0);
 static void awi_input(struct awi_softc *sc, struct mbuf *m, u_int32_t rxts, u_int8_t rssi);
 static void awi_rxint(struct awi_softc *sc);
 static struct mbuf * awi_devget(struct awi_softc *sc, u_int32_t off, u_int16_t len);
 static int awi_init_hw(struct awi_softc *sc);
 static int awi_init_mibs(struct awi_softc *sc);
 static int awi_init_txrx(struct awi_softc *sc);
 static void awi_stop_txrx(struct awi_softc *sc);
 static int awi_start_scan(struct awi_softc *sc);
 static int awi_next_scan(struct awi_softc *sc);
 static void awi_stop_scan(struct awi_softc *sc);
 static void awi_recv_beacon(struct awi_softc *sc, struct mbuf *m0, u_int32_t rxts, u_int8_t rssi);
 static int awi_set_ss(struct awi_softc *sc);
 static void awi_try_sync(struct awi_softc *sc);
 static void awi_sync_done(struct awi_softc *sc);
 static void awi_send_deauth(struct awi_softc *sc);
 static void awi_send_auth(struct awi_softc *sc, int seq);
 static void awi_recv_auth(struct awi_softc *sc, struct mbuf *m0);
 static void awi_send_asreq(struct awi_softc *sc, int reassoc);
 static void awi_recv_asresp(struct awi_softc *sc, struct mbuf *m0);
 static int awi_mib(struct awi_softc *sc, u_int8_t cmd, u_int8_t mib);
 static int awi_cmd_scan(struct awi_softc *sc);
 static int awi_cmd(struct awi_softc *sc, u_int8_t cmd);
 static void awi_cmd_done(struct awi_softc *sc);
 static int awi_next_txd(struct awi_softc *sc, int len, u_int32_t *framep, u_int32_t*ntxdp);
 static int awi_lock(struct awi_softc *sc);
 static void awi_unlock(struct awi_softc *sc);
 static int awi_intr_lock(struct awi_softc *sc);
 static void awi_intr_unlock(struct awi_softc *sc);
 static int awi_cmd_wait(struct awi_softc *sc);
 static void awi_print_essid(u_int8_t *essid);
 
 #ifdef AWI_DEBUG
 static void awi_dump_pkt(struct awi_softc *sc, struct mbuf *m, int rssi);
 int awi_verbose = 0;
 int awi_dump = 0;
 #define	AWI_DUMP_MASK(fc0)  (1 << (((fc0) & IEEE80211_FC0_SUBTYPE_MASK) >> 4))
 int awi_dump_mask = AWI_DUMP_MASK(IEEE80211_FC0_SUBTYPE_BEACON);
 int awi_dump_hdr = 0;
 int awi_dump_len = 28;
 #endif
 
 #if NBPFILTER > 0
 #define	AWI_BPF_NORM	0
 #define	AWI_BPF_RAW	1
 #ifdef __FreeBSD__
 #define	AWI_BPF_MTAP(sc, m, raw) do {					\
 	if ((sc)->sc_rawbpf == (raw))					\
 		BPF_MTAP((sc)->sc_ifp, (m));				\
 } while (0);
 #else
 #define	AWI_BPF_MTAP(sc, m, raw) do {					\
 	if ((sc)->sc_ifp->if_bpf && (sc)->sc_rawbpf == (raw))		\
 		bpf_mtap((sc)->sc_ifp->if_bpf, (m));			\
 } while (0);
 #endif
 #else
 #define	AWI_BPF_MTAP(sc, m, raw)
 #endif
 
 #ifndef llc_snap
 #define llc_snap              llc_un.type_snap
 #endif
 
 #ifdef __FreeBSD__
 #if __FreeBSD_version >= 400000
 devclass_t awi_devclass;
 #endif
 
 #if __FreeBSD_version < 500043
 /* NetBSD compatible functions  */
 static char * ether_sprintf(u_int8_t *);
 
 static char *
 ether_sprintf(enaddr)
 	u_int8_t *enaddr;
 {
 	static char strbuf[18];
 
 	sprintf(strbuf, "%6D", enaddr, ":");
 	return strbuf;
 }
 #endif
 #endif
 
 int
 awi_attach(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	int s;
 	int error;
 #ifdef IFM_IEEE80211
 	int i;
 	u_int8_t *phy_rates;
 	int mword;
 	struct ifmediareq imr;
 #endif
 
 	s = splnet();
 	/*
 	 * Even if we can sleep in initialization state,
 	 * all other processes (e.g. ifconfig) have to wait for
 	 * completion of attaching interface.
 	 */
 	sc->sc_busy = 1;
 	sc->sc_status = AWI_ST_INIT;
 	TAILQ_INIT(&sc->sc_scan);
 	error = awi_init_hw(sc);
 	if (error) {
 		sc->sc_invalid = 1;
 		splx(s);
 		return error;
 	}
 	error = awi_init_mibs(sc);
 	splx(s);
 	if (error) {
 		sc->sc_invalid = 1;
 		return error;
 	}
 
 	ifp->if_softc = sc;
 	ifp->if_start = awi_start;
 	ifp->if_ioctl = awi_ioctl;
 	ifp->if_watchdog = awi_watchdog;
 	ifp->if_mtu = ETHERMTU;
 	ifp->if_hdrlen = sizeof(struct ieee80211_frame) +
 	    sizeof(struct ether_header);
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 #ifdef IFF_NOTRAILERS
 	ifp->if_flags |= IFF_NOTRAILERS;
 #endif
 #ifdef __NetBSD__
 	memcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ);
 #endif
 #ifdef __FreeBSD__
 	ifp->if_output = ether_output;
 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
 	memcpy(sc->sc_ec.ac_enaddr, sc->sc_mib_addr.aMAC_Address,
 	    ETHER_ADDR_LEN);
 #endif
 
 	printf("%s: IEEE802.11 %s %dMbps (firmware %s)\n",
 	    sc->sc_dev.dv_xname,
 	    sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH ? "FH" : "DS",
 	    sc->sc_tx_rate / 10, sc->sc_banner);
 	printf("%s: address %s\n",
 	    sc->sc_dev.dv_xname,  ether_sprintf(sc->sc_mib_addr.aMAC_Address));
 #ifdef __FreeBSD__
 	ether_ifattach(ifp, sc->sc_mib_addr.aMAC_Address);
 #else
 	if_attach(ifp);
 	ether_ifattach(ifp, sc->sc_mib_addr.aMAC_Address);
 #if NBPFILTER > 0
 	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, sizeof(struct ether_header));
 #endif
 #endif
 
 #ifdef IFM_IEEE80211
 	ifmedia_init(&sc->sc_media, 0, awi_media_change, awi_media_status);
 	phy_rates = sc->sc_mib_phy.aSuprt_Data_Rates;
 	for (i = 0; i < phy_rates[1]; i++) {
 		mword = awi_media_rate2opt(sc, AWI_80211_RATE(phy_rates[2 + i]));
 		if (mword == 0)
 			continue;
 		mword |= IFM_IEEE80211;
 		ifmedia_add(&sc->sc_media, mword, 0, NULL);
 		ifmedia_add(&sc->sc_media,
 		    mword | IFM_IEEE80211_ADHOC, 0, NULL);
 		if (sc->sc_mib_phy.IEEE_PHY_Type != AWI_PHY_TYPE_FH)
 			ifmedia_add(&sc->sc_media,
 			    mword | IFM_IEEE80211_ADHOC | IFM_FLAG0, 0, NULL);
 	}
 	awi_media_status(ifp, &imr);
 	ifmedia_set(&sc->sc_media, imr.ifm_active);
 #endif
 
 	/* ready to accept ioctl */
 	awi_unlock(sc);
 
 	/* Attach is successful. */
 	sc->sc_attached = 1;
 	return 0;
 }
 
 #ifdef __NetBSD__
 int
 awi_detach(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	int s;
 
 	/* Succeed if there is no work to do. */
 	if (!sc->sc_attached)
 		return (0);
 
 	s = splnet();
 	sc->sc_invalid = 1;
 	awi_stop(sc);
 	while (sc->sc_sleep_cnt > 0) {
 		wakeup(sc);
 		(void)tsleep(sc, PWAIT, "awidet", 1);
 	}
 	if (sc->sc_wep_ctx != NULL)
 		free(sc->sc_wep_ctx, M_DEVBUF);
 #if NBPFILTER > 0
 	bpfdetach(ifp);
 #endif
 #ifdef IFM_IEEE80211
 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
 #endif
 	ether_ifdetach(ifp);
 	if_detach(ifp);
 	if (sc->sc_enabled) {
 		if (sc->sc_disable)
 			(*sc->sc_disable)(sc);
 		sc->sc_enabled = 0;
 	}
 	splx(s);
 	return 0;
 }
 
 int
 awi_activate(self, act)
 	struct device *self;
 	enum devact act;
 {
 	struct awi_softc *sc = (struct awi_softc *)self;
 	int s, error = 0;
 
 	s = splnet();
 	switch (act) {
 	case DVACT_ACTIVATE:
 		error = EOPNOTSUPP;
 		break;
 
 	case DVACT_DEACTIVATE:
 		sc->sc_invalid = 1;
 		if (sc->sc_ifp)
 			if_deactivate(sc->sc_ifp);
 		break;
 	}
 	splx(s);
 
 	return error;
 }
 
 void
 awi_power(sc, why)
 	struct awi_softc *sc;
 	int why;
 {
 	int s;
 	int ocansleep;
 
 	if (!sc->sc_enabled)
 		return;
 
 	s = splnet();
 	ocansleep = sc->sc_cansleep;
 	sc->sc_cansleep = 0;
 #ifdef needtobefixed	/*ONOE*/
 	if (why == PWR_RESUME) {
 		sc->sc_enabled = 0;
 		awi_init(sc);
 		(void)awi_intr(sc);
 	} else {
 		awi_stop(sc);
 		if (sc->sc_disable)
 			(*sc->sc_disable)(sc);
 	}
 #endif
 	sc->sc_cansleep = ocansleep;
 	splx(s);
 }
 #endif /* __NetBSD__ */
 
 static int
 awi_ioctl(ifp, cmd, data)
 	struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	struct awi_softc *sc = ifp->if_softc;
 	struct ifreq *ifr = (struct ifreq *)data;
 	struct ifaddr *ifa = (struct ifaddr *)data;
 	struct ieee80211req *ireq = (struct ieee80211req *)data;
 	int s, error;
 	struct ieee80211_nwid nwid;
 	u_int8_t *p;
 	int len;
 	u_int8_t tmpstr[IEEE80211_NWID_LEN*2];
 #ifdef __FreeBSD_version
 #if __FreeBSD_version < 500028
 	struct proc *mythread = curproc;		/* name a white lie */
 #else
 	struct thread *mythread = curthread;
 #endif
 #endif
 
 	s = splnet();
 
 	/* serialize ioctl */
 	error = awi_lock(sc);
 	if (error)
 		goto cantlock;
 	switch (cmd) {
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP;
 		switch (ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET:
 			arp_ifinit((void *)ifp, ifa);
 			break;
 #endif
 		}
 		/* FALLTHROUGH */
 	case SIOCSIFFLAGS:
 		sc->sc_format_llc = !(ifp->if_flags & IFF_LINK0);
 		if (!(ifp->if_flags & IFF_UP)) {
 			if (sc->sc_enabled) {
 				awi_stop(sc);
 				if (sc->sc_disable)
 					(*sc->sc_disable)(sc);
 				sc->sc_enabled = 0;
 			}
 			break;
 		}
 		error = awi_init(sc);
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 #ifdef __FreeBSD__
 		error = ENETRESET;	/*XXX*/
 #else
 		error = (cmd == SIOCADDMULTI) ?
 		    ether_addmulti(ifr, &sc->sc_ec) :
 		    ether_delmulti(ifr, &sc->sc_ec);
 #endif
 		/*
 		 * Do not rescan BSS.  Rather, just reset multicast filter.
 		 */
 		if (error == ENETRESET) {
 			if (sc->sc_enabled)
 				error = awi_init(sc);
 			else
 				error = 0;
 		}
 		break;
 	case SIOCSIFMTU:
 		if (ifr->ifr_mtu > ETHERMTU)
 			error = EINVAL;
 		else
 			ifp->if_mtu = ifr->ifr_mtu;
 		break;
 	case SIOCS80211NWID:
 #ifdef __FreeBSD__
 		error = suser(mythread);
 		if (error)
 			break;
 #endif
 		error = copyin(ifr->ifr_data, &nwid, sizeof(nwid));
 		if (error)
 			break;
 		if (nwid.i_len > IEEE80211_NWID_LEN) {
 			error = EINVAL;
 			break;
 		}
 		if (sc->sc_mib_mac.aDesired_ESS_ID[1] == nwid.i_len &&
 		    memcmp(&sc->sc_mib_mac.aDesired_ESS_ID[2], nwid.i_nwid,
 		    nwid.i_len) == 0)
 			break;
 		memset(sc->sc_mib_mac.aDesired_ESS_ID, 0, AWI_ESS_ID_SIZE);
 		sc->sc_mib_mac.aDesired_ESS_ID[0] = IEEE80211_ELEMID_SSID;
 		sc->sc_mib_mac.aDesired_ESS_ID[1] = nwid.i_len;
 		memcpy(&sc->sc_mib_mac.aDesired_ESS_ID[2], nwid.i_nwid,
 		    nwid.i_len);
 		if (sc->sc_enabled) {
 			awi_stop(sc);
 			error = awi_init(sc);
 		}
 		break;
 	case SIOCG80211NWID:
 		if (ifp->if_flags & IFF_RUNNING)
 			p = sc->sc_bss.essid;
 		else
 			p = sc->sc_mib_mac.aDesired_ESS_ID;
 		error = copyout(p + 1, ifr->ifr_data, 1 + IEEE80211_NWID_LEN);
 		break;
 	case SIOCS80211NWKEY:
 #ifdef __FreeBSD__
 		error = suser(mythread);
 		if (error)
 			break;
 #endif
 		error = awi_wep_setnwkey(sc, (struct ieee80211_nwkey *)data);
 		break;
 	case SIOCG80211NWKEY:
 		error = awi_wep_getnwkey(sc, (struct ieee80211_nwkey *)data);
 		break;
 #ifdef IFM_IEEE80211
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
 		break;
 #endif
 #ifdef __FreeBSD__
 	case SIOCG80211:
 		switch(ireq->i_type) {
 		case IEEE80211_IOC_SSID:
 			if (ireq->i_val != -1 && ireq->i_val != 0) {
 				error = EINVAL;
 				break;
 			}
 			if (!sc->sc_mib_local.Network_Mode)
 				p = sc->sc_ownssid;
 			else if (ireq->i_val == -1 &&
 			    (ifp->if_flags & IFF_RUNNING))
 				p = sc->sc_bss.essid;
 			else
 				p = sc->sc_mib_mac.aDesired_ESS_ID;
 			len = p[1];
 			p += 2;
 			if (len > IEEE80211_NWID_LEN) {
 				error = EINVAL;
 				break;
 			}
 			if (len > 0)
 				error = copyout(p, ireq->i_data, len);
 			ireq->i_len = len;
 			break;
 		case IEEE80211_IOC_NUMSSIDS:
 			ireq->i_val = 1;
 			break;
 		case IEEE80211_IOC_WEP:
 			if (sc->sc_wep_algo != NULL)
 				ireq->i_val = IEEE80211_WEP_MIXED;
 			else
 				ireq->i_val = IEEE80211_WEP_OFF;
 			break;
 		case IEEE80211_IOC_WEPKEY:
 			if(ireq->i_val < 0 || ireq->i_val > 3) {
 				error = EINVAL;
 				break;
 			}
 			len = sizeof(tmpstr);
 			error = awi_wep_getkey(sc, ireq->i_val, tmpstr, &len);
 			if(error)
 				break;
 #ifdef __FreeBSD__
 			if (!suser(mythread))
 				bzero(tmpstr, len);
 #endif
 			ireq->i_len = len;
 			error = copyout(tmpstr, ireq->i_data, len);
 			break;
 		case IEEE80211_IOC_NUMWEPKEYS:
 			ireq->i_val = 4;
 			break;
 		case IEEE80211_IOC_WEPTXKEY:
 			ireq->i_val = sc->sc_wep_defkid;
 			break;
 		case IEEE80211_IOC_AUTHMODE:
 			ireq->i_val = IEEE80211_AUTH_OPEN;
 			break;
 		case IEEE80211_IOC_STATIONNAME:
 			/* not used anywhere */
 			error = EINVAL;
 			break;
 		case IEEE80211_IOC_CHANNEL:
 			/* XXX: Handle FH cards */
 			ireq->i_val = sc->sc_bss.chanset;
 			break;
 		case IEEE80211_IOC_POWERSAVE:
 			/*
 			 * The powersave mode is not supported by the driver.
 			 */
 			ireq->i_val = IEEE80211_POWERSAVE_NOSUP;
 			break;
 		case IEEE80211_IOC_POWERSAVESLEEP:
 			error = EINVAL;
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	case SIOCS80211:
 		error = suser(mythread);
 		if(error)
 			break;
 		switch(ireq->i_type) {
 		case IEEE80211_IOC_SSID:
 			if (ireq->i_val != 0 ||
 			    ireq->i_len > IEEE80211_NWID_LEN) {
 				error = EINVAL;
 				break;
 			}
 			bzero(tmpstr, AWI_ESS_ID_SIZE);
 			tmpstr[0] = IEEE80211_ELEMID_SSID;
 			tmpstr[1] = ireq->i_len;
 			error = copyin(ireq->i_data, tmpstr+2, ireq->i_len);
 			if(error)
 				break;
 			bcopy(tmpstr, sc->sc_mib_mac.aDesired_ESS_ID, 
 				AWI_ESS_ID_SIZE);
 			bcopy(tmpstr, sc->sc_ownssid, AWI_ESS_ID_SIZE);
 			break;
 		case IEEE80211_IOC_WEP:
 			if(ireq->i_val == IEEE80211_WEP_OFF)
 				error = awi_wep_setalgo(sc, 0);
 			else
 				error = awi_wep_setalgo(sc, 1);
 			break;
 		case IEEE80211_IOC_WEPKEY:
 			if(ireq->i_val < 0 || ireq->i_val > 3 ||
 			    ireq->i_len > 13) {
 				error = EINVAL;
 				break;
 			}
 			error = copyin(ireq->i_data, tmpstr, ireq->i_len);
 			if(error)
 				break;
 			error = awi_wep_setkey(sc, ireq->i_val, tmpstr,
 			    ireq->i_len);
 			break;
 		case IEEE80211_IOC_WEPTXKEY:
 			if(ireq->i_val < 0 || ireq->i_val > 3) {
 				error = EINVAL;
 				break;
 			}
 			sc->sc_wep_defkid = ireq->i_val;
 			break;
 		case IEEE80211_IOC_AUTHMODE:
 			if(ireq->i_val != IEEE80211_AUTH_OPEN)
 				error = EINVAL;
 			break;
 		case IEEE80211_IOC_STATIONNAME:
 			error = EPERM;
 			break;
 		case IEEE80211_IOC_CHANNEL:
 			if(ireq->i_val < sc->sc_scan_min ||
 			    ireq->i_val > sc->sc_scan_max) {
 				error = EINVAL;
 				break;
 			}
 			sc->sc_ownch = ireq->i_val;
 			break;
 		case IEEE80211_IOC_POWERSAVE:
 			if(ireq->i_val != IEEE80211_POWERSAVE_OFF)
 				error = EINVAL;
 			break;
 		case IEEE80211_IOC_POWERSAVESLEEP:
 			error = EINVAL;
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 		/* Restart the card so the change takes effect */
 		if(!error) {
 			if(sc->sc_enabled) {
 				awi_stop(sc);
 				error = awi_init(sc);
 			}
 		}
 		break;
 #endif /* __FreeBSD__ */
 	default:
 		error = awi_wicfg(ifp, cmd, data);
 		break;
 	}
 	awi_unlock(sc);
   cantlock:
 	splx(s);
 	return error;
 }
 
 #ifdef IFM_IEEE80211
 static int
 awi_media_rate2opt(sc, rate)
 	struct awi_softc *sc;
 	int rate;
 {
 	int mword;
 
 	mword = 0;
 	switch (rate) {
 	case 10:
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 			mword = IFM_IEEE80211_FH1;
 		else
 			mword = IFM_IEEE80211_DS1;
 		break;
 	case 20:
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 			mword = IFM_IEEE80211_FH2;
 		else
 			mword = IFM_IEEE80211_DS2;
 		break;
 	case 55:
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_DS)
 			mword = IFM_IEEE80211_DS5;
 		break;
 	case 110:
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_DS)
 			mword = IFM_IEEE80211_DS11;
 		break;
 	}
 	return mword;
 }
 
 static int
 awi_media_opt2rate(sc, opt)
 	struct awi_softc *sc;
 	int opt;
 {
 	int rate;
 
 	rate = 0;
 	switch (IFM_SUBTYPE(opt)) {
 	case IFM_IEEE80211_FH1:
 	case IFM_IEEE80211_FH2:
 		if (sc->sc_mib_phy.IEEE_PHY_Type != AWI_PHY_TYPE_FH)
 			return 0;
 		break;
 	case IFM_IEEE80211_DS1:
 	case IFM_IEEE80211_DS2:
 	case IFM_IEEE80211_DS5:
 	case IFM_IEEE80211_DS11:
 		if (sc->sc_mib_phy.IEEE_PHY_Type != AWI_PHY_TYPE_DS)
 			return 0;
 		break;
 	}
 
 	switch (IFM_SUBTYPE(opt)) {
 	case IFM_IEEE80211_FH1:
 	case IFM_IEEE80211_DS1:
 		rate = 10;
 		break;
 	case IFM_IEEE80211_FH2:
 	case IFM_IEEE80211_DS2:
 		rate = 20;
 		break;
 	case IFM_IEEE80211_DS5:
 		rate = 55;
 		break;
 	case IFM_IEEE80211_DS11:
 		rate = 110;
 		break;
 	}
 	return rate;
 }
 
 /*
  * Called from ifmedia_ioctl via awi_ioctl with lock obtained.
  */
 static int
 awi_media_change(ifp)
 	struct ifnet *ifp;
 {
 	struct awi_softc *sc = ifp->if_softc;
 	struct ifmedia_entry *ime;
 	u_int8_t *phy_rates;
 	int i, rate, error;
 
 	error = 0;
 	ime = sc->sc_media.ifm_cur;
 	rate = awi_media_opt2rate(sc, ime->ifm_media);
 	if (rate == 0)
 		return EINVAL;
 	if (rate != sc->sc_tx_rate) {
 		phy_rates = sc->sc_mib_phy.aSuprt_Data_Rates;
 		for (i = 0; i < phy_rates[1]; i++) {
 			if (rate == AWI_80211_RATE(phy_rates[2 + i]))
 				break;
 		}
 		if (i == phy_rates[1])
 			return EINVAL;
 	}
 	if (ime->ifm_media & IFM_IEEE80211_ADHOC) {
 		sc->sc_mib_local.Network_Mode = 0;
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 			sc->sc_no_bssid = 0;
 		else
 			sc->sc_no_bssid = (ime->ifm_media & IFM_FLAG0) ? 1 : 0;
 	} else {
 		sc->sc_mib_local.Network_Mode = 1;
 	}
 	if (sc->sc_enabled) {
 		awi_stop(sc);
 		error = awi_init(sc);
 	}
 	return error;
 }
 
 static void
 awi_media_status(ifp, imr)
 	struct ifnet *ifp;
 	struct ifmediareq *imr;
 {
 	struct awi_softc *sc = ifp->if_softc;
 
 	imr->ifm_status = IFM_AVALID;
 	if (ifp->if_flags & IFF_RUNNING)
 		imr->ifm_status |= IFM_ACTIVE;
 	imr->ifm_active = IFM_IEEE80211;
 	imr->ifm_active |= awi_media_rate2opt(sc, sc->sc_tx_rate);
 	if (sc->sc_mib_local.Network_Mode == 0) {
 		imr->ifm_active |= IFM_IEEE80211_ADHOC;
 		if (sc->sc_no_bssid)
 			imr->ifm_active |= IFM_FLAG0;
 	}
 }
 #endif /* IFM_IEEE80211 */
 
 int
 awi_intr(arg)
 	void *arg;
 {
 	struct awi_softc *sc = arg;
 	u_int16_t status;
 	int error, handled = 0, ocansleep;
 
 	if (!sc->sc_enabled || !sc->sc_enab_intr || sc->sc_invalid)
 		return 0;
 
 	am79c930_gcr_setbits(&sc->sc_chip,
 	    AM79C930_GCR_DISPWDN | AM79C930_GCR_ECINT);
 	awi_write_1(sc, AWI_DIS_PWRDN, 1);
 	ocansleep = sc->sc_cansleep;
 	sc->sc_cansleep = 0;
 
 	for (;;) {
 		error = awi_intr_lock(sc);
 		if (error)
 			break;
 		status = awi_read_1(sc, AWI_INTSTAT);
 		awi_write_1(sc, AWI_INTSTAT, 0);
 		awi_write_1(sc, AWI_INTSTAT, 0);
 		status |= awi_read_1(sc, AWI_INTSTAT2) << 8;
 		awi_write_1(sc, AWI_INTSTAT2, 0);
 		DELAY(10);
 		awi_intr_unlock(sc);
 		if (!sc->sc_cmd_inprog)
 			status &= ~AWI_INT_CMD;	/* make sure */
 		if (status == 0)
 			break;
 		handled = 1;
 		if (status & AWI_INT_RX)
 			awi_rxint(sc);
 		if (status & AWI_INT_TX)
 			awi_txint(sc);
 		if (status & AWI_INT_CMD)
 			awi_cmd_done(sc);
 		if (status & AWI_INT_SCAN_CMPLT) {
 			if (sc->sc_status == AWI_ST_SCAN &&
 			    sc->sc_mgt_timer > 0)
 				(void)awi_next_scan(sc);
 		}
 	}
 	sc->sc_cansleep = ocansleep;
 	am79c930_gcr_clearbits(&sc->sc_chip, AM79C930_GCR_DISPWDN);
 	awi_write_1(sc, AWI_DIS_PWRDN, 0);
 	return handled;
 }
 
 int
 awi_init(sc)
 	struct awi_softc *sc;
 {
 	int error, ostatus;
 	int n;
 	struct ifnet *ifp = sc->sc_ifp;
 #ifdef __FreeBSD__
 	struct ifmultiaddr *ifma;
 #else
 	struct ether_multi *enm;
 	struct ether_multistep step;
 #endif
 
 	/* reinitialize muticast filter */
 	n = 0;
 	ifp->if_flags |= IFF_ALLMULTI;
 	sc->sc_mib_local.Accept_All_Multicast_Dis = 0;
 	if (ifp->if_flags & IFF_PROMISC) {
 		sc->sc_mib_mac.aPromiscuous_Enable = 1;
 		goto set_mib;
 	}
 	sc->sc_mib_mac.aPromiscuous_Enable = 0;
 #ifdef __FreeBSD__
 	if (ifp->if_amcount != 0)
 		goto set_mib;
 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 		if (n == AWI_GROUP_ADDR_SIZE)
 			goto set_mib;
 		memcpy(sc->sc_mib_addr.aGroup_Addresses[n],
 		    LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
 		    ETHER_ADDR_LEN);
 		n++;
 	}
 #else
 	ETHER_FIRST_MULTI(step, &sc->sc_ec, enm);
 	while (enm != NULL) {
 		if (n == AWI_GROUP_ADDR_SIZE ||
 		    memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN)
 		    != 0)
 			goto set_mib;
 		memcpy(sc->sc_mib_addr.aGroup_Addresses[n], enm->enm_addrlo,
 		    ETHER_ADDR_LEN);
 		n++;
 		ETHER_NEXT_MULTI(step, enm);
 	}
 #endif
 	for (; n < AWI_GROUP_ADDR_SIZE; n++)
 		memset(sc->sc_mib_addr.aGroup_Addresses[n], 0, ETHER_ADDR_LEN);
 	ifp->if_flags &= ~IFF_ALLMULTI;
 	sc->sc_mib_local.Accept_All_Multicast_Dis = 1;
 
   set_mib:
 #ifdef notdef	/* allow non-encrypted frame for receiving. */
 	sc->sc_mib_mgt.Wep_Required = sc->sc_wep_algo != NULL ? 1 : 0;
 #endif
 	if (!sc->sc_enabled) {
 		sc->sc_enabled = 1;
 		if (sc->sc_enable)
 			(*sc->sc_enable)(sc);
 		sc->sc_status = AWI_ST_INIT;
 		error = awi_init_hw(sc);
 		if (error)
 			return error;
 	}
 	ostatus = sc->sc_status;
 	sc->sc_status = AWI_ST_INIT;
 	if ((error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_LOCAL)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_ADDR)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MAC)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MGT)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_PHY)) != 0) {
 		awi_stop(sc);
 		return error;
 	}
 	if (ifp->if_flags & IFF_RUNNING)
 		sc->sc_status = AWI_ST_RUNNING;
 	else {
 		if (ostatus == AWI_ST_INIT) {
 			error = awi_init_txrx(sc);
 			if (error)
 				return error;
 		}
 		error = awi_start_scan(sc);
 	}
 	return error;
 }
 
 void
 awi_stop(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct awi_bss *bp;
 	struct mbuf *m;
 
 	sc->sc_status = AWI_ST_INIT;
 	if (!sc->sc_invalid) {
 		(void)awi_cmd_wait(sc);
 		if (sc->sc_mib_local.Network_Mode &&
 		    sc->sc_status > AWI_ST_AUTH)
 			awi_send_deauth(sc);
 		awi_stop_txrx(sc);
 	}
 	ifp->if_flags &= ~(IFF_RUNNING|IFF_OACTIVE);
 	ifp->if_timer = 0;
 	sc->sc_tx_timer = sc->sc_rx_timer = sc->sc_mgt_timer = 0;
 	for (;;) {
 		_IF_DEQUEUE(&sc->sc_mgtq, m);
 		if (m == NULL)
 			break;
 		m_freem(m);
 	}
 	IF_DRAIN(&ifp->if_snd);
 	while ((bp = TAILQ_FIRST(&sc->sc_scan)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_scan, bp, list);
 		free(bp, M_DEVBUF);
 	}
 }
 
 static void
 awi_watchdog(ifp)
 	struct ifnet *ifp;
 {
 	struct awi_softc *sc = ifp->if_softc;
 	int ocansleep;
 
 	if (sc->sc_invalid) {
 		ifp->if_timer = 0;
 		return;
 	}
 
 	ocansleep = sc->sc_cansleep;
 	sc->sc_cansleep = 0;
 	if (sc->sc_tx_timer && --sc->sc_tx_timer == 0) {
 		printf("%s: transmit timeout\n", sc->sc_dev.dv_xname);
 		awi_txint(sc);
 	}
 	if (sc->sc_rx_timer && --sc->sc_rx_timer == 0) {
 		if (ifp->if_flags & IFF_DEBUG) {
 			printf("%s: no recent beacons from %s; rescanning\n",
 			    sc->sc_dev.dv_xname,
 			    ether_sprintf(sc->sc_bss.bssid));
 		}
 		ifp->if_flags &= ~IFF_RUNNING;
 		awi_start_scan(sc);
 	}
 	if (sc->sc_mgt_timer && --sc->sc_mgt_timer == 0) {
 		switch (sc->sc_status) {
 		case AWI_ST_SCAN:
 			awi_stop_scan(sc);
 			break;
 		case AWI_ST_AUTH:
 		case AWI_ST_ASSOC:
 			/* restart scan */
 			awi_start_scan(sc);
 			break;
 		default:
 			break;
 		}
 	}
 
 	if (sc->sc_tx_timer == 0 && sc->sc_rx_timer == 0 &&
 	    sc->sc_mgt_timer == 0)
 		ifp->if_timer = 0;
 	else
 		ifp->if_timer = 1;
 	sc->sc_cansleep = ocansleep;
 }
 
 static void
 awi_start(ifp)
 	struct ifnet *ifp;
 {
 	struct awi_softc *sc = ifp->if_softc;
 	struct mbuf *m0, *m;
 	u_int32_t txd, frame, ntxd;
 	u_int8_t rate;
 	int len, sent = 0;
 
 	for (;;) {
 		txd = sc->sc_txnext;
 		_IF_DEQUEUE(&sc->sc_mgtq, m0);
 		if (m0 != NULL) {
 			if (awi_next_txd(sc, m0->m_pkthdr.len, &frame, &ntxd)) {
 				_IF_PREPEND(&sc->sc_mgtq, m0);
 				ifp->if_flags |= IFF_OACTIVE;
 				break;
 			}
 		} else {
 			if (!(ifp->if_flags & IFF_RUNNING))
 				break;
 			IF_DEQUEUE(&ifp->if_snd, m0);
 			if (m0 == NULL)
 				break;
 			len = m0->m_pkthdr.len + sizeof(struct ieee80211_frame);
 			if (sc->sc_format_llc)
 				len += sizeof(struct llc) -
 				    sizeof(struct ether_header);
 			if (sc->sc_wep_algo != NULL)
 				len += IEEE80211_WEP_IVLEN +
 				    IEEE80211_WEP_KIDLEN + IEEE80211_WEP_CRCLEN;
 			if (awi_next_txd(sc, len, &frame, &ntxd)) {
 				IF_PREPEND(&ifp->if_snd, m0);
 				ifp->if_flags |= IFF_OACTIVE;
 				break;
 			}
 			AWI_BPF_MTAP(sc, m0, AWI_BPF_NORM);
 			m0 = awi_fix_txhdr(sc, m0);
 			if (sc->sc_wep_algo != NULL && m0 != NULL)
 				m0 = awi_wep_encrypt(sc, m0, 1);
 			if (m0 == NULL) {
 				ifp->if_oerrors++;
 				continue;
 			}
 			ifp->if_opackets++;
 		}
 #ifdef AWI_DEBUG
 		if (awi_dump)
 			awi_dump_pkt(sc, m0, -1);
 #endif
 		AWI_BPF_MTAP(sc, m0, AWI_BPF_RAW);
 		len = 0;
 		for (m = m0; m != NULL; m = m->m_next) {
 			awi_write_bytes(sc, frame + len, mtod(m, u_int8_t *),
 			    m->m_len);
 			len += m->m_len;
 		}
 		m_freem(m0);
 		rate = sc->sc_tx_rate;	/*XXX*/
 		awi_write_1(sc, ntxd + AWI_TXD_STATE, 0);
 		awi_write_4(sc, txd + AWI_TXD_START, frame);
 		awi_write_4(sc, txd + AWI_TXD_NEXT, ntxd);
 		awi_write_4(sc, txd + AWI_TXD_LENGTH, len);
 		awi_write_1(sc, txd + AWI_TXD_RATE, rate);
 		awi_write_4(sc, txd + AWI_TXD_NDA, 0);
 		awi_write_4(sc, txd + AWI_TXD_NRA, 0);
 		awi_write_1(sc, txd + AWI_TXD_STATE, AWI_TXD_ST_OWN);
 		sc->sc_txnext = ntxd;
 		sent++;
 	}
 	if (sent) {
 		if (sc->sc_tx_timer == 0)
 			sc->sc_tx_timer = 5;
 		ifp->if_timer = 1;
 #ifdef AWI_DEBUG
 		if (awi_verbose)
 			printf("awi_start: sent %d txdone %d txnext %d txbase %d txend %d\n", sent, sc->sc_txdone, sc->sc_txnext, sc->sc_txbase, sc->sc_txend);
 #endif
 	}
 }
 
 static void
 awi_txint(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	u_int8_t flags;
 
 	while (sc->sc_txdone != sc->sc_txnext) {
 		flags = awi_read_1(sc, sc->sc_txdone + AWI_TXD_STATE);
 		if ((flags & AWI_TXD_ST_OWN) || !(flags & AWI_TXD_ST_DONE))
 			break;
 		if (flags & AWI_TXD_ST_ERROR)
 			ifp->if_oerrors++;
 		sc->sc_txdone = awi_read_4(sc, sc->sc_txdone + AWI_TXD_NEXT) &
 		    0x7fff;
 	}
 	sc->sc_tx_timer = 0;
 	ifp->if_flags &= ~IFF_OACTIVE;
 #ifdef AWI_DEBUG
 	if (awi_verbose)
 		printf("awi_txint: txdone %d txnext %d txbase %d txend %d\n",
 		    sc->sc_txdone, sc->sc_txnext, sc->sc_txbase, sc->sc_txend);
 #endif
 	awi_start(ifp);
 }
 
 static struct mbuf *
 awi_fix_txhdr(sc, m0)
 	struct awi_softc *sc;
 	struct mbuf *m0;
 {
 	struct ether_header eh;
 	struct ieee80211_frame *wh;
 	struct llc *llc;
 
 	if (m0->m_len < sizeof(eh)) {
 		m0 = m_pullup(m0, sizeof(eh));
 		if (m0 == NULL)
 			return NULL;
 	}
 	memcpy(&eh, mtod(m0, caddr_t), sizeof(eh));
 	if (sc->sc_format_llc) {
 		m_adj(m0, sizeof(struct ether_header) - sizeof(struct llc));
 		llc = mtod(m0, struct llc *);
 		llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP;
 		llc->llc_control = LLC_UI;
 		llc->llc_snap.org_code[0] = llc->llc_snap.org_code[1] = 
 		    llc->llc_snap.org_code[2] = 0;
 		llc->llc_snap.ether_type = eh.ether_type;
 	}
 	M_PREPEND(m0, sizeof(struct ieee80211_frame), M_DONTWAIT);
 	if (m0 == NULL)
 		return NULL;
 	wh = mtod(m0, struct ieee80211_frame *);
 
 	wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_DATA;
 	LE_WRITE_2(wh->i_dur, 0);
 	LE_WRITE_2(wh->i_seq, 0);
 	if (sc->sc_mib_local.Network_Mode) {
 		wh->i_fc[1] = IEEE80211_FC1_DIR_TODS;
 		memcpy(wh->i_addr1, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 		memcpy(wh->i_addr2, eh.ether_shost, ETHER_ADDR_LEN);
 		memcpy(wh->i_addr3, eh.ether_dhost, ETHER_ADDR_LEN);
 	} else {
 		wh->i_fc[1] = IEEE80211_FC1_DIR_NODS;
 		memcpy(wh->i_addr1, eh.ether_dhost, ETHER_ADDR_LEN);
 		memcpy(wh->i_addr2, eh.ether_shost, ETHER_ADDR_LEN);
 		memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 	}
 	return m0;
 }
 
 static struct mbuf *
 awi_fix_rxhdr(sc, m0)
 	struct awi_softc *sc;
 	struct mbuf *m0;
 {
 	struct ieee80211_frame wh;
 	struct ether_header *eh;
 	struct llc *llc;
 
 	if (m0->m_len < sizeof(wh)) {
 		m_freem(m0);
 		return NULL;
 	}
 	llc = (struct llc *)(mtod(m0, caddr_t) + sizeof(wh));
 	if (llc->llc_dsap == LLC_SNAP_LSAP &&
 	    llc->llc_ssap == LLC_SNAP_LSAP &&
 	    llc->llc_control == LLC_UI &&
 	    llc->llc_snap.org_code[0] == 0 &&
 	    llc->llc_snap.org_code[1] == 0 &&
 	    llc->llc_snap.org_code[2] == 0) {
 		memcpy(&wh, mtod(m0, caddr_t), sizeof(wh));
 		m_adj(m0, sizeof(wh) + sizeof(*llc) - sizeof(*eh));
 		eh = mtod(m0, struct ether_header *);
 		switch (wh.i_fc[1] & IEEE80211_FC1_DIR_MASK) {
 		case IEEE80211_FC1_DIR_NODS:
 			memcpy(eh->ether_dhost, wh.i_addr1, ETHER_ADDR_LEN);
 			memcpy(eh->ether_shost, wh.i_addr2, ETHER_ADDR_LEN);
 			break;
 		case IEEE80211_FC1_DIR_TODS:
 			memcpy(eh->ether_dhost, wh.i_addr3, ETHER_ADDR_LEN);
 			memcpy(eh->ether_shost, wh.i_addr2, ETHER_ADDR_LEN);
 			break;
 		case IEEE80211_FC1_DIR_FROMDS:
 			memcpy(eh->ether_dhost, wh.i_addr1, ETHER_ADDR_LEN);
 			memcpy(eh->ether_shost, wh.i_addr3, ETHER_ADDR_LEN);
 			break;
 		case IEEE80211_FC1_DIR_DSTODS:
 			m_freem(m0);
 			return NULL;
 		}
 	} else {
 		/* assuming ethernet encapsulation, just strip 802.11 header */
 		m_adj(m0, sizeof(wh));
 	}
 	if (ALIGN(mtod(m0, caddr_t) + sizeof(struct ether_header)) !=
 	    (uintptr_t)(mtod(m0, caddr_t) + sizeof(struct ether_header))) {
 		/* XXX: we loose to estimate the type of encapsulation */
 		struct mbuf *n, *n0, **np;
 		caddr_t newdata;
 		int off;
 
 		n0 = NULL;
 		np = &n0;
 		off = 0;
 		while (m0->m_pkthdr.len > off) {
 			if (n0 == NULL) {
 				MGETHDR(n, M_DONTWAIT, MT_DATA);
 				if (n == NULL) {
 					m_freem(m0);
 					return NULL;
 				}
-				M_COPY_PKTHDR(n, m0);
+				M_MOVE_PKTHDR(n, m0);
 				n->m_len = MHLEN;
 			} else {
 				MGET(n, M_DONTWAIT, MT_DATA);
 				if (n == NULL) {
 					m_freem(m0);
 					m_freem(n0);
 					return NULL;
 				}
 				n->m_len = MLEN;
 			}
 			if (m0->m_pkthdr.len - off >= MINCLSIZE) {
 				MCLGET(n, M_DONTWAIT);
 				if (n->m_flags & M_EXT)
 					n->m_len = n->m_ext.ext_size;
 			}
 			if (n0 == NULL) {
 				newdata = (caddr_t)
 				    ALIGN(n->m_data
 				    + sizeof(struct ether_header))
 				    - sizeof(struct ether_header);
 				n->m_len -= newdata - n->m_data;
 				n->m_data = newdata;
 			}
 			if (n->m_len > m0->m_pkthdr.len - off)
 				n->m_len = m0->m_pkthdr.len - off;
 			m_copydata(m0, off, n->m_len, mtod(n, caddr_t));
 			off += n->m_len;
 			*np = n;
 			np = &n->m_next;
 		}
 		m_freem(m0);
 		m0 = n0;
 	}
 	return m0;
 }
 
 static void
 awi_input(sc, m, rxts, rssi)
 	struct awi_softc *sc;
 	struct mbuf *m;
 	u_int32_t rxts;
 	u_int8_t rssi;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211_frame *wh;
 
 	/* trim CRC here for WEP can find its own CRC at the end of packet. */
 	m_adj(m, -ETHER_CRC_LEN);
 	AWI_BPF_MTAP(sc, m, AWI_BPF_RAW);
 	wh = mtod(m, struct ieee80211_frame *);
 	if ((wh->i_fc[0] & IEEE80211_FC0_VERSION_MASK) !=
 	    IEEE80211_FC0_VERSION_0) {
 		printf("%s; receive packet with wrong version: %x\n",
 		    sc->sc_dev.dv_xname, wh->i_fc[0]);
 		m_freem(m);
 		ifp->if_ierrors++;
 		return;
 	}
 	if (wh->i_fc[1] & IEEE80211_FC1_WEP) {
 		m = awi_wep_encrypt(sc, m, 0);
 		if (m == NULL) {
 			ifp->if_ierrors++;
 			return;
 		}
 		wh = mtod(m, struct ieee80211_frame *);
 	}
 #ifdef AWI_DEBUG
 	if (awi_dump)
 		awi_dump_pkt(sc, m, rssi);
 #endif
 
 	if ((sc->sc_mib_local.Network_Mode || !sc->sc_no_bssid) &&
 	    sc->sc_status == AWI_ST_RUNNING) {
 		if (memcmp(wh->i_addr2, sc->sc_bss.bssid, ETHER_ADDR_LEN) == 0) {
 			sc->sc_rx_timer = 10;
 			sc->sc_bss.rssi = rssi;
 		}
 	}
 	switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) {
 	case IEEE80211_FC0_TYPE_DATA:
 		if (sc->sc_mib_local.Network_Mode) {
 			if ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) !=
 			    IEEE80211_FC1_DIR_FROMDS) {
 				m_freem(m);
 				return;
 			}
 		} else {
 			if ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) !=
 			    IEEE80211_FC1_DIR_NODS) {
 				m_freem(m);
 				return;
 			}
 		}
 		m = awi_fix_rxhdr(sc, m);
 		if (m == NULL) {
 			ifp->if_ierrors++;
 			break;
 		}
 		ifp->if_ipackets++;
 #if !(defined(__FreeBSD__) && __FreeBSD_version >= 400000)
 		AWI_BPF_MTAP(sc, m, AWI_BPF_NORM);
 #endif
 		(*ifp->if_input)(ifp, m);
 		break;
 	case IEEE80211_FC0_TYPE_MGT:
 		if ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) !=
 		   IEEE80211_FC1_DIR_NODS) {
 			m_freem(m);
 			return;
 		}
 		switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) {
 		case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
 		case IEEE80211_FC0_SUBTYPE_BEACON:
 			awi_recv_beacon(sc, m, rxts, rssi);
 			break;
 		case IEEE80211_FC0_SUBTYPE_AUTH:
 			awi_recv_auth(sc, m);
 			break;
 		case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
 		case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
 			awi_recv_asresp(sc, m);
 			break;
 		case IEEE80211_FC0_SUBTYPE_DEAUTH:
 			if (sc->sc_mib_local.Network_Mode)
 				awi_send_auth(sc, 1);
 			break;
 		case IEEE80211_FC0_SUBTYPE_DISASSOC:
 			if (sc->sc_mib_local.Network_Mode)
 				awi_send_asreq(sc, 1);
 			break;
 		}
 		m_freem(m);
 		break;
 	case IEEE80211_FC0_TYPE_CTL:
 	default:
 		/* should not come here */
 		m_freem(m);
 		break;
 	}
 }
 
 static void
 awi_rxint(sc)
 	struct awi_softc *sc;
 {
 	u_int8_t state, rate, rssi;
 	u_int16_t len;
 	u_int32_t frame, next, rxts, rxoff;
 	struct mbuf *m;
 
 	rxoff = sc->sc_rxdoff;
 	for (;;) {
 		state = awi_read_1(sc, rxoff + AWI_RXD_HOST_DESC_STATE);
 		if (state & AWI_RXD_ST_OWN)
 			break;
 		if (!(state & AWI_RXD_ST_CONSUMED)) {
 			if (state & AWI_RXD_ST_RXERROR)
 				sc->sc_ifp->if_ierrors++;
 			else {
 				len   = awi_read_2(sc, rxoff + AWI_RXD_LEN);
 				rate  = awi_read_1(sc, rxoff + AWI_RXD_RATE);
 				rssi  = awi_read_1(sc, rxoff + AWI_RXD_RSSI);
 				frame = awi_read_4(sc, rxoff + AWI_RXD_START_FRAME) & 0x7fff;
 				rxts  = awi_read_4(sc, rxoff + AWI_RXD_LOCALTIME);
 				m = awi_devget(sc, frame, len);
 				if (state & AWI_RXD_ST_LF)
 					awi_input(sc, m, rxts, rssi);
 				else
 					sc->sc_rxpend = m;
 			}
 			state |= AWI_RXD_ST_CONSUMED;
 			awi_write_1(sc, rxoff + AWI_RXD_HOST_DESC_STATE, state);
 		}
 		next  = awi_read_4(sc, rxoff + AWI_RXD_NEXT);
 		if (next & AWI_RXD_NEXT_LAST)
 			break;
 		/* make sure the next pointer is correct */
 		if (next != awi_read_4(sc, rxoff + AWI_RXD_NEXT))
 			break;
 		state |= AWI_RXD_ST_OWN;
 		awi_write_1(sc, rxoff + AWI_RXD_HOST_DESC_STATE, state);
 		rxoff = next & 0x7fff;
 	}
 	sc->sc_rxdoff = rxoff;
 }
 
 static struct mbuf *
 awi_devget(sc, off, len)
 	struct awi_softc *sc;
 	u_int32_t off;
 	u_int16_t len;
 {
 	struct mbuf *m;
 	struct mbuf *top, **mp;
 	u_int tlen;
 
 	top = sc->sc_rxpend;
 	mp = &top;
 	if (top != NULL) {
 		sc->sc_rxpend = NULL;
 		top->m_pkthdr.len += len;
 		m = top;
 		while (*mp != NULL) {
 			m = *mp;
 			mp = &m->m_next;
 		}
 		if (m->m_flags & M_EXT)
 			tlen = m->m_ext.ext_size;
 		else if (m->m_flags & M_PKTHDR)
 			tlen = MHLEN;
 		else
 			tlen = MLEN;
 		tlen -= m->m_len;
 		if (tlen > len)
 			tlen = len;
 		awi_read_bytes(sc, off, mtod(m, u_int8_t *) + m->m_len, tlen);
 		off += tlen;
 		len -= tlen;
 	}
 
 	while (len > 0) {
 		if (top == NULL) {
 			MGETHDR(m, M_DONTWAIT, MT_DATA);
 			if (m == NULL)
 				return NULL;
 			m->m_pkthdr.rcvif = sc->sc_ifp;
 			m->m_pkthdr.len = len;
 			m->m_len = MHLEN;
 		} else {
 			MGET(m, M_DONTWAIT, MT_DATA);
 			if (m == NULL) {
 				m_freem(top);
 				return NULL;
 			}
 			m->m_len = MLEN;
 		}
 		if (len >= MINCLSIZE) {
 			MCLGET(m, M_DONTWAIT);
 			if (m->m_flags & M_EXT)
 				m->m_len = m->m_ext.ext_size;
 		}
 		if (top == NULL) {
 			int hdrlen = sizeof(struct ieee80211_frame) +
 			    (sc->sc_format_llc ? sizeof(struct llc) :
 			    sizeof(struct ether_header));
 			caddr_t newdata = (caddr_t)
 			    ALIGN(m->m_data + hdrlen) - hdrlen;
 			m->m_len -= newdata - m->m_data;
 			m->m_data = newdata;
 		}
 		if (m->m_len > len)
 			m->m_len = len;
 		awi_read_bytes(sc, off, mtod(m, u_int8_t *), m->m_len);
 		off += m->m_len;
 		len -= m->m_len;
 		*mp = m;
 		mp = &m->m_next;
 	}
 	return top;
 }
 
 /*
  * Initialize hardware and start firmware to accept commands.
  * Called everytime after power on firmware.
  */
 
 static int
 awi_init_hw(sc)
 	struct awi_softc *sc;
 {
 	u_int8_t status;
 	u_int16_t intmask;
 	int i, error;
 
 	sc->sc_enab_intr = 0;
 	sc->sc_invalid = 0;	/* XXX: really? */
 	awi_drvstate(sc, AWI_DRV_RESET);
 
 	/* reset firmware */
 	am79c930_gcr_setbits(&sc->sc_chip, AM79C930_GCR_CORESET);
 	DELAY(100);
 	awi_write_1(sc, AWI_SELFTEST, 0);
 	awi_write_1(sc, AWI_CMD, 0);
 	awi_write_1(sc, AWI_BANNER, 0);
 	am79c930_gcr_clearbits(&sc->sc_chip, AM79C930_GCR_CORESET);
 	DELAY(100);
 
 	/* wait for selftest completion */
 	for (i = 0; ; i++) {
 		if (i >= AWI_SELFTEST_TIMEOUT*hz/1000) {
 			printf("%s: failed to complete selftest (timeout)\n",
 			    sc->sc_dev.dv_xname);
 			return ENXIO;
 		}
 		status = awi_read_1(sc, AWI_SELFTEST);
 		if ((status & 0xf0) == 0xf0)
 			break;
 		if (sc->sc_cansleep) {
 			sc->sc_sleep_cnt++;
 			(void)tsleep(sc, PWAIT, "awitst", 1);
 			sc->sc_sleep_cnt--;
 		} else {
 			DELAY(1000*1000/hz);
 		}
 	}
 	if (status != AWI_SELFTEST_PASSED) {
 		printf("%s: failed to complete selftest (code %x)\n",
 		    sc->sc_dev.dv_xname, status);
 		return ENXIO;
 	}
 
 	/* check banner to confirm firmware write it */
 	awi_read_bytes(sc, AWI_BANNER, sc->sc_banner, AWI_BANNER_LEN);
 	if (memcmp(sc->sc_banner, "PCnetMobile:", 12) != 0) {
 		printf("%s: failed to complete selftest (bad banner)\n",
 		    sc->sc_dev.dv_xname);
 		for (i = 0; i < AWI_BANNER_LEN; i++)
 			printf("%s%02x", i ? ":" : "\t", sc->sc_banner[i]);
 		printf("\n");
 		return ENXIO;
 	}
 
 	/* initializing interrupt */
 	sc->sc_enab_intr = 1;
 	error = awi_intr_lock(sc);
 	if (error)
 		return error;
 	intmask = AWI_INT_GROGGY | AWI_INT_SCAN_CMPLT |
 	    AWI_INT_TX | AWI_INT_RX | AWI_INT_CMD;
 	awi_write_1(sc, AWI_INTMASK, ~intmask & 0xff);
 	awi_write_1(sc, AWI_INTMASK2, 0);
 	awi_write_1(sc, AWI_INTSTAT, 0);
 	awi_write_1(sc, AWI_INTSTAT2, 0);
 	awi_intr_unlock(sc);
 	am79c930_gcr_setbits(&sc->sc_chip, AM79C930_GCR_ENECINT);
 
 	/* issueing interface test command */
 	error = awi_cmd(sc, AWI_CMD_NOP);
 	if (error) {
 		printf("%s: failed to complete selftest", sc->sc_dev.dv_xname);
 		if (error == ENXIO)
 			printf(" (no hardware)\n");
 		else if (error != EWOULDBLOCK)
 			printf(" (error %d)\n", error);
 		else if (sc->sc_cansleep)
 			printf(" (lost interrupt)\n");
 		else
 			printf(" (command timeout)\n");
 	}
 	return error;
 }
 
 /*
  * Extract the factory default MIB value from firmware and assign the driver
  * default value.
  * Called once at attaching the interface.
  */
 
 static int
 awi_init_mibs(sc)
 	struct awi_softc *sc;
 {
 	int i, error;
 	u_int8_t *rate;
 
 	if ((error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_LOCAL)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_ADDR)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_MAC)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_MGT)) != 0 ||
 	    (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_PHY)) != 0) {
 		printf("%s: failed to get default mib value (error %d)\n",
 		    sc->sc_dev.dv_xname, error);
 		return error;
 	}
 
 	rate = sc->sc_mib_phy.aSuprt_Data_Rates;
 	sc->sc_tx_rate = AWI_RATE_1MBIT;
 	for (i = 0; i < rate[1]; i++) {
 		if (AWI_80211_RATE(rate[2 + i]) > sc->sc_tx_rate)
 			sc->sc_tx_rate = AWI_80211_RATE(rate[2 + i]);
 	}
 	awi_init_region(sc);
 	memset(&sc->sc_mib_mac.aDesired_ESS_ID, 0, AWI_ESS_ID_SIZE);
 	sc->sc_mib_mac.aDesired_ESS_ID[0] = IEEE80211_ELEMID_SSID;
 	sc->sc_mib_local.Fragmentation_Dis = 1;
 	sc->sc_mib_local.Accept_All_Multicast_Dis = 1;
 	sc->sc_mib_local.Power_Saving_Mode_Dis = 1;
 
 	/* allocate buffers */
 	sc->sc_txbase = AWI_BUFFERS;
 	sc->sc_txend = sc->sc_txbase +
 	    (AWI_TXD_SIZE + sizeof(struct ieee80211_frame) +
 	    sizeof(struct ether_header) + ETHERMTU) * AWI_NTXBUFS;
 	LE_WRITE_4(&sc->sc_mib_local.Tx_Buffer_Offset, sc->sc_txbase);
 	LE_WRITE_4(&sc->sc_mib_local.Tx_Buffer_Size,
 	    sc->sc_txend - sc->sc_txbase);
 	LE_WRITE_4(&sc->sc_mib_local.Rx_Buffer_Offset, sc->sc_txend);
 	LE_WRITE_4(&sc->sc_mib_local.Rx_Buffer_Size,
 	    AWI_BUFFERS_END - sc->sc_txend);
 	sc->sc_mib_local.Network_Mode = 1;
 	sc->sc_mib_local.Acting_as_AP = 0;
 	return 0;
 }
 
 /*
  * Start transmitter and receiver of firmware
  * Called after awi_init_hw() to start operation.
  */
 
 static int
 awi_init_txrx(sc)
 	struct awi_softc *sc;
 {
 	int error;
 
 	/* start transmitter */
 	sc->sc_txdone = sc->sc_txnext = sc->sc_txbase;
 	awi_write_4(sc, sc->sc_txbase + AWI_TXD_START, 0);
 	awi_write_4(sc, sc->sc_txbase + AWI_TXD_NEXT, 0);
 	awi_write_4(sc, sc->sc_txbase + AWI_TXD_LENGTH, 0);
 	awi_write_1(sc, sc->sc_txbase + AWI_TXD_RATE, 0);
 	awi_write_4(sc, sc->sc_txbase + AWI_TXD_NDA, 0);
 	awi_write_4(sc, sc->sc_txbase + AWI_TXD_NRA, 0);
 	awi_write_1(sc, sc->sc_txbase + AWI_TXD_STATE, 0);
 	awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_DATA, sc->sc_txbase);
 	awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_MGT, 0);
 	awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_BCAST, 0);
 	awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_PS, 0);
 	awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_CF, 0);
 	error = awi_cmd(sc, AWI_CMD_INIT_TX);
 	if (error)
 		return error;
 
 	/* start receiver */
 	if (sc->sc_rxpend) {
 		m_freem(sc->sc_rxpend);
 		sc->sc_rxpend = NULL;
 	}
 	error = awi_cmd(sc, AWI_CMD_INIT_RX);
 	if (error)
 		return error;
 	sc->sc_rxdoff = awi_read_4(sc, AWI_CMD_PARAMS+AWI_CA_IRX_DATA_DESC);
 	sc->sc_rxmoff = awi_read_4(sc, AWI_CMD_PARAMS+AWI_CA_IRX_PS_DESC);
 	return 0;
 }
 
 static void
 awi_stop_txrx(sc)
 	struct awi_softc *sc;
 {
 
 	if (sc->sc_cmd_inprog)
 		(void)awi_cmd_wait(sc);
 	(void)awi_cmd(sc, AWI_CMD_KILL_RX);
 	(void)awi_cmd_wait(sc);
 	sc->sc_cmd_inprog = AWI_CMD_FLUSH_TX;
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_DATA, 1);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_MGT, 0);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_BCAST, 0);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_PS, 0);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_CF, 0);
 	(void)awi_cmd(sc, AWI_CMD_FLUSH_TX);
 	(void)awi_cmd_wait(sc);
 }
 
 int
 awi_init_region(sc)
 	struct awi_softc *sc;
 {
 
 	if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) {
 		switch (sc->sc_mib_phy.aCurrent_Reg_Domain) {
 		case AWI_REG_DOMAIN_US:
 		case AWI_REG_DOMAIN_CA:
 		case AWI_REG_DOMAIN_EU:
 			sc->sc_scan_min = 0;
 			sc->sc_scan_max = 77;
 			break;
 		case AWI_REG_DOMAIN_ES:
 			sc->sc_scan_min = 0;
 			sc->sc_scan_max = 26;
 			break;
 		case AWI_REG_DOMAIN_FR:
 			sc->sc_scan_min = 0;
 			sc->sc_scan_max = 32;
 			break;
 		case AWI_REG_DOMAIN_JP:
 			sc->sc_scan_min = 6;
 			sc->sc_scan_max = 17;
 			break;
 		default:
 			return EINVAL;
 		}
 		sc->sc_scan_set = sc->sc_scan_cur % 3 + 1;
 	} else {
 		switch (sc->sc_mib_phy.aCurrent_Reg_Domain) {
 		case AWI_REG_DOMAIN_US:
 		case AWI_REG_DOMAIN_CA:
 			sc->sc_scan_min = 1;
 			sc->sc_scan_max = 11;
 			sc->sc_scan_cur = 3;
 			break;
 		case AWI_REG_DOMAIN_EU:
 			sc->sc_scan_min = 1;
 			sc->sc_scan_max = 13;
 			sc->sc_scan_cur = 3;
 			break;
 		case AWI_REG_DOMAIN_ES:
 			sc->sc_scan_min = 10;
 			sc->sc_scan_max = 11;
 			sc->sc_scan_cur = 10;
 			break;
 		case AWI_REG_DOMAIN_FR:
 			sc->sc_scan_min = 10;
 			sc->sc_scan_max = 13;
 			sc->sc_scan_cur = 10;
 			break;
 		case AWI_REG_DOMAIN_JP:
 			sc->sc_scan_min = 14;
 			sc->sc_scan_max = 14;
 			sc->sc_scan_cur = 14;
 			break;
 		default:
 			return EINVAL;
 		}
 	}
 	sc->sc_ownch = sc->sc_scan_cur;
 	return 0;
 }
 
 static int
 awi_start_scan(sc)
 	struct awi_softc *sc;
 {
 	int error = 0;
 	struct awi_bss *bp;
 
 	while ((bp = TAILQ_FIRST(&sc->sc_scan)) != NULL) {
 		TAILQ_REMOVE(&sc->sc_scan, bp, list);
 		free(bp, M_DEVBUF);
 	}
 	if (!sc->sc_mib_local.Network_Mode && sc->sc_no_bssid) {
 		memset(&sc->sc_bss, 0, sizeof(sc->sc_bss));
 		sc->sc_bss.essid[0] = IEEE80211_ELEMID_SSID;
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) {
 			sc->sc_bss.chanset = sc->sc_ownch % 3 + 1;
 			sc->sc_bss.pattern = sc->sc_ownch;
 			sc->sc_bss.index = 1;
 			sc->sc_bss.dwell_time = 200;	/*XXX*/
 		} else
 			sc->sc_bss.chanset = sc->sc_ownch;
 		sc->sc_status = AWI_ST_SETSS;
 		error = awi_set_ss(sc);
 	} else {
 		if (sc->sc_mib_local.Network_Mode)
 			awi_drvstate(sc, AWI_DRV_INFSC);
 		else
 			awi_drvstate(sc, AWI_DRV_ADHSC);
 		sc->sc_start_bss = 0;
 		sc->sc_active_scan = 1;
 		sc->sc_mgt_timer = AWI_ASCAN_WAIT / 1000;
 		sc->sc_ifp->if_timer = 1;
 		sc->sc_status = AWI_ST_SCAN;
 		error = awi_cmd_scan(sc);
 	}
 	return error;
 }
 
 static int
 awi_next_scan(sc)
 	struct awi_softc *sc;
 {
 	int error;
 
 	for (;;) {
 		/*
 		 * The pattern parameter for FH phy should be incremented
 		 * by 3.  But BayStack 650 Access Points apparently always
 		 * assign hop pattern set parameter to 1 for any pattern.
 		 * So we try all combinations of pattern/set parameters.
 		 * Since this causes no error, it may be a bug of
 		 * PCnetMobile firmware.
 		 */
 		sc->sc_scan_cur++;
 		if (sc->sc_scan_cur > sc->sc_scan_max) {
 			sc->sc_scan_cur = sc->sc_scan_min;
 			if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 				sc->sc_scan_set = sc->sc_scan_set % 3 + 1;
 		}
 		error = awi_cmd_scan(sc);
 		if (error != EINVAL)
 			break;
 	}
 	return error;
 }
 
 static void
 awi_stop_scan(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct awi_bss *bp, *sbp;
 	int fail;
 
 	bp = TAILQ_FIRST(&sc->sc_scan);
 	if (bp == NULL) {
   notfound:
 		if (sc->sc_active_scan) {
 			if (ifp->if_flags & IFF_DEBUG)
 				printf("%s: entering passive scan mode\n",
 				    sc->sc_dev.dv_xname);
 			sc->sc_active_scan = 0;
 		}
 		sc->sc_mgt_timer = AWI_PSCAN_WAIT / 1000;
 		ifp->if_timer = 1;
 		(void)awi_next_scan(sc);
 		return;
 	}
 	sbp = NULL;
 	if (ifp->if_flags & IFF_DEBUG)
 		printf("%s:\tmacaddr     ch/pat   sig flag  wep  essid\n",
 		    sc->sc_dev.dv_xname);
 	for (; bp != NULL; bp = TAILQ_NEXT(bp, list)) {
 		if (bp->fails) {
 			/*
 			 * The configuration of the access points may change
 			 * during my scan.  So we retries to associate with
 			 * it unless there are any suitable AP.
 			 */
 			if (bp->fails++ < 3)
 				continue;
 			bp->fails = 0;
 		}
 		fail = 0;
 		/*
 		 * Since the firmware apparently scans not only the specified
 		 * channel of SCAN command but all available channel within
 		 * the region, we should filter out unnecessary responses here.
 		 */
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) {
 			if (bp->pattern < sc->sc_scan_min ||
 			    bp->pattern > sc->sc_scan_max)
 				fail |= 0x01;
 		} else {
 			if (bp->chanset < sc->sc_scan_min ||
 			    bp->chanset > sc->sc_scan_max)
 				fail |= 0x01;
 		}
 		if (sc->sc_mib_local.Network_Mode) {
 			if (!(bp->capinfo & IEEE80211_CAPINFO_ESS) ||
 			    (bp->capinfo & IEEE80211_CAPINFO_IBSS))
 				fail |= 0x02;
 		} else {
 			if ((bp->capinfo & IEEE80211_CAPINFO_ESS) ||
 			    !(bp->capinfo & IEEE80211_CAPINFO_IBSS))
 				fail |= 0x02;
 		}
 		if (sc->sc_wep_algo == NULL) {
 			if (bp->capinfo & IEEE80211_CAPINFO_PRIVACY)
 				fail |= 0x04;
 		} else {
 			if (!(bp->capinfo & IEEE80211_CAPINFO_PRIVACY))
 				fail |= 0x04;
 		}
 		if (sc->sc_mib_mac.aDesired_ESS_ID[1] != 0 &&
 		    memcmp(&sc->sc_mib_mac.aDesired_ESS_ID, bp->essid,
 		    sizeof(bp->essid)) != 0)
 			fail |= 0x08;
 		if (ifp->if_flags & IFF_DEBUG) {
 			printf(" %c %s", fail ? '-' : '+',
 			    ether_sprintf(bp->esrc));
 			if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 				printf("  %2d/%d%c", bp->pattern, bp->chanset,
 				    fail & 0x01 ? '!' : ' ');
 			else
 				printf("  %4d%c", bp->chanset,
 				    fail & 0x01 ? '!' : ' ');
 			printf(" %+4d", bp->rssi);
 			printf(" %4s%c",
 			    (bp->capinfo & IEEE80211_CAPINFO_ESS) ? "ess" :
 			    (bp->capinfo & IEEE80211_CAPINFO_IBSS) ? "ibss" :
 			    "????",
 			    fail & 0x02 ? '!' : ' ');
 			printf(" %3s%c ",
 			    (bp->capinfo & IEEE80211_CAPINFO_PRIVACY) ? "wep" :
 			    "no",
 			    fail & 0x04 ? '!' : ' ');
 			awi_print_essid(bp->essid);
 			printf("%s\n", fail & 0x08 ? "!" : "");
 		}
 		if (!fail) {
 			if (sbp == NULL || bp->rssi > sbp->rssi)
 				sbp = bp;
 		}
 	}
 	if (sbp == NULL)
 		goto notfound;
 	sc->sc_bss = *sbp;
 	(void)awi_set_ss(sc);
 }
 
 static void
 awi_recv_beacon(sc, m0, rxts, rssi)
 	struct awi_softc *sc;
 	struct mbuf *m0;
 	u_int32_t rxts;
 	u_int8_t rssi;
 {
 	struct ieee80211_frame *wh;
 	struct awi_bss *bp;
 	u_int8_t *frame, *eframe;
 	u_int8_t *tstamp, *bintval, *capinfo, *ssid, *rates, *parms;
 
 	if (sc->sc_status != AWI_ST_SCAN)
 		return;
 	wh = mtod(m0, struct ieee80211_frame *);
 
 	frame = (u_int8_t *)&wh[1];
 	eframe = mtod(m0, u_int8_t *) + m0->m_len;
 	/*
 	 * XXX:
 	 *	timestamp [8]
 	 *	beacon interval [2]
 	 *	capability information [2]
 	 *	ssid [tlv]
 	 *	supported rates [tlv]
 	 *	parameter set [tlv]
 	 *	...
 	 */
 	if (frame + 12 > eframe) {
 #ifdef AWI_DEBUG
 		if (awi_verbose)
 			printf("awi_recv_beacon: frame too short \n");
 #endif
 		return;
 	}
 	tstamp = frame;
 	frame += 8;
 	bintval = frame;
 	frame += 2;
 	capinfo = frame;
 	frame += 2;
 
 	ssid = rates = parms = NULL;
 	while (frame < eframe) {
 		switch (*frame) {
 		case IEEE80211_ELEMID_SSID:
 			ssid = frame;
 			break;
 		case IEEE80211_ELEMID_RATES:
 			rates = frame;
 			break;
 		case IEEE80211_ELEMID_FHPARMS:
 		case IEEE80211_ELEMID_DSPARMS:
 			parms = frame;
 			break;
 		}
 		frame += frame[1] + 2;
 	}
 	if (ssid == NULL || rates == NULL || parms == NULL) {
 #ifdef AWI_DEBUG
 		if (awi_verbose)
 			printf("awi_recv_beacon: ssid=%p, rates=%p, parms=%p\n",
 			    ssid, rates, parms);
 #endif
 		return;
 	}
 	if (ssid[1] > IEEE80211_NWID_LEN) {
 #ifdef AWI_DEBUG
 		if (awi_verbose)
 			printf("awi_recv_beacon: bad ssid len: %d from %s\n",
 			    ssid[1], ether_sprintf(wh->i_addr2));
 #endif
 		return;
 	}
 
 	TAILQ_FOREACH(bp, &sc->sc_scan, list) {
 		if (memcmp(bp->esrc, wh->i_addr2, ETHER_ADDR_LEN) == 0 &&
 		    memcmp(bp->bssid, wh->i_addr3, ETHER_ADDR_LEN) == 0)
 			break;
 	}
 	if (bp == NULL) {
 		bp = malloc(sizeof(struct awi_bss), M_DEVBUF, M_NOWAIT);
 		if (bp == NULL)
 			return;
 		TAILQ_INSERT_TAIL(&sc->sc_scan, bp, list);
 		memcpy(bp->esrc, wh->i_addr2, ETHER_ADDR_LEN);
 		memcpy(bp->bssid, wh->i_addr3, ETHER_ADDR_LEN);
 		memset(bp->essid, 0, sizeof(bp->essid));
 		memcpy(bp->essid, ssid, 2 + ssid[1]);
 	}
 	bp->rssi = rssi;
 	bp->rxtime = rxts;
 	memcpy(bp->timestamp, tstamp, sizeof(bp->timestamp));
 	bp->interval = LE_READ_2(bintval);
 	bp->capinfo = LE_READ_2(capinfo);
 	if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) {
 		bp->chanset = parms[4];
 		bp->pattern = parms[5];
 		bp->index = parms[6];
 		bp->dwell_time = LE_READ_2(parms + 2);
 	} else {
 		bp->chanset = parms[2];
 		bp->pattern = 0;
 		bp->index = 0;
 		bp->dwell_time = 0;
 	}
 	if (sc->sc_mgt_timer == 0)
 		awi_stop_scan(sc);
 }
 
 static int
 awi_set_ss(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct awi_bss *bp;
 	int error;
 
 	sc->sc_status = AWI_ST_SETSS;
 	bp = &sc->sc_bss;
 	if (ifp->if_flags & IFF_DEBUG) {
 		printf("%s: ch %d pat %d id %d dw %d iv %d bss %s ssid ",
 		    sc->sc_dev.dv_xname, bp->chanset,
 		    bp->pattern, bp->index, bp->dwell_time, bp->interval,
 		    ether_sprintf(bp->bssid));
 		awi_print_essid(bp->essid);
 		printf("\n");
 	}
 	memcpy(&sc->sc_mib_mgt.aCurrent_BSS_ID, bp->bssid, ETHER_ADDR_LEN);
 	memcpy(&sc->sc_mib_mgt.aCurrent_ESS_ID, bp->essid,
 	    AWI_ESS_ID_SIZE);
 	LE_WRITE_2(&sc->sc_mib_mgt.aBeacon_Period, bp->interval);
 	error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MGT);
 	return error;
 }
 
 static void
 awi_try_sync(sc)
 	struct awi_softc *sc;
 {
 	struct awi_bss *bp;
 
 	sc->sc_status = AWI_ST_SYNC;
 	bp = &sc->sc_bss;
 
 	if (sc->sc_cmd_inprog) {
 		if (awi_cmd_wait(sc))
 			return;
 	}
 	sc->sc_cmd_inprog = AWI_CMD_SYNC;
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_SET, bp->chanset);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_PATTERN, bp->pattern);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_IDX, bp->index);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_STARTBSS,
 	    sc->sc_start_bss ? 1 : 0); 
 	awi_write_2(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_DWELL, bp->dwell_time);
 	awi_write_2(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_MBZ, 0);
 	awi_write_bytes(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_TIMESTAMP,
 	    bp->timestamp, 8);
 	awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_REFTIME, bp->rxtime);
 	(void)awi_cmd(sc, AWI_CMD_SYNC);
 }
 
 static void
 awi_sync_done(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 
 	if (sc->sc_mib_local.Network_Mode) {
 		awi_drvstate(sc, AWI_DRV_INFSY);
 		awi_send_auth(sc, 1);
 	} else {
 		if (ifp->if_flags & IFF_DEBUG) {
 			printf("%s: synced with", sc->sc_dev.dv_xname);
 			if (sc->sc_no_bssid)
 				printf(" no-bssid");
 			else {
 				printf(" %s ssid ",
 				    ether_sprintf(sc->sc_bss.bssid));
 				awi_print_essid(sc->sc_bss.essid);
 			}
 			if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 				printf(" at chanset %d pattern %d\n",
 				    sc->sc_bss.chanset, sc->sc_bss.pattern);
 			else
 				printf(" at channel %d\n", sc->sc_bss.chanset);
 		}
 		awi_drvstate(sc, AWI_DRV_ADHSY);
 		sc->sc_status = AWI_ST_RUNNING;
 		ifp->if_flags |= IFF_RUNNING;
 		awi_start(ifp);
 	}
 }
 
 static void
 awi_send_deauth(sc)
 	struct awi_softc *sc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	struct ieee80211_frame *wh;
 	u_int8_t *deauth;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 	if (ifp->if_flags & IFF_DEBUG)
 		printf("%s: sending deauth to %s\n", sc->sc_dev.dv_xname,
 		    ether_sprintf(sc->sc_bss.bssid));
 
 	wh = mtod(m, struct ieee80211_frame *);
 	wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT |
 	    IEEE80211_FC0_SUBTYPE_AUTH;
 	wh->i_fc[1] = IEEE80211_FC1_DIR_NODS;
 	LE_WRITE_2(wh->i_dur, 0);
 	LE_WRITE_2(wh->i_seq, 0);
 	memcpy(wh->i_addr1, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 	memcpy(wh->i_addr2, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN);
 	memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 
 	deauth = (u_int8_t *)&wh[1];
 	LE_WRITE_2(deauth, IEEE80211_REASON_AUTH_LEAVE);
 	deauth += 2;
 
 	m->m_pkthdr.len = m->m_len = deauth - mtod(m, u_int8_t *);
 	_IF_ENQUEUE(&sc->sc_mgtq, m);
 	awi_start(ifp);
 	awi_drvstate(sc, AWI_DRV_INFTOSS);
 }
 
 static void
 awi_send_auth(sc, seq)
 	struct awi_softc *sc;
 	int seq;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	struct ieee80211_frame *wh;
 	u_int8_t *auth;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 	sc->sc_status = AWI_ST_AUTH;
 	if (ifp->if_flags & IFF_DEBUG)
 		printf("%s: sending auth to %s\n", sc->sc_dev.dv_xname,
 		    ether_sprintf(sc->sc_bss.bssid));
 
 	wh = mtod(m, struct ieee80211_frame *);
 	wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT |
 	    IEEE80211_FC0_SUBTYPE_AUTH;
 	wh->i_fc[1] = IEEE80211_FC1_DIR_NODS;
 	LE_WRITE_2(wh->i_dur, 0);
 	LE_WRITE_2(wh->i_seq, 0);
 	memcpy(wh->i_addr1, sc->sc_bss.esrc, ETHER_ADDR_LEN);
 	memcpy(wh->i_addr2, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN);
 	memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 
 	auth = (u_int8_t *)&wh[1];
 	/* algorithm number */
 	LE_WRITE_2(auth, IEEE80211_AUTH_ALG_OPEN);
 	auth += 2;
 	/* sequence number */
 	LE_WRITE_2(auth, seq);
 	auth += 2;
 	/* status */
 	LE_WRITE_2(auth, 0);
 	auth += 2;
 
 	m->m_pkthdr.len = m->m_len = auth - mtod(m, u_int8_t *);
 	_IF_ENQUEUE(&sc->sc_mgtq, m);
 	awi_start(ifp);
 
 	sc->sc_mgt_timer = AWI_TRANS_TIMEOUT / 1000;
 	ifp->if_timer = 1;
 }
 
 static void
 awi_recv_auth(sc, m0)
 	struct awi_softc *sc;
 	struct mbuf *m0;
 {
 	struct ieee80211_frame *wh;
 	u_int8_t *auth, *eframe;
 	struct awi_bss *bp;
 	u_int16_t status;
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	auth = (u_int8_t *)&wh[1];
 	eframe = mtod(m0, u_int8_t *) + m0->m_len;
 	if (sc->sc_ifp->if_flags & IFF_DEBUG)
 		printf("%s: receive auth from %s\n", sc->sc_dev.dv_xname,
 		    ether_sprintf(wh->i_addr2));
 
 	/* algorithm number */
 	if (LE_READ_2(auth) != IEEE80211_AUTH_ALG_OPEN)
 		return;
 	auth += 2;
 	if (!sc->sc_mib_local.Network_Mode) {
 		if (sc->sc_status != AWI_ST_RUNNING)
 			return;
 		if (LE_READ_2(auth) == 1)
 			awi_send_auth(sc, 2);
 		return;
 	}
 	if (sc->sc_status != AWI_ST_AUTH)
 		return;
 	/* sequence number */
 	if (LE_READ_2(auth) != 2)
 		return;
 	auth += 2;
 	/* status */
 	status = LE_READ_2(auth);
 	if (status != 0) {
 		printf("%s: authentication failed (reason %d)\n",
 		    sc->sc_dev.dv_xname, status);
 		TAILQ_FOREACH(bp, &sc->sc_scan, list) {
 			if (memcmp(bp->esrc, sc->sc_bss.esrc, ETHER_ADDR_LEN)
 			    == 0) {
 				bp->fails++;
 				break;
 			}
 		}
 		return;
 	}
 	sc->sc_mgt_timer = 0;
 	awi_drvstate(sc, AWI_DRV_INFAUTH);
 	awi_send_asreq(sc, 0);
 }
 
 static void
 awi_send_asreq(sc, reassoc)
 	struct awi_softc *sc;
 	int reassoc;
 {
 	struct ifnet *ifp = sc->sc_ifp;
 	struct mbuf *m;
 	struct ieee80211_frame *wh;
 	u_int16_t capinfo, lintval;
 	u_int8_t *asreq;
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return;
 	sc->sc_status = AWI_ST_ASSOC;
 	if (ifp->if_flags & IFF_DEBUG)
 		printf("%s: sending %sassoc req to %s\n", sc->sc_dev.dv_xname,
 		    reassoc ? "re" : "",
 		    ether_sprintf(sc->sc_bss.bssid));
 
 	wh = mtod(m, struct ieee80211_frame *);
 	wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT;
 	if (reassoc)
 		wh->i_fc[0] |= IEEE80211_FC0_SUBTYPE_REASSOC_REQ;
 	else
 		wh->i_fc[0] |= IEEE80211_FC0_SUBTYPE_ASSOC_REQ;
 	wh->i_fc[1] = IEEE80211_FC1_DIR_NODS;
 	LE_WRITE_2(wh->i_dur, 0);
 	LE_WRITE_2(wh->i_seq, 0);
 	memcpy(wh->i_addr1, sc->sc_bss.esrc, ETHER_ADDR_LEN);
 	memcpy(wh->i_addr2, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN);
 	memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 
 	asreq = (u_int8_t *)&wh[1];
 
 	/* capability info */
 	capinfo = IEEE80211_CAPINFO_CF_POLLABLE;
 	if (sc->sc_mib_local.Network_Mode)
 		capinfo |= IEEE80211_CAPINFO_ESS;
 	else
 		capinfo |= IEEE80211_CAPINFO_IBSS;
 	if (sc->sc_wep_algo != NULL)
 		capinfo |= IEEE80211_CAPINFO_PRIVACY;
 	LE_WRITE_2(asreq, capinfo);
 	asreq += 2;
 
 	/* listen interval */
 	lintval = LE_READ_2(&sc->sc_mib_mgt.aListen_Interval);
 	LE_WRITE_2(asreq, lintval);
 	asreq += 2;
 	if (reassoc) {
 		/* current AP address */
 		memcpy(asreq, sc->sc_bss.bssid, ETHER_ADDR_LEN);
 		asreq += ETHER_ADDR_LEN;
 	}
 	/* ssid */
 	memcpy(asreq, sc->sc_bss.essid, 2 + sc->sc_bss.essid[1]);
 	asreq += 2 + asreq[1];
 	/* supported rates */
 	memcpy(asreq, &sc->sc_mib_phy.aSuprt_Data_Rates, 4);
 	asreq += 2 + asreq[1];
 
 	m->m_pkthdr.len = m->m_len = asreq - mtod(m, u_int8_t *);
 	_IF_ENQUEUE(&sc->sc_mgtq, m);
 	awi_start(ifp);
 
 	sc->sc_mgt_timer = AWI_TRANS_TIMEOUT / 1000;
 	ifp->if_timer = 1;
 }
 
 static void
 awi_recv_asresp(sc, m0)
 	struct awi_softc *sc;
 	struct mbuf *m0;
 {
 	struct ieee80211_frame *wh;
 	u_int8_t *asresp, *eframe;
 	u_int16_t status;
 	u_int8_t rate, *phy_rates;
 	struct awi_bss *bp;
 	int i, j;
 
 	wh = mtod(m0, struct ieee80211_frame *);
 	asresp = (u_int8_t *)&wh[1];
 	eframe = mtod(m0, u_int8_t *) + m0->m_len;
 	if (sc->sc_ifp->if_flags & IFF_DEBUG)
 		printf("%s: receive assoc resp from %s\n", sc->sc_dev.dv_xname,
 		    ether_sprintf(wh->i_addr2));
 
 	if (!sc->sc_mib_local.Network_Mode)
 		return;
 
 	if (sc->sc_status != AWI_ST_ASSOC)
 		return;
 	/* capability info */
 	asresp += 2;
 	/* status */
 	status = LE_READ_2(asresp);
 	if (status != 0) {
 		printf("%s: association failed (reason %d)\n",
 		    sc->sc_dev.dv_xname, status);
 		TAILQ_FOREACH(bp, &sc->sc_scan, list) {
 			if (memcmp(bp->esrc, sc->sc_bss.esrc, ETHER_ADDR_LEN)
 			    == 0) {
 				bp->fails++;
 				break;
 			}
 		}
 		return;
 	}
 	asresp += 2;
 	/* association id */
 	asresp += 2;
 	/* supported rates */
 	rate = AWI_RATE_1MBIT;
 	for (i = 0; i < asresp[1]; i++) {
 		if (AWI_80211_RATE(asresp[2 + i]) <= rate)
 			continue;
 		phy_rates = sc->sc_mib_phy.aSuprt_Data_Rates;
 		for (j = 0; j < phy_rates[1]; j++) {
 			if (AWI_80211_RATE(asresp[2 + i]) ==
 			    AWI_80211_RATE(phy_rates[2 + j]))
 				rate = AWI_80211_RATE(asresp[2 + i]);
 		}
 	}
 	if (sc->sc_ifp->if_flags & IFF_DEBUG) {
 		printf("%s: associated with %s ssid ",
 		    sc->sc_dev.dv_xname, ether_sprintf(sc->sc_bss.bssid));
 		awi_print_essid(sc->sc_bss.essid);
 		if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH)
 			printf(" chanset %d pattern %d\n",
 			    sc->sc_bss.chanset, sc->sc_bss.pattern);
 		else
 			printf(" channel %d\n", sc->sc_bss.chanset);
 	}
 	sc->sc_tx_rate = rate;
 	sc->sc_mgt_timer = 0;
 	sc->sc_rx_timer = 10;
 	sc->sc_ifp->if_timer = 1;
 	sc->sc_status = AWI_ST_RUNNING;
 	sc->sc_ifp->if_flags |= IFF_RUNNING;
 	awi_drvstate(sc, AWI_DRV_INFASSOC);
 	awi_start(sc->sc_ifp);
 }
 
 static int
 awi_mib(sc, cmd, mib)
 	struct awi_softc *sc;
 	u_int8_t cmd;
 	u_int8_t mib;
 {
 	int error;
 	u_int8_t size, *ptr;
 
 	switch (mib) {
 	case AWI_MIB_LOCAL:
 		ptr = (u_int8_t *)&sc->sc_mib_local;
 		size = sizeof(sc->sc_mib_local);
 		break;
 	case AWI_MIB_ADDR:
 		ptr = (u_int8_t *)&sc->sc_mib_addr;
 		size = sizeof(sc->sc_mib_addr);
 		break;
 	case AWI_MIB_MAC:
 		ptr = (u_int8_t *)&sc->sc_mib_mac;
 		size = sizeof(sc->sc_mib_mac);
 		break;
 	case AWI_MIB_STAT:
 		ptr = (u_int8_t *)&sc->sc_mib_stat;
 		size = sizeof(sc->sc_mib_stat);
 		break;
 	case AWI_MIB_MGT:
 		ptr = (u_int8_t *)&sc->sc_mib_mgt;
 		size = sizeof(sc->sc_mib_mgt);
 		break;
 	case AWI_MIB_PHY:
 		ptr = (u_int8_t *)&sc->sc_mib_phy;
 		size = sizeof(sc->sc_mib_phy);
 		break;
 	default:
 		return EINVAL;
 	}
 	if (sc->sc_cmd_inprog) {
 		error = awi_cmd_wait(sc);
 		if (error) {
 			if (error == EWOULDBLOCK)
 				printf("awi_mib: cmd %d inprog",
 				    sc->sc_cmd_inprog);
 			return error;
 		}
 	}
 	sc->sc_cmd_inprog = cmd;
 	if (cmd == AWI_CMD_SET_MIB)
 		awi_write_bytes(sc, AWI_CMD_PARAMS+AWI_CA_MIB_DATA, ptr, size);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_MIB_TYPE, mib);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_MIB_SIZE, size);
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_MIB_INDEX, 0);
 	error = awi_cmd(sc, cmd);
 	if (error)
 		return error;
 	if (cmd == AWI_CMD_GET_MIB) {
 		awi_read_bytes(sc, AWI_CMD_PARAMS+AWI_CA_MIB_DATA, ptr, size);
 #ifdef AWI_DEBUG
 		if (awi_verbose) {
 			int i;
 
 			printf("awi_mib: #%d:", mib);
 			for (i = 0; i < size; i++)
 				printf(" %02x", ptr[i]);
 			printf("\n");
 		}
 #endif
 	}
 	return 0;
 }
 
 static int
 awi_cmd_scan(sc)
 	struct awi_softc *sc;
 {
 	int error;
 	u_int8_t scan_mode;
 
 	if (sc->sc_active_scan)
 		scan_mode = AWI_SCAN_ACTIVE;
 	else
 		scan_mode = AWI_SCAN_PASSIVE;
 	if (sc->sc_mib_mgt.aScan_Mode != scan_mode) {
 		sc->sc_mib_mgt.aScan_Mode = scan_mode;
 		error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MGT);
 		return error;
 	}
 
 	if (sc->sc_cmd_inprog) {
 		error = awi_cmd_wait(sc);
 		if (error)
 			return error;
 	}
 	sc->sc_cmd_inprog = AWI_CMD_SCAN;
 	awi_write_2(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_DURATION,
 	    sc->sc_active_scan ? AWI_ASCAN_DURATION : AWI_PSCAN_DURATION);
 	if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) {
 		awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_SET,
 		    sc->sc_scan_set);
 		awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_PATTERN,
 		    sc->sc_scan_cur);
 		awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_IDX, 1);
 	} else {
 		awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_SET,
 		    sc->sc_scan_cur);
 		awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_PATTERN, 0);
 		awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_IDX, 0);
 	}
 	awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_SUSP, 0);
 	return awi_cmd(sc, AWI_CMD_SCAN);
 }
 
 static int
 awi_cmd(sc, cmd)
 	struct awi_softc *sc;
 	u_int8_t cmd;
 {
 	u_int8_t status;
 	int error = 0;
 
 	sc->sc_cmd_inprog = cmd;
 	awi_write_1(sc, AWI_CMD_STATUS, AWI_STAT_IDLE);
 	awi_write_1(sc, AWI_CMD, cmd);
 	if (sc->sc_status != AWI_ST_INIT)
 		return 0;
 	error = awi_cmd_wait(sc);
 	if (error)
 		return error;
 	status = awi_read_1(sc, AWI_CMD_STATUS);
 	awi_write_1(sc, AWI_CMD, 0);
 	switch (status) {
 	case AWI_STAT_OK:
 		break;
 	case AWI_STAT_BADPARM:
 		return EINVAL;
 	default:
 		printf("%s: command %d failed %x\n",
 		    sc->sc_dev.dv_xname, cmd, status);
 		return ENXIO;
 	}
 	return 0;
 }
 
 static void
 awi_cmd_done(sc)
 	struct awi_softc *sc;
 {
 	u_int8_t cmd, status;
 
 	status = awi_read_1(sc, AWI_CMD_STATUS);
 	if (status == AWI_STAT_IDLE)
 		return;		/* stray interrupt */
 
 	cmd = sc->sc_cmd_inprog;
 	sc->sc_cmd_inprog = 0;
 	if (sc->sc_status == AWI_ST_INIT) {
 		wakeup(sc);
 		return;
 	}
 	awi_write_1(sc, AWI_CMD, 0);
 
 	if (status != AWI_STAT_OK) {
 		printf("%s: command %d failed %x\n",
 		    sc->sc_dev.dv_xname, cmd, status);
 		return;
 	}
 	switch (sc->sc_status) {
 	case AWI_ST_SCAN:
 		if (cmd == AWI_CMD_SET_MIB)
 			awi_cmd_scan(sc);	/* retry */
 		break;
 	case AWI_ST_SETSS:
 		awi_try_sync(sc);
 		break;
 	case AWI_ST_SYNC:
 		awi_sync_done(sc);
 		break;
 	default:
 		break;
 	}
 }
 
 static int
 awi_next_txd(sc, len, framep, ntxdp)
 	struct awi_softc *sc;
 	int len;
 	u_int32_t *framep, *ntxdp;
 {
 	u_int32_t txd, ntxd, frame;
 
 	txd = sc->sc_txnext;
 	frame = txd + AWI_TXD_SIZE;
 	if (frame + len > sc->sc_txend)
 		frame = sc->sc_txbase;
 	ntxd = frame + len;
 	if (ntxd + AWI_TXD_SIZE > sc->sc_txend)
 		ntxd = sc->sc_txbase;
 	*framep = frame;
 	*ntxdp = ntxd;
 	/*
 	 * Determine if there are any room in ring buffer.
 	 *		--- send wait,  === new data,  +++ conflict (ENOBUFS)
 	 *   base........................end
 	 *	   done----txd=====ntxd		OK
 	 *	 --txd=====done++++ntxd--	full
 	 *	 --txd=====ntxd    done--	OK
 	 *	 ==ntxd    done----txd===	OK
 	 *	 ==done++++ntxd----txd===	full
 	 *	 ++ntxd    txd=====done++	full
 	 */
 	if (txd < ntxd) {
 		if (txd < sc->sc_txdone && ntxd + AWI_TXD_SIZE > sc->sc_txdone)
 			return ENOBUFS;
 	} else {
 		if (txd < sc->sc_txdone || ntxd + AWI_TXD_SIZE > sc->sc_txdone)
 			return ENOBUFS;
 	}
 	return 0;
 }
 
 static int
 awi_lock(sc)
 	struct awi_softc *sc;
 {
 	int error = 0;
 
 	if (curproc == NULL) {
 		/*
 		 * XXX
 		 * Though driver ioctl should be called with context,
 		 * KAME ipv6 stack calls ioctl in interrupt for now.
 		 * We simply abort the request if there are other
 		 * ioctl requests in progress.
 		 */
 		if (sc->sc_busy) {
 			return EWOULDBLOCK;
 			if (sc->sc_invalid)
 				return ENXIO;
 		}
 		sc->sc_busy = 1;
 		sc->sc_cansleep = 0;
 		return 0;
 	}
 	while (sc->sc_busy) {
 		if (sc->sc_invalid)
 			return ENXIO;
 		sc->sc_sleep_cnt++;
 		error = tsleep(sc, PWAIT | PCATCH, "awilck", 0);
 		sc->sc_sleep_cnt--;
 		if (error)
 			return error;
 	}
 	sc->sc_busy = 1;
 	sc->sc_cansleep = 1;
 	return 0;
 }
 
 static void
 awi_unlock(sc)
 	struct awi_softc *sc;
 {
 	sc->sc_busy = 0;
 	sc->sc_cansleep = 0;
 	if (sc->sc_sleep_cnt)
 		wakeup(sc);
 }
 
 static int
 awi_intr_lock(sc)
 	struct awi_softc *sc;
 {
 	u_int8_t status;
 	int i, retry;
 
 	status = 1;
 	for (retry = 0; retry < 10; retry++) {
 		for (i = 0; i < AWI_LOCKOUT_TIMEOUT*1000/5; i++) {
 			status = awi_read_1(sc, AWI_LOCKOUT_HOST);
 			if (status == 0)
 				break;
 			DELAY(5);
 		}
 		if (status != 0)
 			break;
 		awi_write_1(sc, AWI_LOCKOUT_MAC, 1);
 		status = awi_read_1(sc, AWI_LOCKOUT_HOST);
 		if (status == 0)
 			break;
 		awi_write_1(sc, AWI_LOCKOUT_MAC, 0);
 	}
 	if (status != 0) {
 		printf("%s: failed to lock interrupt\n",
 		    sc->sc_dev.dv_xname);
 		return ENXIO;
 	}
 	return 0;
 }
 
 static void
 awi_intr_unlock(sc)
 	struct awi_softc *sc;
 {
 
 	awi_write_1(sc, AWI_LOCKOUT_MAC, 0);
 }
 
 static int
 awi_cmd_wait(sc)
 	struct awi_softc *sc;
 {
 	int i, error = 0;
 
 	i = 0;
 	while (sc->sc_cmd_inprog) {
 		if (sc->sc_invalid)
 			return ENXIO;
 		if (awi_read_1(sc, AWI_CMD) != sc->sc_cmd_inprog) {
 			printf("%s: failed to access hardware\n",
 			    sc->sc_dev.dv_xname);
 			sc->sc_invalid = 1;
 			return ENXIO;
 		}
 		if (sc->sc_cansleep) {
 			sc->sc_sleep_cnt++;
 			error = tsleep(sc, PWAIT, "awicmd",
 			    AWI_CMD_TIMEOUT*hz/1000);
 			sc->sc_sleep_cnt--;
 		} else {
 			if (awi_read_1(sc, AWI_CMD_STATUS) != AWI_STAT_IDLE) {
 				awi_cmd_done(sc);
 				break;
 			}
 			if (i++ >= AWI_CMD_TIMEOUT*1000/10)
 				error = EWOULDBLOCK;
 			else
 				DELAY(10);
 		}
 		if (error)
 			break;
 	}
 	return error;
 }
 
 static void
 awi_print_essid(essid)
 	u_int8_t *essid;
 {
 	int i, len;
 	u_int8_t *p;
 
 	len = essid[1];
 	if (len > IEEE80211_NWID_LEN)
 		len = IEEE80211_NWID_LEN;	/*XXX*/
 	/* determine printable or not */
 	for (i = 0, p = essid + 2; i < len; i++, p++) {
 		if (*p < ' ' || *p > 0x7e)
 			break;
 	}
 	if (i == len) {
 		printf("\"");
 		for (i = 0, p = essid + 2; i < len; i++, p++)
 			printf("%c", *p);
 		printf("\"");
 	} else {
 		printf("0x");
 		for (i = 0, p = essid + 2; i < len; i++, p++)
 			printf("%02x", *p);
 	}
 }
 
 #ifdef AWI_DEBUG
 static void
 awi_dump_pkt(sc, m, rssi)
 	struct awi_softc *sc;
 	struct mbuf *m;
 	int rssi;
 {
 	struct ieee80211_frame *wh;
 	int i, l;
 
 	wh = mtod(m, struct ieee80211_frame *);
 
 	if (awi_dump_mask != 0 &&
 	    ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK)==IEEE80211_FC1_DIR_NODS) &&
 	    ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK)==IEEE80211_FC0_TYPE_MGT)) {
 		if ((AWI_DUMP_MASK(wh->i_fc[0]) & awi_dump_mask) != 0)
 			return;
 	}
 	if (awi_dump_mask < 0 &&
 	    (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK)==IEEE80211_FC0_TYPE_DATA)
 		return;
 
 	if (rssi < 0)
 		printf("tx: ");
 	else
 		printf("rx: ");
 	switch (wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) {
 	case IEEE80211_FC1_DIR_NODS:
 		printf("NODS %s", ether_sprintf(wh->i_addr2));
 		printf("->%s", ether_sprintf(wh->i_addr1));
 		printf("(%s)", ether_sprintf(wh->i_addr3));
 		break;
 	case IEEE80211_FC1_DIR_TODS:
 		printf("TODS %s", ether_sprintf(wh->i_addr2));
 		printf("->%s", ether_sprintf(wh->i_addr3));
 		printf("(%s)", ether_sprintf(wh->i_addr1));
 		break;
 	case IEEE80211_FC1_DIR_FROMDS:
 		printf("FRDS %s", ether_sprintf(wh->i_addr3));
 		printf("->%s", ether_sprintf(wh->i_addr1));
 		printf("(%s)", ether_sprintf(wh->i_addr2));
 		break;
 	case IEEE80211_FC1_DIR_DSTODS:
 		printf("DSDS %s", ether_sprintf((u_int8_t *)&wh[1]));
 		printf("->%s", ether_sprintf(wh->i_addr3));
 		printf("(%s", ether_sprintf(wh->i_addr2));
 		printf("->%s)", ether_sprintf(wh->i_addr1));
 		break;
 	}
 	switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) {
 	case IEEE80211_FC0_TYPE_DATA:
 		printf(" data");
 		break;
 	case IEEE80211_FC0_TYPE_MGT:
 		switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) {
 		case IEEE80211_FC0_SUBTYPE_PROBE_REQ:
 			printf(" probe_req");
 			break;
 		case IEEE80211_FC0_SUBTYPE_PROBE_RESP:
 			printf(" probe_resp");
 			break;
 		case IEEE80211_FC0_SUBTYPE_BEACON:
 			printf(" beacon");
 			break;
 		case IEEE80211_FC0_SUBTYPE_AUTH:
 			printf(" auth");
 			break;
 		case IEEE80211_FC0_SUBTYPE_ASSOC_REQ:
 			printf(" assoc_req");
 			break;
 		case IEEE80211_FC0_SUBTYPE_ASSOC_RESP:
 			printf(" assoc_resp");
 			break;
 		case IEEE80211_FC0_SUBTYPE_REASSOC_REQ:
 			printf(" reassoc_req");
 			break;
 		case IEEE80211_FC0_SUBTYPE_REASSOC_RESP:
 			printf(" reassoc_resp");
 			break;
 		case IEEE80211_FC0_SUBTYPE_DEAUTH:
 			printf(" deauth");
 			break;
 		case IEEE80211_FC0_SUBTYPE_DISASSOC:
 			printf(" disassoc");
 			break;
 		default:
 			printf(" mgt#%d",
 			    wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK);
 			break;
 		}
 		break;
 	default:
 		printf(" type#%d",
 		    wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK);
 		break;
 	}
 	if (wh->i_fc[1] & IEEE80211_FC1_WEP)
 		printf(" WEP");
 	if (rssi >= 0)
 		printf(" +%d", rssi);
 	printf("\n");
 	if (awi_dump_len > 0) {
 		l = m->m_len;
 		if (l > awi_dump_len + sizeof(*wh))
 			l = awi_dump_len + sizeof(*wh);
 		i = sizeof(*wh);
 		if (awi_dump_hdr)
 			i = 0;
 		for (; i < l; i++) {
 			if ((i & 1) == 0)
 				printf(" ");
 			printf("%02x", mtod(m, u_int8_t *)[i]);
 		}
 		printf("\n");
 	}
 }
 #endif
Index: head/sys/dev/awi/awi_wep.c
===================================================================
--- head/sys/dev/awi/awi_wep.c	(revision 108465)
+++ head/sys/dev/awi/awi_wep.c	(revision 108466)
@@ -1,531 +1,531 @@
 /*	$NetBSD: awi_wep.c,v 1.4 2000/08/14 11:28:03 onoe Exp $	*/
 /* $FreeBSD$ */
 
 /*
  * Copyright (c) 2000 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Atsushi Onoe.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the NetBSD
  *	Foundation, Inc. and its contributors.
  * 4. Neither the name of The NetBSD Foundation nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * WEP support framework for the awi driver.
  *
  * No actual encryption capability is provided here, but any can be added
  * to awi_wep_algo table below.
  *
  * Note that IEEE802.11 specification states WEP uses RC4 with 40bit key,
  * which is a proprietary encryption algorithm available under license
  * from RSA Data Security Inc.  Using another algorithm, includes null
  * encryption provided here, the awi driver cannot be able to communicate
  * with other stations.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/sockio.h>
 #if defined(__FreeBSD__) && __FreeBSD_version >= 400000
 #include <sys/bus.h>
 #else
 #include <sys/device.h>
 #endif
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #ifdef __FreeBSD__
 #include <net/ethernet.h>
 #include <net/if_arp.h>
 #else
 #include <net/if_ether.h>
 #endif
 #include <net/if_media.h>
 #include <net/if_ieee80211.h>
 
 #include <machine/cpu.h>
 #include <machine/bus.h>
 
 #ifdef __NetBSD__
 #include <dev/ic/am79c930reg.h>
 #include <dev/ic/am79c930var.h>
 #include <dev/ic/awireg.h>
 #include <dev/ic/awivar.h>
 
 #include <crypto/arc4/arc4.h>
 #endif
 
 #ifdef __FreeBSD__
 #include <dev/awi/am79c930reg.h>
 #include <dev/awi/am79c930var.h>
 #include <dev/awi/awireg.h>
 #include <dev/awi/awivar.h>
 
 #include <crypto/rc4/rc4.h>
 static __inline int
 arc4_ctxlen(void)
 {
         return sizeof(struct rc4_state);
 }
 
 static __inline void
 arc4_setkey(void *ctx, u_int8_t *key, int keylen)
 {
 	rc4_init(ctx, key, keylen);
 }
 
 static __inline void
 arc4_encrypt(void *ctx, u_int8_t *dst, u_int8_t *src, int len)
 {
 	rc4_crypt(ctx, src, dst, len);
 }
 #endif
 
 static void awi_crc_init(void);
 static u_int32_t awi_crc_update(u_int32_t crc, u_int8_t *buf, int len);
 
 static int awi_null_ctxlen(void);
 static void awi_null_setkey(void *ctx, u_int8_t *key, int keylen);
 static void awi_null_copy(void *ctx, u_int8_t *dst, u_int8_t *src, int len);
 
 /* XXX: the order should be known to wiconfig/user */
 
 static struct awi_wep_algo awi_wep_algo[] = {
 /* 0: no wep */
 	{ "no" },	/* dummy for no wep */
 
 /* 1: normal wep (arc4) */
 	{ "arc4", arc4_ctxlen, arc4_setkey,
 	    arc4_encrypt, arc4_encrypt },
 
 /* 2: debug wep (null) */
 	{ "null", awi_null_ctxlen, awi_null_setkey,
 	    awi_null_copy, awi_null_copy },
 			/* dummy for wep without encryption */
 };
 
 int
 awi_wep_setnwkey(sc, nwkey)
 	struct awi_softc *sc;
 	struct ieee80211_nwkey *nwkey;
 {
 	int i, len, error;
 	u_int8_t keybuf[AWI_MAX_KEYLEN];
 
 	if (nwkey->i_defkid <= 0 ||
 	    nwkey->i_defkid > IEEE80211_WEP_NKID)
 		return EINVAL;
 	error = 0;
 	for (i = 0; i < IEEE80211_WEP_NKID; i++) {
 		if (nwkey->i_key[i].i_keydat == NULL)
 			continue;
 		len = nwkey->i_key[i].i_keylen;
 		if (len > sizeof(keybuf)) {
 			error = EINVAL;
 			break;
 		}
 		error = copyin(nwkey->i_key[i].i_keydat, keybuf, len);
 		if (error)
 			break;
 		error = awi_wep_setkey(sc, i, keybuf, len);
 		if (error)
 			break;
 	}
 	if (error == 0) {
 		sc->sc_wep_defkid = nwkey->i_defkid - 1;
 		error = awi_wep_setalgo(sc, nwkey->i_wepon);
 		if (error == 0 && sc->sc_enabled) {
 			awi_stop(sc);
 			error = awi_init(sc);
 		}
 	}
 	return error;
 }
 
 int
 awi_wep_getnwkey(sc, nwkey)
 	struct awi_softc *sc;
 	struct ieee80211_nwkey *nwkey;
 {
 	int i, len, error, suerr;
 	u_int8_t keybuf[AWI_MAX_KEYLEN];
 
 	nwkey->i_wepon = awi_wep_getalgo(sc);
 	nwkey->i_defkid = sc->sc_wep_defkid + 1;
 	/* do not show any keys to non-root user */
 #ifdef __FreeBSD__
 #if __FreeBSD_version < 500028
 	suerr = suser(curproc);
 #else
 	suerr = suser(curthread);
 #endif
 #else
 	suerr = suser(curproc->p_ucred, &curproc->p_acflag);
 #endif
 	error = 0;
 	for (i = 0; i < IEEE80211_WEP_NKID; i++) {
 		if (nwkey->i_key[i].i_keydat == NULL)
 			continue;
 		if (suerr) {
 			error = suerr;
 			break;
 		}
 		len = sizeof(keybuf);
 		error = awi_wep_getkey(sc, i, keybuf, &len);
 		if (error)
 			break;
 		if (nwkey->i_key[i].i_keylen < len) {
 			error = ENOSPC;
 			break;
 		}
 		nwkey->i_key[i].i_keylen = len;
 		error = copyout(keybuf, nwkey->i_key[i].i_keydat, len);
 		if (error)
 			break;
 	}
 	return error;
 }
 
 int
 awi_wep_getalgo(sc)
 	struct awi_softc *sc;
 {
 
 	if (sc->sc_wep_algo == NULL)
 		return 0;
 	return sc->sc_wep_algo - awi_wep_algo;
 }
 
 int
 awi_wep_setalgo(sc, algo)
 	struct awi_softc *sc;
 	int algo;
 {
 	struct awi_wep_algo *awa;
 	int ctxlen;
 
 	awi_crc_init();	/* XXX: not belongs here */
 	if (algo < 0 || algo > sizeof(awi_wep_algo)/sizeof(awi_wep_algo[0]))
 		return EINVAL;
 	awa = &awi_wep_algo[algo];
 	if (awa->awa_name == NULL)
 		return EINVAL;
 	if (awa->awa_ctxlen == NULL) {
 		awa = NULL;
 		ctxlen = 0;
 	} else
 		ctxlen = awa->awa_ctxlen();
 	if (sc->sc_wep_ctx != NULL) {
 		free(sc->sc_wep_ctx, M_DEVBUF);
 		sc->sc_wep_ctx = NULL;
 	}
 	if (ctxlen) {
 		sc->sc_wep_ctx = malloc(ctxlen, M_DEVBUF, M_NOWAIT);
 		if (sc->sc_wep_ctx == NULL)
 			return ENOMEM;
 	}
 	sc->sc_wep_algo = awa;
 	return 0;
 }
 
 int
 awi_wep_setkey(sc, kid, key, keylen)
 	struct awi_softc *sc;
 	int kid;
 	unsigned char *key;
 	int keylen;
 {
 
 	if (kid < 0 || kid >= IEEE80211_WEP_NKID)
 		return EINVAL;
 	if (keylen < 0 || keylen + IEEE80211_WEP_IVLEN > AWI_MAX_KEYLEN)
 		return EINVAL;
 	sc->sc_wep_keylen[kid] = keylen;
 	if (keylen > 0)
 		memcpy(sc->sc_wep_key[kid] + IEEE80211_WEP_IVLEN, key, keylen);
 	return 0;
 }
 
 int
 awi_wep_getkey(sc, kid, key, keylen)
 	struct awi_softc *sc;
 	int kid;
 	unsigned char *key;
 	int *keylen;
 {
 
 	if (kid < 0 || kid >= IEEE80211_WEP_NKID)
 		return EINVAL;
 	if (*keylen < sc->sc_wep_keylen[kid])
 		return ENOSPC;
 	*keylen = sc->sc_wep_keylen[kid];
 	if (*keylen > 0)
 		memcpy(key, sc->sc_wep_key[kid] + IEEE80211_WEP_IVLEN, *keylen);
 	return 0;
 }
 
 struct mbuf *
 awi_wep_encrypt(sc, m0, txflag)
 	struct awi_softc *sc;
 	struct mbuf *m0;
 	int txflag;
 {
 	struct mbuf *m, *n, *n0;
 	struct ieee80211_frame *wh;
 	struct awi_wep_algo *awa;
 	int left, len, moff, noff, keylen, kid;
 	u_int32_t iv, crc;
 	u_int8_t *key, *ivp;
 	void *ctx;
 	u_int8_t crcbuf[IEEE80211_WEP_CRCLEN];
 
 	n0 = NULL;
 	awa = sc->sc_wep_algo;
 	if (awa == NULL)
 		goto fail;
 	ctx = sc->sc_wep_ctx;
 	m = m0;
 	left = m->m_pkthdr.len;
 	MGET(n, M_DONTWAIT, m->m_type);
 	n0 = n;
 	if (n == NULL)
 		goto fail;
-	M_COPY_PKTHDR(n, m);
+	M_MOVE_PKTHDR(n, m);
 	len = IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN + IEEE80211_WEP_CRCLEN;
 	if (txflag) {
 		n->m_pkthdr.len += len;
 	} else {
 		n->m_pkthdr.len -= len;
 		left -= len;
 	}
 	n->m_len = MHLEN;
 	if (n->m_pkthdr.len >= MINCLSIZE) {
 		MCLGET(n, M_DONTWAIT);
 		if (n->m_flags & M_EXT)
 			n->m_len = n->m_ext.ext_size;
 	}
 	len = sizeof(struct ieee80211_frame);
 	memcpy(mtod(n, caddr_t), mtod(m, caddr_t), len);
 	left -= len;
 	moff = len;
 	noff = len;
 	if (txflag) {
 		kid = sc->sc_wep_defkid;
 		wh = mtod(n, struct ieee80211_frame *);
 		wh->i_fc[1] |= IEEE80211_FC1_WEP;
 		iv = random();
 		/*
 		 * store IV, byte order is not the matter since it's random.
 		 * assuming IEEE80211_WEP_IVLEN is 3
 		 */
 		ivp = mtod(n, u_int8_t *) + noff;
 		ivp[0] = (iv >> 16) & 0xff;
 		ivp[1] = (iv >> 8) & 0xff;
 		ivp[2] = iv & 0xff;
 		ivp[IEEE80211_WEP_IVLEN] = kid << 6;	/* pad and keyid */
 		noff += IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN;
 	} else {
 		ivp = mtod(m, u_int8_t *) + moff;
 		kid = ivp[IEEE80211_WEP_IVLEN] >> 6;
 		moff += IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN;
 	}
 	key = sc->sc_wep_key[kid];
 	keylen = sc->sc_wep_keylen[kid];
 	/* assuming IEEE80211_WEP_IVLEN is 3 */
 	key[0] = ivp[0];
 	key[1] = ivp[1];
 	key[2] = ivp[2];
 	awa->awa_setkey(ctx, key, IEEE80211_WEP_IVLEN + keylen);
 
 	/* encrypt with calculating CRC */
 	crc = ~0;
 	while (left > 0) {
 		len = m->m_len - moff;
 		if (len == 0) {
 			m = m->m_next;
 			moff = 0;
 			continue;
 		}
 		if (len > n->m_len - noff) {
 			len = n->m_len - noff;
 			if (len == 0) {
 				MGET(n->m_next, M_DONTWAIT, n->m_type);
 				if (n->m_next == NULL)
 					goto fail;
 				n = n->m_next;
 				n->m_len = MLEN;
 				if (left >= MINCLSIZE) {
 					MCLGET(n, M_DONTWAIT);
 					if (n->m_flags & M_EXT)
 						n->m_len = n->m_ext.ext_size;
 				}
 				noff = 0;
 				continue;
 			}
 		}
 		if (len > left)
 			len = left;
 		if (txflag) {
 			awa->awa_encrypt(ctx, mtod(n, caddr_t) + noff,
 			    mtod(m, caddr_t) + moff, len);
 			crc = awi_crc_update(crc, mtod(m, caddr_t) + moff, len);
 		} else {
 			awa->awa_decrypt(ctx, mtod(n, caddr_t) + noff,
 			    mtod(m, caddr_t) + moff, len);
 			crc = awi_crc_update(crc, mtod(n, caddr_t) + noff, len);
 		}
 		left -= len;
 		moff += len;
 		noff += len;
 	}
 	crc = ~crc;
 	if (txflag) {
 		LE_WRITE_4(crcbuf, crc);
 		if (n->m_len >= noff + sizeof(crcbuf))
 			n->m_len = noff + sizeof(crcbuf);
 		else {
 			n->m_len = noff;
 			MGET(n->m_next, M_DONTWAIT, n->m_type);
 			if (n->m_next == NULL)
 				goto fail;
 			n = n->m_next;
 			n->m_len = sizeof(crcbuf);
 			noff = 0;
 		}
 		awa->awa_encrypt(ctx, mtod(n, caddr_t) + noff, crcbuf,
 		    sizeof(crcbuf));
 	} else {
 		n->m_len = noff;
 		for (noff = 0; noff < sizeof(crcbuf); noff += len) {
 			len = sizeof(crcbuf) - noff;
 			if (len > m->m_len - moff)
 				len = m->m_len - moff;
 			if (len > 0)
 				awa->awa_decrypt(ctx, crcbuf + noff,
 				    mtod(m, caddr_t) + moff, len);
 			m = m->m_next;
 			moff = 0;
 		}
 		if (crc != LE_READ_4(crcbuf))
 			goto fail;
 	}
 	m_freem(m0);
 	return n0;
 
   fail:
 	m_freem(m0);
 	m_freem(n0);
 	return NULL;
 }
 
 /*
  * CRC 32 -- routine from RFC 2083
  */
 
 /* Table of CRCs of all 8-bit messages */
 static u_int32_t awi_crc_table[256];
 static int awi_crc_table_computed = 0;
 
 /* Make the table for a fast CRC. */
 static void
 awi_crc_init()
 {
 	u_int32_t c;
 	int n, k;
 
 	if (awi_crc_table_computed)
 		return;
 	for (n = 0; n < 256; n++) {
 		c = (u_int32_t)n;
 		for (k = 0; k < 8; k++) {
 			if (c & 1)
 				c = 0xedb88320UL ^ (c >> 1);
 			else
 				c = c >> 1;
 		}
 		awi_crc_table[n] = c;
 	}
 	awi_crc_table_computed = 1;
 }
 
 /*
  * Update a running CRC with the bytes buf[0..len-1]--the CRC
  * should be initialized to all 1's, and the transmitted value
  * is the 1's complement of the final running CRC
  */
 
 static u_int32_t
 awi_crc_update(crc, buf, len)
 	u_int32_t crc;
 	u_int8_t *buf;
 	int len;
 {
 	u_int8_t *endbuf;
 
 	for (endbuf = buf + len; buf < endbuf; buf++)
 		crc = awi_crc_table[(crc ^ *buf) & 0xff] ^ (crc >> 8);
 	return crc;
 }
 
 /*
  * Null -- do nothing but copy.
  */
 
 static int
 awi_null_ctxlen()
 {
 
 	return 0;
 }
 
 static void
 awi_null_setkey(ctx, key, keylen)
 	void *ctx;
 	u_char *key;
 	int keylen;
 {
 }
 
 static void
 awi_null_copy(ctx, dst, src, len)
 	void *ctx;
 	u_char *dst;
 	u_char *src;
 	int len;
 {
 
 	memcpy(dst, src, len);
 }
Index: head/sys/dev/en/midway.c
===================================================================
--- head/sys/dev/en/midway.c	(revision 108465)
+++ head/sys/dev/en/midway.c	(revision 108466)
@@ -1,3467 +1,3467 @@
 /*	$NetBSD: midway.c,v 1.30 1997/09/29 17:40:38 chuck Exp $	*/
 /*	(sync'd to midway.c 1.68)	*/
 
 /*
  *
  * Copyright (c) 1996 Charles D. Cranor and Washington University.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Charles D. Cranor and
  *	Washington University.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  *
  * m i d w a y . c   e n i 1 5 5   d r i v e r 
  *
  * author: Chuck Cranor <chuck@ccrc.wustl.edu>
  * started: spring, 1996 (written from scratch).
  *
  * notes from the author:
  *   Extra special thanks go to Werner Almesberger, EPFL LRC.   Werner's
  *   ENI driver was especially useful in figuring out how this card works.
  *   I would also like to thank Werner for promptly answering email and being
  *   generally helpful.
  */
 
 #undef	EN_DEBUG
 #undef	EN_DEBUG_RANGE		/* check ranges on en_read/en_write's? */
 #define	EN_MBUF_OPT		/* try and put more stuff in mbuf? */
 #define	EN_DIAG
 #define	EN_STAT
 #ifndef EN_DMA
 #define EN_DMA		1	/* use dma? */
 #endif
 #define EN_NOTXDMA	0	/* hook to disable tx dma only */
 #define EN_NORXDMA	0	/* hook to disable rx dma only */
 #define EN_DDBHOOK	1	/* compile in ddb functions */
 #if defined(MIDWAY_ADPONLY)
 #define EN_ENIDMAFIX	0	/* no ENI cards to worry about */
 #else
 #define EN_ENIDMAFIX	1	/* avoid byte DMA on the ENI card (see below) */
 #endif
 
 /*
  * note on EN_ENIDMAFIX: the byte aligner on the ENI version of the card
  * appears to be broken.   it works just fine if there is no load... however
  * when the card is loaded the data get corrupted.   to see this, one only
  * has to use "telnet" over ATM.   do the following command in "telnet":
  * 	cat /usr/share/misc/termcap
  * "telnet" seems to generate lots of 1023 byte mbufs (which make great
  * use of the byte aligner).   watch "netstat -s" for checksum errors.
  * 
  * I further tested this by adding a function that compared the transmit 
  * data on the card's SRAM with the data in the mbuf chain _after_ the 
  * "transmit DMA complete" interrupt.   using the "telnet" test I got data
  * mismatches where the byte-aligned data should have been.   using ddb
  * and en_dumpmem() I verified that the DTQs fed into the card were 
  * absolutely correct.   thus, we are forced to concluded that the ENI
  * hardware is buggy.   note that the Adaptec version of the card works
  * just fine with byte DMA.
  *
  * bottom line: we set EN_ENIDMAFIX to 1 to avoid byte DMAs on the ENI
  * card.
  */
 
 #if defined(DIAGNOSTIC) && !defined(EN_DIAG)
 #define EN_DIAG			/* link in with master DIAG option */
 #endif
 #ifdef EN_STAT
 #define EN_COUNT(X) (X)++
 #else
 #define EN_COUNT(X) /* nothing */
 #endif
 
 #ifdef EN_DEBUG
 #undef	EN_DDBHOOK
 #define	EN_DDBHOOK	1
 #define STATIC /* nothing */
 #define INLINE /* nothing */
 #else /* EN_DEBUG */
 #define STATIC static
 #define INLINE __inline
 #endif /* EN_DEBUG */
 
 #ifdef __FreeBSD__
 #include "opt_inet.h"
 #include "opt_natm.h"
 #include "opt_ddb.h"
 /* enable DDBHOOK when DDB is available */
 #undef	EN_DDBHOOK
 #ifdef DDB
 #define	EN_DDBHOOK	1
 #endif
 #endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/queue.h>
 #if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__)
 #include <sys/device.h>
 #endif
 #include <sys/sockio.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/if_atm.h>
 
 #include <vm/vm.h>
 
 #if defined(INET) || defined(INET6)
 #include <netinet/in.h>
 #include <netinet/if_atm.h>
 #endif
 
 #ifdef NATM
 #include <netnatm/natm.h>
 #endif
 
 #if defined(__NetBSD__) || defined(__OpenBSD__)
 #include <machine/bus.h>
 #include <dev/ic/midwayreg.h>
 #include <dev/ic/midwayvar.h>
 #elif defined(__FreeBSD__)
 #include <sys/bus.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <machine/resource.h>
 #include <dev/en/midwayreg.h>
 #include <dev/en/midwayvar.h>
 #include <vm/pmap.h>			/* for vtophys proto */
 
 #ifndef IFF_NOTRAILERS
 #define IFF_NOTRAILERS 0
 #endif
 
 #endif	/* __FreeBSD__ */
 
 #if defined(__alpha__)
 /* XXX XXX NEED REAL DMA MAPPING SUPPORT XXX XXX */
 #undef vtophys
 #define	vtophys(va)	alpha_XXX_dmamap((vm_offset_t)(va))
 #endif
 
 #ifdef __FreeBSD__
 #define NBPF 1
 #else
 #include "bpf.h"
 #endif
 #if NBPF > 0
 #include <net/bpf.h>
 #ifdef __FreeBSD__
 #define BPFATTACH(ifp, dlt, hlen)	bpfattach((ifp), (dlt), (hlen))
 #else
 #define BPFATTACH(ifp, dlt, hlen)	bpfattach(&(ifp)->if_bpf, (ifp), (dlt), (hlen))
 #define BPF_MTAP(ifp, m)		bpf_mtap((ifp)->if_bpf, (m))
 #endif
 #endif /* NBPF > 0 */
 
 /*
  * params
  */
 
 #ifndef EN_TXHIWAT
 #define EN_TXHIWAT	(64*1024)	/* max 64 KB waiting to be DMAd out */
 #endif
 
 #ifndef EN_MINDMA
 #define EN_MINDMA	32	/* don't DMA anything less than this (bytes) */
 #endif
 
 #define RX_NONE		0xffff	/* recv VC not in use */
 
 #define EN_OBHDR	ATM_PH_DRIVER7  /* TBD in first mbuf ! */
 #define EN_OBTRL	ATM_PH_DRIVER8  /* PDU trailier in last mbuf ! */
 
 #define ENOTHER_FREE	0x01		/* free rxslot */
 #define ENOTHER_DRAIN	0x02		/* almost free (drain DRQ dma) */
 #define ENOTHER_RAW	0x04		/* 'raw' access  (aka boodi mode) */
 #define ENOTHER_SWSL	0x08		/* in software service list */
 
 static int en_dma = EN_DMA;		/* use DMA (switch off for dbg) */
 
 #ifndef __FreeBSD__
 /*
  * autoconfig attachments
  */
 
 struct cfdriver en_cd = {
     0, "en", DV_IFNET,
 };
 #endif
 
 /*
  * local structures
  */
 
 /*
  * params to en_txlaunch() function
  */
 
 struct en_launch {
   u_int32_t tbd1;		/* TBD 1 */
   u_int32_t tbd2;		/* TBD 2 */
   u_int32_t pdu1;		/* PDU 1 (aal5) */
   int nodma;			/* don't use DMA */
   int need;			/* total space we need (pad out if less data) */
   int mlen;			/* length of mbuf (for dtq) */
   struct mbuf *t;		/* data */
   u_int32_t aal;		/* aal code */
   u_int32_t atm_vci;		/* vci */
   u_int8_t atm_flags;		/* flags */
 };
 
 
 /*
  * dma table (index by # of words)
  *
  * plan A: use WMAYBE (obsolete)
  * plan B: avoid WMAYBE
  */
 
 struct en_dmatab {
   u_int8_t bcode;		/* code */
   u_int8_t divshift;		/* byte divisor */
 };
 
 static struct en_dmatab en_dma_planB[] = {
   { 0, 0 },		/* 0 */		{ MIDDMA_WORD, 2},	/* 1 */
   { MIDDMA_2WORD, 3},	/* 2 */		{ MIDDMA_WORD, 2},	/* 3 */
   { MIDDMA_4WORD, 4},	/* 4 */		{ MIDDMA_WORD, 2},	/* 5 */
   { MIDDMA_2WORD, 3},	/* 6 */		{ MIDDMA_WORD, 2},	/* 7 */
   { MIDDMA_8WORD, 5},   /* 8 */		{ MIDDMA_WORD, 2},	/* 9 */
   { MIDDMA_2WORD, 3},	/* 10 */	{ MIDDMA_WORD, 2},	/* 11 */
   { MIDDMA_4WORD, 4},	/* 12 */	{ MIDDMA_WORD, 2},	/* 13 */
   { MIDDMA_2WORD, 3},	/* 14 */	{ MIDDMA_WORD, 2},	/* 15 */
   { MIDDMA_16WORD, 6},  /* 16 */
 };
 
 static struct en_dmatab *en_dmaplan = en_dma_planB;
 
 /*
  * prototypes
  */
 
 STATIC INLINE	int en_b2sz(int) __attribute__ ((unused));
 #ifdef EN_DDBHOOK
 		int en_dump(int,int);
 		int en_dumpmem(int,int,int);
 #endif
 STATIC		void en_dmaprobe(struct en_softc *);
 STATIC		int en_dmaprobe_doit(struct en_softc *, u_int8_t *, 
 		    u_int8_t *, int);
 STATIC INLINE	int en_dqneed(struct en_softc *, caddr_t, u_int,
 		    u_int) __attribute__ ((unused));
 STATIC		void en_init(struct en_softc *);
 STATIC		int en_ioctl(struct ifnet *, EN_IOCTL_CMDT, caddr_t);
 STATIC INLINE	int en_k2sz(int) __attribute__ ((unused));
 STATIC		void en_loadvc(struct en_softc *, int);
 STATIC		int en_mfix(struct en_softc *, struct mbuf **, struct mbuf *);
 STATIC INLINE	struct mbuf *en_mget(struct en_softc *, u_int,
 		    u_int *) __attribute__ ((unused));
 STATIC INLINE	u_int32_t en_read(struct en_softc *,
 		    u_int32_t) __attribute__ ((unused));
 STATIC		int en_rxctl(struct en_softc *, struct atm_pseudoioctl *, int);
 STATIC		void en_txdma(struct en_softc *, int);
 STATIC		void en_txlaunch(struct en_softc *, int,
 		    struct en_launch *);
 STATIC		void en_service(struct en_softc *);
 STATIC		void en_start(struct ifnet *);
 STATIC INLINE	int en_sz2b(int) __attribute__ ((unused));
 STATIC INLINE	void en_write(struct en_softc *, u_int32_t,
 		    u_int32_t) __attribute__ ((unused));
 
 /*
  * macros/inline
  */
 
 /*
  * raw read/write macros
  */
 
 #define EN_READDAT(SC,R) en_read(SC,R)
 #define EN_WRITEDAT(SC,R,V) en_write(SC,R,V)
 
 /*
  * cooked read/write macros
  */
 
 #define EN_READ(SC,R) (u_int32_t)ntohl(en_read(SC,R))
 #define EN_WRITE(SC,R,V) en_write(SC,R, htonl(V))
 
 #define EN_WRAPADD(START,STOP,CUR,VAL) { \
 	(CUR) = (CUR) + (VAL); \
 	if ((CUR) >= (STOP)) \
 		(CUR) = (START) + ((CUR) - (STOP)); \
 	}
 
 #define WORD_IDX(START, X) (((X) - (START)) / sizeof(u_int32_t))
 
 /* we store sc->dtq and sc->drq data in the following format... */
 #define EN_DQ_MK(SLOT,LEN) (((SLOT) << 20)|(LEN)|(0x80000))
 					/* the 0x80000 ensures we != 0 */
 #define EN_DQ_SLOT(X) ((X) >> 20)
 #define EN_DQ_LEN(X) ((X) & 0x3ffff)
 
 /* format of DTQ/DRQ word 1 differs between ENI and ADP */
 #if defined(MIDWAY_ENIONLY)
 
 #define MID_MK_TXQ(SC,CNT,CHAN,END,BCODE) \
 	EN_WRITE((SC), (SC)->dtq_us, \
 		MID_MK_TXQ_ENI((CNT), (CHAN), (END), (BCODE))); 
 
 #define MID_MK_RXQ(SC,CNT,VCI,END,BCODE) \
 	EN_WRITE((SC), (SC)->drq_us, \
 		MID_MK_RXQ_ENI((CNT), (VCI), (END), (BCODE))); 
 
 #elif defined(MIDWAY_ADPONLY)
 
 #define MID_MK_TXQ(SC,CNT,CHAN,END,JK) \
 	EN_WRITE((SC), (SC)->dtq_us, \
 		MID_MK_TXQ_ADP((CNT), (CHAN), (END), (JK))); 
 
 #define MID_MK_RXQ(SC,CNT,VCI,END,JK) \
 	EN_WRITE((SC), (SC)->drq_us, \
 		MID_MK_RXQ_ADP((CNT), (VCI), (END), (JK))); 
 
 #else
 
 #define MID_MK_TXQ(SC,CNT,CHAN,END,JK_OR_BCODE) { \
 	if ((SC)->is_adaptec) \
 	  EN_WRITE((SC), (SC)->dtq_us, \
 		  MID_MK_TXQ_ADP((CNT), (CHAN), (END), (JK_OR_BCODE))); \
 	else \
 	  EN_WRITE((SC), (SC)->dtq_us, \
 		  MID_MK_TXQ_ENI((CNT), (CHAN), (END), (JK_OR_BCODE))); \
 	}
 
 #define MID_MK_RXQ(SC,CNT,VCI,END,JK_OR_BCODE) { \
 	if ((SC)->is_adaptec) \
 	  EN_WRITE((SC), (SC)->drq_us, \
 		  MID_MK_RXQ_ADP((CNT), (VCI), (END), (JK_OR_BCODE))); \
 	else \
 	  EN_WRITE((SC), (SC)->drq_us, \
 		   MID_MK_RXQ_ENI((CNT), (VCI), (END), (JK_OR_BCODE))); \
 	}
 
 #endif
 
 /* add an item to the DTQ */
 #define EN_DTQADD(SC,CNT,CHAN,JK_OR_BCODE,ADDR,LEN,END) { \
 	if (END) \
 	  (SC)->dtq[MID_DTQ_A2REG((SC)->dtq_us)] = EN_DQ_MK(CHAN,LEN); \
 	MID_MK_TXQ(SC,CNT,CHAN,END,JK_OR_BCODE); \
 	(SC)->dtq_us += 4; \
 	EN_WRITE((SC), (SC)->dtq_us, (ADDR)); \
 	EN_WRAPADD(MID_DTQOFF, MID_DTQEND, (SC)->dtq_us, 4); \
 	(SC)->dtq_free--; \
 	if (END) \
 	  EN_WRITE((SC), MID_DMA_WRTX, MID_DTQ_A2REG((SC)->dtq_us)); \
 }
 
 /* DRQ add macro */
 #define EN_DRQADD(SC,CNT,VCI,JK_OR_BCODE,ADDR,LEN,SLOT,END) { \
 	if (END) \
 	  (SC)->drq[MID_DRQ_A2REG((SC)->drq_us)] = EN_DQ_MK(SLOT,LEN); \
 	MID_MK_RXQ(SC,CNT,VCI,END,JK_OR_BCODE); \
 	(SC)->drq_us += 4; \
 	EN_WRITE((SC), (SC)->drq_us, (ADDR)); \
 	EN_WRAPADD(MID_DRQOFF, MID_DRQEND, (SC)->drq_us, 4); \
 	(SC)->drq_free--; \
 	if (END) \
 	  EN_WRITE((SC), MID_DMA_WRRX, MID_DRQ_A2REG((SC)->drq_us)); \
 }
 
 /*
  * the driver code
  *
  * the code is arranged in a specific way:
  * [1] short/inline functions
  * [2] autoconfig stuff
  * [3] ioctl stuff
  * [4] reset -> init -> trasmit -> intr -> receive functions
  *
  */
 
 /***********************************************************************/
 
 /*
  * en_read: read a word from the card.   this is the only function
  * that reads from the card.
  */
 
 STATIC INLINE u_int32_t en_read(sc, r)
 
 struct en_softc *sc;
 u_int32_t r;
 
 {
 
 #ifdef EN_DEBUG_RANGE
   if (r > MID_MAXOFF || (r % 4))
     panic("en_read out of range, r=0x%x", r);
 #endif
 
   return(bus_space_read_4(sc->en_memt, sc->en_base, r));
 }
 
 /*
  * en_write: write a word to the card.   this is the only function that
  * writes to the card.
  */
 
 STATIC INLINE void en_write(sc, r, v)
 
 struct en_softc *sc;
 u_int32_t r, v;
 
 {
 #ifdef EN_DEBUG_RANGE
   if (r > MID_MAXOFF || (r % 4))
     panic("en_write out of range, r=0x%x", r);
 #endif
 
   bus_space_write_4(sc->en_memt, sc->en_base, r, v);
 }
 
 /*
  * en_k2sz: convert KBytes to a size parameter (a log2)
  */
 
 STATIC INLINE int en_k2sz(k)
 
 int k;
 
 {
   switch(k) {
     case 1:   return(0);
     case 2:   return(1);
     case 4:   return(2);
     case 8:   return(3);
     case 16:  return(4);
     case 32:  return(5);
     case 64:  return(6);
     case 128: return(7);
     default: panic("en_k2sz");
   }
   return(0);
 }
 #define en_log2(X) en_k2sz(X)
 
 
 /*
  * en_b2sz: convert a DMA burst code to its byte size
  */
 
 STATIC INLINE int en_b2sz(b)
 
 int b;
 
 {
   switch (b) {
     case MIDDMA_WORD:   return(1*4);
     case MIDDMA_2WMAYBE:
     case MIDDMA_2WORD:  return(2*4);
     case MIDDMA_4WMAYBE:
     case MIDDMA_4WORD:  return(4*4);
     case MIDDMA_8WMAYBE:
     case MIDDMA_8WORD:  return(8*4);
     case MIDDMA_16WMAYBE:
     case MIDDMA_16WORD: return(16*4);
     default: panic("en_b2sz");
   }
   return(0);
 }
 
 
 /*
  * en_sz2b: convert a burst size (bytes) to DMA burst code
  */
 
 STATIC INLINE int en_sz2b(sz)
 
 int sz;
 
 {
   switch (sz) {
     case 1*4:  return(MIDDMA_WORD);
     case 2*4:  return(MIDDMA_2WORD);
     case 4*4:  return(MIDDMA_4WORD);
     case 8*4:  return(MIDDMA_8WORD);
     case 16*4: return(MIDDMA_16WORD);
     default: panic("en_sz2b");
   }
   return(0);
 }
 
 
 /*
  * en_dqneed: calculate number of DTQ/DRQ's needed for a buffer
  */
 
 STATIC INLINE int en_dqneed(sc, data, len, tx)
 
 struct en_softc *sc;
 caddr_t data;
 u_int len, tx;
 
 {
   int result, needalign, sz;
 
 #if !defined(MIDWAY_ENIONLY)
 #if !defined(MIDWAY_ADPONLY)
     if (sc->is_adaptec)
 #endif /* !MIDWAY_ADPONLY */
       return(1);	/* adaptec can DMA anything in one go */
 #endif
     
 #if !defined(MIDWAY_ADPONLY)
     result = 0;
     if (len < EN_MINDMA) {
       if (!tx)			/* XXX: conservative */
         return(1);		/* will copy/DMA_JK */
     }
 
     if (tx) {			/* byte burst? */
       needalign = (((uintptr_t) (void *) data) % sizeof(u_int32_t));
       if (needalign) {
         result++;
         sz = min(len, sizeof(u_int32_t) - needalign);
         len -= sz;
         data += sz;
       }
     }
 
     if (sc->alburst && len) {
       needalign = (((uintptr_t) (void *) data) & sc->bestburstmask);
       if (needalign) {
 	result++;		/* alburst */
         sz = min(len, sc->bestburstlen - needalign);
         len -= sz;
       }
     }
 
     if (len >= sc->bestburstlen) {
       sz = len / sc->bestburstlen;
       sz = sz * sc->bestburstlen;
       len -= sz;
       result++;			/* best shot */
     }
     
     if (len) {
       result++;			/* clean up */
       if (tx && (len % sizeof(u_int32_t)) != 0)
         result++;		/* byte cleanup */
     }
 
     return(result);
 #endif	/* !MIDWAY_ADPONLY */
 }
 
 
 /*
  * en_mget: get an mbuf chain that can hold totlen bytes and return it
  * (for recv)   [based on am7990_get from if_le and ieget from if_ie]
  * after this call the sum of all the m_len's in the chain will be totlen.
  */
 
 STATIC INLINE struct mbuf *en_mget(sc, totlen, drqneed)
 
 struct en_softc *sc;
 u_int totlen, *drqneed;
 
 {
   struct mbuf *m;
   struct mbuf *top, **mp;
   *drqneed = 0;
 
   MGETHDR(m, M_DONTWAIT, MT_DATA);
   if (m == NULL)
     return(NULL);
   m->m_pkthdr.rcvif = &sc->enif;
   m->m_pkthdr.len = totlen;
   m->m_len = MHLEN;
   top = NULL;
   mp = &top;
   
   /* if (top != NULL) then we've already got 1 mbuf on the chain */
   while (totlen > 0) {
     if (top) {
       MGET(m, M_DONTWAIT, MT_DATA);
       if (!m) {
 	m_freem(top);	
 	return(NULL);	/* out of mbufs */
       }
       m->m_len = MLEN;
     }
     if (totlen >= MINCLSIZE) {
       MCLGET(m, M_DONTWAIT);
       if ((m->m_flags & M_EXT) == 0) {
 	m_free(m);
 	m_freem(top);
 	return(NULL);	  /* out of mbuf clusters */
       }
       m->m_len = MCLBYTES;
     }
     m->m_len = min(totlen, m->m_len);
     totlen -= m->m_len;
     *mp = m;
     mp = &m->m_next;
 
     *drqneed += en_dqneed(sc, m->m_data, m->m_len, 0);
 
   }
   return(top);
 }
 
 /***********************************************************************/
 
 /*
  * autoconfig stuff
  */
 
 void en_attach(sc)
 
 struct en_softc *sc;
 
 {
   struct ifnet *ifp = &sc->enif;
   int sz;
   u_int32_t reg, lcv, check, ptr, sav, midvloc;
 
   /*
    * probe card to determine memory size.   the stupid ENI card always
    * reports to PCI that it needs 4MB of space (2MB regs and 2MB RAM).
    * if it has less than 2MB RAM the addresses wrap in the RAM address space.
    * (i.e. on a 512KB card addresses 0x3ffffc, 0x37fffc, and 0x2ffffc
    * are aliases for 0x27fffc  [note that RAM starts at offset 0x200000]).
    */
 
   if (sc->en_busreset)
     sc->en_busreset(sc);
   EN_WRITE(sc, MID_RESID, 0x0);	/* reset card before touching RAM */
   for (lcv = MID_PROBEOFF; lcv <= MID_MAXOFF ; lcv += MID_PROBSIZE) {
     EN_WRITE(sc, lcv, lcv);	/* data[address] = address */
     for (check = MID_PROBEOFF ; check < lcv ; check += MID_PROBSIZE) {
       reg = EN_READ(sc, check);
       if (reg != check) {		/* found an alias! */
 	goto done_probe;		/* and quit */
       }
     }
   }
 done_probe:
   lcv -= MID_PROBSIZE;			/* take one step back */
   sc->en_obmemsz = (lcv + 4) - MID_RAMOFF;
 
   /*
    * determine the largest DMA burst supported
    */
 
   en_dmaprobe(sc);
 
   /*
    * "hello world"
    */
 
   if (sc->en_busreset)
     sc->en_busreset(sc);
   EN_WRITE(sc, MID_RESID, 0x0);		/* reset */
   for (lcv = MID_RAMOFF ; lcv < MID_RAMOFF + sc->en_obmemsz ; lcv += 4)
     EN_WRITE(sc, lcv, 0);	/* zero memory */
 
   reg = EN_READ(sc, MID_RESID);
 
   printf("%s: ATM midway v%d, board IDs %d.%d, %s%s%s, %ldKB on-board RAM\n",
 	sc->sc_dev.dv_xname, MID_VER(reg), MID_MID(reg), MID_DID(reg), 
 	(MID_IS_SABRE(reg)) ? "sabre controller, " : "",
 	(MID_IS_SUNI(reg)) ? "SUNI" : "Utopia",
 	(!MID_IS_SUNI(reg) && MID_IS_UPIPE(reg)) ? " (pipelined)" : "",
 	(long)sc->en_obmemsz / 1024);
 
   if (sc->is_adaptec) {
     if (sc->bestburstlen == 64 && sc->alburst == 0)
       printf("%s: passed 64 byte DMA test\n", sc->sc_dev.dv_xname);
     else
       printf("%s: FAILED DMA TEST: burst=%d, alburst=%d\n", 
 	    sc->sc_dev.dv_xname, sc->bestburstlen, sc->alburst);
   } else {
     printf("%s: maximum DMA burst length = %d bytes%s\n", sc->sc_dev.dv_xname,
 	  sc->bestburstlen, (sc->alburst) ? " (must align)" : "");
   }
 
   /*
    * link into network subsystem and prepare card
    */
 
 #if defined(__NetBSD__) || defined(__OpenBSD__)
   bcopy(sc->sc_dev.dv_xname, sc->enif.if_xname, IFNAMSIZ);
 #endif
   sc->enif.if_softc = sc;
   ifp->if_flags = IFF_SIMPLEX|IFF_NOTRAILERS;
   ifp->if_ioctl = en_ioctl;
   ifp->if_output = atm_output;
   ifp->if_start = en_start;
 
   /*
    * init softc
    */
 
   for (lcv = 0 ; lcv < MID_N_VC ; lcv++) {
     sc->rxvc2slot[lcv] = RX_NONE;
     sc->txspeed[lcv] = 0;	/* full */
     sc->txvc2slot[lcv] = 0;	/* full speed == slot 0 */
   }
 
   sz = sc->en_obmemsz - (MID_BUFOFF - MID_RAMOFF);
   ptr = sav = MID_BUFOFF;
   ptr = roundup(ptr, EN_TXSZ * 1024);	/* align */
   sz = sz - (ptr - sav);
   if (EN_TXSZ*1024 * EN_NTX > sz) {
     printf("%s: EN_NTX/EN_TXSZ too big\n", sc->sc_dev.dv_xname);
     return;
   }
   for (lcv = 0 ; lcv < EN_NTX ; lcv++) {
     sc->txslot[lcv].mbsize = 0;
     sc->txslot[lcv].start = ptr;
     ptr += (EN_TXSZ * 1024);
     sz -= (EN_TXSZ * 1024);
     sc->txslot[lcv].stop = ptr;
     sc->txslot[lcv].nref = 0;
     bzero(&sc->txslot[lcv].indma, sizeof(sc->txslot[lcv].indma));
     bzero(&sc->txslot[lcv].q, sizeof(sc->txslot[lcv].q));
 #ifdef EN_DEBUG
     printf("%s: tx%d: start 0x%x, stop 0x%x\n", sc->sc_dev.dv_xname, lcv,
 		sc->txslot[lcv].start, sc->txslot[lcv].stop);
 #endif
   }
 
   sav = ptr;
   ptr = roundup(ptr, EN_RXSZ * 1024);	/* align */
   sz = sz - (ptr - sav);
   sc->en_nrx = sz / (EN_RXSZ * 1024);
   if (sc->en_nrx <= 0) {
     printf("%s: EN_NTX/EN_TXSZ/EN_RXSZ too big\n", sc->sc_dev.dv_xname);
     return;
   }
 
   /* 
    * ensure that there is always one VC slot on the service list free
    * so that we can tell the difference between a full and empty list.
    */
   if (sc->en_nrx >= MID_N_VC)
     sc->en_nrx = MID_N_VC - 1;
 
   for (lcv = 0 ; lcv < sc->en_nrx ; lcv++) {
     sc->rxslot[lcv].rxhand = NULL;
     sc->rxslot[lcv].oth_flags = ENOTHER_FREE;
     bzero(&sc->rxslot[lcv].indma, sizeof(sc->rxslot[lcv].indma));
     bzero(&sc->rxslot[lcv].q, sizeof(sc->rxslot[lcv].q));
     midvloc = sc->rxslot[lcv].start = ptr;
     ptr += (EN_RXSZ * 1024);
     sz -= (EN_RXSZ * 1024);
     sc->rxslot[lcv].stop = ptr;
     midvloc = midvloc - MID_RAMOFF;
     midvloc = (midvloc & ~((EN_RXSZ*1024) - 1)) >> 2; /* mask, cvt to words */
     midvloc = midvloc >> MIDV_LOCTOPSHFT;  /* we only want the top 11 bits */
     midvloc = (midvloc & MIDV_LOCMASK) << MIDV_LOCSHIFT;
     sc->rxslot[lcv].mode = midvloc | 
 	(en_k2sz(EN_RXSZ) << MIDV_SZSHIFT) | MIDV_TRASH;
 
 #ifdef EN_DEBUG
     printf("%s: rx%d: start 0x%x, stop 0x%x, mode 0x%x\n", sc->sc_dev.dv_xname,
 	lcv, sc->rxslot[lcv].start, sc->rxslot[lcv].stop, sc->rxslot[lcv].mode);
 #endif
   }
 
 #ifdef EN_STAT
   sc->vtrash = sc->otrash = sc->mfix = sc->txmbovr = sc->dmaovr = 0;
   sc->txoutspace = sc->txdtqout = sc->launch = sc->lheader = sc->ltail = 0;
   sc->hwpull = sc->swadd = sc->rxqnotus = sc->rxqus = sc->rxoutboth = 0;
   sc->rxdrqout = sc->ttrash = sc->rxmbufout = sc->mfixfail = 0;
   sc->headbyte = sc->tailbyte = sc->tailflush = 0;
 #endif
   sc->need_drqs = sc->need_dtqs = 0;
 
   printf("%s: %d %dKB receive buffers, %d %dKB transmit buffers allocated\n",
 	sc->sc_dev.dv_xname, sc->en_nrx, EN_RXSZ, EN_NTX, EN_TXSZ);
 
   printf("%s: End Station Identifier (mac address) %6D\n",
 	 sc->sc_dev.dv_xname, sc->macaddr, ":");
 
   /*
    * final commit
    */
 
   if_attach(ifp);
   atm_ifattach(ifp); 
 
 #if NBPF > 0 
   BPFATTACH(ifp, DLT_ATM_RFC1483, sizeof(struct atmllc));
 #endif
 }
 
 
 /*
  * en_dmaprobe: helper function for en_attach.
  *
  * see how the card handles DMA by running a few DMA tests.   we need
  * to figure out the largest number of bytes we can DMA in one burst
  * ("bestburstlen"), and if the starting address for a burst needs to
  * be aligned on any sort of boundary or not ("alburst").
  *
  * typical findings:
  * sparc1: bestburstlen=4, alburst=0 (ick, broken DMA!)
  * sparc2: bestburstlen=64, alburst=1
  * p166:   bestburstlen=64, alburst=0 
  */
 
 #if defined(__FreeBSD__) && defined(__i386__)
 #define NBURSTS	3	/* number of bursts to use for dmaprobe */
 #define BOUNDARY 1024	/* test misaligned dma crossing the bounday.
 			   should be n * 64.  at least 64*(NBURSTS+1).
 			   dell P6 with EDO DRAM has 1K bounday problem */
 #endif
 
 STATIC void en_dmaprobe(sc)
 
 struct en_softc *sc;
 
 {
 #ifdef NBURSTS
   /* be careful. kernel stack is only 8K */
   u_int8_t buffer[BOUNDARY * 2 + 64 * (NBURSTS + 1)]; 
 #else
   u_int32_t srcbuf[64], dstbuf[64];
 #endif
   u_int8_t *sp, *dp;
   int bestalgn, bestnotalgn, lcv, try;
 
   sc->alburst = 0;
 
 #ifdef NBURSTS
   /* setup src and dst buf at the end of the boundary */
   sp = (u_int8_t *)roundup((uintptr_t)(void *)buffer, 64);
   while (((uintptr_t)(void *)sp & (BOUNDARY - 1)) != (BOUNDARY - 64))
       sp += 64;
   dp = sp + BOUNDARY;
 
   /*
    * we can't dma across page boundary so that, if buf is at a page
    * boundary, move it to the next page.  but still either src or dst
    * will be at the boundary, which should be ok.
    */
   if ((((uintptr_t)(void *)sp + 64) & PAGE_MASK) == 0)
       sp += 64;
   if ((((uintptr_t)(void *)dp + 64) & PAGE_MASK) == 0)
       dp += 64;
 #else /* !NBURSTS */
   sp = (u_int8_t *) srcbuf;
   while ((((unsigned long) sp) % MIDDMA_MAXBURST) != 0)
     sp += 4;
   dp = (u_int8_t *) dstbuf;
   while ((((unsigned long) dp) % MIDDMA_MAXBURST) != 0)
     dp += 4;
 #endif /* !NBURSTS */
 
   bestalgn = bestnotalgn = en_dmaprobe_doit(sc, sp, dp, 0);
 
   for (lcv = 4 ; lcv < MIDDMA_MAXBURST ; lcv += 4) {
     try = en_dmaprobe_doit(sc, sp+lcv, dp+lcv, 0);
 #ifdef NBURSTS
     if (try < bestnotalgn) {
       bestnotalgn = try;
       break;
     }
 #else
     if (try < bestnotalgn)
       bestnotalgn = try;
 #endif
   }
 
   if (bestalgn != bestnotalgn) 		/* need bursts aligned */
     sc->alburst = 1;
 
   sc->bestburstlen = bestalgn;
   sc->bestburstshift = en_log2(bestalgn);
   sc->bestburstmask = sc->bestburstlen - 1; /* must be power of 2 */
   sc->bestburstcode = en_sz2b(bestalgn);
 
 #if 1 /* __FreeBSD__ */
   /*
    * correct pci chipsets should be able to handle misaligned-64-byte DMA.
    * but there are too many broken chipsets around.  we try to work around
    * by finding the best workable dma size, but still some broken machines
    * exhibit the problem later. so warn it here.
    */
   if (bestalgn != 64 || sc->alburst != 0) {
     printf("%s: WARNING: DMA test detects a broken PCI chipset!\n", 
 	   sc->sc_dev.dv_xname);
     printf("     trying to work around the problem...  but if this doesn't\n");
     printf("     work for you, you'd better switch to a newer motherboard.\n");
   }
 #endif /* 1 */
   return;
 }
 
 
 /*
  * en_dmaprobe_doit: do actual testing
  */
 
 STATIC int
 en_dmaprobe_doit(sc, sp, dp, wmtry)
 
 struct en_softc *sc;
 u_int8_t *sp, *dp;
 int wmtry;
 
 {
   int lcv, retval = 4, cnt, count;
   u_int32_t reg, bcode, midvloc;
 
   /*
    * set up a 1k buffer at MID_BUFOFF
    */
 
   if (sc->en_busreset)
     sc->en_busreset(sc);
   EN_WRITE(sc, MID_RESID, 0x0);	/* reset card before touching RAM */
 
   midvloc = ((MID_BUFOFF - MID_RAMOFF) / sizeof(u_int32_t)) >> MIDV_LOCTOPSHFT;
   EN_WRITE(sc, MIDX_PLACE(0), MIDX_MKPLACE(en_k2sz(1), midvloc));
   EN_WRITE(sc, MID_VC(0), (midvloc << MIDV_LOCSHIFT) 
 		| (en_k2sz(1) << MIDV_SZSHIFT) | MIDV_TRASH);
   EN_WRITE(sc, MID_DST_RP(0), 0);
   EN_WRITE(sc, MID_WP_ST_CNT(0), 0);
 
 #ifdef NBURSTS
   for (lcv = 0 ; lcv < 64*NBURSTS; lcv++) 	/* set up sample data */
 #else
   for (lcv = 0 ; lcv < 68 ; lcv++) 		/* set up sample data */
 #endif
     sp[lcv] = lcv+1;
   EN_WRITE(sc, MID_MAST_CSR, MID_MCSR_ENDMA);	/* enable DMA (only) */
 
   sc->drq_chip = MID_DRQ_REG2A(EN_READ(sc, MID_DMA_RDRX));
   sc->dtq_chip = MID_DTQ_REG2A(EN_READ(sc, MID_DMA_RDTX));
 
   /*
    * try it now . . .  DMA it out, then DMA it back in and compare
    *
    * note: in order to get the dma stuff to reverse directions it wants
    * the "end" flag set!   since we are not dma'ing valid data we may
    * get an ident mismatch interrupt (which we will ignore).
    *
    * note: we've got two different tests rolled up in the same loop
    * if (wmtry) 
    *   then we are doing a wmaybe test and wmtry is a byte count
    *   else we are doing a burst test
    */
 
   for (lcv = 8 ; lcv <= MIDDMA_MAXBURST ; lcv = lcv * 2) {
 
 #ifdef EN_DEBUG
     printf("DMA test lcv=%d, sp=0x%lx, dp=0x%lx, wmtry=%d\n",
 	   lcv, (unsigned long)sp, (unsigned long)dp, wmtry);
 #endif
 
     /* zero SRAM and dest buffer */
     for (cnt = 0 ; cnt < 1024; cnt += 4) 
       EN_WRITE(sc, MID_BUFOFF+cnt, 0);	/* zero memory */
 #ifdef NBURSTS
     for (cnt = 0 ; cnt < 64*NBURSTS; cnt++) 
 #else
     for (cnt = 0 ; cnt < 68  ; cnt++) 
 #endif
       dp[cnt] = 0;
 
     if (wmtry) {
       count = (sc->bestburstlen - sizeof(u_int32_t)) / sizeof(u_int32_t);
       bcode = en_dmaplan[count].bcode;
       count = wmtry >> en_dmaplan[count].divshift;
     } else {
       bcode = en_sz2b(lcv);
       count = 1;
     }
 #ifdef NBURSTS
     /* build lcv-byte-DMA x NBURSTS */
     if (sc->is_adaptec)
       EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ADP(lcv*NBURSTS, 0, MID_DMA_END, 0));
     else
       EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ENI(count*NBURSTS, 0, MID_DMA_END, bcode));
     EN_WRITE(sc, sc->dtq_chip+4, vtophys(sp));
     EN_WRAPADD(MID_DTQOFF, MID_DTQEND, sc->dtq_chip, 8);
     EN_WRITE(sc, MID_DMA_WRTX, MID_DTQ_A2REG(sc->dtq_chip));
     cnt = 1000;
     while (EN_READ(sc, MID_DMA_RDTX) != MID_DTQ_A2REG(sc->dtq_chip)) {
       DELAY(1);
       cnt--;
       if (cnt == 0) {
 	printf("%s: unexpected timeout in tx DMA test\n", sc->sc_dev.dv_xname);
 /*
 	printf("  alignment=0x%x, burst size=%d, dma addr reg=0x%x\n",
 	       (u_long)sp & 63, lcv, EN_READ(sc, MID_DMA_ADDR));
 */	       
 	return(retval);		/* timeout, give up */
       }
     }
 #else /* !NBURSTS */
     if (sc->is_adaptec)
       EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ADP(lcv, 0, MID_DMA_END, 0));
     else
       EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ENI(count, 0, MID_DMA_END, bcode));
     EN_WRITE(sc, sc->dtq_chip+4, vtophys(sp));
     EN_WRITE(sc, MID_DMA_WRTX, MID_DTQ_A2REG(sc->dtq_chip+8));
     cnt = 1000;
     while (EN_READ(sc, MID_DMA_RDTX) == MID_DTQ_A2REG(sc->dtq_chip)) {
       DELAY(1);
       cnt--;
       if (cnt == 0) {
 	printf("%s: unexpected timeout in tx DMA test\n", sc->sc_dev.dv_xname);
 	return(retval);		/* timeout, give up */
       }
     }
     EN_WRAPADD(MID_DTQOFF, MID_DTQEND, sc->dtq_chip, 8);
 #endif /* !NBURSTS */
     reg = EN_READ(sc, MID_INTACK); 
     if ((reg & MID_INT_DMA_TX) != MID_INT_DMA_TX) {
       printf("%s: unexpected status in tx DMA test: 0x%x\n", 
 		sc->sc_dev.dv_xname, reg);
       return(retval);
     }
     EN_WRITE(sc, MID_MAST_CSR, MID_MCSR_ENDMA);   /* re-enable DMA (only) */
 
     /* "return to sender..."  address is known ... */
 
 #ifdef NBURSTS
     /* build lcv-byte-DMA x NBURSTS */
     if (sc->is_adaptec)
       EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ADP(lcv*NBURSTS, 0, MID_DMA_END, 0));
     else
       EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ENI(count*NBURSTS, 0, MID_DMA_END, bcode));
     EN_WRITE(sc, sc->drq_chip+4, vtophys(dp));
     EN_WRAPADD(MID_DRQOFF, MID_DRQEND, sc->drq_chip, 8);
     EN_WRITE(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip));
     cnt = 1000;
     while (EN_READ(sc, MID_DMA_RDRX) != MID_DRQ_A2REG(sc->drq_chip)) {
       DELAY(1);
       cnt--;
       if (cnt == 0) {
 	printf("%s: unexpected timeout in rx DMA test\n", sc->sc_dev.dv_xname);
 	return(retval);		/* timeout, give up */
       }
     }
 #else /* !NBURSTS */
     if (sc->is_adaptec)
       EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ADP(lcv, 0, MID_DMA_END, 0));
     else
       EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ENI(count, 0, MID_DMA_END, bcode));
     EN_WRITE(sc, sc->drq_chip+4, vtophys(dp));
     EN_WRITE(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip+8));
     cnt = 1000;
     while (EN_READ(sc, MID_DMA_RDRX) == MID_DRQ_A2REG(sc->drq_chip)) {
       DELAY(1);
       cnt--;
       if (cnt == 0) {
 	printf("%s: unexpected timeout in rx DMA test\n", sc->sc_dev.dv_xname);
 	return(retval);		/* timeout, give up */
       }
     }
     EN_WRAPADD(MID_DRQOFF, MID_DRQEND, sc->drq_chip, 8);
 #endif /* !NBURSTS */
     reg = EN_READ(sc, MID_INTACK); 
     if ((reg & MID_INT_DMA_RX) != MID_INT_DMA_RX) {
       printf("%s: unexpected status in rx DMA test: 0x%x\n", 
 		sc->sc_dev.dv_xname, reg);
       return(retval);
     }
     EN_WRITE(sc, MID_MAST_CSR, MID_MCSR_ENDMA);   /* re-enable DMA (only) */
 
     if (wmtry) {
       return(bcmp(sp, dp, wmtry));  /* wmtry always exits here, no looping */
     }
   
 #ifdef NBURSTS
     if (bcmp(sp, dp, lcv * NBURSTS)) {
 /*      printf("DMA test failed! lcv=%d, sp=0x%x, dp=0x%x\n", lcv, sp, dp); */
       return(retval);		/* failed, use last value */
     }
 #else
     if (bcmp(sp, dp, lcv))
       return(retval);		/* failed, use last value */
 #endif
 
     retval = lcv;
 
   }
   return(retval);		/* studly 64 byte DMA present!  oh baby!! */
 }
 
 /***********************************************************************/
 
 /*
  * en_ioctl: handle ioctl requests
  *
  * NOTE: if you add an ioctl to set txspeed, you should choose a new
  * TX channel/slot.   Choose the one with the lowest sc->txslot[slot].nref
  * value, subtract one from sc->txslot[0].nref, add one to the
  * sc->txslot[slot].nref, set sc->txvc2slot[vci] = slot, and then set
  * txspeed[vci].
  */
 
 STATIC int en_ioctl(ifp, cmd, data)
 
 struct ifnet *ifp;
 EN_IOCTL_CMDT cmd;
 caddr_t data;
 
 {
     struct en_softc *sc = (struct en_softc *) ifp->if_softc;
     struct ifaddr *ifa = (struct ifaddr *) data;
     struct ifreq *ifr = (struct ifreq *) data;
     struct atm_pseudoioctl *api = (struct atm_pseudoioctl *)data;
 #ifdef NATM
     struct atm_rawioctl *ario = (struct atm_rawioctl *)data;
     int slot;
 #endif
     int s, error = 0;
 
     s = splnet();
 
     switch (cmd) {
 	case SIOCATMENA:		/* enable circuit for recv */
 		error = en_rxctl(sc, api, 1);
 		break;
 
 	case SIOCATMDIS: 		/* disable circuit for recv */
 		error = en_rxctl(sc, api, 0);
 		break;
 
 #ifdef NATM
 	case SIOCXRAWATM:
 		if ((slot = sc->rxvc2slot[ario->npcb->npcb_vci]) == RX_NONE) {
 			error = EINVAL;
 			break;
 		}
 		if (ario->rawvalue > EN_RXSZ*1024)
 			ario->rawvalue = EN_RXSZ*1024;
 		if (ario->rawvalue) {
 			sc->rxslot[slot].oth_flags |= ENOTHER_RAW;
 			sc->rxslot[slot].raw_threshold = ario->rawvalue;
 		} else {
 			sc->rxslot[slot].oth_flags &= (~ENOTHER_RAW);
 			sc->rxslot[slot].raw_threshold = 0;
 		}
 #ifdef EN_DEBUG
 		printf("%s: rxvci%d: turn %s raw (boodi) mode\n",
 			sc->sc_dev.dv_xname, ario->npcb->npcb_vci,
 			(ario->rawvalue) ? "on" : "off");
 #endif
 		break;
 #endif
 	case SIOCSIFADDR: 
 		ifp->if_flags |= IFF_UP;
 #if defined(INET) || defined(INET6)
 		if (ifa->ifa_addr->sa_family == AF_INET
 		    || ifa->ifa_addr->sa_family == AF_INET6) {
 			en_reset(sc);
 			en_init(sc);
 			ifa->ifa_rtrequest = atm_rtrequest; /* ??? */
 			break;
 		}
 #endif /* INET */
 		/* what to do if not INET? */
 		en_reset(sc);
 		en_init(sc);
 		break;
 
 	case SIOCGIFADDR: 
 		error = EINVAL;
 		break;
 
 	case SIOCSIFFLAGS: 
 		error = EINVAL;
 		break;
 
 #if defined(SIOCSIFMTU)		/* ??? copied from if_de */
 #if !defined(ifr_mtu)
 #define ifr_mtu ifr_metric
 #endif
 	case SIOCSIFMTU:
 	    /*
 	     * Set the interface MTU.
 	     */
 #ifdef notsure
 	    if (ifr->ifr_mtu > ATMMTU) {
 		error = EINVAL;
 		break;
 	    }
 #endif
 	    ifp->if_mtu = ifr->ifr_mtu;
 		/* XXXCDC: do we really need to reset on MTU size change? */
 	    en_reset(sc);
 	    en_init(sc);
 	    break;
 #endif /* SIOCSIFMTU */
 
 	default: 
 	    error = EINVAL;
 	    break;
     }
     splx(s);
     return error;
 }
 
 
 /*
  * en_rxctl: turn on and off VCs for recv.
  */
 
 STATIC int en_rxctl(sc, pi, on)
 
 struct en_softc *sc;
 struct atm_pseudoioctl *pi;
 int on;
 
 {
   u_int s, vci, flags, slot;
   u_int32_t oldmode, newmode;
 
   vci = ATM_PH_VCI(&pi->aph);
   flags = ATM_PH_FLAGS(&pi->aph);
 
 #ifdef EN_DEBUG
   printf("%s: %s vpi=%d, vci=%d, flags=%d\n", sc->sc_dev.dv_xname,
 	(on) ? "enable" : "disable", ATM_PH_VPI(&pi->aph), vci, flags);
 #endif
 
   if (ATM_PH_VPI(&pi->aph) || vci >= MID_N_VC)
     return(EINVAL);
 
   /*
    * turn on VCI!
    */
 
   if (on) {
     if (sc->rxvc2slot[vci] != RX_NONE)
       return(EINVAL);
     for (slot = 0 ; slot < sc->en_nrx ; slot++)
       if (sc->rxslot[slot].oth_flags & ENOTHER_FREE)
 	break;
     if (slot == sc->en_nrx)
       return(ENOSPC);
     sc->rxvc2slot[vci] = slot;
     sc->rxslot[slot].rxhand = NULL;
     oldmode = sc->rxslot[slot].mode;
     newmode = (flags & ATM_PH_AAL5) ? MIDV_AAL5 : MIDV_NOAAL;
     sc->rxslot[slot].mode = MIDV_SETMODE(oldmode, newmode);
     sc->rxslot[slot].atm_vci = vci;
     sc->rxslot[slot].atm_flags = flags;
     sc->rxslot[slot].oth_flags = 0;
     sc->rxslot[slot].rxhand = pi->rxhand;
     if (sc->rxslot[slot].indma.ifq_head || sc->rxslot[slot].q.ifq_head)
       panic("en_rxctl: left over mbufs on enable");
     sc->txspeed[vci] = 0;	/* full speed to start */
     sc->txvc2slot[vci] = 0;	/* init value */
     sc->txslot[0].nref++;	/* bump reference count */
     en_loadvc(sc, vci);		/* does debug printf for us */
     return(0);
   }
 
   /*
    * turn off VCI
    */
 
   if (sc->rxvc2slot[vci] == RX_NONE)
     return(EINVAL);
   slot = sc->rxvc2slot[vci];
   if ((sc->rxslot[slot].oth_flags & (ENOTHER_FREE|ENOTHER_DRAIN)) != 0)
     return(EINVAL);
   s = splimp();		/* block out enintr() */
   oldmode = EN_READ(sc, MID_VC(vci));
   newmode = MIDV_SETMODE(oldmode, MIDV_TRASH) & ~MIDV_INSERVICE;
   EN_WRITE(sc, MID_VC(vci), (newmode | (oldmode & MIDV_INSERVICE)));
 		/* halt in tracks, be careful to preserve inserivce bit */
   DELAY(27);
   sc->rxslot[slot].rxhand = NULL;
   sc->rxslot[slot].mode = newmode;
 
   sc->txslot[sc->txvc2slot[vci]].nref--;
   sc->txspeed[vci] = 0;
   sc->txvc2slot[vci] = 0;
 
   /* if stuff is still going on we are going to have to drain it out */
   if (sc->rxslot[slot].indma.ifq_head || 
 		sc->rxslot[slot].q.ifq_head ||
 		(sc->rxslot[slot].oth_flags & ENOTHER_SWSL) != 0) {
     sc->rxslot[slot].oth_flags |= ENOTHER_DRAIN;
   } else {
     sc->rxslot[slot].oth_flags = ENOTHER_FREE;
     sc->rxslot[slot].atm_vci = RX_NONE;
     sc->rxvc2slot[vci] = RX_NONE;
   }
   splx(s);		/* enable enintr() */
 #ifdef EN_DEBUG
   printf("%s: rx%d: VCI %d is now %s\n", sc->sc_dev.dv_xname, slot, vci,
 	(sc->rxslot[slot].oth_flags & ENOTHER_DRAIN) ? "draining" : "free");
 #endif
   return(0);
 }
 
 /***********************************************************************/
 
 /*
  * en_reset: reset the board, throw away work in progress.
  * must en_init to recover.
  */
 
 void en_reset(sc)
 
 struct en_softc *sc;
 
 {
   struct mbuf *m;
   int lcv, slot;
 
 #ifdef EN_DEBUG
   printf("%s: reset\n", sc->sc_dev.dv_xname);
 #endif
 
   if (sc->en_busreset)
     sc->en_busreset(sc);
   EN_WRITE(sc, MID_RESID, 0x0);	/* reset hardware */
 
   /*
    * recv: dump any mbufs we are dma'ing into, if DRAINing, then a reset
    * will free us!
    */
 
   for (lcv = 0 ; lcv < MID_N_VC ; lcv++) {
     if (sc->rxvc2slot[lcv] == RX_NONE)
       continue;
     slot = sc->rxvc2slot[lcv];
     while (1) {
       _IF_DEQUEUE(&sc->rxslot[slot].indma, m);
       if (m == NULL) 
 	break;		/* >>> exit 'while(1)' here <<< */
       m_freem(m);
     }
     while (1) {
       _IF_DEQUEUE(&sc->rxslot[slot].q, m);
       if (m == NULL) 
 	break;		/* >>> exit 'while(1)' here <<< */
       m_freem(m);
     }
     sc->rxslot[slot].oth_flags &= ~ENOTHER_SWSL;
     if (sc->rxslot[slot].oth_flags & ENOTHER_DRAIN) {
       sc->rxslot[slot].oth_flags = ENOTHER_FREE;
       sc->rxvc2slot[lcv] = RX_NONE;
 #ifdef EN_DEBUG
   printf("%s: rx%d: VCI %d is now free\n", sc->sc_dev.dv_xname, slot, lcv);
 #endif
     }
   }
 
   /*
    * xmit: dump everything
    */
 
   for (lcv = 0 ; lcv < EN_NTX ; lcv++) {
     while (1) {
       _IF_DEQUEUE(&sc->txslot[lcv].indma, m);
       if (m == NULL) 
 	break;		/* >>> exit 'while(1)' here <<< */
       m_freem(m);
     }
     while (1) {
       _IF_DEQUEUE(&sc->txslot[lcv].q, m);
       if (m == NULL) 
 	break;		/* >>> exit 'while(1)' here <<< */
       m_freem(m);
     }
 
     sc->txslot[lcv].mbsize = 0;
   }
 
   return;
 }
 
 
 /*
  * en_init: init board and sync the card with the data in the softc.
  */
 
 STATIC void en_init(sc)
 
 struct en_softc *sc;
 
 {
   int vc, slot;
   u_int32_t loc;
 
   if ((sc->enif.if_flags & IFF_UP) == 0) {
 #ifdef EN_DEBUG
     printf("%s: going down\n", sc->sc_dev.dv_xname);
 #endif
     en_reset(sc);			/* to be safe */
     sc->enif.if_flags &= ~IFF_RUNNING;	/* disable */
     return;
   }
 
 #ifdef EN_DEBUG
   printf("%s: going up\n", sc->sc_dev.dv_xname);
 #endif
   sc->enif.if_flags |= IFF_RUNNING;	/* enable */
 
   if (sc->en_busreset)
     sc->en_busreset(sc);
   EN_WRITE(sc, MID_RESID, 0x0);		/* reset */
 
   /*
    * init obmem data structures: vc tab, dma q's, slist.
    *
    * note that we set drq_free/dtq_free to one less than the total number
    * of DTQ/DRQs present.   we do this because the card uses the condition
    * (drq_chip == drq_us) to mean "list is empty"... but if you allow the
    * circular list to be completely full then (drq_chip == drq_us) [i.e.
    * the drq_us pointer will wrap all the way around].   by restricting
    * the number of active requests to (N - 1) we prevent the list from
    * becoming completely full.    note that the card will sometimes give
    * us an interrupt for a DTQ/DRQ we have already processes... this helps
    * keep that interrupt from messing us up.
    */
 
   for (vc = 0 ; vc < MID_N_VC ; vc++) 
     en_loadvc(sc, vc);
 
   bzero(&sc->drq, sizeof(sc->drq));
   sc->drq_free = MID_DRQ_N - 1;		/* N - 1 */
   sc->drq_chip = MID_DRQ_REG2A(EN_READ(sc, MID_DMA_RDRX));
   EN_WRITE(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip)); 
 						/* ensure zero queue */
   sc->drq_us = sc->drq_chip;
 
   bzero(&sc->dtq, sizeof(sc->dtq));
   sc->dtq_free = MID_DTQ_N - 1;		/* N - 1 */
   sc->dtq_chip = MID_DTQ_REG2A(EN_READ(sc, MID_DMA_RDTX));
   EN_WRITE(sc, MID_DMA_WRTX, MID_DRQ_A2REG(sc->dtq_chip)); 
 						/* ensure zero queue */
   sc->dtq_us = sc->dtq_chip;
 
   sc->hwslistp = MID_SL_REG2A(EN_READ(sc, MID_SERV_WRITE));
   sc->swsl_size = sc->swsl_head = sc->swsl_tail = 0;
 
 #ifdef EN_DEBUG
   printf("%s: drq free/chip: %d/0x%x, dtq free/chip: %d/0x%x, hwslist: 0x%x\n", 
     sc->sc_dev.dv_xname, sc->drq_free, sc->drq_chip, 
     sc->dtq_free, sc->dtq_chip, sc->hwslistp);
 #endif
 
   for (slot = 0 ; slot < EN_NTX ; slot++) {
     sc->txslot[slot].bfree = EN_TXSZ * 1024;
     EN_WRITE(sc, MIDX_READPTR(slot), 0);
     EN_WRITE(sc, MIDX_DESCSTART(slot), 0);
     loc = sc->txslot[slot].cur = sc->txslot[slot].start;
     loc = loc - MID_RAMOFF;
     loc = (loc & ~((EN_TXSZ*1024) - 1)) >> 2; /* mask, cvt to words */
     loc = loc >> MIDV_LOCTOPSHFT;	/* top 11 bits */
     EN_WRITE(sc, MIDX_PLACE(slot), MIDX_MKPLACE(en_k2sz(EN_TXSZ), loc));
 #ifdef EN_DEBUG
     printf("%s: tx%d: place 0x%x\n", sc->sc_dev.dv_xname,  slot,
 	(u_int)EN_READ(sc, MIDX_PLACE(slot)));
 #endif
   }
 
   /*
    * enable!
    */
 
   EN_WRITE(sc, MID_INTENA, MID_INT_TX|MID_INT_DMA_OVR|MID_INT_IDENT|
 	MID_INT_LERR|MID_INT_DMA_ERR|MID_INT_DMA_RX|MID_INT_DMA_TX|
 	MID_INT_SERVICE| /* >>> MID_INT_SUNI| XXXCDC<<< */ MID_INT_STATS);
   EN_WRITE(sc, MID_MAST_CSR, MID_SETIPL(sc->ipl)|MID_MCSR_ENDMA|
 	MID_MCSR_ENTX|MID_MCSR_ENRX);
 
 }
 
 
 /*
  * en_loadvc: load a vc tab entry from a slot
  */
 
 STATIC void en_loadvc(sc, vc)
 
 struct en_softc *sc;
 int vc;
 
 {
   int slot;
   u_int32_t reg = EN_READ(sc, MID_VC(vc));
   
   reg = MIDV_SETMODE(reg, MIDV_TRASH);
   EN_WRITE(sc, MID_VC(vc), reg);
   DELAY(27);
 
   if ((slot = sc->rxvc2slot[vc]) == RX_NONE)
     return;
 
   /* no need to set CRC */
   EN_WRITE(sc, MID_DST_RP(vc), 0);	/* read pointer = 0, desc. start = 0 */
   EN_WRITE(sc, MID_WP_ST_CNT(vc), 0);	/* write pointer = 0 */
   EN_WRITE(sc, MID_VC(vc), sc->rxslot[slot].mode);  /* set mode, size, loc */
   sc->rxslot[slot].cur = sc->rxslot[slot].start;
 
 #ifdef EN_DEBUG
     printf("%s: rx%d: assigned to VCI %d\n", sc->sc_dev.dv_xname, slot, vc);
 #endif
 }
 
 
 /*
  * en_start: start transmitting the next packet that needs to go out
  * if there is one.    note that atm_output() has already splimp()'d us.
  */
 
 STATIC void en_start(ifp)
 
 struct ifnet *ifp;
 
 {
     struct en_softc *sc = (struct en_softc *) ifp->if_softc;
     struct ifqueue *ifq = &ifp->if_snd; /* if INPUT QUEUE */
     struct mbuf *m, *lastm, *prev;
     struct atm_pseudohdr *ap, *new_ap;
     int txchan, mlen, got, need, toadd, cellcnt, first;
     u_int32_t atm_vpi, atm_vci, atm_flags, *dat, aal;
     u_int8_t *cp;
 
     if ((ifp->if_flags & IFF_RUNNING) == 0)
 	return;
 
     /*
      * remove everything from interface queue since we handle all queueing
      * locally ... 
      */
 
     while (1) {
 
       IF_DEQUEUE(ifq, m);
       if (m == NULL)
 	return;		/* EMPTY: >>> exit here <<< */
     
       /*
        * calculate size of packet (in bytes)
        * also, if we are not doing transmit DMA we eliminate all stupid
        * (non-word) alignments here using en_mfix().   calls to en_mfix()
        * seem to be due to tcp retransmits for the most part.
        *
        * after this loop mlen total length of mbuf chain (including atm_ph),
        * and lastm is a pointer to the last mbuf on the chain.
        */
 
       lastm = m;
       mlen = 0;
       prev = NULL;
       while (1) {
 	/* no DMA? */
         if ((!sc->is_adaptec && EN_ENIDMAFIX) || EN_NOTXDMA || !en_dma) {
 	  if ( ((uintptr_t)mtod(lastm, void *) % sizeof(u_int32_t)) != 0 ||
 	    ((lastm->m_len % sizeof(u_int32_t)) != 0 && lastm->m_next)) {
 	    first = (lastm == m);
 	    if (en_mfix(sc, &lastm, prev) == 0) {	/* failed? */
 	      m_freem(m);
 	      m = NULL;
               break;
             }
 	    if (first)
 	      m = lastm;		/* update */
           }
           prev = lastm;
         }
 
 	mlen += lastm->m_len;
 	if (lastm->m_next == NULL)
 	  break;
 	lastm = lastm->m_next;
       }
 
       if (m == NULL)		/* happens only if mfix fails */
         continue;
 
       ap = mtod(m, struct atm_pseudohdr *);
 
       atm_vpi = ATM_PH_VPI(ap);
       atm_vci = ATM_PH_VCI(ap);
       atm_flags = ATM_PH_FLAGS(ap) & ~(EN_OBHDR|EN_OBTRL);
       aal = ((atm_flags & ATM_PH_AAL5) != 0) 
 			? MID_TBD_AAL5 : MID_TBD_NOAAL5;
 
       /*
        * check that vpi/vci is one we can use
        */
 
       if (atm_vpi || atm_vci > MID_N_VC) {
 	printf("%s: output vpi=%d, vci=%d out of card range, dropping...\n", 
 		sc->sc_dev.dv_xname, atm_vpi, atm_vci);
 	m_freem(m);
 	continue;
       }
 
       /*
        * computing how much padding we need on the end of the mbuf, then
        * see if we can put the TBD at the front of the mbuf where the
        * link header goes (well behaved protocols will reserve room for us).
        * last, check if room for PDU tail.
        *
        * got = number of bytes of data we have
        * cellcnt = number of cells in this mbuf
        * need = number of bytes of data + padding we need (excludes TBD)
        * toadd = number of bytes of data we need to add to end of mbuf,
        *	[including AAL5 PDU, if AAL5]
        */
 
       got = mlen - sizeof(struct atm_pseudohdr);
       toadd = (aal == MID_TBD_AAL5) ? MID_PDU_SIZE : 0;	/* PDU */
       cellcnt = (got + toadd + (MID_ATMDATASZ - 1)) / MID_ATMDATASZ;
       need = cellcnt * MID_ATMDATASZ;
       toadd = need - got;		/* recompute, including zero padding */
 
 #ifdef EN_DEBUG
       printf("%s: txvci%d: mlen=%d, got=%d, need=%d, toadd=%d, cell#=%d\n",
 	sc->sc_dev.dv_xname, atm_vci, mlen, got, need, toadd, cellcnt);
       printf("     leading_space=%d, trailing_space=%d\n", 
 	(int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(lastm));
 #endif
 
 #ifdef EN_MBUF_OPT
 
       /*
        * note: external storage (M_EXT) can be shared between mbufs
        * to avoid copying (see m_copym()).    this means that the same
        * data buffer could be shared by several mbufs, and thus it isn't
        * a good idea to try and write TBDs or PDUs to M_EXT data areas.
        */
 
       if (M_LEADINGSPACE(m) >= MID_TBD_SIZE && (m->m_flags & M_EXT) == 0) {
 	m->m_data -= MID_TBD_SIZE;
 	m->m_len += MID_TBD_SIZE;
 	mlen += MID_TBD_SIZE;
 	new_ap = mtod(m, struct atm_pseudohdr *);
 	*new_ap = *ap;			/* move it back */
 	ap = new_ap;
 	dat = ((u_int32_t *) ap) + 1;
 	/* make sure the TBD is in proper byte order */
 	*dat++ = htonl(MID_TBD_MK1(aal, sc->txspeed[atm_vci], cellcnt));
 	*dat = htonl(MID_TBD_MK2(atm_vci, 0, 0));
 	atm_flags |= EN_OBHDR;
       }
 
       if (toadd && (lastm->m_flags & M_EXT) == 0 && 
 					M_TRAILINGSPACE(lastm) >= toadd) {
 	cp = mtod(lastm, u_int8_t *) + lastm->m_len;
 	lastm->m_len += toadd;
 	mlen += toadd;
 	if (aal == MID_TBD_AAL5) {
 	  bzero(cp, toadd - MID_PDU_SIZE);
 	  dat = (u_int32_t *)(cp + toadd - MID_PDU_SIZE);
 	  /* make sure the PDU is in proper byte order */
 	  *dat = htonl(MID_PDU_MK1(0, 0, got));
 	} else {
 	  bzero(cp, toadd);
 	}
 	atm_flags |= EN_OBTRL;
       }
       ATM_PH_FLAGS(ap) = atm_flags;	/* update EN_OBHDR/EN_OBTRL bits */
 #endif	/* EN_MBUF_OPT */
 
       /*
        * get assigned channel (will be zero unless txspeed[atm_vci] is set)
        */
 
       txchan = sc->txvc2slot[atm_vci];
 
       if (sc->txslot[txchan].mbsize > EN_TXHIWAT) {
 	EN_COUNT(sc->txmbovr);
 	m_freem(m);
 #ifdef EN_DEBUG
 	printf("%s: tx%d: buffer space shortage\n", sc->sc_dev.dv_xname,
 		txchan);
 #endif
 	continue;
       }
 
       sc->txslot[txchan].mbsize += mlen;
 
 #ifdef EN_DEBUG
       printf("%s: tx%d: VPI=%d, VCI=%d, FLAGS=0x%x, speed=0x%x\n",
 	sc->sc_dev.dv_xname, txchan, atm_vpi, atm_vci, atm_flags, 
 	sc->txspeed[atm_vci]);
       printf("     adjusted mlen=%d, mbsize=%d\n", mlen, 
 		sc->txslot[txchan].mbsize);
 #endif
 
       _IF_ENQUEUE(&sc->txslot[txchan].q, m);
 
       en_txdma(sc, txchan);
 
   }
   /*NOTREACHED*/
 }
 
 
 /*
  * en_mfix: fix a stupid mbuf
  */
 
 #ifndef __FreeBSD__
 
 STATIC int en_mfix(sc, mm, prev)
 
 struct en_softc *sc;
 struct mbuf **mm, *prev;
 
 {
   struct mbuf *m, *new;
   u_char *d, *cp;
   int off;
   struct mbuf *nxt;
 
   m = *mm;
 
   EN_COUNT(sc->mfix);			/* count # of calls */
 #ifdef EN_DEBUG
   printf("%s: mfix mbuf m_data=%p, m_len=%d\n", sc->sc_dev.dv_xname,
 	m->m_data, m->m_len);
 #endif
 
   d = mtod(m, u_char *);
   off = ((unsigned long) d) % sizeof(u_int32_t);
 
   if (off) {
     if ((m->m_flags & M_EXT) == 0) {
       bcopy(d, d - off, m->m_len);   /* ALIGN! (with costly data copy...) */
       d -= off;
       m->m_data = (caddr_t)d;
     } else {
       /* can't write to an M_EXT mbuf since it may be shared */
       MGET(new, M_DONTWAIT, MT_DATA);
       if (!new) {
         EN_COUNT(sc->mfixfail);
         return(0);
       }
       MCLGET(new, M_DONTWAIT);
       if ((new->m_flags & M_EXT) == 0) {
         m_free(new);
         EN_COUNT(sc->mfixfail);
         return(0);
       }
       bcopy(d, new->m_data, m->m_len);	/* ALIGN! (with costly data copy...) */
       new->m_len = m->m_len;
       new->m_next = m->m_next;
       if (prev)
         prev->m_next = new;
       m_free(m);
       *mm = m = new;	/* note: 'd' now invalid */
     }
   }
 
   off = m->m_len % sizeof(u_int32_t);
   if (off == 0)
     return(1);
 
   d = mtod(m, u_char *) + m->m_len;
   off = sizeof(u_int32_t) - off;
   
   nxt = m->m_next;
   while (off--) {
     for ( ; nxt != NULL && nxt->m_len == 0 ; nxt = nxt->m_next)
       /*null*/;
     if (nxt == NULL) {		/* out of data, zero fill */
       *d++ = 0;
       continue;			/* next "off" */
     }
     cp = mtod(nxt, u_char *);
     *d++ = *cp++;
     m->m_len++;
     nxt->m_len--; 
     nxt->m_data = (caddr_t)cp;
   }
   return(1);
 }
 
 #else /* __FreeBSD__ */
 
 STATIC int en_makeexclusive(struct en_softc *, struct mbuf **, struct mbuf *);
 
 STATIC int en_makeexclusive(sc, mm, prev)
     struct en_softc *sc;
     struct mbuf **mm, *prev;
 {
     struct mbuf *m, *new;
 
     m = *mm;
     
     if (m->m_flags & M_EXT) {
 	if (m->m_ext.ext_type != EXT_CLUSTER) {
 	    /* external buffer isn't an ordinary mbuf cluster! */
 	    printf("%s: mfix: special buffer! can't make a copy!\n",
 		   sc->sc_dev.dv_xname);
 	    return (0);
 	}
 	
 	if (MEXT_IS_REF(m)) {
 	    /* make a real copy of the M_EXT mbuf since it is shared */
 	    MGET(new, M_DONTWAIT, MT_DATA);
 	    if (!new) {
 		EN_COUNT(sc->mfixfail);
 		return(0);
 	    }
 	    if (m->m_flags & M_PKTHDR)
-		M_COPY_PKTHDR(new, m);
+		M_MOVE_PKTHDR(new, m);
 	    MCLGET(new, M_DONTWAIT);
 	    if ((new->m_flags & M_EXT) == 0) {
 		m_free(new);
 		EN_COUNT(sc->mfixfail);
 		return(0);
 	    }
 	    bcopy(m->m_data, new->m_data, m->m_len);	
 	    new->m_len = m->m_len;
 	    new->m_next = m->m_next;
 	    if (prev)
 		prev->m_next = new;
 	    m_free(m);
 	    *mm = new;
 	}
 	else {
 	    /* the buffer is not shared, align the data offset using
 	       this buffer. */
 	    u_char *d = mtod(m, u_char *);
 	    int off = ((uintptr_t)(void *)d) % sizeof(u_int32_t);
 
 	    if (off > 0) {
 		bcopy(d, d - off, m->m_len);
 		m->m_data = (caddr_t)d - off;
 	    }
 	}
     }
     return (1);
 }
 
 STATIC int en_mfix(sc, mm, prev)
 
 struct en_softc *sc;
 struct mbuf **mm, *prev;
 
 {
   struct mbuf *m;
   u_char *d, *cp;
   int off;
   struct mbuf *nxt;
 
   m = *mm;
 
   EN_COUNT(sc->mfix);			/* count # of calls */
 #ifdef EN_DEBUG
   printf("%s: mfix mbuf m_data=%p, m_len=%d\n", sc->sc_dev.dv_xname,
 	m->m_data, m->m_len);
 #endif
 
   d = mtod(m, u_char *);
   off = ((uintptr_t) (void *) d) % sizeof(u_int32_t);
 
   if (off) {
     if ((m->m_flags & M_EXT) == 0) {
       bcopy(d, d - off, m->m_len);   /* ALIGN! (with costly data copy...) */
       d -= off;
       m->m_data = (caddr_t)d;
     } else {
       /* can't write to an M_EXT mbuf since it may be shared */
       if (en_makeexclusive(sc, &m, prev) == 0)
 	  return (0);
       *mm = m;	/* note: 'd' now invalid */
     }
   }
 
   off = m->m_len % sizeof(u_int32_t);
   if (off == 0)
     return(1);
 
   if (m->m_flags & M_EXT) {
       /* can't write to an M_EXT mbuf since it may be shared */
       if (en_makeexclusive(sc, &m, prev) == 0)
 	  return (0);
       *mm = m;	/* note: 'd' now invalid */
   }
 
   d = mtod(m, u_char *) + m->m_len;
   off = sizeof(u_int32_t) - off;
   
   nxt = m->m_next;
   while (off--) {
     if (nxt != NULL && nxt->m_len == 0) {
 	/* remove an empty mbuf.  this avoids odd byte padding to an empty
 	   last mbuf.  */
 	m->m_next = nxt = m_free(nxt);
     }
     if (nxt == NULL) {		/* out of data, zero fill */
       *d++ = 0;
       continue;			/* next "off" */
     }
     cp = mtod(nxt, u_char *);
     *d++ = *cp++;
     m->m_len++;
     nxt->m_len--; 
     nxt->m_data = (caddr_t)cp;
   }
   if (nxt != NULL && nxt->m_len == 0)
       m->m_next = m_free(nxt);
   return(1);
 }
 
 #endif /* __FreeBSD__ */
 
 /*
  * en_txdma: start trasmit DMA, if possible
  */
 
 STATIC void en_txdma(sc, chan)
 
 struct en_softc *sc;
 int chan;
 
 {
   struct mbuf *tmp;
   struct atm_pseudohdr *ap;
   struct en_launch launch;
   int datalen = 0, dtqneed, len, ncells;
   u_int8_t *cp;
   struct ifnet *ifp;
 
 #ifdef EN_DEBUG
   printf("%s: tx%d: starting...\n", sc->sc_dev.dv_xname, chan);
 #endif
 
   /*
    * note: now that txlaunch handles non-word aligned/sized requests
    * the only time you can safely set launch.nodma is if you've en_mfix()'d
    * the mbuf chain.    this happens only if EN_NOTXDMA || !en_dma.
    */
 
   launch.nodma = (EN_NOTXDMA || !en_dma);
 
 again:
 
   /*
    * get an mbuf waiting for DMA
    */
 
   launch.t = sc->txslot[chan].q.ifq_head; /* peek at head of queue */
 
   if (launch.t == NULL) {
 #ifdef EN_DEBUG
     printf("%s: tx%d: ...done!\n", sc->sc_dev.dv_xname, chan);
 #endif
     return;	/* >>> exit here if no data waiting for DMA <<< */
   }
 
   /*
    * get flags, vci
    * 
    * note: launch.need = # bytes we need to get on the card
    *	   dtqneed = # of DTQs we need for this packet
    *       launch.mlen = # of bytes in in mbuf chain (<= launch.need)
    */
 
   ap = mtod(launch.t, struct atm_pseudohdr *);
   launch.atm_vci = ATM_PH_VCI(ap);
   launch.atm_flags = ATM_PH_FLAGS(ap);
   launch.aal = ((launch.atm_flags & ATM_PH_AAL5) != 0) ? 
 		MID_TBD_AAL5 : MID_TBD_NOAAL5;
 
   /*
    * XXX: have to recompute the length again, even though we already did
    * it in en_start().   might as well compute dtqneed here as well, so 
    * this isn't that bad.
    */
 
   if ((launch.atm_flags & EN_OBHDR) == 0) {
     dtqneed = 1;		/* header still needs to be added */
     launch.need = MID_TBD_SIZE;	/* not includeded with mbuf */
   } else {
     dtqneed = 0;		/* header on-board, dma with mbuf */
     launch.need = 0;
   }
 
   launch.mlen = 0;
   for (tmp = launch.t ; tmp != NULL ; tmp = tmp->m_next) {
     len = tmp->m_len;
     launch.mlen += len;
     cp = mtod(tmp, u_int8_t *);
     if (tmp == launch.t) {
       len -= sizeof(struct atm_pseudohdr); /* don't count this! */
       cp += sizeof(struct atm_pseudohdr);
     }
     launch.need += len;
     if (len == 0)
       continue;			/* atm_pseudohdr alone in first mbuf */
 
     dtqneed += en_dqneed(sc, (caddr_t) cp, len, 1);
   }
 
   if ((launch.need % sizeof(u_int32_t)) != 0) 
     dtqneed++;			/* need DTQ to FLUSH internal buffer */
 
   if ((launch.atm_flags & EN_OBTRL) == 0) {
     if (launch.aal == MID_TBD_AAL5) {
       datalen = launch.need - MID_TBD_SIZE;
       launch.need += MID_PDU_SIZE;		/* AAL5: need PDU tail */
     }
     dtqneed++;			/* need to work on the end a bit */
   }
 
   /*
    * finish calculation of launch.need (need to figure out how much padding
    * we will need).   launch.need includes MID_TBD_SIZE, but we need to
    * remove that to so we can round off properly.     we have to add 
    * MID_TBD_SIZE back in after calculating ncells.
    */
 
   launch.need = roundup(launch.need - MID_TBD_SIZE, MID_ATMDATASZ);
   ncells = launch.need / MID_ATMDATASZ;
   launch.need += MID_TBD_SIZE;
 
   if (launch.need > EN_TXSZ * 1024) {
     printf("%s: tx%d: packet larger than xmit buffer (%d > %d)\n",
       sc->sc_dev.dv_xname, chan, launch.need, EN_TXSZ * 1024);
     goto dequeue_drop;
   }
 
   /*
    * note: don't use the entire buffer space.  if WRTX becomes equal
    * to RDTX, the transmitter stops assuming the buffer is empty!  --kjc
    */
   if (launch.need >= sc->txslot[chan].bfree) {
     EN_COUNT(sc->txoutspace);
 #ifdef EN_DEBUG
     printf("%s: tx%d: out of transmit space\n", sc->sc_dev.dv_xname, chan);
 #endif
     return;		/* >>> exit here if out of obmem buffer space <<< */
   }
   
   /*
    * ensure we have enough dtqs to go, if not, wait for more.
    */
 
   if (launch.nodma) {
     dtqneed = 1;
   }
   if (dtqneed > sc->dtq_free) {
     sc->need_dtqs = 1;
     EN_COUNT(sc->txdtqout);
 #ifdef EN_DEBUG
     printf("%s: tx%d: out of transmit DTQs\n", sc->sc_dev.dv_xname, chan);
 #endif
     return;		/* >>> exit here if out of dtqs <<< */
   }
 
   /*
    * it is a go, commit!  dequeue mbuf start working on the xfer.
    */
 
   _IF_DEQUEUE(&sc->txslot[chan].q, tmp);
 #ifdef EN_DIAG
   if (launch.t != tmp)
     panic("en dequeue");
 #endif /* EN_DIAG */
 
   /*
    * launch!
    */
 
   EN_COUNT(sc->launch);
   ifp = &sc->enif;
   ifp->if_opackets++;
   
   if ((launch.atm_flags & EN_OBHDR) == 0) {
     EN_COUNT(sc->lheader);
     /* store tbd1/tbd2 in host byte order */
     launch.tbd1 = MID_TBD_MK1(launch.aal, sc->txspeed[launch.atm_vci], ncells);
     launch.tbd2 = MID_TBD_MK2(launch.atm_vci, 0, 0);
   }
   if ((launch.atm_flags & EN_OBTRL) == 0 && launch.aal == MID_TBD_AAL5) {
     EN_COUNT(sc->ltail);
     launch.pdu1 = MID_PDU_MK1(0, 0, datalen);  /* host byte order */
   }
 
   en_txlaunch(sc, chan, &launch);
 
 #if NBPF > 0
   if (ifp->if_bpf) {
       /*
        * adjust the top of the mbuf to skip the pseudo atm header
        * (and TBD, if present) before passing the packet to bpf,
        * restore it afterwards.
        */
       int size = sizeof(struct atm_pseudohdr);
       if (launch.atm_flags & EN_OBHDR)
 	  size += MID_TBD_SIZE;
 
       launch.t->m_data += size;
       launch.t->m_len -= size;
 
       BPF_MTAP(ifp, launch.t);
 
       launch.t->m_data -= size;
       launch.t->m_len += size;
   }
 #endif /* NBPF > 0 */
   /*
    * do some housekeeping and get the next packet
    */
 
   sc->txslot[chan].bfree -= launch.need;
   _IF_ENQUEUE(&sc->txslot[chan].indma, launch.t);
   goto again;
 
   /*
    * END of txdma loop!
    */
 
   /*
    * error handles
    */
 
 dequeue_drop:
   _IF_DEQUEUE(&sc->txslot[chan].q, tmp);
   if (launch.t != tmp)
     panic("en dequeue drop");
   m_freem(launch.t);
   sc->txslot[chan].mbsize -= launch.mlen;
   goto again;
 }
 
 
 /*
  * en_txlaunch: launch an mbuf into the dma pool!
  */
 
 STATIC void en_txlaunch(sc, chan, l)
 
 struct en_softc *sc;
 int chan;
 struct en_launch *l;
 
 {
   struct mbuf *tmp;
   u_int32_t cur = sc->txslot[chan].cur,
 	    start = sc->txslot[chan].start,
 	    stop = sc->txslot[chan].stop,
 	    dma, *data, *datastop, count, bcode;
   int pad, addtail, need, len, needalign, cnt, end, mx;
 
 
  /*
   * vars:
   *   need = # bytes card still needs (decr. to zero)
   *   len = # of bytes left in current mbuf
   *   cur = our current pointer
   *   dma = last place we programmed into the DMA
   *   data = pointer into data area of mbuf that needs to go next
   *   cnt = # of bytes to transfer in this DTQ
   *   bcode/count = DMA burst code, and chip's version of cnt
   *
   *   a single buffer can require up to 5 DTQs depending on its size
   *   and alignment requirements.   the 5 possible requests are:
   *   [1] 1, 2, or 3 byte DMA to align src data pointer to word boundary
   *   [2] alburst DMA to align src data pointer to bestburstlen
   *   [3] 1 or more bestburstlen DMAs
   *   [4] clean up burst (to last word boundary)
   *   [5] 1, 2, or 3 byte final clean up DMA
   */
 
  need = l->need;
  dma = cur;
  addtail = (l->atm_flags & EN_OBTRL) == 0;	/* add a tail? */
 
 #ifdef EN_DIAG
   if ((need - MID_TBD_SIZE) % MID_ATMDATASZ) 
     printf("%s: tx%d: bogus trasmit needs (%d)\n", sc->sc_dev.dv_xname, chan,
 		need);
 #endif
 #ifdef EN_DEBUG
   printf("%s: tx%d: launch mbuf %p!   cur=0x%x[%d], need=%d, addtail=%d\n",
 	sc->sc_dev.dv_xname, chan, l->t, cur, (cur-start)/4, need, addtail);
   count = EN_READ(sc, MIDX_PLACE(chan));
   printf("     HW: base_address=0x%x, size=%d, read=%d, descstart=%d\n",
 	 (u_int)MIDX_BASE(count), MIDX_SZ(count),
 	 (int)EN_READ(sc, MIDX_READPTR(chan)),
 	 (int)EN_READ(sc, MIDX_DESCSTART(chan)));
 #endif
 
  /*
   * do we need to insert the TBD by hand?
   * note that tbd1/tbd2/pdu1 are in host byte order.
   */
 
   if ((l->atm_flags & EN_OBHDR) == 0) {
 #ifdef EN_DEBUG
     printf("%s: tx%d: insert header 0x%x 0x%x\n", sc->sc_dev.dv_xname,
 	chan, l->tbd1, l->tbd2);
 #endif
     EN_WRITE(sc, cur, l->tbd1);
     EN_WRAPADD(start, stop, cur, 4);
     EN_WRITE(sc, cur, l->tbd2);
     EN_WRAPADD(start, stop, cur, 4);
     need -= 8;
   }
 
   /*
    * now do the mbufs...
    */
 
   for (tmp = l->t ; tmp != NULL ; tmp = tmp->m_next) {
 
     /* get pointer to data and length */
     data = mtod(tmp, u_int32_t *);
     len = tmp->m_len;
     if (tmp == l->t) {
       data += sizeof(struct atm_pseudohdr)/sizeof(u_int32_t);
       len -= sizeof(struct atm_pseudohdr);
     }
 
     /* now, determine if we should copy it */
     if (l->nodma || (len < EN_MINDMA &&
        (len % 4) == 0 && ((uintptr_t) (void *) data % 4) == 0 &&
        (cur % 4) == 0)) {
 
       /* 
        * roundup len: the only time this will change the value of len
        * is when l->nodma is true, tmp is the last mbuf, and there is
        * a non-word number of bytes to transmit.   in this case it is
        * safe to round up because we've en_mfix'd the mbuf (so the first
        * byte is word aligned there must be enough free bytes at the end
        * to round off to the next word boundary)...
        */
       len = roundup(len, sizeof(u_int32_t));
       datastop = data + (len / sizeof(u_int32_t));
       /* copy loop: preserve byte order!!!  use WRITEDAT */
       while (data != datastop) {
 	EN_WRITEDAT(sc, cur, *data);
 	data++;
 	EN_WRAPADD(start, stop, cur, 4);
       }
       need -= len;
 #ifdef EN_DEBUG
       printf("%s: tx%d: copied %d bytes (%d left, cur now 0x%x)\n", 
 		sc->sc_dev.dv_xname, chan, len, need, cur);
 #endif
       continue;		/* continue on to next mbuf */
     }
 
     /* going to do DMA, first make sure the dtq is in sync. */
     if (dma != cur) {
       EN_DTQADD(sc, WORD_IDX(start,cur), chan, MIDDMA_JK, 0, 0, 0);
 #ifdef EN_DEBUG
       printf("%s: tx%d: dtq_sync: advance pointer to %d\n",
 		sc->sc_dev.dv_xname, chan, cur);
 #endif
     }
 
     /*
      * if this is the last buffer, and it looks like we are going to need to
      * flush the internal buffer, can we extend the length of this mbuf to
      * avoid the FLUSH?
      */
 
     if (tmp->m_next == NULL) {
       cnt = (need - len) % sizeof(u_int32_t);
       if (cnt && M_TRAILINGSPACE(tmp) >= cnt)
         len += cnt;			/* pad for FLUSH */
     }
       
 #if !defined(MIDWAY_ENIONLY)
 
     /*
      * the adaptec DMA engine is smart and handles everything for us.
      */
 
     if (sc->is_adaptec) {
       /* need to DMA "len" bytes out to card */
       need -= len;
       EN_WRAPADD(start, stop, cur, len);
 #ifdef EN_DEBUG
       printf("%s: tx%d: adp_dma %d bytes (%d left, cur now 0x%x)\n",
               sc->sc_dev.dv_xname, chan, len, need, cur);
 #endif
       end = (need == 0) ? MID_DMA_END : 0;
       EN_DTQADD(sc, len, chan, 0, vtophys(data), l->mlen, end);
       if (end)
         goto done;
       dma = cur;	/* update dma pointer */
       continue;
     }
 #endif /* !MIDWAY_ENIONLY */
 
 #if !defined(MIDWAY_ADPONLY)
 
     /*
      * the ENI DMA engine is not so smart and need more help from us
      */
 
     /* do we need to do a DMA op to align to word boundary? */
     needalign = (uintptr_t) (void *) data % sizeof(u_int32_t);
     if (needalign) {
       EN_COUNT(sc->headbyte);
       cnt = sizeof(u_int32_t) - needalign;
       if (cnt == 2 && len >= cnt) {
         count = 1;
         bcode = MIDDMA_2BYTE;
       } else {
         cnt = min(cnt, len);		/* prevent overflow */
         count = cnt;
         bcode = MIDDMA_BYTE;
       }
       need -= cnt;
       EN_WRAPADD(start, stop, cur, cnt);
 #ifdef EN_DEBUG
       printf("%s: tx%d: small al_dma %d bytes (%d left, cur now 0x%x)\n",
               sc->sc_dev.dv_xname, chan, cnt, need, cur);
 #endif
       len -= cnt;
       end = (need == 0) ? MID_DMA_END : 0;
       EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end);
       if (end)
         goto done;
       data = (u_int32_t *) ((u_char *)data + cnt);
     }
 
     /* do we need to do a DMA op to align? */
     if (sc->alburst && 
 	(needalign = (((uintptr_t) (void *) data) & sc->bestburstmask)) != 0
 	&& len >= sizeof(u_int32_t)) {
       cnt = sc->bestburstlen - needalign;
       mx = len & ~(sizeof(u_int32_t)-1);	/* don't go past end */
       if (cnt > mx) {
         cnt = mx;
         count = cnt / sizeof(u_int32_t);
         bcode = MIDDMA_WORD;
       } else {
         count = cnt / sizeof(u_int32_t);
         bcode = en_dmaplan[count].bcode;
         count = cnt >> en_dmaplan[count].divshift;
       }
       need -= cnt;
       EN_WRAPADD(start, stop, cur, cnt);
 #ifdef EN_DEBUG
       printf("%s: tx%d: al_dma %d bytes (%d left, cur now 0x%x)\n", 
 		sc->sc_dev.dv_xname, chan, cnt, need, cur);
 #endif
       len -= cnt;
       end = (need == 0) ? MID_DMA_END : 0;
       EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end);
       if (end)
         goto done;
       data = (u_int32_t *) ((u_char *)data + cnt);
     }
 
     /* do we need to do a max-sized burst? */
     if (len >= sc->bestburstlen) {
       count = len >> sc->bestburstshift;
       cnt = count << sc->bestburstshift;
       bcode = sc->bestburstcode;
       need -= cnt;
       EN_WRAPADD(start, stop, cur, cnt);
 #ifdef EN_DEBUG
       printf("%s: tx%d: best_dma %d bytes (%d left, cur now 0x%x)\n", 
 		sc->sc_dev.dv_xname, chan, cnt, need, cur);
 #endif
       len -= cnt;
       end = (need == 0) ? MID_DMA_END : 0;
       EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end);
       if (end)
         goto done;
       data = (u_int32_t *) ((u_char *)data + cnt);
     }
 
     /* do we need to do a cleanup burst? */
     cnt = len & ~(sizeof(u_int32_t)-1);
     if (cnt) {
       count = cnt / sizeof(u_int32_t);
       bcode = en_dmaplan[count].bcode;
       count = cnt >> en_dmaplan[count].divshift;
       need -= cnt;
       EN_WRAPADD(start, stop, cur, cnt);
 #ifdef EN_DEBUG
       printf("%s: tx%d: cleanup_dma %d bytes (%d left, cur now 0x%x)\n", 
 		sc->sc_dev.dv_xname, chan, cnt, need, cur);
 #endif
       len -= cnt;
       end = (need == 0) ? MID_DMA_END : 0;
       EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end);
       if (end)
         goto done;
       data = (u_int32_t *) ((u_char *)data + cnt);
     }
 
     /* any word fragments left? */
     if (len) {
       EN_COUNT(sc->tailbyte);
       if (len == 2) {
         count = 1;
         bcode = MIDDMA_2BYTE;                 /* use 2byte mode */
       } else {
         count = len;
         bcode = MIDDMA_BYTE;                  /* use 1 byte mode */
       }
       need -= len;
       EN_WRAPADD(start, stop, cur, len);
 #ifdef EN_DEBUG
       printf("%s: tx%d: byte cleanup_dma %d bytes (%d left, cur now 0x%x)\n",
               sc->sc_dev.dv_xname, chan, len, need, cur);
 #endif
       end = (need == 0) ? MID_DMA_END : 0;
       EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end);
       if (end)
         goto done;
     }
 
     dma = cur;		/* update dma pointer */
 #endif /* !MIDWAY_ADPONLY */
 
   } /* next mbuf, please */
 
   /*
    * all mbuf data has been copied out to the obmem (or set up to be DMAd).
    * if the trailer or padding needs to be put in, do it now.  
    *
    * NOTE: experimental results reveal the following fact:
    *   if you DMA "X" bytes to the card, where X is not a multiple of 4,
    *   then the card will internally buffer the last (X % 4) bytes (in
    *   hopes of getting (4 - (X % 4)) more bytes to make a complete word).
    *   it is imporant to make sure we don't leave any important data in
    *   this internal buffer because it is discarded on the last (end) DTQ.
    *   one way to do this is to DMA in (4 - (X % 4)) more bytes to flush
    *   the darn thing out.
    */
 
   if (addtail) {
 
     pad = need % sizeof(u_int32_t);
     if (pad) {
       /*
        * FLUSH internal data buffer.  pad out with random data from the front
        * of the mbuf chain...
        */
       bcode = (sc->is_adaptec) ? 0 : MIDDMA_BYTE;
       EN_COUNT(sc->tailflush);
       EN_WRAPADD(start, stop, cur, pad);
       EN_DTQADD(sc, pad, chan, bcode, vtophys(l->t->m_data), 0, 0);
       need -= pad;
 #ifdef EN_DEBUG
       printf("%s: tx%d: pad/FLUSH dma %d bytes (%d left, cur now 0x%x)\n", 
 		sc->sc_dev.dv_xname, chan, pad, need, cur);
 #endif
     }
 
     /* copy data */
     pad = need / sizeof(u_int32_t);	/* round *down* */
     if (l->aal == MID_TBD_AAL5)
       pad -= 2;
 #ifdef EN_DEBUG
       printf("%s: tx%d: padding %d bytes (cur now 0x%x)\n", 
 	     sc->sc_dev.dv_xname, chan, (int)(pad * sizeof(u_int32_t)), cur);
 #endif
     while (pad--) {
       EN_WRITEDAT(sc, cur, 0);	/* no byte order issues with zero */
       EN_WRAPADD(start, stop, cur, 4);
     }
     if (l->aal == MID_TBD_AAL5) {
       EN_WRITE(sc, cur, l->pdu1); /* in host byte order */
       EN_WRAPADD(start, stop, cur, 8);
     }
   }
 
   if (addtail || dma != cur) {
    /* write final descritor  */
     EN_DTQADD(sc, WORD_IDX(start,cur), chan, MIDDMA_JK, 0, 
 				l->mlen, MID_DMA_END);
     /* dma = cur; */ 	/* not necessary since we are done */
   }
 
 done:
   /* update current pointer */
   sc->txslot[chan].cur = cur;
 #ifdef EN_DEBUG
       printf("%s: tx%d: DONE!   cur now = 0x%x\n", 
 		sc->sc_dev.dv_xname, chan, cur);
 #endif
 
   return;
 }
 
 
 /*
  * interrupt handler
  */
 
 EN_INTR_TYPE en_intr(arg)
 
 void *arg;
 
 {
   struct en_softc *sc = (struct en_softc *) arg;
   struct mbuf *m;
   struct atm_pseudohdr ah;
   struct ifnet *ifp;
   u_int32_t reg, kick, val, mask, chip, vci, slot, dtq, drq;
   int lcv, idx, need_softserv = 0;
 
   reg = EN_READ(sc, MID_INTACK);
 
   if ((reg & MID_INT_ANY) == 0) 
     EN_INTR_RET(0); /* not us */
 
 #ifdef EN_DEBUG
   printf("%s: interrupt=0x%b\n", sc->sc_dev.dv_xname, reg, MID_INTBITS);
 #endif
 
   /*
    * unexpected errors that need a reset
    */
 
   if ((reg & (MID_INT_IDENT|MID_INT_LERR|MID_INT_DMA_ERR|MID_INT_SUNI)) != 0) {
     printf("%s: unexpected interrupt=0x%b, resetting card\n", 
 	sc->sc_dev.dv_xname, reg, MID_INTBITS);
 #ifdef EN_DEBUG
 #ifdef DDB
 #ifdef __FreeBSD__
     Debugger("en: unexpected error");
 #else
     Debugger();
 #endif
 #endif	/* DDB */
     sc->enif.if_flags &= ~IFF_RUNNING; /* FREEZE! */
 #else
     en_reset(sc);
     en_init(sc);
 #endif
     EN_INTR_RET(1); /* for us */
   }
 
   /*******************
    * xmit interrupts *
    ******************/
 
   kick = 0;				/* bitmask of channels to kick */
   if (reg & MID_INT_TX) {		/* TX done! */
 
     /*
      * check for tx complete, if detected then this means that some space
      * has come free on the card.   we must account for it and arrange to
      * kick the channel to life (in case it is stalled waiting on the card).
      */
     for (mask = 1, lcv = 0 ; lcv < EN_NTX ; lcv++, mask = mask * 2) {
       if (reg & MID_TXCHAN(lcv)) {
 	kick = kick | mask;	/* want to kick later */
 	val = EN_READ(sc, MIDX_READPTR(lcv));	/* current read pointer */
 	val = (val * sizeof(u_int32_t)) + sc->txslot[lcv].start;
 						/* convert to offset */
 	if (val > sc->txslot[lcv].cur)
 	  sc->txslot[lcv].bfree = val - sc->txslot[lcv].cur;
 	else
 	  sc->txslot[lcv].bfree = (val + (EN_TXSZ*1024)) - sc->txslot[lcv].cur;
 #ifdef EN_DEBUG
 	printf("%s: tx%d: trasmit done.   %d bytes now free in buffer\n",
 		sc->sc_dev.dv_xname, lcv, sc->txslot[lcv].bfree);
 #endif
       }
     }
   }
 
   if (reg & MID_INT_DMA_TX) {		/* TX DMA done! */
 
   /*
    * check for TX DMA complete, if detected then this means that some DTQs
    * are now free.   it also means some indma mbufs can be freed.
    * if we needed DTQs, kick all channels.
    */
     val = EN_READ(sc, MID_DMA_RDTX);	/* chip's current location */
     idx = MID_DTQ_A2REG(sc->dtq_chip);/* where we last saw chip */
     if (sc->need_dtqs) {
       kick = MID_NTX_CH - 1;		/* assume power of 2, kick all! */
       sc->need_dtqs = 0;		/* recalculated in "kick" loop below */
 #ifdef EN_DEBUG
       printf("%s: cleared need DTQ condition\n", sc->sc_dev.dv_xname);
 #endif
     }
     while (idx != val) {
       sc->dtq_free++;
       if ((dtq = sc->dtq[idx]) != 0) {
         sc->dtq[idx] = 0;	/* don't forget to zero it out when done */
 	slot = EN_DQ_SLOT(dtq);
 	_IF_DEQUEUE(&sc->txslot[slot].indma, m);
 	if (!m) panic("enintr: dtqsync");
 	sc->txslot[slot].mbsize -= EN_DQ_LEN(dtq);
 #ifdef EN_DEBUG
 	printf("%s: tx%d: free %d dma bytes, mbsize now %d\n",
 		sc->sc_dev.dv_xname, slot, EN_DQ_LEN(dtq), 
 		sc->txslot[slot].mbsize);
 #endif
 	m_freem(m);
       }
       EN_WRAPADD(0, MID_DTQ_N, idx, 1);
     };
     sc->dtq_chip = MID_DTQ_REG2A(val);	/* sync softc */
   }
 
 
   /*
    * kick xmit channels as needed
    */
 
   if (kick) {
 #ifdef EN_DEBUG
   printf("%s: tx kick mask = 0x%x\n", sc->sc_dev.dv_xname, kick);
 #endif
     for (mask = 1, lcv = 0 ; lcv < EN_NTX ; lcv++, mask = mask * 2) {
       if ((kick & mask) && sc->txslot[lcv].q.ifq_head) {
 	en_txdma(sc, lcv);		/* kick it! */
       }
     }		/* for each slot */
   }		/* if kick */
 
 
   /*******************
    * recv interrupts *
    ******************/
 
   /*
    * check for RX DMA complete, and pass the data "upstairs"
    */
 
   if (reg & MID_INT_DMA_RX) {
     val = EN_READ(sc, MID_DMA_RDRX); /* chip's current location */
     idx = MID_DRQ_A2REG(sc->drq_chip);/* where we last saw chip */
     while (idx != val) {
       sc->drq_free++;
       if ((drq = sc->drq[idx]) != 0) {
         sc->drq[idx] = 0;	/* don't forget to zero it out when done */
 	slot = EN_DQ_SLOT(drq);
         if (EN_DQ_LEN(drq) == 0) {  /* "JK" trash DMA? */
           m = NULL;
         } else {
 	  _IF_DEQUEUE(&sc->rxslot[slot].indma, m);
 	  if (!m)
 	    panic("enintr: drqsync: %s: lost mbuf in slot %d!",
 		  sc->sc_dev.dv_xname, slot);
         }
 	/* do something with this mbuf */
 	if (sc->rxslot[slot].oth_flags & ENOTHER_DRAIN) {  /* drain? */
           if (m)
 	    m_freem(m);
 	  vci = sc->rxslot[slot].atm_vci;
 	  if (sc->rxslot[slot].indma.ifq_head == NULL &&
 		sc->rxslot[slot].q.ifq_head == NULL &&
 		(EN_READ(sc, MID_VC(vci)) & MIDV_INSERVICE) == 0 &&
 		(sc->rxslot[slot].oth_flags & ENOTHER_SWSL) == 0) {
 	    sc->rxslot[slot].oth_flags = ENOTHER_FREE; /* done drain */
 	    sc->rxslot[slot].atm_vci = RX_NONE;
 	    sc->rxvc2slot[vci] = RX_NONE;
 #ifdef EN_DEBUG
 	    printf("%s: rx%d: VCI %d now free\n", sc->sc_dev.dv_xname,
 			slot, vci);
 #endif
 	  }
 	} else if (m != NULL) {
 	  ATM_PH_FLAGS(&ah) = sc->rxslot[slot].atm_flags;
 	  ATM_PH_VPI(&ah) = 0;
 	  ATM_PH_SETVCI(&ah, sc->rxslot[slot].atm_vci);
 #ifdef EN_DEBUG
 	  printf("%s: rx%d: rxvci%d: atm_input, mbuf %p, len %d, hand %p\n",
 		sc->sc_dev.dv_xname, slot, sc->rxslot[slot].atm_vci, m,
 		EN_DQ_LEN(drq), sc->rxslot[slot].rxhand);
 #endif
 
 	  ifp = &sc->enif;
 	  ifp->if_ipackets++;
 
 #if NBPF > 0
 	  if (ifp->if_bpf)
 	    BPF_MTAP(ifp, m);
 #endif
 
 	  atm_input(ifp, &ah, m, sc->rxslot[slot].rxhand);
 	}
 
       }
       EN_WRAPADD(0, MID_DRQ_N, idx, 1);
     };
     sc->drq_chip = MID_DRQ_REG2A(val);	/* sync softc */
 
     if (sc->need_drqs) {	/* true if we had a DRQ shortage */
       need_softserv = 1;
       sc->need_drqs = 0;
 #ifdef EN_DEBUG
 	printf("%s: cleared need DRQ condition\n", sc->sc_dev.dv_xname);
 #endif
     }
   }
 
   /*
    * handle service interrupts
    */
 
   if (reg & MID_INT_SERVICE) {
     chip = MID_SL_REG2A(EN_READ(sc, MID_SERV_WRITE));
 
     while (sc->hwslistp != chip) {
 
       /* fetch and remove it from hardware service list */
       vci = EN_READ(sc, sc->hwslistp);
       EN_WRAPADD(MID_SLOFF, MID_SLEND, sc->hwslistp, 4);/* advance hw ptr */
       slot = sc->rxvc2slot[vci];
       if (slot == RX_NONE) {
 #ifdef EN_DEBUG
 	printf("%s: unexpected rx interrupt on VCI %d\n", 
 		sc->sc_dev.dv_xname, vci);
 #endif
 	EN_WRITE(sc, MID_VC(vci), MIDV_TRASH);  /* rx off, damn it! */
 	continue;				/* next */
       }
       EN_WRITE(sc, MID_VC(vci), sc->rxslot[slot].mode); /* remove from hwsl */
       EN_COUNT(sc->hwpull);
 
 #ifdef EN_DEBUG
       printf("%s: pulled VCI %d off hwslist\n", sc->sc_dev.dv_xname, vci);
 #endif
 
       /* add it to the software service list (if needed) */
       if ((sc->rxslot[slot].oth_flags & ENOTHER_SWSL) == 0) {
 	EN_COUNT(sc->swadd);
 	need_softserv = 1;
 	sc->rxslot[slot].oth_flags |= ENOTHER_SWSL;
 	sc->swslist[sc->swsl_tail] = slot;
 	EN_WRAPADD(0, MID_SL_N, sc->swsl_tail, 1);
 	sc->swsl_size++;
 #ifdef EN_DEBUG
       printf("%s: added VCI %d to swslist\n", sc->sc_dev.dv_xname, vci);
 #endif
       }
     };
   }
 
   /*
    * now service (function too big to include here)
    */
 
   if (need_softserv)
     en_service(sc);
 
   /*
    * keep our stats
    */
 
   if (reg & MID_INT_DMA_OVR) {
     EN_COUNT(sc->dmaovr);
 #ifdef EN_DEBUG
     printf("%s: MID_INT_DMA_OVR\n", sc->sc_dev.dv_xname);
 #endif
   }
   reg = EN_READ(sc, MID_STAT);
 #ifdef EN_STAT
   sc->otrash += MID_OTRASH(reg);
   sc->vtrash += MID_VTRASH(reg);
 #endif
 
   EN_INTR_RET(1); /* for us */
 }
 
 
 /*
  * en_service: handle a service interrupt
  *
  * Q: why do we need a software service list?
  *
  * A: if we remove a VCI from the hardware list and we find that we are
  *    out of DRQs we must defer processing until some DRQs become free.
  *    so we must remember to look at this RX VCI/slot later, but we can't
  *    put it back on the hardware service list (since that isn't allowed).
  *    so we instead save it on the software service list.   it would be nice 
  *    if we could peek at the VCI on top of the hwservice list without removing
  *    it, however this leads to a race condition: if we peek at it and
  *    decide we are done with it new data could come in before we have a 
  *    chance to remove it from the hwslist.   by the time we get it out of
  *    the list the interrupt for the new data will be lost.   oops!
  *
  */
 
 STATIC void en_service(sc)
 
 struct en_softc *sc;
 
 {
   struct mbuf *m, *tmp;
   u_int32_t cur, dstart, rbd, pdu, *sav, dma, bcode, count, *data, *datastop;
   u_int32_t start, stop, cnt, needalign;
   int slot, raw, aal5, llc, vci, fill, mlen, tlen, drqneed, need, needfill, end;
 
   aal5 = 0;		/* Silence gcc */
 next_vci:
   if (sc->swsl_size == 0) {
 #ifdef EN_DEBUG
     printf("%s: en_service done\n", sc->sc_dev.dv_xname);
 #endif
     return;		/* >>> exit here if swsl now empty <<< */
   }
 
   /*
    * get slot/vci to service
    */
 
   slot = sc->swslist[sc->swsl_head];
   vci = sc->rxslot[slot].atm_vci;
 #ifdef EN_DIAG
   if (sc->rxvc2slot[vci] != slot) panic("en_service rx slot/vci sync");
 #endif
 
   /*
    * determine our mode and if we've got any work to do
    */
 
   raw = sc->rxslot[slot].oth_flags & ENOTHER_RAW;
   start= sc->rxslot[slot].start;
   stop= sc->rxslot[slot].stop;
   cur = sc->rxslot[slot].cur;
 
 #ifdef EN_DEBUG
   printf("%s: rx%d: service vci=%d raw=%d start/stop/cur=0x%x 0x%x 0x%x\n",
 	sc->sc_dev.dv_xname, slot, vci, raw, start, stop, cur);
 #endif
 
 same_vci:
   dstart = MIDV_DSTART(EN_READ(sc, MID_DST_RP(vci)));
   dstart = (dstart * sizeof(u_int32_t)) + start;
 
   /* check to see if there is any data at all */
   if (dstart == cur) {
 defer:					/* defer processing */
     EN_WRAPADD(0, MID_SL_N, sc->swsl_head, 1); 
     sc->rxslot[slot].oth_flags &= ~ENOTHER_SWSL;
     sc->swsl_size--;
 					/* >>> remove from swslist <<< */
 #ifdef EN_DEBUG
     printf("%s: rx%d: remove vci %d from swslist\n", 
 		sc->sc_dev.dv_xname, slot, vci);
 #endif
     goto next_vci;
   }
 
   /*
    * figure out how many bytes we need
    * [mlen = # bytes to go in mbufs, fill = # bytes to dump (MIDDMA_JK)]
    */
 
   if (raw) {
 
     /* raw mode (aka boodi mode) */
     fill = 0;
     if (dstart > cur)
       mlen = dstart - cur;
     else
       mlen = (dstart + (EN_RXSZ*1024)) - cur;
 
     if (mlen < sc->rxslot[slot].raw_threshold)
       goto defer; 		/* too little data to deal with */
 
   } else {
 
     /* normal mode */
     aal5 = (sc->rxslot[slot].atm_flags & ATM_PH_AAL5);
     llc = (aal5 && (sc->rxslot[slot].atm_flags & ATM_PH_LLCSNAP)) ? 1 : 0;
     rbd = EN_READ(sc, cur);
     if (MID_RBD_ID(rbd) != MID_RBD_STDID) 
       panic("en_service: id mismatch");
 
     if (rbd & MID_RBD_T) {
       mlen = 0;			/* we've got trash */
       fill = MID_RBD_SIZE;
       EN_COUNT(sc->ttrash);
 #ifdef EN_DEBUG
       printf("RX overflow lost %d cells!\n", MID_RBD_CNT(rbd));
 #endif
     } else if (!aal5) {
       mlen = MID_RBD_SIZE + MID_CHDR_SIZE + MID_ATMDATASZ; /* 1 cell (ick!) */
       fill = 0;
     } else {
       struct ifnet *ifp;
 
       tlen = (MID_RBD_CNT(rbd) * MID_ATMDATASZ) + MID_RBD_SIZE;
       pdu = cur + tlen - MID_PDU_SIZE;
       if (pdu >= stop)
 	pdu -= (EN_RXSZ*1024);
       pdu = EN_READ(sc, pdu);	/* get PDU in correct byte order */
       fill = tlen - MID_RBD_SIZE - MID_PDU_LEN(pdu);
       if (fill < 0 || (rbd & MID_RBD_CRCERR) != 0) {
 	static int first = 1;
 
 	if (first) {
 	  printf("%s: %s, dropping frame\n", sc->sc_dev.dv_xname,
 		 (rbd & MID_RBD_CRCERR) ?
 		 "CRC error" : "invalid AAL5 PDU length");
 	  printf("%s: got %d cells (%d bytes), AAL5 len is %d bytes (pdu=0x%x)\n",
 		 sc->sc_dev.dv_xname, MID_RBD_CNT(rbd),
 		 tlen - MID_RBD_SIZE, MID_PDU_LEN(pdu), pdu);
 #ifndef EN_DEBUG
 	  printf("CRC error report disabled from now on!\n");
 	  first = 0;
 #endif
 	}
 	fill = tlen;
 
 	ifp = &sc->enif;
 	ifp->if_ierrors++;
 
       }
       mlen = tlen - fill;
     }
 
   }
 
   /*
    * now allocate mbufs for mlen bytes of data, if out of mbufs, trash all
    *
    * notes:
    *  1. it is possible that we've already allocated an mbuf for this pkt
    *	 but ran out of DRQs, in which case we saved the allocated mbuf on
    *	 "q".
    *  2. if we save an mbuf in "q" we store the "cur" (pointer) in the front 
    *     of the mbuf as an identity (that we can check later), and we also
    *     store drqneed (so we don't have to recompute it).
    *  3. after this block of code, if m is still NULL then we ran out of mbufs
    */
   
   m = sc->rxslot[slot].q.ifq_head;
   drqneed = 1;
   if (m) {
     sav = mtod(m, u_int32_t *);
     if (sav[0] != cur) {
 #ifdef EN_DEBUG
       printf("%s: rx%d: q'ed mbuf %p not ours\n", 
 		sc->sc_dev.dv_xname, slot, m);
 #endif
       m = NULL;			/* wasn't ours */
       EN_COUNT(sc->rxqnotus);
     } else {
       EN_COUNT(sc->rxqus);
       _IF_DEQUEUE(&sc->rxslot[slot].q, m);
       drqneed = sav[1];
 #ifdef EN_DEBUG
       printf("%s: rx%d: recovered q'ed mbuf %p (drqneed=%d)\n", 
 	sc->sc_dev.dv_xname, slot, m, drqneed);
 #endif
     }
   }
 
   if (mlen != 0 && m == NULL) {
     m = en_mget(sc, mlen, &drqneed);		/* allocate! */
     if (m == NULL) {
       fill += mlen;
       mlen = 0;
       EN_COUNT(sc->rxmbufout);
 #ifdef EN_DEBUG
       printf("%s: rx%d: out of mbufs\n", sc->sc_dev.dv_xname, slot);
 #endif
     }
 #ifdef EN_DEBUG
     printf("%s: rx%d: allocate mbuf %p, mlen=%d, drqneed=%d\n", 
 	sc->sc_dev.dv_xname, slot, m, mlen, drqneed);
 #endif
   }
 
 #ifdef EN_DEBUG
   printf("%s: rx%d: VCI %d, mbuf_chain %p, mlen %d, fill %d\n",
 	sc->sc_dev.dv_xname, slot, vci, m, mlen, fill);
 #endif
 
   /*
    * now check to see if we've got the DRQs needed.    if we are out of 
    * DRQs we must quit (saving our mbuf, if we've got one).
    */
 
   needfill = (fill) ? 1 : 0;
   if (drqneed + needfill > sc->drq_free) {
     sc->need_drqs = 1;	/* flag condition */
     if (m == NULL) {
       EN_COUNT(sc->rxoutboth);
 #ifdef EN_DEBUG
       printf("%s: rx%d: out of DRQs *and* mbufs!\n", sc->sc_dev.dv_xname, slot);
 #endif
       return;		/* >>> exit here if out of both mbufs and DRQs <<< */
     }
     sav = mtod(m, u_int32_t *);
     sav[0] = cur;
     sav[1] = drqneed;
     _IF_ENQUEUE(&sc->rxslot[slot].q, m);
     EN_COUNT(sc->rxdrqout);
 #ifdef EN_DEBUG
     printf("%s: rx%d: out of DRQs\n", sc->sc_dev.dv_xname, slot);
 #endif
     return;		/* >>> exit here if out of DRQs <<< */
   }
 
   /*
    * at this point all resources have been allocated and we are commited 
    * to servicing this slot.
    *
    * dma = last location we told chip about
    * cur = current location
    * mlen = space in the mbuf we want
    * need = bytes to xfer in (decrs to zero)
    * fill = how much fill we need
    * tlen = how much data to transfer to this mbuf
    * cnt/bcode/count = <same as xmit>
    *
    * 'needfill' not used after this point
    */
 
   dma = cur;		/* dma = last location we told chip about */
   need = roundup(mlen, sizeof(u_int32_t));
   fill = fill - (need - mlen);  /* note: may invalidate 'needfill' */
 
   for (tmp = m ; tmp != NULL && need > 0 ; tmp = tmp->m_next) {
     tlen = roundup(tmp->m_len, sizeof(u_int32_t)); /* m_len set by en_mget */
     data = mtod(tmp, u_int32_t *);
 
 #ifdef EN_DEBUG
     printf("%s: rx%d: load mbuf %p, m_len=%d, m_data=%p, tlen=%d\n",
 	sc->sc_dev.dv_xname, slot, tmp, tmp->m_len, tmp->m_data, tlen);
 #endif
     
     /* copy data */
     if (EN_NORXDMA || !en_dma || tlen < EN_MINDMA) {
       datastop = (u_int32_t *)((u_char *) data + tlen);
       /* copy loop: preserve byte order!!!  use READDAT */
       while (data != datastop) {
 	*data = EN_READDAT(sc, cur);
 	data++;
 	EN_WRAPADD(start, stop, cur, 4);
       }
       need -= tlen;
 #ifdef EN_DEBUG
       printf("%s: rx%d: vci%d: copied %d bytes (%d left)\n",
 		sc->sc_dev.dv_xname, slot, vci, tlen, need);
 #endif
       continue;
     }
 
     /* DMA data (check to see if we need to sync DRQ first) */
     if (dma != cur) {
       EN_DRQADD(sc, WORD_IDX(start,cur), vci, MIDDMA_JK, 0, 0, 0, 0);
 #ifdef EN_DEBUG
       printf("%s: rx%d: vci%d: drq_sync: advance pointer to %d\n",
 		sc->sc_dev.dv_xname, slot, vci, cur);
 #endif
     }
 
 #if !defined(MIDWAY_ENIONLY)
      
     /*
      * the adaptec DMA engine is smart and handles everything for us.
      */ 
   
     if (sc->is_adaptec) {
       need -= tlen;
       EN_WRAPADD(start, stop, cur, tlen);
 #ifdef EN_DEBUG
       printf("%s: rx%d: vci%d: adp_dma %d bytes (%d left)\n",
 		sc->sc_dev.dv_xname, slot, vci, tlen, need);
 #endif
       end = (need == 0 && !fill) ? MID_DMA_END : 0;
       EN_DRQADD(sc, tlen, vci, 0, vtophys(data), mlen, slot, end);
       if (end)
         goto done;
       dma = cur;	/* update dma pointer */
       continue;
     }
 #endif /* !MIDWAY_ENIONLY */
 
 
 #if !defined(MIDWAY_ADPONLY)
 
     /*
      * the ENI DMA engine is not so smart and need more help from us
      */
 
     /* do we need to do a DMA op to align? */
     if (sc->alburst &&
       (needalign = (((uintptr_t) (void *) data) & sc->bestburstmask)) != 0) {
       cnt = sc->bestburstlen - needalign;
       if (cnt > tlen) {
         cnt = tlen;
         count = cnt / sizeof(u_int32_t);
         bcode = MIDDMA_WORD;
       } else {
         count = cnt / sizeof(u_int32_t);
         bcode = en_dmaplan[count].bcode;
         count = cnt >> en_dmaplan[count].divshift;
       }
       need -= cnt;
       EN_WRAPADD(start, stop, cur, cnt);
 #ifdef EN_DEBUG
       printf("%s: rx%d: vci%d: al_dma %d bytes (%d left)\n",
 		sc->sc_dev.dv_xname, slot, vci, cnt, need);
 #endif
       tlen -= cnt;
       end = (need == 0 && !fill) ? MID_DMA_END : 0;
       EN_DRQADD(sc, count, vci, bcode, vtophys(data), mlen, slot, end);
       if (end)
         goto done;
       data = (u_int32_t *)((u_char *) data + cnt);   
     }
 
     /* do we need a max-sized burst? */
     if (tlen >= sc->bestburstlen) {
       count = tlen >> sc->bestburstshift;
       cnt = count << sc->bestburstshift;
       bcode = sc->bestburstcode;
       need -= cnt;
       EN_WRAPADD(start, stop, cur, cnt);
 #ifdef EN_DEBUG
       printf("%s: rx%d: vci%d: best_dma %d bytes (%d left)\n",
 		sc->sc_dev.dv_xname, slot, vci, cnt, need);
 #endif
       tlen -= cnt;
       end = (need == 0 && !fill) ? MID_DMA_END : 0;
       EN_DRQADD(sc, count, vci, bcode, vtophys(data), mlen, slot, end);
       if (end)
         goto done;
       data = (u_int32_t *)((u_char *) data + cnt);   
     }
 
     /* do we need to do a cleanup burst? */
     if (tlen) {
       count = tlen / sizeof(u_int32_t);
       bcode = en_dmaplan[count].bcode;
       count = tlen >> en_dmaplan[count].divshift;
       need -= tlen;
       EN_WRAPADD(start, stop, cur, tlen);
 #ifdef EN_DEBUG
       printf("%s: rx%d: vci%d: cleanup_dma %d bytes (%d left)\n",
 		sc->sc_dev.dv_xname, slot, vci, tlen, need);
 #endif
       end = (need == 0 && !fill) ? MID_DMA_END : 0;
       EN_DRQADD(sc, count, vci, bcode, vtophys(data), mlen, slot, end);
       if (end)
         goto done;
     }
 
     dma = cur;		/* update dma pointer */
 
 #endif /* !MIDWAY_ADPONLY */
 
   }
 
   /* skip the end */
   if (fill || dma != cur) {
 #ifdef EN_DEBUG
       if (fill)
         printf("%s: rx%d: vci%d: skipping %d bytes of fill\n",
 		sc->sc_dev.dv_xname, slot, vci, fill);
       else
         printf("%s: rx%d: vci%d: syncing chip from 0x%x to 0x%x [cur]\n",
 		sc->sc_dev.dv_xname, slot, vci, dma, cur);
 #endif
     EN_WRAPADD(start, stop, cur, fill);
     EN_DRQADD(sc, WORD_IDX(start,cur), vci, MIDDMA_JK, 0, mlen,
 					slot, MID_DMA_END);
     /* dma = cur; */	/* not necessary since we are done */
   }
 
   /*
    * done, remove stuff we don't want to pass up:
    *   raw mode (boodi mode): pass everything up for later processing
    *   aal5: remove RBD
    *   aal0: remove RBD + cell header
    */
 
 done:
   if (m) {
     if (!raw) {
       cnt = MID_RBD_SIZE;
       if (!aal5) cnt += MID_CHDR_SIZE;
       m->m_len -= cnt;				/* chop! */
       m->m_pkthdr.len -= cnt;
       m->m_data += cnt;
     }
     _IF_ENQUEUE(&sc->rxslot[slot].indma, m);
   }
   sc->rxslot[slot].cur = cur;		/* update master copy of 'cur' */
 
 #ifdef EN_DEBUG
   printf("%s: rx%d: vci%d: DONE!   cur now =0x%x\n", 
 	sc->sc_dev.dv_xname, slot, vci, cur);
 #endif
 
   goto same_vci;	/* get next packet in this slot */
 }
 
 
 #ifdef EN_DDBHOOK
 /*
  * functions we can call from ddb
  */
 
 /*
  * en_dump: dump the state
  */
 
 #define END_SWSL	0x00000040		/* swsl state */
 #define END_DRQ		0x00000020		/* drq state */
 #define END_DTQ		0x00000010		/* dtq state */
 #define END_RX		0x00000008		/* rx state */
 #define END_TX		0x00000004		/* tx state */
 #define END_MREGS	0x00000002		/* registers */
 #define END_STATS	0x00000001		/* dump stats */
 
 #define END_BITS "\20\7SWSL\6DRQ\5DTQ\4RX\3TX\2MREGS\1STATS"
 
 /* Do not staticize - meant for calling from DDB! */
 int en_dump(unit, level)
 
 int unit, level;
 
 {
   struct en_softc *sc;
   int lcv, cnt, slot;
   u_int32_t ptr, reg;
 #ifdef __FreeBSD__
   devclass_t dc;
   int maxunit;
 
   dc = devclass_find("en");
   if (dc == NULL) {
     printf("en_dump: can't find devclass!\n");
     return 0;
   }
   maxunit = devclass_get_maxunit(dc);
   for (lcv = 0 ; lcv < maxunit ; lcv++) {
     sc = devclass_get_softc(dc, lcv);
 #else
   for (lcv = 0 ; lcv < en_cd.cd_ndevs ; lcv++) {
     sc = (struct en_softc *) en_cd.cd_devs[lcv];
 #endif
     if (sc == NULL) continue;
     if (unit != -1 && unit != lcv)
       continue;
 
     printf("dumping device %s at level 0x%b\n", sc->sc_dev.dv_xname, level,
 			END_BITS);
 
     if (sc->dtq_us == 0) {
       printf("<hasn't been en_init'd yet>\n");
       continue;
     }
 
     if (level & END_STATS) {
       printf("  en_stats:\n");
       printf("    %d mfix (%d failed); %d/%d head/tail byte DMAs, %d flushes\n",
 	   sc->mfix, sc->mfixfail, sc->headbyte, sc->tailbyte, sc->tailflush);
       printf("    %d rx dma overflow interrupts\n", sc->dmaovr);
       printf("    %d times we ran out of TX space and stalled\n", 
 							sc->txoutspace);
       printf("    %d times we ran out of DTQs\n", sc->txdtqout);
       printf("    %d times we launched a packet\n", sc->launch);
       printf("    %d times we launched without on-board header\n", sc->lheader);
       printf("    %d times we launched without on-board tail\n", sc->ltail);
       printf("    %d times we pulled the hw service list\n", sc->hwpull);
       printf("    %d times we pushed a vci on the sw service list\n", 
 								sc->swadd);
       printf("    %d times RX pulled an mbuf from Q that wasn't ours\n", 
 							 sc->rxqnotus);
       printf("    %d times RX pulled a good mbuf from Q\n", sc->rxqus);
       printf("    %d times we ran out of mbufs *and* DRQs\n", sc->rxoutboth);
       printf("    %d times we ran out of DRQs\n", sc->rxdrqout);
 
       printf("    %d trasmit packets dropped due to mbsize\n", sc->txmbovr);
       printf("    %d cells trashed due to turned off rxvc\n", sc->vtrash);
       printf("    %d cells trashed due to totally full buffer\n", sc->otrash);
       printf("    %d cells trashed due almost full buffer\n", sc->ttrash);
       printf("    %d rx mbuf allocation failures\n", sc->rxmbufout);
 #ifdef NATM
       printf("    %d drops at natmintrq\n", natmintrq.ifq_drops);
 #ifdef NATM_STAT
       printf("    natmintr so_rcv: ok/drop cnt: %d/%d, ok/drop bytes: %d/%d\n",
 	natm_sookcnt, natm_sodropcnt, natm_sookbytes, natm_sodropbytes);
 #endif
 #endif
     }
 
     if (level & END_MREGS) {
       printf("mregs:\n");
       printf("resid = 0x%x\n", EN_READ(sc, MID_RESID));
       printf("interrupt status = 0x%b\n", 
 	     (int)EN_READ(sc, MID_INTSTAT), MID_INTBITS);
       printf("interrupt enable = 0x%b\n", 
 	     (int)EN_READ(sc, MID_INTENA), MID_INTBITS);
       printf("mcsr = 0x%b\n", (int)EN_READ(sc, MID_MAST_CSR), MID_MCSRBITS);
       printf("serv_write = [chip=%u] [us=%u]\n", EN_READ(sc, MID_SERV_WRITE),
 	     MID_SL_A2REG(sc->hwslistp));
       printf("dma addr = 0x%x\n", EN_READ(sc, MID_DMA_ADDR));
       printf("DRQ: chip[rd=0x%x,wr=0x%x], sc[chip=0x%x,us=0x%x]\n",
 	MID_DRQ_REG2A(EN_READ(sc, MID_DMA_RDRX)), 
 	MID_DRQ_REG2A(EN_READ(sc, MID_DMA_WRRX)), sc->drq_chip, sc->drq_us);
       printf("DTQ: chip[rd=0x%x,wr=0x%x], sc[chip=0x%x,us=0x%x]\n",
 	MID_DTQ_REG2A(EN_READ(sc, MID_DMA_RDTX)), 
 	MID_DTQ_REG2A(EN_READ(sc, MID_DMA_WRTX)), sc->dtq_chip, sc->dtq_us);
 
       printf("  unusal txspeeds: ");
       for (cnt = 0 ; cnt < MID_N_VC ; cnt++)
 	if (sc->txspeed[cnt])
 	  printf(" vci%d=0x%x", cnt, sc->txspeed[cnt]);
       printf("\n");
 
       printf("  rxvc slot mappings: ");
       for (cnt = 0 ; cnt < MID_N_VC ; cnt++)
 	if (sc->rxvc2slot[cnt] != RX_NONE)
 	  printf("  %d->%d", cnt, sc->rxvc2slot[cnt]);
       printf("\n");
 
     }
 
     if (level & END_TX) {
       printf("tx:\n");
       for (slot = 0 ; slot < EN_NTX; slot++) {
 	printf("tx%d: start/stop/cur=0x%x/0x%x/0x%x [%d]  ", slot,
 	  sc->txslot[slot].start, sc->txslot[slot].stop, sc->txslot[slot].cur,
 		(sc->txslot[slot].cur - sc->txslot[slot].start)/4);
 	printf("mbsize=%d, bfree=%d\n", sc->txslot[slot].mbsize,
 		sc->txslot[slot].bfree);
         printf("txhw: base_address=0x%x, size=%u, read=%u, descstart=%u\n",
 	  (u_int)MIDX_BASE(EN_READ(sc, MIDX_PLACE(slot))), 
 	  MIDX_SZ(EN_READ(sc, MIDX_PLACE(slot))),
 	  EN_READ(sc, MIDX_READPTR(slot)), EN_READ(sc, MIDX_DESCSTART(slot)));
       }
     }
 
     if (level & END_RX) {
       printf("  recv slots:\n");
       for (slot = 0 ; slot < sc->en_nrx; slot++) {
 	printf("rx%d: vci=%d: start/stop/cur=0x%x/0x%x/0x%x ", slot,
 	  sc->rxslot[slot].atm_vci, sc->rxslot[slot].start, 
 	  sc->rxslot[slot].stop, sc->rxslot[slot].cur);
 	printf("mode=0x%x, atm_flags=0x%x, oth_flags=0x%x\n", 
 	sc->rxslot[slot].mode, sc->rxslot[slot].atm_flags, 
 		sc->rxslot[slot].oth_flags);
         printf("RXHW: mode=0x%x, DST_RP=0x%x, WP_ST_CNT=0x%x\n",
 	  EN_READ(sc, MID_VC(sc->rxslot[slot].atm_vci)),
 	  EN_READ(sc, MID_DST_RP(sc->rxslot[slot].atm_vci)),
 	  EN_READ(sc, MID_WP_ST_CNT(sc->rxslot[slot].atm_vci)));
       }
     }
 
     if (level & END_DTQ) {
       printf("  dtq [need_dtqs=%d,dtq_free=%d]:\n", 
 					sc->need_dtqs, sc->dtq_free);
       ptr = sc->dtq_chip;
       while (ptr != sc->dtq_us) {
         reg = EN_READ(sc, ptr);
         printf("\t0x%x=[cnt=%d, chan=%d, end=%d, type=%d @ 0x%x]\n", 
 	    sc->dtq[MID_DTQ_A2REG(ptr)], MID_DMA_CNT(reg), MID_DMA_TXCHAN(reg),
 	    (reg & MID_DMA_END) != 0, MID_DMA_TYPE(reg), EN_READ(sc, ptr+4));
         EN_WRAPADD(MID_DTQOFF, MID_DTQEND, ptr, 8);
       }
     }
 
     if (level & END_DRQ) {
       printf("  drq [need_drqs=%d,drq_free=%d]:\n", 
 					sc->need_drqs, sc->drq_free);
       ptr = sc->drq_chip;
       while (ptr != sc->drq_us) {
         reg = EN_READ(sc, ptr);
 	printf("\t0x%x=[cnt=%d, chan=%d, end=%d, type=%d @ 0x%x]\n", 
 	  sc->drq[MID_DRQ_A2REG(ptr)], MID_DMA_CNT(reg), MID_DMA_RXVCI(reg),
 	  (reg & MID_DMA_END) != 0, MID_DMA_TYPE(reg), EN_READ(sc, ptr+4));
 	EN_WRAPADD(MID_DRQOFF, MID_DRQEND, ptr, 8);
       }
     }
 
     if (level & END_SWSL) {
       printf(" swslist [size=%d]: ", sc->swsl_size);
       for (cnt = sc->swsl_head ; cnt != sc->swsl_tail ; 
 			cnt = (cnt + 1) % MID_SL_N)
         printf("0x%x ", sc->swslist[cnt]);
       printf("\n");
     }
   }
   return(0);
 }
 
 /*
  * en_dumpmem: dump the memory
  */
 
 /* Do not staticize - meant for calling from DDB! */
 int en_dumpmem(unit, addr, len)
 
 int unit, addr, len;
 
 {
   struct en_softc *sc;
   u_int32_t reg;
 #ifdef __FreeBSD__
   devclass_t dc;
 
   dc = devclass_find("en");
   if (dc == NULL) {
     printf("en_dumpmem: can't find devclass!\n");
     return 0;
   }
   sc = devclass_get_softc(dc, unit);
 #else
   if (unit < 0 || unit > en_cd.cd_ndevs ||
 	(sc = (struct en_softc *) en_cd.cd_devs[unit]) == NULL) {
     printf("invalid unit number: %d\n", unit);
     return(0);
   }
 #endif
 
   addr = addr & ~3;
   if (addr < MID_RAMOFF || addr + len*4 > MID_MAXOFF || len <= 0) {
     printf("invalid addr/len number: %d, %d\n", addr, len);
     return(0);
   }
   printf("dumping %d words starting at offset 0x%x\n", len, addr);
   while (len--) {
     reg = EN_READ(sc, addr);
     printf("mem[0x%x] = 0x%x\n", addr, reg);
     addr += 4;
   }
   return(0);
 }
 #endif
Index: head/sys/dev/hifn/hifn7751.c
===================================================================
--- head/sys/dev/hifn/hifn7751.c	(revision 108465)
+++ head/sys/dev/hifn/hifn7751.c	(revision 108466)
@@ -1,2637 +1,2638 @@
 /* $FreeBSD$ */
 /*	$OpenBSD: hifn7751.c,v 1.120 2002/05/17 00:33:34 deraadt Exp $	*/
 
 /*
  * Invertex AEON / Hifn 7751 driver
  * Copyright (c) 1999 Invertex Inc. All rights reserved.
  * Copyright (c) 1999 Theo de Raadt
  * Copyright (c) 2000-2001 Network Security Technologies, Inc.
  *			http://www.netsec.net
  *
  * This driver is based on a previous driver by Invertex, for which they
  * requested:  Please send any comments, feedback, bug-fixes, or feature
  * requests to software@invertex.com.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *   notice, this list of conditions and the following disclaimer in the
  *   documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *   derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  */
 
 #define HIFN_DEBUG
 
 /*
  * Driver for the Hifn 7751 encryption processor.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/clock.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <opencrypto/cryptodev.h>
 #include <sys/random.h>
 
 #include <pci/pcivar.h>
 #include <pci/pcireg.h>
 #include <dev/hifn/hifn7751reg.h>
 #include <dev/hifn/hifn7751var.h>
 
 /*
  * Prototypes and count for the pci_device structure
  */
 static	int hifn_probe(device_t);
 static	int hifn_attach(device_t);
 static	int hifn_detach(device_t);
 static	int hifn_suspend(device_t);
 static	int hifn_resume(device_t);
 static	void hifn_shutdown(device_t);
 
 static device_method_t hifn_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		hifn_probe),
 	DEVMETHOD(device_attach,	hifn_attach),
 	DEVMETHOD(device_detach,	hifn_detach),
 	DEVMETHOD(device_suspend,	hifn_suspend),
 	DEVMETHOD(device_resume,	hifn_resume),
 	DEVMETHOD(device_shutdown,	hifn_shutdown),
 
 	/* bus interface */
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 
 	{ 0, 0 }
 };
 static driver_t hifn_driver = {
 	"hifn",
 	hifn_methods,
 	sizeof (struct hifn_softc)
 };
 static devclass_t hifn_devclass;
 
 DRIVER_MODULE(hifn, pci, hifn_driver, hifn_devclass, 0, 0);
 MODULE_DEPEND(hifn, crypto, 1, 1, 1);
 
 static	void hifn_reset_board(struct hifn_softc *, int);
 static	void hifn_reset_puc(struct hifn_softc *);
 static	void hifn_puc_wait(struct hifn_softc *);
 static	int hifn_enable_crypto(struct hifn_softc *);
 static	void hifn_set_retry(struct hifn_softc *sc);
 static	void hifn_init_dma(struct hifn_softc *);
 static	void hifn_init_pci_registers(struct hifn_softc *);
 static	int hifn_sramsize(struct hifn_softc *);
 static	int hifn_dramsize(struct hifn_softc *);
 static	int hifn_ramtype(struct hifn_softc *);
 static	void hifn_sessions(struct hifn_softc *);
 static	void hifn_intr(void *);
 static	u_int hifn_write_command(struct hifn_command *, u_int8_t *);
 static	u_int32_t hifn_next_signature(u_int32_t a, u_int cnt);
 static	int hifn_newsession(void *, u_int32_t *, struct cryptoini *);
 static	int hifn_freesession(void *, u_int64_t);
 static	int hifn_process(void *, struct cryptop *, int);
 static	void hifn_callback(struct hifn_softc *, struct hifn_command *, u_int8_t *);
 static	int hifn_crypto(struct hifn_softc *, struct hifn_command *, struct cryptop *, int);
 static	int hifn_readramaddr(struct hifn_softc *, int, u_int8_t *);
 static	int hifn_writeramaddr(struct hifn_softc *, int, u_int8_t *);
 static	int hifn_dmamap_load_src(struct hifn_softc *, struct hifn_command *);
 static	int hifn_dmamap_load_dst(struct hifn_softc *, struct hifn_command *);
 static	int hifn_init_pubrng(struct hifn_softc *);
 static	void hifn_rng(void *);
 static	void hifn_tick(void *);
 static	void hifn_abort(struct hifn_softc *);
 static	void hifn_alloc_slot(struct hifn_softc *, int *, int *, int *, int *);
 
 static	void hifn_write_reg_0(struct hifn_softc *, bus_size_t, u_int32_t);
 static	void hifn_write_reg_1(struct hifn_softc *, bus_size_t, u_int32_t);
 
 static __inline__ u_int32_t
 READ_REG_0(struct hifn_softc *sc, bus_size_t reg)
 {
     u_int32_t v = bus_space_read_4(sc->sc_st0, sc->sc_sh0, reg);
     sc->sc_bar0_lastreg = (bus_size_t) -1;
     return (v);
 }
 #define	WRITE_REG_0(sc, reg, val)	hifn_write_reg_0(sc, reg, val)
 
 static __inline__ u_int32_t
 READ_REG_1(struct hifn_softc *sc, bus_size_t reg)
 {
     u_int32_t v = bus_space_read_4(sc->sc_st1, sc->sc_sh1, reg);
     sc->sc_bar1_lastreg = (bus_size_t) -1;
     return (v);
 }
 #define	WRITE_REG_1(sc, reg, val)	hifn_write_reg_1(sc, reg, val)
 
 #ifdef HIFN_DEBUG
 static	int hifn_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, hifn, CTLFLAG_RW, &hifn_debug,
 	    0, "Hifn driver debugging printfs");
 #endif
 
 static	struct hifn_stats hifnstats;
 SYSCTL_STRUCT(_kern, OID_AUTO, hifn_stats, CTLFLAG_RD, &hifnstats,
 	    hifn_stats, "Hifn driver statistics");
 static	int hifn_maxbatch = 2;		/* XXX tune based on part+sys speed */
 SYSCTL_INT(_kern, OID_AUTO, hifn_maxbatch, CTLFLAG_RW, &hifn_maxbatch,
 	    0, "Hifn driver: max ops to batch w/o interrupt");
 
 /*
  * Probe for a supported device.  The PCI vendor and device
  * IDs are used to detect devices we know how to handle.
  */
 static int
 hifn_probe(device_t dev)
 {
 	if (pci_get_vendor(dev) == PCI_VENDOR_INVERTEX &&
 	    pci_get_device(dev) == PCI_PRODUCT_INVERTEX_AEON)
 		return (0);
 	if (pci_get_vendor(dev) == PCI_VENDOR_HIFN &&
 	    (pci_get_device(dev) == PCI_PRODUCT_HIFN_7751 ||
 	     pci_get_device(dev) == PCI_PRODUCT_HIFN_7951 ||
 	     pci_get_device(dev) == PCI_PRODUCT_HIFN_7811))
 		return (0);
 	if (pci_get_vendor(dev) == PCI_VENDOR_NETSEC &&
 	    pci_get_device(dev) == PCI_PRODUCT_NETSEC_7751)
 		return (0);
 	return (ENXIO);
 }
 
 static void
 hifn_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	bus_addr_t *paddr = (bus_addr_t*) arg;
 	*paddr = segs->ds_addr;
 }
 
 static const char*
 hifn_partname(struct hifn_softc *sc)
 {
 	/* XXX sprintf numbers when not decoded */
 	switch (pci_get_vendor(sc->sc_dev)) {
 	case PCI_VENDOR_HIFN:
 		switch (pci_get_device(sc->sc_dev)) {
 		case PCI_PRODUCT_HIFN_6500:	return "Hifn 6500";
 		case PCI_PRODUCT_HIFN_7751:	return "Hifn 7751";
 		case PCI_PRODUCT_HIFN_7811:	return "Hifn 7811";
 		case PCI_PRODUCT_HIFN_7951:	return "Hifn 7951";
 		}
 		return "Hifn unknown-part";
 	case PCI_VENDOR_INVERTEX:
 		switch (pci_get_device(sc->sc_dev)) {
 		case PCI_PRODUCT_INVERTEX_AEON:	return "Invertex AEON";
 		}
 		return "Invertex unknown-part";
 	case PCI_VENDOR_NETSEC:
 		switch (pci_get_device(sc->sc_dev)) {
 		case PCI_PRODUCT_NETSEC_7751:	return "NetSec 7751";
 		}
 		return "NetSec unknown-part";
 	}
 	return "Unknown-vendor unknown-part";
 }
 
 /*
  * Attach an interface that successfully probed.
  */
 static int 
 hifn_attach(device_t dev)
 {
 	struct hifn_softc *sc = device_get_softc(dev);
 	u_int32_t cmd;
 	caddr_t kva;
 	int rseg, rid;
 	char rbase;
 	u_int16_t ena, rev;
 
 	KASSERT(sc != NULL, ("hifn_attach: null software carrier!"));
 	bzero(sc, sizeof (*sc));
 	sc->sc_dev = dev;
 
 	mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "crypto driver", MTX_DEF);
 
 	/* XXX handle power management */
 
 	/*
 	 * The 7951 has a random number generator and
 	 * public key support; note this.
 	 */
 	if (pci_get_vendor(dev) == PCI_VENDOR_HIFN &&
 	    pci_get_device(dev) == PCI_PRODUCT_HIFN_7951)
 		sc->sc_flags = HIFN_HAS_RNG | HIFN_HAS_PUBLIC;
 	/*
 	 * The 7811 has a random number generator and
 	 * we also note it's identity 'cuz of some quirks.
 	 */
 	if (pci_get_vendor(dev) == PCI_VENDOR_HIFN &&
 	    pci_get_device(dev) == PCI_PRODUCT_HIFN_7811)
 		sc->sc_flags |= HIFN_IS_7811 | HIFN_HAS_RNG;
 
 	/*
 	 * Configure support for memory-mapped access to
 	 * registers and for DMA operations.
 	 */
 #define	PCIM_ENA	(PCIM_CMD_MEMEN|PCIM_CMD_BUSMASTEREN)
 	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
 	cmd |= PCIM_ENA;
 	pci_write_config(dev, PCIR_COMMAND, cmd, 4);
 	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
 	if ((cmd & PCIM_ENA) != PCIM_ENA) {
 		device_printf(dev, "failed to enable %s\n",
 			(cmd & PCIM_ENA) == 0 ?
 				"memory mapping & bus mastering" :
 			(cmd & PCIM_CMD_MEMEN) == 0 ?
 				"memory mapping" : "bus mastering");
 		goto fail_pci;
 	}
 #undef PCIM_ENA
 
 	/*
 	 * Setup PCI resources. Note that we record the bus
 	 * tag and handle for each register mapping, this is
 	 * used by the READ_REG_0, WRITE_REG_0, READ_REG_1,
 	 * and WRITE_REG_1 macros throughout the driver.
 	 */
 	rid = HIFN_BAR0;
 	sc->sc_bar0res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
 			 		    0, ~0, 1, RF_ACTIVE);
 	if (sc->sc_bar0res == NULL) {
 		device_printf(dev, "cannot map bar%d register space\n", 0);
 		goto fail_pci;
 	}
 	sc->sc_st0 = rman_get_bustag(sc->sc_bar0res);
 	sc->sc_sh0 = rman_get_bushandle(sc->sc_bar0res);
 	sc->sc_bar0_lastreg = (bus_size_t) -1;
 
 	rid = HIFN_BAR1;
 	sc->sc_bar1res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
 					    0, ~0, 1, RF_ACTIVE);
 	if (sc->sc_bar1res == NULL) {
 		device_printf(dev, "cannot map bar%d register space\n", 1);
 		goto fail_io0;
 	}
 	sc->sc_st1 = rman_get_bustag(sc->sc_bar1res);
 	sc->sc_sh1 = rman_get_bushandle(sc->sc_bar1res);
 	sc->sc_bar1_lastreg = (bus_size_t) -1;
 
 	hifn_set_retry(sc);
 
 	/*
 	 * Setup the area where the Hifn DMA's descriptors
 	 * and associated data structures.
 	 */
 	if (bus_dma_tag_create(NULL,			/* parent */
 			       1, 0,			/* alignment,boundary */
 			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       HIFN_MAX_DMALEN,		/* maxsize */
 			       MAX_SCATTER,		/* nsegments */
 			       HIFN_MAX_SEGLEN,		/* maxsegsize */
 			       BUS_DMA_ALLOCNOW,	/* flags */
 			       &sc->sc_dmat)) {
 		device_printf(dev, "cannot allocate DMA tag\n");
 		goto fail_io1;
 	}
 	if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &sc->sc_dmamap)) {
 		device_printf(dev, "cannot create dma map\n");
 		bus_dma_tag_destroy(sc->sc_dmat);
 		goto fail_io1;
 	}
 	if (bus_dmamem_alloc(sc->sc_dmat, (void**) &kva, BUS_DMA_NOWAIT, &sc->sc_dmamap)) {
 		device_printf(dev, "cannot alloc dma buffer\n");
 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap);
 		bus_dma_tag_destroy(sc->sc_dmat);
 		goto fail_io1;
 	}
 	if (bus_dmamap_load(sc->sc_dmat, sc->sc_dmamap, kva,
 			     sizeof (*sc->sc_dma),
 			     hifn_dmamap_cb, &sc->sc_dma_physaddr,
 			     BUS_DMA_NOWAIT)) {
 		device_printf(dev, "cannot load dma map\n");
 		bus_dmamem_free(sc->sc_dmat, kva, sc->sc_dmamap);
 		bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap);
 		bus_dma_tag_destroy(sc->sc_dmat);
 		goto fail_io1;
 	}
 	sc->sc_dma = (struct hifn_dma *)kva;
 	bzero(sc->sc_dma, sizeof(*sc->sc_dma));
 
 	KASSERT(sc->sc_st0 != NULL, ("hifn_attach: null bar0 tag!"));
 	KASSERT(sc->sc_sh0 != NULL, ("hifn_attach: null bar0 handle!"));
 	KASSERT(sc->sc_st1 != NULL, ("hifn_attach: null bar1 tag!"));
 	KASSERT(sc->sc_sh1 != NULL, ("hifn_attach: null bar1 handle!"));
 
 	/*
 	 * Reset the board and do the ``secret handshake''
 	 * to enable the crypto support.  Then complete the
 	 * initialization procedure by setting up the interrupt
 	 * and hooking in to the system crypto support so we'll
 	 * get used for system services like the crypto device,
 	 * IPsec, RNG device, etc.
 	 */
 	hifn_reset_board(sc, 0);
 
 	if (hifn_enable_crypto(sc) != 0) {
 		device_printf(dev, "crypto enabling failed\n");
 		goto fail_mem;
 	}
 	hifn_reset_puc(sc);
 
 	hifn_init_dma(sc);
 	hifn_init_pci_registers(sc);
 
 	if (hifn_ramtype(sc))
 		goto fail_mem;
 
 	if (sc->sc_drammodel == 0)
 		hifn_sramsize(sc);
 	else
 		hifn_dramsize(sc);
 
 	/*
 	 * Workaround for NetSec 7751 rev A: half ram size because two
 	 * of the address lines were left floating
 	 */
 	if (pci_get_vendor(dev) == PCI_VENDOR_NETSEC &&
 	    pci_get_device(dev) == PCI_PRODUCT_NETSEC_7751 &&
 	    pci_get_revid(dev) == 0x61)	/*XXX???*/
 		sc->sc_ramsize >>= 1;
 
 	/*
 	 * Arrange the interrupt line.
 	 */
 	rid = 0;
 	sc->sc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid,
 					0, ~0, 1, RF_SHAREABLE|RF_ACTIVE);
 	if (sc->sc_irq == NULL) {
 		device_printf(dev, "could not map interrupt\n");
 		goto fail_mem;
 	}
 	/*
 	 * NB: Network code assumes we are blocked with splimp()
 	 *     so make sure the IRQ is marked appropriately.
 	 */
 	if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET,
 			   hifn_intr, sc, &sc->sc_intrhand)) {
 		device_printf(dev, "could not setup interrupt\n");
 		goto fail_intr2;
 	}
 
 	hifn_sessions(sc);
 
 	/*
 	 * NB: Keep only the low 16 bits; this masks the chip id
 	 *     from the 7951.
 	 */
 	rev = READ_REG_1(sc, HIFN_1_REVID) & 0xffff;
 
 	rseg = sc->sc_ramsize / 1024;
 	rbase = 'K';
 	if (sc->sc_ramsize >= (1024 * 1024)) {
 		rbase = 'M';
 		rseg /= 1024;
 	}
 	device_printf(sc->sc_dev, "%s, rev %u, %d%cB %cram, %u sessions\n",
 		hifn_partname(sc), rev,
 		rseg, rbase, sc->sc_drammodel ? 'd' : 's',
 		sc->sc_maxses);
 
 	sc->sc_cid = crypto_get_driverid(0);
 	if (sc->sc_cid < 0) {
 		device_printf(dev, "could not get crypto driver id\n");
 		goto fail_intr;
 	}
 
 	WRITE_REG_0(sc, HIFN_0_PUCNFG,
 	    READ_REG_0(sc, HIFN_0_PUCNFG) | HIFN_PUCNFG_CHIPID);
 	ena = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA;
 
 	switch (ena) {
 	case HIFN_PUSTAT_ENA_2:
 		crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		crypto_register(sc->sc_cid, CRYPTO_ARC4, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		/*FALLTHROUGH*/
 	case HIFN_PUSTAT_ENA_1:
 		crypto_register(sc->sc_cid, CRYPTO_MD5, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		crypto_register(sc->sc_cid, CRYPTO_SHA1, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0,
 		    hifn_newsession, hifn_freesession, hifn_process, sc);
 		break;
 	}
 
 	bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	if (sc->sc_flags & (HIFN_HAS_PUBLIC | HIFN_HAS_RNG))
 		hifn_init_pubrng(sc);
 
 	/* NB: 1 means the callout runs w/o Giant locked */
 	callout_init(&sc->sc_tickto, 1);
 	callout_reset(&sc->sc_tickto, hz, hifn_tick, sc);
 
 	return (0);
 
 fail_intr:
 	bus_teardown_intr(dev, sc->sc_irq, sc->sc_intrhand);
 fail_intr2:
 	/* XXX don't store rid */
 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
 fail_mem:
 	bus_dmamap_unload(sc->sc_dmat, sc->sc_dmamap);
 	bus_dmamem_free(sc->sc_dmat, sc->sc_dma, sc->sc_dmamap);
 	bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap);
 	bus_dma_tag_destroy(sc->sc_dmat);
 
 	/* Turn off DMA polling */
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET |
 	    HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
 fail_io1:
 	bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR1, sc->sc_bar1res);
 fail_io0:
 	bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR0, sc->sc_bar0res);
 fail_pci:
 	mtx_destroy(&sc->sc_mtx);
 	return (ENXIO);
 }
 
 /*
  * Detach an interface that successfully probed.
  */
 static int 
 hifn_detach(device_t dev)
 {
 	struct hifn_softc *sc = device_get_softc(dev);
 
 	KASSERT(sc != NULL, ("hifn_detach: null software carrier!"));
 
 	HIFN_LOCK(sc);
 
 	/*XXX other resources */
 	callout_stop(&sc->sc_tickto);
 	callout_stop(&sc->sc_rngto);
 
 	/* Turn off DMA polling */
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET |
 	    HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
 
 	crypto_unregister_all(sc->sc_cid);
 
 	bus_generic_detach(dev);	/*XXX should be no children, right? */
 
 	bus_teardown_intr(dev, sc->sc_irq, sc->sc_intrhand);
 	/* XXX don't store rid */
 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
 
 	bus_dmamap_unload(sc->sc_dmat, sc->sc_dmamap);
 	bus_dmamem_free(sc->sc_dmat, sc->sc_dma, sc->sc_dmamap);
 	bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap);
 	bus_dma_tag_destroy(sc->sc_dmat);
 
 	bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR1, sc->sc_bar1res);
 	bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR0, sc->sc_bar0res);
 
 	HIFN_UNLOCK(sc);
 
 	mtx_destroy(&sc->sc_mtx);
 
 	return (0);
 }
 
 /*
  * Stop all chip I/O so that the kernel's probe routines don't
  * get confused by errant DMAs when rebooting.
  */
 static void
 hifn_shutdown(device_t dev)
 {
 #ifdef notyet
 	hifn_stop(device_get_softc(dev));
 #endif
 }
 
 /*
  * Device suspend routine.  Stop the interface and save some PCI
  * settings in case the BIOS doesn't restore them properly on
  * resume.
  */
 static int
 hifn_suspend(device_t dev)
 {
 	struct hifn_softc *sc = device_get_softc(dev);
 #ifdef notyet
 	int i;
 
 	hifn_stop(sc);
 	for (i = 0; i < 5; i++)
 		sc->saved_maps[i] = pci_read_config(dev, PCIR_MAPS + i * 4, 4);
 	sc->saved_biosaddr = pci_read_config(dev, PCIR_BIOS, 4);
 	sc->saved_intline = pci_read_config(dev, PCIR_INTLINE, 1);
 	sc->saved_cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
 	sc->saved_lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
 #endif
 	sc->sc_suspended = 1;
 
 	return (0);
 }
 
 /*
  * Device resume routine.  Restore some PCI settings in case the BIOS
  * doesn't, re-enable busmastering, and restart the interface if
  * appropriate.
  */
 static int
 hifn_resume(device_t dev)
 {
 	struct hifn_softc *sc = device_get_softc(dev);
 #ifdef notyet
 	int i;
 
 	/* better way to do this? */
 	for (i = 0; i < 5; i++)
 		pci_write_config(dev, PCIR_MAPS + i * 4, sc->saved_maps[i], 4);
 	pci_write_config(dev, PCIR_BIOS, sc->saved_biosaddr, 4);
 	pci_write_config(dev, PCIR_INTLINE, sc->saved_intline, 1);
 	pci_write_config(dev, PCIR_CACHELNSZ, sc->saved_cachelnsz, 1);
 	pci_write_config(dev, PCIR_LATTIMER, sc->saved_lattimer, 1);
 
 	/* reenable busmastering */
 	pci_enable_busmaster(dev);
 	pci_enable_io(dev, HIFN_RES);
 
         /* reinitialize interface if necessary */
         if (ifp->if_flags & IFF_UP)
                 rl_init(sc);
 #endif
 	sc->sc_suspended = 0;
 
 	return (0);
 }
 
 static int
 hifn_init_pubrng(struct hifn_softc *sc)
 {
 	u_int32_t r;
 	int i;
 
 	if ((sc->sc_flags & HIFN_IS_7811) == 0) {
 		/* Reset 7951 public key/rng engine */
 		WRITE_REG_1(sc, HIFN_1_PUB_RESET,
 		    READ_REG_1(sc, HIFN_1_PUB_RESET) | HIFN_PUBRST_RESET);
 
 		for (i = 0; i < 100; i++) {
 			DELAY(1000);
 			if ((READ_REG_1(sc, HIFN_1_PUB_RESET) &
 			    HIFN_PUBRST_RESET) == 0)
 				break;
 		}
 
 		if (i == 100) {
 			device_printf(sc->sc_dev, "public key init failed\n");
 			return (1);
 		}
 	}
 
 	/* Enable the rng, if available */
 	if (sc->sc_flags & HIFN_HAS_RNG) {
 		if (sc->sc_flags & HIFN_IS_7811) {
 			r = READ_REG_1(sc, HIFN_1_7811_RNGENA);
 			if (r & HIFN_7811_RNGENA_ENA) {
 				r &= ~HIFN_7811_RNGENA_ENA;
 				WRITE_REG_1(sc, HIFN_1_7811_RNGENA, r);
 			}
 			WRITE_REG_1(sc, HIFN_1_7811_RNGCFG,
 			    HIFN_7811_RNGCFG_DEFL);
 			r |= HIFN_7811_RNGENA_ENA;
 			WRITE_REG_1(sc, HIFN_1_7811_RNGENA, r);
 		} else
 			WRITE_REG_1(sc, HIFN_1_RNG_CONFIG,
 			    READ_REG_1(sc, HIFN_1_RNG_CONFIG) |
 			    HIFN_RNGCFG_ENA);
 
 		sc->sc_rngfirst = 1;
 		if (hz >= 100)
 			sc->sc_rnghz = hz / 100;
 		else
 			sc->sc_rnghz = 1;
 		/* NB: 1 means the callout runs w/o Giant locked */
 		callout_init(&sc->sc_rngto, 1);
 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, hifn_rng, sc);
 	}
 
 	/* Enable public key engine, if available */
 	if (sc->sc_flags & HIFN_HAS_PUBLIC) {
 		WRITE_REG_1(sc, HIFN_1_PUB_IEN, HIFN_PUBIEN_DONE);
 		sc->sc_dmaier |= HIFN_DMAIER_PUBDONE;
 		WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier);
 	}
 
 	return (0);
 }
 
 static void
 hifn_rng(void *vsc)
 {
 #define	RANDOM_BITS(n)	(n)*sizeof (u_int32_t), (n)*sizeof (u_int32_t)*NBBY, 0
 	struct hifn_softc *sc = vsc;
 	u_int32_t sts, num[2];
 	int i;
 
 	if (sc->sc_flags & HIFN_IS_7811) {
 		for (i = 0; i < 5; i++) {
 			sts = READ_REG_1(sc, HIFN_1_7811_RNGSTS);
 			if (sts & HIFN_7811_RNGSTS_UFL) {
 				device_printf(sc->sc_dev,
 					      "RNG underflow: disabling\n");
 				return;
 			}
 			if ((sts & HIFN_7811_RNGSTS_RDY) == 0)
 				break;
 
 			/*
 			 * There are at least two words in the RNG FIFO
 			 * at this point.
 			 */
 			num[0] = READ_REG_1(sc, HIFN_1_7811_RNGDAT);
 			num[1] = READ_REG_1(sc, HIFN_1_7811_RNGDAT);
 			/* NB: discard first data read */
 			if (sc->sc_rngfirst)
 				sc->sc_rngfirst = 0;
 			else
 				random_harvest(num, RANDOM_BITS(2), RANDOM_PURE);
 		}
 	} else {
 		num[0] = READ_REG_1(sc, HIFN_1_RNG_DATA);
 
 		/* NB: discard first data read */
 		if (sc->sc_rngfirst)
 			sc->sc_rngfirst = 0;
 		else
 			random_harvest(num, RANDOM_BITS(1), RANDOM_PURE);
 	}
 
 	callout_reset(&sc->sc_rngto, sc->sc_rnghz, hifn_rng, sc);
 #undef RANDOM_BITS
 }
 
 static void
 hifn_puc_wait(struct hifn_softc *sc)
 {
 	int i;
 
 	for (i = 5000; i > 0; i--) {
 		DELAY(1);
 		if (!(READ_REG_0(sc, HIFN_0_PUCTRL) & HIFN_PUCTRL_RESET))
 			break;
 	}
 	if (!i)
 		device_printf(sc->sc_dev, "proc unit did not reset\n");
 }
 
 /*
  * Reset the processing unit.
  */
 static void
 hifn_reset_puc(struct hifn_softc *sc)
 {
 	/* Reset processing unit */
 	WRITE_REG_0(sc, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA);
 	hifn_puc_wait(sc);
 }
 
 /*
  * Set the Retry and TRDY registers; note that we set them to
  * zero because the 7811 locks up when forced to retry (section
  * 3.6 of "Specification Update SU-0014-04".  Not clear if we
  * should do this for all Hifn parts, but it doesn't seem to hurt.
  */
 static void
 hifn_set_retry(struct hifn_softc *sc)
 {
 	/* NB: RETRY only responds to 8-bit reads/writes */
 	pci_write_config(sc->sc_dev, HIFN_RETRY_TIMEOUT, 0, 1);
 	pci_write_config(sc->sc_dev, HIFN_TRDY_TIMEOUT, 0, 4);
 }
 
 /*
  * Resets the board.  Values in the regesters are left as is
  * from the reset (i.e. initial values are assigned elsewhere).
  */
 static void
 hifn_reset_board(struct hifn_softc *sc, int full)
 {
 	u_int32_t reg;
 
 	/*
 	 * Set polling in the DMA configuration register to zero.  0x7 avoids
 	 * resetting the board and zeros out the other fields.
 	 */
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET |
 	    HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
 
 	/*
 	 * Now that polling has been disabled, we have to wait 1 ms
 	 * before resetting the board.
 	 */
 	DELAY(1000);
 
 	/* Reset the DMA unit */
 	if (full) {
 		WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MODE);
 		DELAY(1000);
 	} else {
 		WRITE_REG_1(sc, HIFN_1_DMA_CNFG,
 		    HIFN_DMACNFG_MODE | HIFN_DMACNFG_MSTRESET);
 		hifn_reset_puc(sc);
 	}
 
 	KASSERT(sc->sc_dma != NULL, ("hifn_reset_board: null DMA tag!"));
 	bzero(sc->sc_dma, sizeof(*sc->sc_dma));
 
 	/* Bring dma unit out of reset */
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET |
 	    HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
 
 	hifn_puc_wait(sc);
 	hifn_set_retry(sc);
 
 	if (sc->sc_flags & HIFN_IS_7811) {
 		for (reg = 0; reg < 1000; reg++) {
 			if (READ_REG_1(sc, HIFN_1_7811_MIPSRST) &
 			    HIFN_MIPSRST_CRAMINIT)
 				break;
 			DELAY(1000);
 		}
 		if (reg == 1000)
 			printf(": cram init timeout\n");
 	}
 }
 
 static u_int32_t
 hifn_next_signature(u_int32_t a, u_int cnt)
 {
 	int i;
 	u_int32_t v;
 
 	for (i = 0; i < cnt; i++) {
 
 		/* get the parity */
 		v = a & 0x80080125;
 		v ^= v >> 16;
 		v ^= v >> 8;
 		v ^= v >> 4;
 		v ^= v >> 2;
 		v ^= v >> 1;
 
 		a = (v & 1) ^ (a << 1);
 	}
 
 	return a;
 }
 
 struct pci2id {
 	u_short		pci_vendor;
 	u_short		pci_prod;
 	char		card_id[13];
 };
 static struct pci2id pci2id[] = {
 	{
 		PCI_VENDOR_HIFN,
 		PCI_PRODUCT_HIFN_7951,
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		  0x00, 0x00, 0x00, 0x00, 0x00 }
 	}, {
 		PCI_VENDOR_NETSEC,
 		PCI_PRODUCT_NETSEC_7751,
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		  0x00, 0x00, 0x00, 0x00, 0x00 }
 	}, {
 		PCI_VENDOR_INVERTEX,
 		PCI_PRODUCT_INVERTEX_AEON,
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		  0x00, 0x00, 0x00, 0x00, 0x00 }
 	}, {
 		PCI_VENDOR_HIFN,
 		PCI_PRODUCT_HIFN_7811,
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		  0x00, 0x00, 0x00, 0x00, 0x00 }
 	}, {
 		/*
 		 * Other vendors share this PCI ID as well, such as
 		 * http://www.powercrypt.com, and obviously they also
 		 * use the same key.
 		 */
 		PCI_VENDOR_HIFN,
 		PCI_PRODUCT_HIFN_7751,
 		{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 		  0x00, 0x00, 0x00, 0x00, 0x00 }
 	},
 };
 
 /*
  * Checks to see if crypto is already enabled.  If crypto isn't enable,
  * "hifn_enable_crypto" is called to enable it.  The check is important,
  * as enabling crypto twice will lock the board.
  */
 static int 
 hifn_enable_crypto(struct hifn_softc *sc)
 {
 	u_int32_t dmacfg, ramcfg, encl, addr, i;
 	char *offtbl = NULL;
 
 	for (i = 0; i < sizeof(pci2id)/sizeof(pci2id[0]); i++) {
 		if (pci2id[i].pci_vendor == pci_get_vendor(sc->sc_dev) &&
 		    pci2id[i].pci_prod == pci_get_device(sc->sc_dev)) {
 			offtbl = pci2id[i].card_id;
 			break;
 		}
 	}
 	if (offtbl == NULL) {
 		device_printf(sc->sc_dev, "Unknown card!\n");
 		return (1);
 	}
 
 	ramcfg = READ_REG_0(sc, HIFN_0_PUCNFG);
 	dmacfg = READ_REG_1(sc, HIFN_1_DMA_CNFG);
 
 	/*
 	 * The RAM config register's encrypt level bit needs to be set before
 	 * every read performed on the encryption level register.
 	 */
 	WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg | HIFN_PUCNFG_CHIPID);
 
 	encl = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA;
 
 	/*
 	 * Make sure we don't re-unlock.  Two unlocks kills chip until the
 	 * next reboot.
 	 */
 	if (encl == HIFN_PUSTAT_ENA_1 || encl == HIFN_PUSTAT_ENA_2) {
 #ifdef HIFN_DEBUG
 		if (hifn_debug)
 			device_printf(sc->sc_dev,
 			    "Strong crypto already enabled!\n");
 #endif
 		goto report;
 	}
 
 	if (encl != 0 && encl != HIFN_PUSTAT_ENA_0) {
 #ifdef HIFN_DEBUG
 		if (hifn_debug)
 			device_printf(sc->sc_dev,
 			      "Unknown encryption level 0x%x\n", encl);
 #endif
 		return 1;
 	}
 
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_UNLOCK |
 	    HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE);
 	DELAY(1000);
 	addr = READ_REG_1(sc, HIFN_UNLOCK_SECRET1);
 	DELAY(1000);
 	WRITE_REG_1(sc, HIFN_UNLOCK_SECRET2, 0);
 	DELAY(1000);
 
 	for (i = 0; i <= 12; i++) {
 		addr = hifn_next_signature(addr, offtbl[i] + 0x101);
 		WRITE_REG_1(sc, HIFN_UNLOCK_SECRET2, addr);
 
 		DELAY(1000);
 	}
 
 	WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg | HIFN_PUCNFG_CHIPID);
 	encl = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA;
 
 #ifdef HIFN_DEBUG
 	if (hifn_debug) {
 		if (encl != HIFN_PUSTAT_ENA_1 && encl != HIFN_PUSTAT_ENA_2)
 			device_printf(sc->sc_dev, "Engine is permanently "
 				"locked until next system reset!\n");
 		else
 			device_printf(sc->sc_dev, "Engine enabled "
 				"successfully!\n");
 	}
 #endif
 
 report:
 	WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg);
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, dmacfg);
 
 	switch (encl) {
 	case HIFN_PUSTAT_ENA_1:
 	case HIFN_PUSTAT_ENA_2:
 		break;
 	case HIFN_PUSTAT_ENA_0:
 	default:
 		device_printf(sc->sc_dev, "disabled");
 		break;
 	}
 
 	return 0;
 }
 
 /*
  * Give initial values to the registers listed in the "Register Space"
  * section of the HIFN Software Development reference manual.
  */
 static void 
 hifn_init_pci_registers(struct hifn_softc *sc)
 {
 	/* write fixed values needed by the Initialization registers */
 	WRITE_REG_0(sc, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA);
 	WRITE_REG_0(sc, HIFN_0_FIFOCNFG, HIFN_FIFOCNFG_THRESHOLD);
 	WRITE_REG_0(sc, HIFN_0_PUIER, HIFN_PUIER_DSTOVER);
 
 	/* write all 4 ring address registers */
 	WRITE_REG_1(sc, HIFN_1_DMA_CRAR, sc->sc_dma_physaddr +
 	    offsetof(struct hifn_dma, cmdr[0]));
 	WRITE_REG_1(sc, HIFN_1_DMA_SRAR, sc->sc_dma_physaddr +
 	    offsetof(struct hifn_dma, srcr[0]));
 	WRITE_REG_1(sc, HIFN_1_DMA_DRAR, sc->sc_dma_physaddr +
 	    offsetof(struct hifn_dma, dstr[0]));
 	WRITE_REG_1(sc, HIFN_1_DMA_RRAR, sc->sc_dma_physaddr +
 	    offsetof(struct hifn_dma, resr[0]));
 
 	DELAY(2000);
 
 	/* write status register */
 	WRITE_REG_1(sc, HIFN_1_DMA_CSR,
 	    HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS |
 	    HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_C_CTRL_DIS |
 	    HIFN_DMACSR_D_ABORT | HIFN_DMACSR_D_DONE | HIFN_DMACSR_D_LAST |
 	    HIFN_DMACSR_D_WAIT | HIFN_DMACSR_D_OVER |
 	    HIFN_DMACSR_R_ABORT | HIFN_DMACSR_R_DONE | HIFN_DMACSR_R_LAST |
 	    HIFN_DMACSR_R_WAIT | HIFN_DMACSR_R_OVER |
 	    HIFN_DMACSR_S_ABORT | HIFN_DMACSR_S_DONE | HIFN_DMACSR_S_LAST |
 	    HIFN_DMACSR_S_WAIT |
 	    HIFN_DMACSR_C_ABORT | HIFN_DMACSR_C_DONE | HIFN_DMACSR_C_LAST |
 	    HIFN_DMACSR_C_WAIT |
 	    HIFN_DMACSR_ENGINE |
 	    ((sc->sc_flags & HIFN_HAS_PUBLIC) ?
 		HIFN_DMACSR_PUBDONE : 0) |
 	    ((sc->sc_flags & HIFN_IS_7811) ?
 		HIFN_DMACSR_ILLW | HIFN_DMACSR_ILLR : 0));
 
 	sc->sc_d_busy = sc->sc_r_busy = sc->sc_s_busy = sc->sc_c_busy = 0;
 	sc->sc_dmaier |= HIFN_DMAIER_R_DONE | HIFN_DMAIER_C_ABORT |
 	    HIFN_DMAIER_D_OVER | HIFN_DMAIER_R_OVER |
 	    HIFN_DMAIER_S_ABORT | HIFN_DMAIER_D_ABORT | HIFN_DMAIER_R_ABORT |
 	    ((sc->sc_flags & HIFN_IS_7811) ?
 		HIFN_DMAIER_ILLW | HIFN_DMAIER_ILLR : 0);
 	sc->sc_dmaier &= ~HIFN_DMAIER_C_WAIT;
 	WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier);
 
 	WRITE_REG_0(sc, HIFN_0_PUCNFG, HIFN_PUCNFG_COMPSING |
 	    HIFN_PUCNFG_DRFR_128 | HIFN_PUCNFG_TCALLPHASES |
 	    HIFN_PUCNFG_TCDRVTOTEM | HIFN_PUCNFG_BUS32 |
 	    (sc->sc_drammodel ? HIFN_PUCNFG_DRAM : HIFN_PUCNFG_SRAM));
 
 	WRITE_REG_0(sc, HIFN_0_PUISR, HIFN_PUISR_DSTOVER);
 	WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET |
 	    HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE | HIFN_DMACNFG_LAST |
 	    ((HIFN_POLL_FREQUENCY << 16 ) & HIFN_DMACNFG_POLLFREQ) |
 	    ((HIFN_POLL_SCALAR << 8) & HIFN_DMACNFG_POLLINVAL));
 }
 
 /*
  * The maximum number of sessions supported by the card
  * is dependent on the amount of context ram, which
  * encryption algorithms are enabled, and how compression
  * is configured.  This should be configured before this
  * routine is called.
  */
 static void
 hifn_sessions(struct hifn_softc *sc)
 {
 	u_int32_t pucnfg;
 	int ctxsize;
 
 	pucnfg = READ_REG_0(sc, HIFN_0_PUCNFG);
 
 	if (pucnfg & HIFN_PUCNFG_COMPSING) {
 		if (pucnfg & HIFN_PUCNFG_ENCCNFG)
 			ctxsize = 128;
 		else
 			ctxsize = 512;
 		sc->sc_maxses = 1 +
 		    ((sc->sc_ramsize - 32768) / ctxsize);
 	} else
 		sc->sc_maxses = sc->sc_ramsize / 16384;
 
 	if (sc->sc_maxses > 2048)
 		sc->sc_maxses = 2048;
 }
 
 /*
  * Determine ram type (sram or dram).  Board should be just out of a reset
  * state when this is called.
  */
 static int
 hifn_ramtype(struct hifn_softc *sc)
 {
 	u_int8_t data[8], dataexpect[8];
 	int i;
 
 	for (i = 0; i < sizeof(data); i++)
 		data[i] = dataexpect[i] = 0x55;
 	if (hifn_writeramaddr(sc, 0, data))
 		return (-1);
 	if (hifn_readramaddr(sc, 0, data))
 		return (-1);
 	if (bcmp(data, dataexpect, sizeof(data)) != 0) {
 		sc->sc_drammodel = 1;
 		return (0);
 	}
 
 	for (i = 0; i < sizeof(data); i++)
 		data[i] = dataexpect[i] = 0xaa;
 	if (hifn_writeramaddr(sc, 0, data))
 		return (-1);
 	if (hifn_readramaddr(sc, 0, data))
 		return (-1);
 	if (bcmp(data, dataexpect, sizeof(data)) != 0) {
 		sc->sc_drammodel = 1;
 		return (0);
 	}
 
 	return (0);
 }
 
 #define	HIFN_SRAM_MAX		(32 << 20)
 #define	HIFN_SRAM_STEP_SIZE	16384
 #define	HIFN_SRAM_GRANULARITY	(HIFN_SRAM_MAX / HIFN_SRAM_STEP_SIZE)
 
 static int
 hifn_sramsize(struct hifn_softc *sc)
 {
 	u_int32_t a;
 	u_int8_t data[8];
 	u_int8_t dataexpect[sizeof(data)];
 	int32_t i;
 
 	for (i = 0; i < sizeof(data); i++)
 		data[i] = dataexpect[i] = i ^ 0x5a;
 
 	for (i = HIFN_SRAM_GRANULARITY - 1; i >= 0; i--) {
 		a = i * HIFN_SRAM_STEP_SIZE;
 		bcopy(&i, data, sizeof(i));
 		hifn_writeramaddr(sc, a, data);
 	}
 
 	for (i = 0; i < HIFN_SRAM_GRANULARITY; i++) {
 		a = i * HIFN_SRAM_STEP_SIZE;
 		bcopy(&i, dataexpect, sizeof(i));
 		if (hifn_readramaddr(sc, a, data) < 0)
 			return (0);
 		if (bcmp(data, dataexpect, sizeof(data)) != 0)
 			return (0);
 		sc->sc_ramsize = a + HIFN_SRAM_STEP_SIZE;
 	}
 
 	return (0);
 }
 
 /*
  * XXX For dram boards, one should really try all of the
  * HIFN_PUCNFG_DSZ_*'s.  This just assumes that PUCNFG
  * is already set up correctly.
  */
 static int
 hifn_dramsize(struct hifn_softc *sc)
 {
 	u_int32_t cnfg;
 
 	cnfg = READ_REG_0(sc, HIFN_0_PUCNFG) &
 	    HIFN_PUCNFG_DRAMMASK;
 	sc->sc_ramsize = 1 << ((cnfg >> 13) + 18);
 	return (0);
 }
 
 static void
 hifn_alloc_slot(struct hifn_softc *sc, int *cmdp, int *srcp, int *dstp, int *resp)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 
 	if (dma->cmdi == HIFN_D_CMD_RSIZE) {
 		dma->cmdi = 0;
 		dma->cmdr[HIFN_D_CMD_RSIZE].l = htole32(HIFN_D_VALID |
 		    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 		HIFN_CMDR_SYNC(sc, HIFN_D_CMD_RSIZE,
 		    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 	}
 	*cmdp = dma->cmdi++;
 	dma->cmdk = dma->cmdi;
 
 	if (dma->srci == HIFN_D_SRC_RSIZE) {
 		dma->srci = 0;
 		dma->srcr[HIFN_D_SRC_RSIZE].l = htole32(HIFN_D_VALID |
 		    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 		HIFN_SRCR_SYNC(sc, HIFN_D_SRC_RSIZE,
 		    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 	}
 	*srcp = dma->srci++;
 	dma->srck = dma->srci;
 
 	if (dma->dsti == HIFN_D_DST_RSIZE) {
 		dma->dsti = 0;
 		dma->dstr[HIFN_D_DST_RSIZE].l = htole32(HIFN_D_VALID |
 		    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 		HIFN_DSTR_SYNC(sc, HIFN_D_DST_RSIZE,
 		    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 	}
 	*dstp = dma->dsti++;
 	dma->dstk = dma->dsti;
 
 	if (dma->resi == HIFN_D_RES_RSIZE) {
 		dma->resi = 0;
 		dma->resr[HIFN_D_RES_RSIZE].l = htole32(HIFN_D_VALID |
 		    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 		HIFN_RESR_SYNC(sc, HIFN_D_RES_RSIZE,
 		    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 	}
 	*resp = dma->resi++;
 	dma->resk = dma->resi;
 }
 
 static int
 hifn_writeramaddr(struct hifn_softc *sc, int addr, u_int8_t *data)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	hifn_base_command_t wc;
 	const u_int32_t masks = HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ;
 	int r, cmdi, resi, srci, dsti;
 
 	wc.masks = htole16(3 << 13);
 	wc.session_num = htole16(addr >> 14);
 	wc.total_source_count = htole16(8);
 	wc.total_dest_count = htole16(addr & 0x3fff);
 
 	hifn_alloc_slot(sc, &cmdi, &srci, &dsti, &resi);
 
 	WRITE_REG_1(sc, HIFN_1_DMA_CSR,
 	    HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA |
 	    HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA);
 
 	/* build write command */
 	bzero(dma->command_bufs[cmdi], HIFN_MAX_COMMAND);
 	*(hifn_base_command_t *)dma->command_bufs[cmdi] = wc;
 	bcopy(data, &dma->test_src, sizeof(dma->test_src));
 
 	dma->srcr[srci].p = htole32(sc->sc_dma_physaddr
 	    + offsetof(struct hifn_dma, test_src));
 	dma->dstr[dsti].p = htole32(sc->sc_dma_physaddr
 	    + offsetof(struct hifn_dma, test_dst));
 
 	dma->cmdr[cmdi].l = htole32(16 | masks);
 	dma->srcr[srci].l = htole32(8 | masks);
 	dma->dstr[dsti].l = htole32(4 | masks);
 	dma->resr[resi].l = htole32(4 | masks);
 
 	bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	for (r = 10000; r >= 0; r--) {
 		DELAY(10);
 		bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if ((dma->resr[resi].l & htole32(HIFN_D_VALID)) == 0)
 			break;
 		bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	}
 	if (r == 0) {
 		device_printf(sc->sc_dev, "writeramaddr -- "
 		    "result[%d](addr %d) still valid\n", resi, addr);
 		r = -1;
 		return (-1);
 	} else
 		r = 0;
 
 	WRITE_REG_1(sc, HIFN_1_DMA_CSR,
 	    HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS |
 	    HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS);
 
 	return (r);
 }
 
 static int
 hifn_readramaddr(struct hifn_softc *sc, int addr, u_int8_t *data)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	hifn_base_command_t rc;
 	const u_int32_t masks = HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ;
 	int r, cmdi, srci, dsti, resi;
 
 	rc.masks = htole16(2 << 13);
 	rc.session_num = htole16(addr >> 14);
 	rc.total_source_count = htole16(addr & 0x3fff);
 	rc.total_dest_count = htole16(8);
 
 	hifn_alloc_slot(sc, &cmdi, &srci, &dsti, &resi);
 
 	WRITE_REG_1(sc, HIFN_1_DMA_CSR,
 	    HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA |
 	    HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA);
 
 	bzero(dma->command_bufs[cmdi], HIFN_MAX_COMMAND);
 	*(hifn_base_command_t *)dma->command_bufs[cmdi] = rc;
 
 	dma->srcr[srci].p = htole32(sc->sc_dma_physaddr +
 	    offsetof(struct hifn_dma, test_src));
 	dma->test_src = 0;
 	dma->dstr[dsti].p =  htole32(sc->sc_dma_physaddr +
 	    offsetof(struct hifn_dma, test_dst));
 	dma->test_dst = 0;
 	dma->cmdr[cmdi].l = htole32(8 | masks);
 	dma->srcr[srci].l = htole32(8 | masks);
 	dma->dstr[dsti].l = htole32(8 | masks);
 	dma->resr[resi].l = htole32(HIFN_MAX_RESULT | masks);
 
 	bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	for (r = 10000; r >= 0; r--) {
 		DELAY(10);
 		bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if ((dma->resr[resi].l & htole32(HIFN_D_VALID)) == 0)
 			break;
 		bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	}
 	if (r == 0) {
 		device_printf(sc->sc_dev, "readramaddr -- "
 		    "result[%d](addr %d) still valid\n", resi, addr);
 		r = -1;
 	} else {
 		r = 0;
 		bcopy(&dma->test_dst, data, sizeof(dma->test_dst));
 	}
 
 	WRITE_REG_1(sc, HIFN_1_DMA_CSR,
 	    HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS |
 	    HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS);
 
 	return (r);
 }
 
 /*
  * Initialize the descriptor rings.
  */
 static void 
 hifn_init_dma(struct hifn_softc *sc)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	int i;
 
 	hifn_set_retry(sc);
 
 	/* initialize static pointer values */
 	for (i = 0; i < HIFN_D_CMD_RSIZE; i++)
 		dma->cmdr[i].p = htole32(sc->sc_dma_physaddr +
 		    offsetof(struct hifn_dma, command_bufs[i][0]));
 	for (i = 0; i < HIFN_D_RES_RSIZE; i++)
 		dma->resr[i].p = htole32(sc->sc_dma_physaddr +
 		    offsetof(struct hifn_dma, result_bufs[i][0]));
 
 	dma->cmdr[HIFN_D_CMD_RSIZE].p =
 	    htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, cmdr[0]));
 	dma->srcr[HIFN_D_SRC_RSIZE].p =
 	    htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, srcr[0]));
 	dma->dstr[HIFN_D_DST_RSIZE].p =
 	    htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, dstr[0]));
 	dma->resr[HIFN_D_RES_RSIZE].p =
 	    htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, resr[0]));
 
 	dma->cmdu = dma->srcu = dma->dstu = dma->resu = 0;
 	dma->cmdi = dma->srci = dma->dsti = dma->resi = 0;
 	dma->cmdk = dma->srck = dma->dstk = dma->resk = 0;
 }
 
 /*
  * Writes out the raw command buffer space.  Returns the
  * command buffer size.
  */
 static u_int
 hifn_write_command(struct hifn_command *cmd, u_int8_t *buf)
 {
 #define	MIN(a,b)	((a)<(b)?(a):(b))
 	u_int8_t *buf_pos;
 	hifn_base_command_t *base_cmd;
 	hifn_mac_command_t *mac_cmd;
 	hifn_crypt_command_t *cry_cmd;
 	int using_mac, using_crypt, len;
 	u_int32_t dlen, slen;
 
 	buf_pos = buf;
 	using_mac = cmd->base_masks & HIFN_BASE_CMD_MAC;
 	using_crypt = cmd->base_masks & HIFN_BASE_CMD_CRYPT;
 
 	base_cmd = (hifn_base_command_t *)buf_pos;
 	base_cmd->masks = htole16(cmd->base_masks);
 	slen = cmd->src_mapsize;
 	if (cmd->sloplen)
 		dlen = cmd->dst_mapsize - cmd->sloplen + sizeof(u_int32_t);
 	else
 		dlen = cmd->dst_mapsize;
 	base_cmd->total_source_count = htole16(slen & HIFN_BASE_CMD_LENMASK_LO);
 	base_cmd->total_dest_count = htole16(dlen & HIFN_BASE_CMD_LENMASK_LO);
 	dlen >>= 16;
 	slen >>= 16;
 	base_cmd->session_num = htole16(cmd->session_num |
 	    ((slen << HIFN_BASE_CMD_SRCLEN_S) & HIFN_BASE_CMD_SRCLEN_M) |
 	    ((dlen << HIFN_BASE_CMD_DSTLEN_S) & HIFN_BASE_CMD_DSTLEN_M));
 	buf_pos += sizeof(hifn_base_command_t);
 
 	if (using_mac) {
 		mac_cmd = (hifn_mac_command_t *)buf_pos;
 		dlen = cmd->maccrd->crd_len;
 		mac_cmd->source_count = htole16(dlen & 0xffff);
 		dlen >>= 16;
 		mac_cmd->masks = htole16(cmd->mac_masks |
 		    ((dlen << HIFN_MAC_CMD_SRCLEN_S) & HIFN_MAC_CMD_SRCLEN_M));
 		mac_cmd->header_skip = htole16(cmd->maccrd->crd_skip);
 		mac_cmd->reserved = 0;
 		buf_pos += sizeof(hifn_mac_command_t);
 	}
 
 	if (using_crypt) {
 		cry_cmd = (hifn_crypt_command_t *)buf_pos;
 		dlen = cmd->enccrd->crd_len;
 		cry_cmd->source_count = htole16(dlen & 0xffff);
 		dlen >>= 16;
 		cry_cmd->masks = htole16(cmd->cry_masks |
 		    ((dlen << HIFN_CRYPT_CMD_SRCLEN_S) & HIFN_CRYPT_CMD_SRCLEN_M));
 		cry_cmd->header_skip = htole16(cmd->enccrd->crd_skip);
 		cry_cmd->reserved = 0;
 		buf_pos += sizeof(hifn_crypt_command_t);
 	}
 
 	if (using_mac && cmd->mac_masks & HIFN_MAC_CMD_NEW_KEY) {
 		bcopy(cmd->mac, buf_pos, HIFN_MAC_KEY_LENGTH);
 		buf_pos += HIFN_MAC_KEY_LENGTH;
 	}
 
 	if (using_crypt && cmd->cry_masks & HIFN_CRYPT_CMD_NEW_KEY) {
 		switch (cmd->cry_masks & HIFN_CRYPT_CMD_ALG_MASK) {
 		case HIFN_CRYPT_CMD_ALG_3DES:
 			bcopy(cmd->ck, buf_pos, HIFN_3DES_KEY_LENGTH);
 			buf_pos += HIFN_3DES_KEY_LENGTH;
 			break;
 		case HIFN_CRYPT_CMD_ALG_DES:
 			bcopy(cmd->ck, buf_pos, HIFN_DES_KEY_LENGTH);
 			buf_pos += cmd->cklen;
 			break;
 		case HIFN_CRYPT_CMD_ALG_RC4:
 			len = 256;
 			do {
 				int clen;
 
 				clen = MIN(cmd->cklen, len);
 				bcopy(cmd->ck, buf_pos, clen);
 				len -= clen;
 				buf_pos += clen;
 			} while (len > 0);
 			bzero(buf_pos, 4);
 			buf_pos += 4;
 			break;
 		}
 	}
 
 	if (using_crypt && cmd->cry_masks & HIFN_CRYPT_CMD_NEW_IV) {
 		bcopy(cmd->iv, buf_pos, HIFN_IV_LENGTH);
 		buf_pos += HIFN_IV_LENGTH;
 	}
 
 	if ((cmd->base_masks & (HIFN_BASE_CMD_MAC|HIFN_BASE_CMD_CRYPT)) == 0) {
 		bzero(buf_pos, 8);
 		buf_pos += 8;
 	}
 
 	return (buf_pos - buf);
 #undef	MIN
 }
 
 static int
 hifn_dmamap_aligned(struct hifn_operand *op)
 {
 	int i;
 
 	for (i = 0; i < op->nsegs; i++) {
 		if (op->segs[i].ds_addr & 3)
 			return (0);
 		if ((i != (op->nsegs - 1)) && (op->segs[i].ds_len & 3))
 			return (0);
 	}
 	return (1);
 }
 
 static int
 hifn_dmamap_load_dst(struct hifn_softc *sc, struct hifn_command *cmd)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	struct hifn_operand *dst = &cmd->dst;
 	u_int32_t p, l;
 	int idx, used = 0, i;
 
 	idx = dma->dsti;
 	for (i = 0; i < dst->nsegs - 1; i++) {
 		dma->dstr[idx].p = htole32(dst->segs[i].ds_addr);
 		dma->dstr[idx].l = htole32(HIFN_D_VALID |
 		    HIFN_D_MASKDONEIRQ | dst->segs[i].ds_len);
 		HIFN_DSTR_SYNC(sc, idx,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		used++;
 
 		if (++idx == HIFN_D_DST_RSIZE) {
 			dma->dstr[idx].l = htole32(HIFN_D_VALID |
 			    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 			HIFN_DSTR_SYNC(sc, idx,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			idx = 0;
 		}
 	}
 
 	if (cmd->sloplen == 0) {
 		p = dst->segs[i].ds_addr;
 		l = HIFN_D_VALID | HIFN_D_MASKDONEIRQ | HIFN_D_LAST |
 		    dst->segs[i].ds_len;
 	} else {
 		p = sc->sc_dma_physaddr +
 		    offsetof(struct hifn_dma, slop[cmd->slopidx]);
 		l = HIFN_D_VALID | HIFN_D_MASKDONEIRQ | HIFN_D_LAST |
 		    sizeof(u_int32_t);
 
 		if ((dst->segs[i].ds_len - cmd->sloplen) != 0) {
 			dma->dstr[idx].p = htole32(dst->segs[i].ds_addr);
 			dma->dstr[idx].l = htole32(HIFN_D_VALID |
 			    HIFN_D_MASKDONEIRQ |
 			    (dst->segs[i].ds_len - cmd->sloplen));
 			HIFN_DSTR_SYNC(sc, idx,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			used++;
 
 			if (++idx == HIFN_D_DST_RSIZE) {
 				dma->dstr[idx].l = htole32(HIFN_D_VALID |
 				    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 				HIFN_DSTR_SYNC(sc, idx,
 				    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 				idx = 0;
 			}
 		}
 	}
 	dma->dstr[idx].p = htole32(p);
 	dma->dstr[idx].l = htole32(l);
 	HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	used++;
 
 	if (++idx == HIFN_D_DST_RSIZE) {
 		dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_JUMP |
 		    HIFN_D_MASKDONEIRQ);
 		HIFN_DSTR_SYNC(sc, idx,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		idx = 0;
 	}
 
 	dma->dsti = idx;
 	dma->dstu += used;
 	return (idx);
 }
 
 static int
 hifn_dmamap_load_src(struct hifn_softc *sc, struct hifn_command *cmd)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	struct hifn_operand *src = &cmd->src;
 	int idx, i;
 	u_int32_t last = 0;
 
 	idx = dma->srci;
 	for (i = 0; i < src->nsegs; i++) {
 		if (i == src->nsegs - 1)
 			last = HIFN_D_LAST;
 
 		dma->srcr[idx].p = htole32(src->segs[i].ds_addr);
 		dma->srcr[idx].l = htole32(src->segs[i].ds_len |
 		    HIFN_D_VALID | HIFN_D_MASKDONEIRQ | last);
 		HIFN_SRCR_SYNC(sc, idx,
 		    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 
 		if (++idx == HIFN_D_SRC_RSIZE) {
 			dma->srcr[idx].l = htole32(HIFN_D_VALID |
 			    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 			HIFN_SRCR_SYNC(sc, HIFN_D_SRC_RSIZE,
 			    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 			idx = 0;
 		}
 	}
 	dma->srci = idx;
 	dma->srcu += src->nsegs;
 	return (idx);
 } 
 
 static void
 hifn_op_cb(void* arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error)
 {
 	struct hifn_operand *op = arg;
 
 	KASSERT(nsegs <= MAX_SCATTER,
 		("hifn_op_cb: too many DMA segments (%u > %u) "
 		 "returned when mapping operand", nsegs, MAX_SCATTER));
 	op->mapsize = mapsize;
 	op->nsegs = nsegs;
 	bcopy(seg, op->segs, nsegs * sizeof (seg[0]));
 }
 
 static int 
 hifn_crypto(
 	struct hifn_softc *sc,
 	struct hifn_command *cmd,
 	struct cryptop *crp,
 	int hint)
 {
 	struct	hifn_dma *dma = sc->sc_dma;
 	u_int32_t cmdlen;
 	int cmdi, resi, err = 0;
 
 	/*
 	 * need 1 cmd, and 1 res
 	 *
 	 * NB: check this first since it's easy.
 	 */
 	if ((dma->cmdu + 1) > HIFN_D_CMD_RSIZE ||
 	    (dma->resu + 1) > HIFN_D_RES_RSIZE) {
 #ifdef HIFN_DEBUG
 		if (hifn_debug) {
 			device_printf(sc->sc_dev,
 				"cmd/result exhaustion, cmdu %u resu %u\n",
 				dma->cmdu, dma->resu);
 		}
 #endif
 		hifnstats.hst_nomem_cr++;
 		return (ERESTART);
 	}
 
 	if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &cmd->src_map)) {
 		hifnstats.hst_nomem_map++;
 		return (ENOMEM);
 	}
 
 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
 		if (bus_dmamap_load_mbuf(sc->sc_dmat, cmd->src_map,
 		    cmd->src_m, hifn_op_cb, &cmd->src, BUS_DMA_NOWAIT)) {
 			hifnstats.hst_nomem_load++;
 			err = ENOMEM;
 			goto err_srcmap1;
 		}
 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
 		if (bus_dmamap_load_uio(sc->sc_dmat, cmd->src_map,
 		    cmd->src_io, hifn_op_cb, &cmd->src, BUS_DMA_NOWAIT)) {
 			hifnstats.hst_nomem_load++;
 			err = ENOMEM;
 			goto err_srcmap1;
 		}
 	} else {
 		err = EINVAL;
 		goto err_srcmap1;
 	}
 
 	if (hifn_dmamap_aligned(&cmd->src)) {
 		cmd->sloplen = cmd->src_mapsize & 3;
 		cmd->dst = cmd->src;
 	} else {
 		if (crp->crp_flags & CRYPTO_F_IOV) {
 			err = EINVAL;
 			goto err_srcmap;
 		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
 			int totlen, len;
 			struct mbuf *m, *m0, *mlast;
 
 			KASSERT(cmd->dst_m == cmd->src_m,
 				("hifn_crypto: dst_m initialized improperly"));
 			hifnstats.hst_unaligned++;
 			/*
 			 * Source is not aligned on a longword boundary.
 			 * Copy the data to insure alignment.  If we fail
 			 * to allocate mbufs or clusters while doing this
 			 * we return ERESTART so the operation is requeued
 			 * at the crypto later, but only if there are
 			 * ops already posted to the hardware; otherwise we
 			 * have no guarantee that we'll be re-entered.
 			 */
 			totlen = cmd->src_mapsize;
 			if (cmd->src_m->m_flags & M_PKTHDR) {
 				len = MHLEN;
 				MGETHDR(m0, M_DONTWAIT, MT_DATA);
+				if (m0 && !m_dup_pkthdr(m0, cmd->src_m, M_DONTWAIT)) {
+					m_free(m0);
+					m0 = NULL;
+				}
 			} else {
 				len = MLEN;
 				MGET(m0, M_DONTWAIT, MT_DATA);
 			}
 			if (m0 == NULL) {
 				hifnstats.hst_nomem_mbuf++;
 				err = dma->cmdu ? ERESTART : ENOMEM;
 				goto err_srcmap;
-			}
-			if (len == MHLEN) {
-				M_COPY_PKTHDR(m0, cmd->src_m);
 			}
 			if (totlen >= MINCLSIZE) {
 				MCLGET(m0, M_DONTWAIT);
 				if ((m0->m_flags & M_EXT) == 0) {
 					hifnstats.hst_nomem_mcl++;
 					err = dma->cmdu ? ERESTART : ENOMEM;
 					m_freem(m0);
 					goto err_srcmap;
 				}
 				len = MCLBYTES;
 			}
 			totlen -= len;
 			m0->m_pkthdr.len = m0->m_len = len;
 			mlast = m0;
 
 			while (totlen > 0) {
 				MGET(m, M_DONTWAIT, MT_DATA);
 				if (m == NULL) {
 					hifnstats.hst_nomem_mbuf++;
 					err = dma->cmdu ? ERESTART : ENOMEM;
 					m_freem(m0);
 					goto err_srcmap;
 				}
 				len = MLEN;
 				if (totlen >= MINCLSIZE) {
 					MCLGET(m, M_DONTWAIT);
 					if ((m->m_flags & M_EXT) == 0) {
 						hifnstats.hst_nomem_mcl++;
 						err = dma->cmdu ? ERESTART : ENOMEM;
 						mlast->m_next = m;
 						m_freem(m0);
 						goto err_srcmap;
 					}
 					len = MCLBYTES;
 				}
 
 				m->m_len = len;
 				m0->m_pkthdr.len += len;
 				totlen -= len;
 
 				mlast->m_next = m;
 				mlast = m;
 			}
 			cmd->dst_m = m0;
 		}
 	}
 
 	if (cmd->dst_map == NULL) {
 		if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &cmd->dst_map)) {
 			hifnstats.hst_nomem_map++;
 			err = ENOMEM;
 			goto err_srcmap;
 		}
 		if (crp->crp_flags & CRYPTO_F_IMBUF) {
 			if (bus_dmamap_load_mbuf(sc->sc_dmat, cmd->dst_map,
 			    cmd->dst_m, hifn_op_cb, &cmd->dst, BUS_DMA_NOWAIT)) {
 				hifnstats.hst_nomem_map++;
 				err = ENOMEM;
 				goto err_dstmap1;
 			}
 		} else if (crp->crp_flags & CRYPTO_F_IOV) {
 			if (bus_dmamap_load_uio(sc->sc_dmat, cmd->dst_map,
 			    cmd->dst_io, hifn_op_cb, &cmd->dst, BUS_DMA_NOWAIT)) {
 				hifnstats.hst_nomem_load++;
 				err = ENOMEM;
 				goto err_dstmap1;
 			}
 		}
 	}
 
 #ifdef HIFN_DEBUG
 	if (hifn_debug) {
 		device_printf(sc->sc_dev,
 		    "Entering cmd: stat %8x ien %8x u %d/%d/%d/%d n %d/%d\n",
 		    READ_REG_1(sc, HIFN_1_DMA_CSR),
 		    READ_REG_1(sc, HIFN_1_DMA_IER),
 		    dma->cmdu, dma->srcu, dma->dstu, dma->resu,
 		    cmd->src_nsegs, cmd->dst_nsegs);
 	}
 #endif
 
 	if (cmd->src_map == cmd->dst_map) {
 		bus_dmamap_sync(sc->sc_dmat, cmd->src_map,
 		    BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD);
 	} else {
 		bus_dmamap_sync(sc->sc_dmat, cmd->src_map,
 		    BUS_DMASYNC_PREWRITE);
 		bus_dmamap_sync(sc->sc_dmat, cmd->dst_map,
 		    BUS_DMASYNC_PREREAD);
 	}
 
 	/*
 	 * need N src, and N dst
 	 */
 	if ((dma->srcu + cmd->src_nsegs) > HIFN_D_SRC_RSIZE ||
 	    (dma->dstu + cmd->dst_nsegs + 1) > HIFN_D_DST_RSIZE) {
 #ifdef HIFN_DEBUG
 		if (hifn_debug) {
 			device_printf(sc->sc_dev,
 				"src/dst exhaustion, srcu %u+%u dstu %u+%u\n",
 				dma->srcu, cmd->src_nsegs,
 				dma->dstu, cmd->dst_nsegs);
 		}
 #endif
 		hifnstats.hst_nomem_sd++;
 		err = ERESTART;
 		goto err_dstmap;
 	}
 
 	if (dma->cmdi == HIFN_D_CMD_RSIZE) {
 		dma->cmdi = 0;
 		dma->cmdr[HIFN_D_CMD_RSIZE].l = htole32(HIFN_D_VALID |
 		    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 		HIFN_CMDR_SYNC(sc, HIFN_D_CMD_RSIZE,
 		    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 	}
 	cmdi = dma->cmdi++;
 	cmdlen = hifn_write_command(cmd, dma->command_bufs[cmdi]);
 	HIFN_CMD_SYNC(sc, cmdi, BUS_DMASYNC_PREWRITE);
 
 	/* .p for command/result already set */
 	dma->cmdr[cmdi].l = htole32(cmdlen | HIFN_D_VALID | HIFN_D_LAST |
 	    HIFN_D_MASKDONEIRQ);
 	HIFN_CMDR_SYNC(sc, cmdi,
 	    BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD);
 	dma->cmdu++;
 	if (sc->sc_c_busy == 0) {
 		WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_ENA);
 		sc->sc_c_busy = 1;
 	}
 
 	/*
 	 * We don't worry about missing an interrupt (which a "command wait"
 	 * interrupt salvages us from), unless there is more than one command
 	 * in the queue.
 	 */
 	if (dma->cmdu > 1) {
 		sc->sc_dmaier |= HIFN_DMAIER_C_WAIT;
 		WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier);
 	}
 
 	hifnstats.hst_ipackets++;
 	hifnstats.hst_ibytes += cmd->src_mapsize;
 
 	hifn_dmamap_load_src(sc, cmd);
 	if (sc->sc_s_busy == 0) {
 		WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_S_CTRL_ENA);
 		sc->sc_s_busy = 1;
 	}
 
 	/*
 	 * Unlike other descriptors, we don't mask done interrupt from
 	 * result descriptor.
 	 */
 #ifdef HIFN_DEBUG
 	if (hifn_debug)
 		printf("load res\n");
 #endif
 	if (dma->resi == HIFN_D_RES_RSIZE) {
 		dma->resi = 0;
 		dma->resr[HIFN_D_RES_RSIZE].l = htole32(HIFN_D_VALID |
 		    HIFN_D_JUMP | HIFN_D_MASKDONEIRQ);
 		HIFN_RESR_SYNC(sc, HIFN_D_RES_RSIZE,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	}
 	resi = dma->resi++;
 	KASSERT(dma->hifn_commands[resi] == NULL,
 		("hifn_crypto: command slot %u busy", resi));
 	dma->hifn_commands[resi] = cmd;
 	HIFN_RES_SYNC(sc, resi, BUS_DMASYNC_PREREAD);
 	if ((hint & CRYPTO_HINT_MORE) && sc->sc_curbatch < hifn_maxbatch) {
 		dma->resr[resi].l = htole32(HIFN_MAX_RESULT |
 		    HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ);
 		sc->sc_curbatch++;
 		if (sc->sc_curbatch > hifnstats.hst_maxbatch)
 			hifnstats.hst_maxbatch = sc->sc_curbatch;
 		hifnstats.hst_totbatch++;
 	} else {
 		dma->resr[resi].l = htole32(HIFN_MAX_RESULT |
 		    HIFN_D_VALID | HIFN_D_LAST);
 		sc->sc_curbatch = 0;
 	}
 	HIFN_RESR_SYNC(sc, resi,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	dma->resu++;
 	if (sc->sc_r_busy == 0) {
 		WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_R_CTRL_ENA);
 		sc->sc_r_busy = 1;
 	}
 
 	if (cmd->sloplen)
 		cmd->slopidx = resi;
 
 	hifn_dmamap_load_dst(sc, cmd);
 
 	if (sc->sc_d_busy == 0) {
 		WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_D_CTRL_ENA);
 		sc->sc_d_busy = 1;
 	}
 
 #ifdef HIFN_DEBUG
 	if (hifn_debug) {
 		device_printf(sc->sc_dev, "command: stat %8x ier %8x\n",
 		    READ_REG_1(sc, HIFN_1_DMA_CSR),
 		    READ_REG_1(sc, HIFN_1_DMA_IER));
 	}
 #endif
 
 	sc->sc_active = 5;
 	KASSERT(err == 0, ("hifn_crypto: success with error %u", err));
 	return (err);		/* success */
 
 err_dstmap:
 	if (cmd->src_map != cmd->dst_map)
 		bus_dmamap_unload(sc->sc_dmat, cmd->dst_map);
 err_dstmap1:
 	if (cmd->src_map != cmd->dst_map)
 		bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map);
 err_srcmap:
 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
 		if (cmd->src_m != cmd->dst_m)
 			m_freem(cmd->dst_m);
 	}
 	bus_dmamap_unload(sc->sc_dmat, cmd->src_map);
 err_srcmap1:
 	bus_dmamap_destroy(sc->sc_dmat, cmd->src_map);
 	return (err);
 }
 
 static void
 hifn_tick(void* vsc)
 {
 	struct hifn_softc *sc = vsc;
 
 	HIFN_LOCK(sc);
 	if (sc->sc_active == 0) {
 		struct hifn_dma *dma = sc->sc_dma;
 		u_int32_t r = 0;
 
 		if (dma->cmdu == 0 && sc->sc_c_busy) {
 			sc->sc_c_busy = 0;
 			r |= HIFN_DMACSR_C_CTRL_DIS;
 		}
 		if (dma->srcu == 0 && sc->sc_s_busy) {
 			sc->sc_s_busy = 0;
 			r |= HIFN_DMACSR_S_CTRL_DIS;
 		}
 		if (dma->dstu == 0 && sc->sc_d_busy) {
 			sc->sc_d_busy = 0;
 			r |= HIFN_DMACSR_D_CTRL_DIS;
 		}
 		if (dma->resu == 0 && sc->sc_r_busy) {
 			sc->sc_r_busy = 0;
 			r |= HIFN_DMACSR_R_CTRL_DIS;
 		}
 		if (r)
 			WRITE_REG_1(sc, HIFN_1_DMA_CSR, r);
 	} else
 		sc->sc_active--;
 	HIFN_UNLOCK(sc);
 	callout_reset(&sc->sc_tickto, hz, hifn_tick, sc);
 }
 
 static void 
 hifn_intr(void *arg)
 {
 	struct hifn_softc *sc = arg;
 	struct hifn_dma *dma;
 	u_int32_t dmacsr, restart;
 	int i, u;
 
 	HIFN_LOCK(sc);
 	dma = sc->sc_dma;
 
 	dmacsr = READ_REG_1(sc, HIFN_1_DMA_CSR);
 
 #ifdef HIFN_DEBUG
 	if (hifn_debug) {
 		device_printf(sc->sc_dev,
 		    "irq: stat %08x ien %08x damier %08x i %d/%d/%d/%d k %d/%d/%d/%d u %d/%d/%d/%d\n",
 		    dmacsr, READ_REG_1(sc, HIFN_1_DMA_IER), sc->sc_dmaier,
 		    dma->cmdi, dma->srci, dma->dsti, dma->resi,
 		    dma->cmdk, dma->srck, dma->dstk, dma->resk,
 		    dma->cmdu, dma->srcu, dma->dstu, dma->resu);
 	}
 #endif
 
 	/* Nothing in the DMA unit interrupted */
 	if ((dmacsr & sc->sc_dmaier) == 0) {
 		hifnstats.hst_noirq++;
 		HIFN_UNLOCK(sc);
 		return;
 	}
 
 	WRITE_REG_1(sc, HIFN_1_DMA_CSR, dmacsr & sc->sc_dmaier);
 
 	if ((sc->sc_flags & HIFN_HAS_PUBLIC) &&
 	    (dmacsr & HIFN_DMACSR_PUBDONE))
 		WRITE_REG_1(sc, HIFN_1_PUB_STATUS,
 		    READ_REG_1(sc, HIFN_1_PUB_STATUS) | HIFN_PUBSTS_DONE);
 
 	restart = dmacsr & (HIFN_DMACSR_D_OVER | HIFN_DMACSR_R_OVER);
 	if (restart)
 		device_printf(sc->sc_dev, "overrun %x\n", dmacsr);
 
 	if (sc->sc_flags & HIFN_IS_7811) {
 		if (dmacsr & HIFN_DMACSR_ILLR)
 			device_printf(sc->sc_dev, "illegal read\n");
 		if (dmacsr & HIFN_DMACSR_ILLW)
 			device_printf(sc->sc_dev, "illegal write\n");
 	}
 
 	restart = dmacsr & (HIFN_DMACSR_C_ABORT | HIFN_DMACSR_S_ABORT |
 	    HIFN_DMACSR_D_ABORT | HIFN_DMACSR_R_ABORT);
 	if (restart) {
 		device_printf(sc->sc_dev, "abort, resetting.\n");
 		hifnstats.hst_abort++;
 		hifn_abort(sc);
 		HIFN_UNLOCK(sc);
 		return;
 	}
 
 	if ((dmacsr & HIFN_DMACSR_C_WAIT) && (dma->cmdu == 0)) {
 		/*
 		 * If no slots to process and we receive a "waiting on
 		 * command" interrupt, we disable the "waiting on command"
 		 * (by clearing it).
 		 */
 		sc->sc_dmaier &= ~HIFN_DMAIER_C_WAIT;
 		WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier);
 	}
 
 	/* clear the rings */
 	i = dma->resk; u = dma->resu;
 	while (u != 0) {
 		HIFN_RESR_SYNC(sc, i,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if (dma->resr[i].l & htole32(HIFN_D_VALID)) {
 			HIFN_RESR_SYNC(sc, i,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			break;
 		}
 
 		if (i != HIFN_D_RES_RSIZE) {
 			struct hifn_command *cmd;
 			u_int8_t *macbuf = NULL;
 
 			HIFN_RES_SYNC(sc, i, BUS_DMASYNC_POSTREAD);
 			cmd = dma->hifn_commands[i];
 			KASSERT(cmd != NULL,
 				("hifn_intr: null command slot %u", i));
 			dma->hifn_commands[i] = NULL;
 
 			if (cmd->base_masks & HIFN_BASE_CMD_MAC) {
 				macbuf = dma->result_bufs[i];
 				macbuf += 12;
 			}
 
 			hifn_callback(sc, cmd, macbuf);
 			hifnstats.hst_opackets++;
 			u--;
 		}
 
 		if (++i == (HIFN_D_RES_RSIZE + 1))
 			i = 0;
 	}
 	dma->resk = i; dma->resu = u;
 
 	i = dma->srck; u = dma->srcu;
 	while (u != 0) {
 		if (i == HIFN_D_SRC_RSIZE)
 			i = 0;
 		HIFN_SRCR_SYNC(sc, i,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if (dma->srcr[i].l & htole32(HIFN_D_VALID)) {
 			HIFN_SRCR_SYNC(sc, i,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			break;
 		}
 		i++, u--;
 	}
 	dma->srck = i; dma->srcu = u;
 
 	i = dma->cmdk; u = dma->cmdu;
 	while (u != 0) {
 		HIFN_CMDR_SYNC(sc, i,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if (dma->cmdr[i].l & htole32(HIFN_D_VALID)) {
 			HIFN_CMDR_SYNC(sc, i,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			break;
 		}
 		if (i != HIFN_D_CMD_RSIZE) {
 			u--;
 			HIFN_CMD_SYNC(sc, i, BUS_DMASYNC_POSTWRITE);
 		}
 		if (++i == (HIFN_D_CMD_RSIZE + 1))
 			i = 0;
 	}
 	dma->cmdk = i; dma->cmdu = u;
 
 	if (sc->sc_needwakeup) {		/* XXX check high watermark */
 		int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
 #ifdef HIFN_DEBUG
 		if (hifn_debug)
 			device_printf(sc->sc_dev,
 				"wakeup crypto (%x) u %d/%d/%d/%d\n",
 				sc->sc_needwakeup,
 				dma->cmdu, dma->srcu, dma->dstu, dma->resu);
 #endif
 		sc->sc_needwakeup &= ~wakeup;
 		crypto_unblock(sc->sc_cid, wakeup);
 	}
 	HIFN_UNLOCK(sc);
 }
 
 /*
  * Allocate a new 'session' and return an encoded session id.  'sidp'
  * contains our registration id, and should contain an encoded session
  * id on successful allocation.
  */
 static int
 hifn_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri)
 {
 	struct cryptoini *c;
 	struct hifn_softc *sc = arg;
 	int i, mac = 0, cry = 0;
 
 	KASSERT(sc != NULL, ("hifn_newsession: null softc"));
 	if (sidp == NULL || cri == NULL || sc == NULL)
 		return (EINVAL);
 
 	for (i = 0; i < sc->sc_maxses; i++)
 		if (sc->sc_sessions[i].hs_state == HS_STATE_FREE)
 			break;
 	if (i == sc->sc_maxses)
 		return (ENOMEM);
 
 	for (c = cri; c != NULL; c = c->cri_next) {
 		switch (c->cri_alg) {
 		case CRYPTO_MD5:
 		case CRYPTO_SHA1:
 		case CRYPTO_MD5_HMAC:
 		case CRYPTO_SHA1_HMAC:
 			if (mac)
 				return (EINVAL);
 			mac = 1;
 			break;
 		case CRYPTO_DES_CBC:
 		case CRYPTO_3DES_CBC:
 			/* XXX this may read fewer, does it matter? */
 			read_random(sc->sc_sessions[i].hs_iv, HIFN_IV_LENGTH);
 			/*FALLTHROUGH*/
 		case CRYPTO_ARC4:
 			if (cry)
 				return (EINVAL);
 			cry = 1;
 			break;
 		default:
 			return (EINVAL);
 		}
 	}
 	if (mac == 0 && cry == 0)
 		return (EINVAL);
 
 	*sidp = HIFN_SID(device_get_unit(sc->sc_dev), i);
 	sc->sc_sessions[i].hs_state = HS_STATE_USED;
 
 	return (0);
 }
 
 /*
  * Deallocate a session.
  * XXX this routine should run a zero'd mac/encrypt key into context ram.
  * XXX to blow away any keys already stored there.
  */
 static int
 hifn_freesession(void *arg, u_int64_t tid)
 {
 	struct hifn_softc *sc = arg;
 	int session;
 	u_int32_t sid = ((u_int32_t) tid) & 0xffffffff;
 
 	KASSERT(sc != NULL, ("hifn_freesession: null softc"));
 	if (sc == NULL)
 		return (EINVAL);
 
 	session = HIFN_SESSION(sid);
 	if (session >= sc->sc_maxses)
 		return (EINVAL);
 
 	bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session]));
 	return (0);
 }
 
 static int
 hifn_process(void *arg, struct cryptop *crp, int hint)
 {
 	struct hifn_softc *sc = arg;
 	struct hifn_command *cmd = NULL;
 	int session, err;
 	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
 
 	if (crp == NULL || crp->crp_callback == NULL) {
 		hifnstats.hst_invalid++;
 		return (EINVAL);
 	}
 	session = HIFN_SESSION(crp->crp_sid);
 
 	if (sc == NULL || session >= sc->sc_maxses) {
 		err = EINVAL;
 		goto errout;
 	}
 
 	cmd = malloc(sizeof(struct hifn_command), M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (cmd == NULL) {
 		hifnstats.hst_nomem++;
 		err = ENOMEM;
 		goto errout;
 	}
 
 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
 		cmd->src_m = (struct mbuf *)crp->crp_buf;
 		cmd->dst_m = (struct mbuf *)crp->crp_buf;
 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
 		cmd->src_io = (struct uio *)crp->crp_buf;
 		cmd->dst_io = (struct uio *)crp->crp_buf;
 	} else {
 		err = EINVAL;
 		goto errout;	/* XXX we don't handle contiguous buffers! */
 	}
 
 	crd1 = crp->crp_desc;
 	if (crd1 == NULL) {
 		err = EINVAL;
 		goto errout;
 	}
 	crd2 = crd1->crd_next;
 
 	if (crd2 == NULL) {
 		if (crd1->crd_alg == CRYPTO_MD5_HMAC ||
 		    crd1->crd_alg == CRYPTO_SHA1_HMAC ||
 		    crd1->crd_alg == CRYPTO_SHA1 ||
 		    crd1->crd_alg == CRYPTO_MD5) {
 			maccrd = crd1;
 			enccrd = NULL;
 		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
 		    crd1->crd_alg == CRYPTO_3DES_CBC ||
 		    crd1->crd_alg == CRYPTO_ARC4) {
 			if ((crd1->crd_flags & CRD_F_ENCRYPT) == 0)
 				cmd->base_masks |= HIFN_BASE_CMD_DECODE;
 			maccrd = NULL;
 			enccrd = crd1;
 		} else {
 			err = EINVAL;
 			goto errout;
 		}
 	} else {
 		if ((crd1->crd_alg == CRYPTO_MD5_HMAC ||
                      crd1->crd_alg == CRYPTO_SHA1_HMAC ||
                      crd1->crd_alg == CRYPTO_MD5 ||
                      crd1->crd_alg == CRYPTO_SHA1) &&
 		    (crd2->crd_alg == CRYPTO_DES_CBC ||
 		     crd2->crd_alg == CRYPTO_3DES_CBC ||
 		     crd2->crd_alg == CRYPTO_ARC4) &&
 		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
 			cmd->base_masks = HIFN_BASE_CMD_DECODE;
 			maccrd = crd1;
 			enccrd = crd2;
 		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
 		     crd1->crd_alg == CRYPTO_ARC4 ||
 		     crd1->crd_alg == CRYPTO_3DES_CBC) &&
 		    (crd2->crd_alg == CRYPTO_MD5_HMAC ||
                      crd2->crd_alg == CRYPTO_SHA1_HMAC ||
                      crd2->crd_alg == CRYPTO_MD5 ||
                      crd2->crd_alg == CRYPTO_SHA1) &&
 		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
 			enccrd = crd1;
 			maccrd = crd2;
 		} else {
 			/*
 			 * We cannot order the 7751 as requested
 			 */
 			err = EINVAL;
 			goto errout;
 		}
 	}
 
 	if (enccrd) {
 		cmd->enccrd = enccrd;
 		cmd->base_masks |= HIFN_BASE_CMD_CRYPT;
 		switch (enccrd->crd_alg) {
 		case CRYPTO_ARC4:
 			cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_RC4;
 			if ((enccrd->crd_flags & CRD_F_ENCRYPT)
 			    != sc->sc_sessions[session].hs_prev_op)
 				sc->sc_sessions[session].hs_state =
 				    HS_STATE_USED;
 			break;
 		case CRYPTO_DES_CBC:
 			cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_DES |
 			    HIFN_CRYPT_CMD_MODE_CBC |
 			    HIFN_CRYPT_CMD_NEW_IV;
 			break;
 		case CRYPTO_3DES_CBC:
 			cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_3DES |
 			    HIFN_CRYPT_CMD_MODE_CBC |
 			    HIFN_CRYPT_CMD_NEW_IV;
 			break;
 		default:
 			err = EINVAL;
 			goto errout;
 		}
 		if (enccrd->crd_alg != CRYPTO_ARC4) {
 			if (enccrd->crd_flags & CRD_F_ENCRYPT) {
 				if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
 					bcopy(enccrd->crd_iv, cmd->iv,
 					    HIFN_IV_LENGTH);
 				else
 					bcopy(sc->sc_sessions[session].hs_iv,
 					    cmd->iv, HIFN_IV_LENGTH);
 
 				if ((enccrd->crd_flags & CRD_F_IV_PRESENT)
 				    == 0) {
 					if (crp->crp_flags & CRYPTO_F_IMBUF)
 						m_copyback(cmd->src_m,
 						    enccrd->crd_inject,
 						    HIFN_IV_LENGTH, cmd->iv);
 					else if (crp->crp_flags & CRYPTO_F_IOV)
 						cuio_copyback(cmd->src_io,
 						    enccrd->crd_inject,
 						    HIFN_IV_LENGTH, cmd->iv);
 				}
 			} else {
 				if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
 					bcopy(enccrd->crd_iv, cmd->iv,
 					    HIFN_IV_LENGTH);
 				else if (crp->crp_flags & CRYPTO_F_IMBUF)
 					m_copydata(cmd->src_m,
 					    enccrd->crd_inject,
 					    HIFN_IV_LENGTH, cmd->iv);
 				else if (crp->crp_flags & CRYPTO_F_IOV)
 					cuio_copydata(cmd->src_io,
 					    enccrd->crd_inject,
 					    HIFN_IV_LENGTH, cmd->iv);
 			}
 		}
 
 		cmd->ck = enccrd->crd_key;
 		cmd->cklen = enccrd->crd_klen >> 3;
 
 		if (sc->sc_sessions[session].hs_state == HS_STATE_USED)
 			cmd->cry_masks |= HIFN_CRYPT_CMD_NEW_KEY;
 	}
 
 	if (maccrd) {
 		cmd->maccrd = maccrd;
 		cmd->base_masks |= HIFN_BASE_CMD_MAC;
 
 		switch (maccrd->crd_alg) {
 		case CRYPTO_MD5:
 			cmd->mac_masks |= HIFN_MAC_CMD_ALG_MD5 |
 			    HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HASH |
 			    HIFN_MAC_CMD_POS_IPSEC;
                        break;
 		case CRYPTO_MD5_HMAC:
 			cmd->mac_masks |= HIFN_MAC_CMD_ALG_MD5 |
 			    HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HMAC |
 			    HIFN_MAC_CMD_POS_IPSEC | HIFN_MAC_CMD_TRUNC;
 			break;
 		case CRYPTO_SHA1:
 			cmd->mac_masks |= HIFN_MAC_CMD_ALG_SHA1 |
 			    HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HASH |
 			    HIFN_MAC_CMD_POS_IPSEC;
 			break;
 		case CRYPTO_SHA1_HMAC:
 			cmd->mac_masks |= HIFN_MAC_CMD_ALG_SHA1 |
 			    HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HMAC |
 			    HIFN_MAC_CMD_POS_IPSEC | HIFN_MAC_CMD_TRUNC;
 			break;
 		}
 
 		if ((maccrd->crd_alg == CRYPTO_SHA1_HMAC ||
 		     maccrd->crd_alg == CRYPTO_MD5_HMAC) &&
 		    sc->sc_sessions[session].hs_state == HS_STATE_USED) {
 			cmd->mac_masks |= HIFN_MAC_CMD_NEW_KEY;
 			bcopy(maccrd->crd_key, cmd->mac, maccrd->crd_klen >> 3);
 			bzero(cmd->mac + (maccrd->crd_klen >> 3),
 			    HIFN_MAC_KEY_LENGTH - (maccrd->crd_klen >> 3));
 		}
 	}
 
 	cmd->crp = crp;
 	cmd->session_num = session;
 	cmd->softc = sc;
 
 	err = hifn_crypto(sc, cmd, crp, hint);
 	if (!err) {
 		if (enccrd)
 			sc->sc_sessions[session].hs_prev_op =
 				enccrd->crd_flags & CRD_F_ENCRYPT;
 		if (sc->sc_sessions[session].hs_state == HS_STATE_USED)
 			sc->sc_sessions[session].hs_state = HS_STATE_KEY;
 		return 0;
 	} else if (err == ERESTART) {
 		/*
 		 * There weren't enough resources to dispatch the request
 		 * to the part.  Notify the caller so they'll requeue this
 		 * request and resubmit it again soon.
 		 */
 #ifdef HIFN_DEBUG
 		if (hifn_debug)
 			device_printf(sc->sc_dev, "requeue request\n");
 #endif
 		free(cmd, M_DEVBUF);
 		sc->sc_needwakeup |= CRYPTO_SYMQ;
 		return (err);
 	}
 
 errout:
 	if (cmd != NULL)
 		free(cmd, M_DEVBUF);
 	if (err == EINVAL)
 		hifnstats.hst_invalid++;
 	else
 		hifnstats.hst_nomem++;
 	crp->crp_etype = err;
 	crypto_done(crp);
 	return (err);
 }
 
 static void
 hifn_abort(struct hifn_softc *sc)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	struct hifn_command *cmd;
 	struct cryptop *crp;
 	int i, u;
 
 	i = dma->resk; u = dma->resu;
 	while (u != 0) {
 		cmd = dma->hifn_commands[i];
 		KASSERT(cmd != NULL, ("hifn_abort: null command slot %u", i));
 		dma->hifn_commands[i] = NULL;
 		crp = cmd->crp;
 
 		if ((dma->resr[i].l & htole32(HIFN_D_VALID)) == 0) {
 			/* Salvage what we can. */
 			u_int8_t *macbuf;
 
 			if (cmd->base_masks & HIFN_BASE_CMD_MAC) {
 				macbuf = dma->result_bufs[i];
 				macbuf += 12;
 			} else
 				macbuf = NULL;
 			hifnstats.hst_opackets++;
 			hifn_callback(sc, cmd, macbuf);
 		} else {
 			if (cmd->src_map == cmd->dst_map) {
 				bus_dmamap_sync(sc->sc_dmat, cmd->src_map,
 				    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 			} else {
 				bus_dmamap_sync(sc->sc_dmat, cmd->src_map,
 				    BUS_DMASYNC_POSTWRITE);
 				bus_dmamap_sync(sc->sc_dmat, cmd->dst_map,
 				    BUS_DMASYNC_POSTREAD);
 			}
 
 			if (cmd->src_m != cmd->dst_m) {
 				m_freem(cmd->src_m);
 				crp->crp_buf = (caddr_t)cmd->dst_m;
 			}
 
 			/* non-shared buffers cannot be restarted */
 			if (cmd->src_map != cmd->dst_map) {
 				/*
 				 * XXX should be EAGAIN, delayed until
 				 * after the reset.
 				 */
 				crp->crp_etype = ENOMEM;
 				bus_dmamap_unload(sc->sc_dmat, cmd->dst_map);
 				bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map);
 			} else
 				crp->crp_etype = ENOMEM;
 
 			bus_dmamap_unload(sc->sc_dmat, cmd->src_map);
 			bus_dmamap_destroy(sc->sc_dmat, cmd->src_map);
 
 			free(cmd, M_DEVBUF);
 			if (crp->crp_etype != EAGAIN)
 				crypto_done(crp);
 		}
 
 		if (++i == HIFN_D_RES_RSIZE)
 			i = 0;
 		u--;
 	}
 	dma->resk = i; dma->resu = u;
 
 	/* Force upload of key next time */
 	for (i = 0; i < sc->sc_maxses; i++)
 		if (sc->sc_sessions[i].hs_state == HS_STATE_KEY)
 			sc->sc_sessions[i].hs_state = HS_STATE_USED;
 	
 	hifn_reset_board(sc, 1);
 	hifn_init_dma(sc);
 	hifn_init_pci_registers(sc);
 }
 
 static void
 hifn_callback(struct hifn_softc *sc, struct hifn_command *cmd, u_int8_t *macbuf)
 {
 	struct hifn_dma *dma = sc->sc_dma;
 	struct cryptop *crp = cmd->crp;
 	struct cryptodesc *crd;
 	struct mbuf *m;
 	int totlen, i, u;
 
 	if (cmd->src_map == cmd->dst_map) {
 		bus_dmamap_sync(sc->sc_dmat, cmd->src_map,
 		    BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD);
 	} else {
 		bus_dmamap_sync(sc->sc_dmat, cmd->src_map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_sync(sc->sc_dmat, cmd->dst_map,
 		    BUS_DMASYNC_POSTREAD);
 	}
 
 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
 		if (cmd->src_m != cmd->dst_m) {
 			crp->crp_buf = (caddr_t)cmd->dst_m;
 			totlen = cmd->src_mapsize;
 			for (m = cmd->dst_m; m != NULL; m = m->m_next) {
 				if (totlen < m->m_len) {
 					m->m_len = totlen;
 					totlen = 0;
 				} else
 					totlen -= m->m_len;
 			}
 			cmd->dst_m->m_pkthdr.len = cmd->src_m->m_pkthdr.len;
 			m_freem(cmd->src_m);
 		}
 	}
 
 	if (cmd->sloplen != 0) {
 		if (crp->crp_flags & CRYPTO_F_IMBUF)
 			m_copyback((struct mbuf *)crp->crp_buf,
 			    cmd->src_mapsize - cmd->sloplen,
 			    cmd->sloplen, (caddr_t)&dma->slop[cmd->slopidx]);
 		else if (crp->crp_flags & CRYPTO_F_IOV)
 			cuio_copyback((struct uio *)crp->crp_buf,
 			    cmd->src_mapsize - cmd->sloplen,
 			    cmd->sloplen, (caddr_t)&dma->slop[cmd->slopidx]);
 	}
 
 	i = dma->dstk; u = dma->dstu;
 	while (u != 0) {
 		if (i == HIFN_D_DST_RSIZE)
 			i = 0;
 		bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 		if (dma->dstr[i].l & htole32(HIFN_D_VALID)) {
 			bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap,
 			    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 			break;
 		}
 		i++, u--;
 	}
 	dma->dstk = i; dma->dstu = u;
 
 	hifnstats.hst_obytes += cmd->dst_mapsize;
 
 	if ((cmd->base_masks & (HIFN_BASE_CMD_CRYPT | HIFN_BASE_CMD_DECODE)) ==
 	    HIFN_BASE_CMD_CRYPT) {
 		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
 			if (crd->crd_alg != CRYPTO_DES_CBC &&
 			    crd->crd_alg != CRYPTO_3DES_CBC)
 				continue;
 			if (crp->crp_flags & CRYPTO_F_IMBUF)
 				m_copydata((struct mbuf *)crp->crp_buf,
 				    crd->crd_skip + crd->crd_len - HIFN_IV_LENGTH,
 				    HIFN_IV_LENGTH,
 				    cmd->softc->sc_sessions[cmd->session_num].hs_iv);
 			else if (crp->crp_flags & CRYPTO_F_IOV) {
 				cuio_copydata((struct uio *)crp->crp_buf,
 				    crd->crd_skip + crd->crd_len - HIFN_IV_LENGTH,
 				    HIFN_IV_LENGTH,
 				    cmd->softc->sc_sessions[cmd->session_num].hs_iv);
 			}
 			break;
 		}
 	}
 
 	if (macbuf != NULL) {
 		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
                         int len;
 
                         if (crd->crd_alg == CRYPTO_MD5)
 				len = 16;
                         else if (crd->crd_alg == CRYPTO_SHA1)
 				len = 20;
                         else if (crd->crd_alg == CRYPTO_MD5_HMAC ||
                             crd->crd_alg == CRYPTO_SHA1_HMAC)
 				len = 12;
                         else
 				continue;
 
 			if (crp->crp_flags & CRYPTO_F_IMBUF)
 				m_copyback((struct mbuf *)crp->crp_buf,
                                    crd->crd_inject, len, macbuf);
 			else if ((crp->crp_flags & CRYPTO_F_IOV) && crp->crp_mac)
 				bcopy((caddr_t)macbuf, crp->crp_mac, len);
 			break;
 		}
 	}
 
 	if (cmd->src_map != cmd->dst_map) {
 		bus_dmamap_unload(sc->sc_dmat, cmd->dst_map);
 		bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map);
 	}
 	bus_dmamap_unload(sc->sc_dmat, cmd->src_map);
 	bus_dmamap_destroy(sc->sc_dmat, cmd->src_map);
 	free(cmd, M_DEVBUF);
 	crypto_done(crp);
 }
 
 /*
  * 7811 PB3 rev/2 parts lock-up on burst writes to Group 0
  * and Group 1 registers; avoid conditions that could create
  * burst writes by doing a read in between the writes.
  *
  * NB: The read we interpose is always to the same register;
  *     we do this because reading from an arbitrary (e.g. last)
  *     register may not always work.
  */
 static void
 hifn_write_reg_0(struct hifn_softc *sc, bus_size_t reg, u_int32_t val)
 {
 	if (sc->sc_flags & HIFN_IS_7811) {
 		if (sc->sc_bar0_lastreg == reg - 4)
 			bus_space_read_4(sc->sc_st0, sc->sc_sh0, HIFN_0_PUCNFG);
 		sc->sc_bar0_lastreg = reg;
 	}
 	bus_space_write_4(sc->sc_st0, sc->sc_sh0, reg, val);
 }
 
 static void
 hifn_write_reg_1(struct hifn_softc *sc, bus_size_t reg, u_int32_t val)
 {
 	if (sc->sc_flags & HIFN_IS_7811) {
 		if (sc->sc_bar1_lastreg == reg - 4)
 			bus_space_read_4(sc->sc_st1, sc->sc_sh1, HIFN_1_REVID);
 		sc->sc_bar1_lastreg = reg;
 	}
 	bus_space_write_4(sc->sc_st1, sc->sc_sh1, reg, val);
 }
Index: head/sys/dev/ubsec/ubsec.c
===================================================================
--- head/sys/dev/ubsec/ubsec.c	(revision 108465)
+++ head/sys/dev/ubsec/ubsec.c	(revision 108466)
@@ -1,2763 +1,2765 @@
 /* $FreeBSD$ */
 /*	$OpenBSD: ubsec.c,v 1.115 2002/09/24 18:33:26 jason Exp $	*/
 
 /*
  * Copyright (c) 2000 Jason L. Wright (jason@thought.net)
  * Copyright (c) 2000 Theo de Raadt (deraadt@openbsd.org)
  * Copyright (c) 2001 Patrik Lindergren (patrik@ipunplugged.com)
  * 
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Jason L. Wright
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Effort sponsored in part by the Defense Advanced Research Projects
  * Agency (DARPA) and Air Force Research Laboratory, Air Force
  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  *
  */
 
 #define UBSEC_DEBUG
 
 /*
  * uBsec 5[56]01, 58xx hardware crypto accelerator
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/stdint.h>
 #include <sys/sysctl.h>
 #include <sys/endian.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 
 #include <machine/clock.h>
 #include <machine/bus.h>
 #include <machine/resource.h>
 #include <sys/bus.h>
 #include <sys/rman.h>
 
 #include <crypto/sha1.h>
 #include <opencrypto/cryptodev.h>
 #include <opencrypto/cryptosoft.h>
 #include <sys/md5.h>
 #include <sys/random.h>
 
 #include <pci/pcivar.h>
 #include <pci/pcireg.h>
 
 /* grr, #defines for gratuitous incompatibility in queue.h */
 #define	SIMPLEQ_HEAD		STAILQ_HEAD
 #define	SIMPLEQ_ENTRY		STAILQ_ENTRY
 #define	SIMPLEQ_INIT		STAILQ_INIT
 #define	SIMPLEQ_INSERT_TAIL	STAILQ_INSERT_TAIL
 #define	SIMPLEQ_EMPTY		STAILQ_EMPTY
 #define	SIMPLEQ_FIRST		STAILQ_FIRST
 #define	SIMPLEQ_REMOVE_HEAD	STAILQ_REMOVE_HEAD_UNTIL
 /* ditto for endian.h */
 #define	letoh16(x)		le16toh(x)
 #define	letoh32(x)		le32toh(x)
 
 #include <dev/ubsec/ubsecreg.h>
 #include <dev/ubsec/ubsecvar.h>
 
 /*
  * Prototypes and count for the pci_device structure
  */
 static	int ubsec_probe(device_t);
 static	int ubsec_attach(device_t);
 static	int ubsec_detach(device_t);
 static	int ubsec_suspend(device_t);
 static	int ubsec_resume(device_t);
 static	void ubsec_shutdown(device_t);
 
 static device_method_t ubsec_methods[] = {
 	/* Device interface */
 	DEVMETHOD(device_probe,		ubsec_probe),
 	DEVMETHOD(device_attach,	ubsec_attach),
 	DEVMETHOD(device_detach,	ubsec_detach),
 	DEVMETHOD(device_suspend,	ubsec_suspend),
 	DEVMETHOD(device_resume,	ubsec_resume),
 	DEVMETHOD(device_shutdown,	ubsec_shutdown),
 
 	/* bus interface */
 	DEVMETHOD(bus_print_child,	bus_generic_print_child),
 	DEVMETHOD(bus_driver_added,	bus_generic_driver_added),
 
 	{ 0, 0 }
 };
 static driver_t ubsec_driver = {
 	"ubsec",
 	ubsec_methods,
 	sizeof (struct ubsec_softc)
 };
 static devclass_t ubsec_devclass;
 
 DRIVER_MODULE(ubsec, pci, ubsec_driver, ubsec_devclass, 0, 0);
 MODULE_DEPEND(ubsec, crypto, 1, 1, 1);
 
 static	void ubsec_intr(void *);
 static	int ubsec_newsession(void *, u_int32_t *, struct cryptoini *);
 static	int ubsec_freesession(void *, u_int64_t);
 static	int ubsec_process(void *, struct cryptop *, int);
 static	void ubsec_callback(struct ubsec_softc *, struct ubsec_q *);
 static	int ubsec_feed(struct ubsec_softc *);
 static	void ubsec_mcopy(struct mbuf *, struct mbuf *, int, int);
 static	void ubsec_callback2(struct ubsec_softc *, struct ubsec_q2 *);
 static	int ubsec_feed2(struct ubsec_softc *);
 static	void ubsec_rng(void *);
 static	int ubsec_dma_malloc(struct ubsec_softc *, bus_size_t,
 			     struct ubsec_dma_alloc *, int);
 static	void ubsec_dma_free(struct ubsec_softc *, struct ubsec_dma_alloc *);
 static	int ubsec_dmamap_aligned(struct ubsec_operand *op);
 
 static	void ubsec_reset_board(struct ubsec_softc *sc);
 static	void ubsec_init_board(struct ubsec_softc *sc);
 static	void ubsec_init_pciregs(device_t dev);
 static	void ubsec_totalreset(struct ubsec_softc *sc);
 
 static	int ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q);
 
 static	int ubsec_kprocess(void*, struct cryptkop *, int);
 static	int ubsec_kprocess_modexp_hw(struct ubsec_softc *, struct cryptkop *, int);
 static	int ubsec_kprocess_modexp_sw(struct ubsec_softc *, struct cryptkop *, int);
 static	int ubsec_kprocess_rsapriv(struct ubsec_softc *, struct cryptkop *, int);
 static	void ubsec_kfree(struct ubsec_softc *, struct ubsec_q2 *);
 static	int ubsec_ksigbits(struct crparam *);
 static	void ubsec_kshift_r(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
 static	void ubsec_kshift_l(u_int, u_int8_t *, u_int, u_int8_t *, u_int);
 
 #ifdef UBSEC_DEBUG
 static	void ubsec_dump_pb(volatile struct ubsec_pktbuf *);
 static	void ubsec_dump_mcr(struct ubsec_mcr *);
 static	void ubsec_dump_ctx2(struct ubsec_ctx_keyop *);
 
 static	int ubsec_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, ubsec, CTLFLAG_RW, &ubsec_debug,
 	    0, "UBSEC driver debugging printfs");
 #endif
 
 #define	READ_REG(sc,r) \
 	bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r))
 
 #define WRITE_REG(sc,reg,val) \
 	bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val)
 
 #define	SWAP32(x) (x) = htole32(ntohl((x)))
 #define	HTOLE32(x) (x) = htole32(x)
 
 
 struct ubsec_stats ubsecstats;
 SYSCTL_STRUCT(_kern, OID_AUTO, ubsec_stats, CTLFLAG_RD, &ubsecstats,
 	    ubsec_stats, "Broadcom driver statistics");
 static	int ubsec_maxbatch = 2;		/* XXX tune based on part+sys speed */
 SYSCTL_INT(_kern, OID_AUTO, ubsec_maxbatch, CTLFLAG_RW, &ubsec_maxbatch,
 	    0, "Broadcom driver: max ops to batch w/o interrupt");
 
 static int
 ubsec_probe(device_t dev)
 {
 	if (pci_get_vendor(dev) == PCI_VENDOR_BLUESTEEL &&
 	    (pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5501 ||
 	     pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5601))
 		return (0);
 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
 	    (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5805 ||
 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5820 ||
 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5821 ||
 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5822))
 		return (0);
 	return (ENXIO);
 }
 
 static const char*
 ubsec_partname(struct ubsec_softc *sc)
 {
 	/* XXX sprintf numbers when not decoded */
 	switch (pci_get_vendor(sc->sc_dev)) {
 	case PCI_VENDOR_BROADCOM:
 		switch (pci_get_device(sc->sc_dev)) {
 		case PCI_PRODUCT_BROADCOM_5805:	return "Broadcom 5805";
 		case PCI_PRODUCT_BROADCOM_5820:	return "Broadcom 5820";
 		case PCI_PRODUCT_BROADCOM_5821:	return "Broadcom 5821";
 		case PCI_PRODUCT_BROADCOM_5822:	return "Broadcom 5822";
 		}
 		return "Broadcom unknown-part";
 	case PCI_VENDOR_BLUESTEEL:
 		switch (pci_get_device(sc->sc_dev)) {
 		case PCI_PRODUCT_BLUESTEEL_5601: return "Bluesteel 5601";
 		}
 		return "Bluesteel unknown-part";
 	}
 	return "Unknown-vendor unknown-part";
 }
 
 static int
 ubsec_attach(device_t dev)
 {
 	struct ubsec_softc *sc = device_get_softc(dev);
 	struct ubsec_dma *dmap;
 	u_int32_t cmd, i;
 	int rid;
 
 	KASSERT(sc != NULL, ("ubsec_attach: null software carrier!"));
 	bzero(sc, sizeof (*sc));
 	sc->sc_dev = dev;
 
 	mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "crypto driver", MTX_DEF);
 
 	SIMPLEQ_INIT(&sc->sc_queue);
 	SIMPLEQ_INIT(&sc->sc_qchip);
 	SIMPLEQ_INIT(&sc->sc_queue2);
 	SIMPLEQ_INIT(&sc->sc_qchip2);
 	SIMPLEQ_INIT(&sc->sc_q2free);
 
 	/* XXX handle power management */
 
 	sc->sc_statmask = BS_STAT_MCR1_DONE | BS_STAT_DMAERR;
 
 	if (pci_get_vendor(dev) == PCI_VENDOR_BLUESTEEL &&
 	    pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5601)
 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG;
 
 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
 	    pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5805)
 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG;
 
 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
 	    pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5820)
 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG |
 		    UBS_FLAGS_LONGCTX | UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY;
 
 	if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM &&
 	    (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5821 ||
 	     pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5822)) {
 		/* NB: the 5821/5822 defines some additional status bits */
 		sc->sc_statmask |= BS_STAT_MCR1_ALLEMPTY |
 		    BS_STAT_MCR2_ALLEMPTY;
 		sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG |
 		    UBS_FLAGS_LONGCTX | UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY;
 	}
 	/* XXX no PK key support until we sort out the bus_dma stuff */
 	sc->sc_flags &= ~UBS_FLAGS_KEY;
  
 	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
 	cmd |= PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN;
 	pci_write_config(dev, PCIR_COMMAND, cmd, 4);
 	cmd = pci_read_config(dev, PCIR_COMMAND, 4);
 
 	if (!(cmd & PCIM_CMD_MEMEN)) {
 		device_printf(dev, "failed to enable memory mapping\n");
 		goto bad;
 	}
 
 	if (!(cmd & PCIM_CMD_BUSMASTEREN)) {
 		device_printf(dev, "failed to enable bus mastering\n");
 		goto bad;
 	}
 
 	/* 
 	 * Setup memory-mapping of PCI registers.
 	 */
 	rid = BS_BAR;
 	sc->sc_sr = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
 				       0, ~0, 1, RF_ACTIVE);
 	if (sc->sc_sr == NULL) {
 		device_printf(dev, "cannot map register space\n");
 		goto bad;
 	}
 	sc->sc_st = rman_get_bustag(sc->sc_sr);
 	sc->sc_sh = rman_get_bushandle(sc->sc_sr);
 
 	/*
 	 * Arrange interrupt line.
 	 */
 	rid = 0;
 	sc->sc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid,
 					0, ~0, 1, RF_SHAREABLE|RF_ACTIVE);
 	if (sc->sc_irq == NULL) {
 		device_printf(dev, "could not map interrupt\n");
 		goto bad;
 	}
 	/*
 	 * NB: Network code assumes we are blocked with splimp()
 	 *     so make sure the IRQ is mapped appropriately.
 	 */
 	if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET,
 			   ubsec_intr, sc, &sc->sc_ih)) {
 		device_printf(dev, "could not establish interrupt\n");
 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
 		bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
 		goto bad;
 	}
 
 	sc->sc_cid = crypto_get_driverid(0);
 	if (sc->sc_cid < 0) {
 		device_printf(dev, "could not get crypto driver id\n");
 		bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
 		bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
 		goto bad;
 	}
 
 	/*
 	 * Setup DMA descriptor area.
 	 */
 	if (bus_dma_tag_create(NULL,			/* parent */
 			       1, 0,			/* alignment, bounds */
 			       BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
 			       BUS_SPACE_MAXADDR,	/* highaddr */
 			       NULL, NULL,		/* filter, filterarg */
 			       0x3ffff,			/* maxsize XXX */
 			       UBS_MAX_SCATTER,		/* nsegments */
 			       0xffff,			/* maxsegsize XXX */
 			       BUS_DMA_ALLOCNOW,	/* flags */
 			       &sc->sc_dmat)) {
 		device_printf(dev, "cannot allocate DMA tag\n");
 		crypto_unregister_all(sc->sc_cid);
 		bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
 		bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
 		bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
 		goto bad;
 	}
 	SIMPLEQ_INIT(&sc->sc_freequeue);
 	dmap = sc->sc_dmaa;
 	for (i = 0; i < UBS_MAX_NQUEUE; i++, dmap++) {
 		struct ubsec_q *q;
 
 		q = (struct ubsec_q *)malloc(sizeof(struct ubsec_q),
 		    M_DEVBUF, M_NOWAIT);
 		if (q == NULL) {
 			device_printf(dev, "cannot allocate queue buffers\n");
 			break;
 		}
 
 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_dmachunk),
 		    &dmap->d_alloc, 0)) {
 			device_printf(dev, "cannot allocate dma buffers\n");
 			free(q, M_DEVBUF);
 			break;
 		}
 		dmap->d_dma = (struct ubsec_dmachunk *)dmap->d_alloc.dma_vaddr;
 
 		q->q_dma = dmap;
 		sc->sc_queuea[i] = q;
 
 		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
 	}
 
 	device_printf(sc->sc_dev, "%s\n", ubsec_partname(sc));
 
 	crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0,
 	    ubsec_newsession, ubsec_freesession, ubsec_process, sc);
 	crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0,
 	     ubsec_newsession, ubsec_freesession, ubsec_process, sc);
 	crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0,
 	     ubsec_newsession, ubsec_freesession, ubsec_process, sc);
 	crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0,
 	     ubsec_newsession, ubsec_freesession, ubsec_process, sc);
 
 	/*
 	 * Reset Broadcom chip
 	 */
 	ubsec_reset_board(sc);
 
 	/*
 	 * Init Broadcom specific PCI settings
 	 */
 	ubsec_init_pciregs(dev);
 
 	/*
 	 * Init Broadcom chip
 	 */
 	ubsec_init_board(sc);
 
 #ifndef UBSEC_NO_RNG
 	if (sc->sc_flags & UBS_FLAGS_RNG) {
 		sc->sc_statmask |= BS_STAT_MCR2_DONE;
 
 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
 		    &sc->sc_rng.rng_q.q_mcr, 0))
 			goto skip_rng;
 
 		if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rngbypass),
 		    &sc->sc_rng.rng_q.q_ctx, 0)) {
 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
 			goto skip_rng;
 		}
 
 		if (ubsec_dma_malloc(sc, sizeof(u_int32_t) *
 		    UBSEC_RNG_BUFSIZ, &sc->sc_rng.rng_buf, 0)) {
 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx);
 			ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr);
 			goto skip_rng;
 		}
 
 		if (hz >= 100)
 			sc->sc_rnghz = hz / 100;
 		else
 			sc->sc_rnghz = 1;
 		/* NB: 1 means the callout runs w/o Giant locked */
 		callout_init(&sc->sc_rngto, 1);
 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
 skip_rng:
 	;
 	}
 #endif /* UBSEC_NO_RNG */
 
 	if (sc->sc_flags & UBS_FLAGS_KEY) {
 		sc->sc_statmask |= BS_STAT_MCR2_DONE;
 
 		crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0,
 			ubsec_kprocess, sc);
 #if 0
 		crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0,
 			ubsec_kprocess, sc);
 #endif
 	}
 	return (0);
 bad:
 	mtx_destroy(&sc->sc_mtx);
 	return (ENXIO);
 }
 
 /*
  * Detach a device that successfully probed.
  */
 static int
 ubsec_detach(device_t dev)
 {
 	struct ubsec_softc *sc = device_get_softc(dev);
 
 	KASSERT(sc != NULL, ("ubsec_detach: null software carrier"));
 
 	UBSEC_LOCK(sc);
 
 	callout_stop(&sc->sc_rngto);
 
 	crypto_unregister_all(sc->sc_cid);
 
 	bus_generic_detach(dev);
 	bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih);
 	bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq);
 
 	bus_dma_tag_destroy(sc->sc_dmat);
 	bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr);
 
 	UBSEC_UNLOCK(sc);
 
 	mtx_destroy(&sc->sc_mtx);
 
 	return (0);
 }
 
 /*
  * Stop all chip i/o so that the kernel's probe routines don't
  * get confused by errant DMAs when rebooting.
  */
 static void
 ubsec_shutdown(device_t dev)
 {
 #ifdef notyet
 	ubsec_stop(device_get_softc(dev));
 #endif
 }
 
 /*
  * Device suspend routine.
  */
 static int
 ubsec_suspend(device_t dev)
 {
 	struct ubsec_softc *sc = device_get_softc(dev);
 
 	KASSERT(sc != NULL, ("ubsec_suspend: null software carrier"));
 #ifdef notyet
 	/* XXX stop the device and save PCI settings */
 #endif
 	sc->sc_suspended = 1;
 
 	return (0);
 }
 
 static int
 ubsec_resume(device_t dev)
 {
 	struct ubsec_softc *sc = device_get_softc(dev);
 
 	KASSERT(sc != NULL, ("ubsec_resume: null software carrier"));
 #ifdef notyet
 	/* XXX retore PCI settings and start the device */
 #endif
 	sc->sc_suspended = 0;
 	return (0);
 }
 
 /*
  * UBSEC Interrupt routine
  */
 static void
 ubsec_intr(void *arg)
 {
 	struct ubsec_softc *sc = arg;
 	volatile u_int32_t stat;
 	struct ubsec_q *q;
 	struct ubsec_dma *dmap;
 	int npkts = 0, i;
 
 	UBSEC_LOCK(sc);
 
 	stat = READ_REG(sc, BS_STAT);
 	stat &= sc->sc_statmask;
 	if (stat == 0) {
 		UBSEC_UNLOCK(sc);
 		return;
 	}
 
 	WRITE_REG(sc, BS_STAT, stat);		/* IACK */
 
 	/*
 	 * Check to see if we have any packets waiting for us
 	 */
 	if ((stat & BS_STAT_MCR1_DONE)) {
 		while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
 			q = SIMPLEQ_FIRST(&sc->sc_qchip);
 			dmap = q->q_dma;
 
 			if ((dmap->d_dma->d_mcr.mcr_flags & htole16(UBS_MCR_DONE)) == 0)
 				break;
 
 			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, q, q_next);
 
 			npkts = q->q_nstacked_mcrs;
 			/*
 			 * search for further sc_qchip ubsec_q's that share
 			 * the same MCR, and complete them too, they must be
 			 * at the top.
 			 */
 			for (i = 0; i < npkts; i++) {
 				if(q->q_stacked_mcr[i]) {
 					ubsec_callback(sc, q->q_stacked_mcr[i]);
 					ubsecstats.hst_opackets++;
 				} else {
 					break;
 				}
 			}
 			ubsec_callback(sc, q);
 			ubsecstats.hst_opackets++;
 		}
 
 		/*
 		 * Don't send any more packet to chip if there has been
 		 * a DMAERR.
 		 */
 		if (!(stat & BS_STAT_DMAERR))
 			ubsec_feed(sc);
 	}
 
 	/*
 	 * Check to see if we have any key setups/rng's waiting for us
 	 */
 	if ((sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG)) &&
 	    (stat & BS_STAT_MCR2_DONE)) {
 		struct ubsec_q2 *q2;
 		struct ubsec_mcr *mcr;
 
 		while (!SIMPLEQ_EMPTY(&sc->sc_qchip2)) {
 			q2 = SIMPLEQ_FIRST(&sc->sc_qchip2);
 
 			bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map,
 			    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 
 			mcr = (struct ubsec_mcr *)q2->q_mcr.dma_vaddr;
 			if ((mcr->mcr_flags & htole16(UBS_MCR_DONE)) == 0) {
 				bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map,
 				    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
 				break;
 			}
 			SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip2, q2, q_next);
 			ubsec_callback2(sc, q2);
 			/*
 			 * Don't send any more packet to chip if there has been
 			 * a DMAERR.
 			 */
 			if (!(stat & BS_STAT_DMAERR))
 				ubsec_feed2(sc);
 		}
 	}
 
 	/*
 	 * Check to see if we got any DMA Error
 	 */
 	if (stat & BS_STAT_DMAERR) {
 #ifdef UBSEC_DEBUG
 		if (ubsec_debug) {
 			volatile u_int32_t a = READ_REG(sc, BS_ERR);
 
 			printf("dmaerr %s@%08x\n",
 			    (a & BS_ERR_READ) ? "read" : "write",
 			    a & BS_ERR_ADDR);
 		}
 #endif /* UBSEC_DEBUG */
 		ubsecstats.hst_dmaerr++;
 		ubsec_totalreset(sc);
 		ubsec_feed(sc);
 	}
 
 	if (sc->sc_needwakeup) {		/* XXX check high watermark */
 		int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ);
 #ifdef UBSEC_DEBUG
 		if (ubsec_debug)
 			device_printf(sc->sc_dev, "wakeup crypto (%x)\n",
 				sc->sc_needwakeup);
 #endif /* UBSEC_DEBUG */
 		sc->sc_needwakeup &= ~wakeup;
 		crypto_unblock(sc->sc_cid, wakeup);
 	}
 
 	UBSEC_UNLOCK(sc);
 }
 
 /*
  * ubsec_feed() - aggregate and post requests to chip
  */
 static int
 ubsec_feed(struct ubsec_softc *sc)
 {
 	struct ubsec_q *q, *q2;
 	int npkts, i;
 	void *v;
 	u_int32_t stat;
 
 	npkts = sc->sc_nqueue;
 	if (npkts > UBS_MAX_AGGR)
 		npkts = UBS_MAX_AGGR;
 	if (npkts > ubsecstats.hst_maxbatch)
 		ubsecstats.hst_maxbatch = npkts;
 	if (npkts < 2)
 		goto feed1;
 	ubsecstats.hst_totbatch += npkts-1;
 
 	if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
 		if(stat & BS_STAT_DMAERR) {
 			ubsec_totalreset(sc);
 			ubsecstats.hst_dmaerr++;
 		}
 		return (0);
 	}
 
 #ifdef UBSEC_DEBUG
 	if (ubsec_debug)
 		printf("merging %d records\n", npkts);
 #endif /* UBSEC_DEBUG */
 
 	q = SIMPLEQ_FIRST(&sc->sc_queue);
 	SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q, q_next);
 	--sc->sc_nqueue;
 
 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_PREWRITE);
 	if (q->q_dst_map != NULL)
 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map, BUS_DMASYNC_PREREAD);
 
 	q->q_nstacked_mcrs = npkts - 1;		/* Number of packets stacked */
 
 	for (i = 0; i < q->q_nstacked_mcrs; i++) {
 		q2 = SIMPLEQ_FIRST(&sc->sc_queue);
 		bus_dmamap_sync(sc->sc_dmat, q2->q_src_map,
 		    BUS_DMASYNC_PREWRITE);
 		if (q2->q_dst_map != NULL)
 			bus_dmamap_sync(sc->sc_dmat, q2->q_dst_map,
 			    BUS_DMASYNC_PREREAD);
 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q2, q_next);
 		--sc->sc_nqueue;
 
 		v = (void*)(((char *)&q2->q_dma->d_dma->d_mcr) + sizeof(struct ubsec_mcr) -
 		    sizeof(struct ubsec_mcr_add));
 		bcopy(v, &q->q_dma->d_dma->d_mcradd[i], sizeof(struct ubsec_mcr_add));
 		q->q_stacked_mcr[i] = q2;
 	}
 	q->q_dma->d_dma->d_mcr.mcr_pkts = htole16(npkts);
 	SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
 	bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
 	    offsetof(struct ubsec_dmachunk, d_mcr));
 	return (0);
 
 feed1:
 	while (!SIMPLEQ_EMPTY(&sc->sc_queue)) {
 		if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) {
 			if(stat & BS_STAT_DMAERR) {
 				ubsec_totalreset(sc);
 				ubsecstats.hst_dmaerr++;
 			}
 			break;
 		}
 
 		q = SIMPLEQ_FIRST(&sc->sc_queue);
 
 		bus_dmamap_sync(sc->sc_dmat, q->q_src_map,
 		    BUS_DMASYNC_PREWRITE);
 		if (q->q_dst_map != NULL)
 			bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
 			    BUS_DMASYNC_PREREAD);
 		bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 		WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr +
 		    offsetof(struct ubsec_dmachunk, d_mcr));
 #ifdef UBSEC_DEBUG
 		if (ubsec_debug)
 			printf("feed: q->chip %p %08x stat %08x\n",
 			      q, (u_int32_t)vtophys(&q->q_dma->d_dma->d_mcr),
 			      stat);
 #endif /* UBSEC_DEBUG */
 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q, q_next);
 		--sc->sc_nqueue;
 		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next);
 	}
 	return (0);
 }
 
 /*
  * Allocate a new 'session' and return an encoded session id.  'sidp'
  * contains our registration id, and should contain an encoded session
  * id on successful allocation.
  */
 static int
 ubsec_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri)
 {
 	struct cryptoini *c, *encini = NULL, *macini = NULL;
 	struct ubsec_softc *sc = arg;
 	struct ubsec_session *ses = NULL;
 	MD5_CTX md5ctx;
 	SHA1_CTX sha1ctx;
 	int i, sesn;
 
 	KASSERT(sc != NULL, ("ubsec_newsession: null softc"));
 	if (sidp == NULL || cri == NULL || sc == NULL)
 		return (EINVAL);
 
 	for (c = cri; c != NULL; c = c->cri_next) {
 		if (c->cri_alg == CRYPTO_MD5_HMAC ||
 		    c->cri_alg == CRYPTO_SHA1_HMAC) {
 			if (macini)
 				return (EINVAL);
 			macini = c;
 		} else if (c->cri_alg == CRYPTO_DES_CBC ||
 		    c->cri_alg == CRYPTO_3DES_CBC) {
 			if (encini)
 				return (EINVAL);
 			encini = c;
 		} else
 			return (EINVAL);
 	}
 	if (encini == NULL && macini == NULL)
 		return (EINVAL);
 
 	if (sc->sc_sessions == NULL) {
 		ses = sc->sc_sessions = (struct ubsec_session *)malloc(
 		    sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT);
 		if (ses == NULL)
 			return (ENOMEM);
 		sesn = 0;
 		sc->sc_nsessions = 1;
 	} else {
 		for (sesn = 0; sesn < sc->sc_nsessions; sesn++) {
 			if (sc->sc_sessions[sesn].ses_used == 0) {
 				ses = &sc->sc_sessions[sesn];
 				break;
 			}
 		}
 
 		if (ses == NULL) {
 			sesn = sc->sc_nsessions;
 			ses = (struct ubsec_session *)malloc((sesn + 1) *
 			    sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT);
 			if (ses == NULL)
 				return (ENOMEM);
 			bcopy(sc->sc_sessions, ses, sesn *
 			    sizeof(struct ubsec_session));
 			bzero(sc->sc_sessions, sesn *
 			    sizeof(struct ubsec_session));
 			free(sc->sc_sessions, M_DEVBUF);
 			sc->sc_sessions = ses;
 			ses = &sc->sc_sessions[sesn];
 			sc->sc_nsessions++;
 		}
 	}
 
 	bzero(ses, sizeof(struct ubsec_session));
 	ses->ses_used = 1;
 	if (encini) {
 		/* get an IV, network byte order */
 		/* XXX may read fewer than requested */
 		read_random(ses->ses_iv, sizeof(ses->ses_iv));
 
 		/* Go ahead and compute key in ubsec's byte order */
 		if (encini->cri_alg == CRYPTO_DES_CBC) {
 			bcopy(encini->cri_key, &ses->ses_deskey[0], 8);
 			bcopy(encini->cri_key, &ses->ses_deskey[2], 8);
 			bcopy(encini->cri_key, &ses->ses_deskey[4], 8);
 		} else
 			bcopy(encini->cri_key, ses->ses_deskey, 24);
 
 		SWAP32(ses->ses_deskey[0]);
 		SWAP32(ses->ses_deskey[1]);
 		SWAP32(ses->ses_deskey[2]);
 		SWAP32(ses->ses_deskey[3]);
 		SWAP32(ses->ses_deskey[4]);
 		SWAP32(ses->ses_deskey[5]);
 	}
 
 	if (macini) {
 		for (i = 0; i < macini->cri_klen / 8; i++)
 			macini->cri_key[i] ^= HMAC_IPAD_VAL;
 
 		if (macini->cri_alg == CRYPTO_MD5_HMAC) {
 			MD5Init(&md5ctx);
 			MD5Update(&md5ctx, macini->cri_key,
 			    macini->cri_klen / 8);
 			MD5Update(&md5ctx, hmac_ipad_buffer,
 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
 			bcopy(md5ctx.state, ses->ses_hminner,
 			    sizeof(md5ctx.state));
 		} else {
 			SHA1Init(&sha1ctx);
 			SHA1Update(&sha1ctx, macini->cri_key,
 			    macini->cri_klen / 8);
 			SHA1Update(&sha1ctx, hmac_ipad_buffer,
 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
 			bcopy(sha1ctx.h.b32, ses->ses_hminner,
 			    sizeof(sha1ctx.h.b32));
 		}
 
 		for (i = 0; i < macini->cri_klen / 8; i++)
 			macini->cri_key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL);
 
 		if (macini->cri_alg == CRYPTO_MD5_HMAC) {
 			MD5Init(&md5ctx);
 			MD5Update(&md5ctx, macini->cri_key,
 			    macini->cri_klen / 8);
 			MD5Update(&md5ctx, hmac_opad_buffer,
 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
 			bcopy(md5ctx.state, ses->ses_hmouter,
 			    sizeof(md5ctx.state));
 		} else {
 			SHA1Init(&sha1ctx);
 			SHA1Update(&sha1ctx, macini->cri_key,
 			    macini->cri_klen / 8);
 			SHA1Update(&sha1ctx, hmac_opad_buffer,
 			    HMAC_BLOCK_LEN - (macini->cri_klen / 8));
 			bcopy(sha1ctx.h.b32, ses->ses_hmouter,
 			    sizeof(sha1ctx.h.b32));
 		}
 
 		for (i = 0; i < macini->cri_klen / 8; i++)
 			macini->cri_key[i] ^= HMAC_OPAD_VAL;
 	}
 
 	*sidp = UBSEC_SID(device_get_unit(sc->sc_dev), sesn);
 	return (0);
 }
 
 /*
  * Deallocate a session.
  */
 static int
 ubsec_freesession(void *arg, u_int64_t tid)
 {
 	struct ubsec_softc *sc = arg;
 	int session;
 	u_int32_t sid = ((u_int32_t) tid) & 0xffffffff;
 
 	KASSERT(sc != NULL, ("ubsec_freesession: null softc"));
 	if (sc == NULL)
 		return (EINVAL);
 
 	session = UBSEC_SESSION(sid);
 	if (session >= sc->sc_nsessions)
 		return (EINVAL);
 
 	bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session]));
 	return (0);
 }
 
 static void
 ubsec_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error)
 {
 	struct ubsec_operand *op = arg;
 
 	KASSERT(nsegs <= UBS_MAX_SCATTER,
 		("Too many DMA segments returned when mapping operand"));
 #ifdef UBSEC_DEBUG
 	if (ubsec_debug)
 		printf("ubsec_op_cb: mapsize %u nsegs %d\n",
 			(u_int) mapsize, nsegs);
 #endif
 	op->mapsize = mapsize;
 	op->nsegs = nsegs;
 	bcopy(seg, op->segs, nsegs * sizeof (seg[0]));
 }
 
 static int
 ubsec_process(void *arg, struct cryptop *crp, int hint)
 {
 	struct ubsec_q *q = NULL;
 	int err = 0, i, j, nicealign;
 	struct ubsec_softc *sc = arg;
 	struct cryptodesc *crd1, *crd2, *maccrd, *enccrd;
 	int encoffset = 0, macoffset = 0, cpskip, cpoffset;
 	int sskip, dskip, stheend, dtheend;
 	int16_t coffset;
 	struct ubsec_session *ses;
 	struct ubsec_pktctx ctx;
 	struct ubsec_dma *dmap = NULL;
 
 	if (crp == NULL || crp->crp_callback == NULL || sc == NULL) {
 		ubsecstats.hst_invalid++;
 		return (EINVAL);
 	}
 	if (UBSEC_SESSION(crp->crp_sid) >= sc->sc_nsessions) {
 		ubsecstats.hst_invalid++;
 		return (EINVAL);
 	}
 
 	UBSEC_LOCK(sc);
 
 	if (SIMPLEQ_EMPTY(&sc->sc_freequeue)) {
 		ubsecstats.hst_queuefull++;
 		sc->sc_needwakeup |= CRYPTO_SYMQ;
 		UBSEC_UNLOCK(sc);
 		return (ERESTART);
 	}
 	q = SIMPLEQ_FIRST(&sc->sc_freequeue);
 	SIMPLEQ_REMOVE_HEAD(&sc->sc_freequeue, q, q_next);
 	UBSEC_UNLOCK(sc);
 
 	dmap = q->q_dma; /* Save dma pointer */
 	bzero(q, sizeof(struct ubsec_q));
 	bzero(&ctx, sizeof(ctx));
 
 	q->q_sesn = UBSEC_SESSION(crp->crp_sid);
 	q->q_dma = dmap;
 	ses = &sc->sc_sessions[q->q_sesn];
 
 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
 		q->q_src_m = (struct mbuf *)crp->crp_buf;
 		q->q_dst_m = (struct mbuf *)crp->crp_buf;
 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
 		q->q_src_io = (struct uio *)crp->crp_buf;
 		q->q_dst_io = (struct uio *)crp->crp_buf;
 	} else {
 		ubsecstats.hst_invalid++;
 		err = EINVAL;
 		goto errout;	/* XXX we don't handle contiguous blocks! */
 	}
 
 	bzero(&dmap->d_dma->d_mcr, sizeof(struct ubsec_mcr));
 
 	dmap->d_dma->d_mcr.mcr_pkts = htole16(1);
 	dmap->d_dma->d_mcr.mcr_flags = 0;
 	q->q_crp = crp;
 
 	crd1 = crp->crp_desc;
 	if (crd1 == NULL) {
 		ubsecstats.hst_invalid++;
 		err = EINVAL;
 		goto errout;
 	}
 	crd2 = crd1->crd_next;
 
 	if (crd2 == NULL) {
 		if (crd1->crd_alg == CRYPTO_MD5_HMAC ||
 		    crd1->crd_alg == CRYPTO_SHA1_HMAC) {
 			maccrd = crd1;
 			enccrd = NULL;
 		} else if (crd1->crd_alg == CRYPTO_DES_CBC ||
 		    crd1->crd_alg == CRYPTO_3DES_CBC) {
 			maccrd = NULL;
 			enccrd = crd1;
 		} else {
 			ubsecstats.hst_invalid++;
 			err = EINVAL;
 			goto errout;
 		}
 	} else {
 		if ((crd1->crd_alg == CRYPTO_MD5_HMAC ||
 		    crd1->crd_alg == CRYPTO_SHA1_HMAC) &&
 		    (crd2->crd_alg == CRYPTO_DES_CBC ||
 			crd2->crd_alg == CRYPTO_3DES_CBC) &&
 		    ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) {
 			maccrd = crd1;
 			enccrd = crd2;
 		} else if ((crd1->crd_alg == CRYPTO_DES_CBC ||
 		    crd1->crd_alg == CRYPTO_3DES_CBC) &&
 		    (crd2->crd_alg == CRYPTO_MD5_HMAC ||
 			crd2->crd_alg == CRYPTO_SHA1_HMAC) &&
 		    (crd1->crd_flags & CRD_F_ENCRYPT)) {
 			enccrd = crd1;
 			maccrd = crd2;
 		} else {
 			/*
 			 * We cannot order the ubsec as requested
 			 */
 			ubsecstats.hst_invalid++;
 			err = EINVAL;
 			goto errout;
 		}
 	}
 
 	if (enccrd) {
 		encoffset = enccrd->crd_skip;
 		ctx.pc_flags |= htole16(UBS_PKTCTX_ENC_3DES);
 
 		if (enccrd->crd_flags & CRD_F_ENCRYPT) {
 			q->q_flags |= UBSEC_QFLAGS_COPYOUTIV;
 
 			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
 				bcopy(enccrd->crd_iv, ctx.pc_iv, 8);
 			else {
 				ctx.pc_iv[0] = ses->ses_iv[0];
 				ctx.pc_iv[1] = ses->ses_iv[1];
 			}
 
 			if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) {
 				if (crp->crp_flags & CRYPTO_F_IMBUF)
 					m_copyback(q->q_src_m,
 					    enccrd->crd_inject,
 					    8, (caddr_t)ctx.pc_iv);
 				else if (crp->crp_flags & CRYPTO_F_IOV)
 					cuio_copyback(q->q_src_io,
 					    enccrd->crd_inject,
 					    8, (caddr_t)ctx.pc_iv);
 			}
 		} else {
 			ctx.pc_flags |= htole16(UBS_PKTCTX_INBOUND);
 
 			if (enccrd->crd_flags & CRD_F_IV_EXPLICIT)
 				bcopy(enccrd->crd_iv, ctx.pc_iv, 8);
 			else if (crp->crp_flags & CRYPTO_F_IMBUF)
 				m_copydata(q->q_src_m, enccrd->crd_inject,
 				    8, (caddr_t)ctx.pc_iv);
 			else if (crp->crp_flags & CRYPTO_F_IOV)
 				cuio_copydata(q->q_src_io,
 				    enccrd->crd_inject, 8,
 				    (caddr_t)ctx.pc_iv);
 		}
 
 		ctx.pc_deskey[0] = ses->ses_deskey[0];
 		ctx.pc_deskey[1] = ses->ses_deskey[1];
 		ctx.pc_deskey[2] = ses->ses_deskey[2];
 		ctx.pc_deskey[3] = ses->ses_deskey[3];
 		ctx.pc_deskey[4] = ses->ses_deskey[4];
 		ctx.pc_deskey[5] = ses->ses_deskey[5];
 		SWAP32(ctx.pc_iv[0]);
 		SWAP32(ctx.pc_iv[1]);
 	}
 
 	if (maccrd) {
 		macoffset = maccrd->crd_skip;
 
 		if (maccrd->crd_alg == CRYPTO_MD5_HMAC)
 			ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_MD5);
 		else
 			ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_SHA1);
 
 		for (i = 0; i < 5; i++) {
 			ctx.pc_hminner[i] = ses->ses_hminner[i];
 			ctx.pc_hmouter[i] = ses->ses_hmouter[i];
 
 			HTOLE32(ctx.pc_hminner[i]);
 			HTOLE32(ctx.pc_hmouter[i]);
 		}
 	}
 
 	if (enccrd && maccrd) {
 		/*
 		 * ubsec cannot handle packets where the end of encryption
 		 * and authentication are not the same, or where the
 		 * encrypted part begins before the authenticated part.
 		 */
 		if ((encoffset + enccrd->crd_len) !=
 		    (macoffset + maccrd->crd_len)) {
 			ubsecstats.hst_lenmismatch++;
 			err = EINVAL;
 			goto errout;
 		}
 		if (enccrd->crd_skip < maccrd->crd_skip) {
 			ubsecstats.hst_skipmismatch++;
 			err = EINVAL;
 			goto errout;
 		}
 		sskip = maccrd->crd_skip;
 		cpskip = dskip = enccrd->crd_skip;
 		stheend = maccrd->crd_len;
 		dtheend = enccrd->crd_len;
 		coffset = enccrd->crd_skip - maccrd->crd_skip;
 		cpoffset = cpskip + dtheend;
 #ifdef UBSEC_DEBUG
 		if (ubsec_debug) {
 			printf("mac: skip %d, len %d, inject %d\n",
 			    maccrd->crd_skip, maccrd->crd_len, maccrd->crd_inject);
 			printf("enc: skip %d, len %d, inject %d\n",
 			    enccrd->crd_skip, enccrd->crd_len, enccrd->crd_inject);
 			printf("src: skip %d, len %d\n", sskip, stheend);
 			printf("dst: skip %d, len %d\n", dskip, dtheend);
 			printf("ubs: coffset %d, pktlen %d, cpskip %d, cpoffset %d\n",
 			    coffset, stheend, cpskip, cpoffset);
 		}
 #endif
 	} else {
 		cpskip = dskip = sskip = macoffset + encoffset;
 		dtheend = stheend = (enccrd)?enccrd->crd_len:maccrd->crd_len;
 		cpoffset = cpskip + dtheend;
 		coffset = 0;
 	}
 	ctx.pc_offset = htole16(coffset >> 2);
 
 	if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &q->q_src_map)) {
 		ubsecstats.hst_nomap++;
 		err = ENOMEM;
 		goto errout;
 	}
 	if (crp->crp_flags & CRYPTO_F_IMBUF) {
 		if (bus_dmamap_load_mbuf(sc->sc_dmat, q->q_src_map,
 		    q->q_src_m, ubsec_op_cb, &q->q_src, BUS_DMA_NOWAIT) != 0) {
 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
 			q->q_src_map = NULL;
 			ubsecstats.hst_noload++;
 			err = ENOMEM;
 			goto errout;
 		}
 	} else if (crp->crp_flags & CRYPTO_F_IOV) {
 		if (bus_dmamap_load_uio(sc->sc_dmat, q->q_src_map,
 		    q->q_src_io, ubsec_op_cb, &q->q_src, BUS_DMA_NOWAIT) != 0) {
 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
 			q->q_src_map = NULL;
 			ubsecstats.hst_noload++;
 			err = ENOMEM;
 			goto errout;
 		}
 	}
 	nicealign = ubsec_dmamap_aligned(&q->q_src);
 
 	dmap->d_dma->d_mcr.mcr_pktlen = htole16(stheend);
 
 #ifdef UBSEC_DEBUG
 	if (ubsec_debug)
 		printf("src skip: %d nicealign: %u\n", sskip, nicealign);
 #endif
 	for (i = j = 0; i < q->q_src_nsegs; i++) {
 		struct ubsec_pktbuf *pb;
 		bus_size_t packl = q->q_src_segs[i].ds_len;
 		bus_addr_t packp = q->q_src_segs[i].ds_addr;
 
 		if (sskip >= packl) {
 			sskip -= packl;
 			continue;
 		}
 
 		packl -= sskip;
 		packp += sskip;
 		sskip = 0;
 
 		if (packl > 0xfffc) {
 			err = EIO;
 			goto errout;
 		}
 
 		if (j == 0)
 			pb = &dmap->d_dma->d_mcr.mcr_ipktbuf;
 		else
 			pb = &dmap->d_dma->d_sbuf[j - 1];
 
 		pb->pb_addr = htole32(packp);
 
 		if (stheend) {
 			if (packl > stheend) {
 				pb->pb_len = htole32(stheend);
 				stheend = 0;
 			} else {
 				pb->pb_len = htole32(packl);
 				stheend -= packl;
 			}
 		} else
 			pb->pb_len = htole32(packl);
 
 		if ((i + 1) == q->q_src_nsegs)
 			pb->pb_next = 0;
 		else
 			pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
 			    offsetof(struct ubsec_dmachunk, d_sbuf[j]));
 		j++;
 	}
 
 	if (enccrd == NULL && maccrd != NULL) {
 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr = 0;
 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_len = 0;
 		dmap->d_dma->d_mcr.mcr_opktbuf.pb_next = htole32(dmap->d_alloc.dma_paddr +
 		    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
 #ifdef UBSEC_DEBUG
 		if (ubsec_debug)
 			printf("opkt: %x %x %x\n",
 			    dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr,
 			    dmap->d_dma->d_mcr.mcr_opktbuf.pb_len,
 			    dmap->d_dma->d_mcr.mcr_opktbuf.pb_next);
 #endif
 	} else {
 		if (crp->crp_flags & CRYPTO_F_IOV) {
 			if (!nicealign) {
 				ubsecstats.hst_iovmisaligned++;
 				err = EINVAL;
 				goto errout;
 			}
 			if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT,
 			     &q->q_dst_map)) {
 				ubsecstats.hst_nomap++;
 				err = ENOMEM;
 				goto errout;
 			}
 			if (bus_dmamap_load_uio(sc->sc_dmat, q->q_dst_map,
 			    q->q_dst_io, ubsec_op_cb, &q->q_dst, BUS_DMA_NOWAIT) != 0) {
 				bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
 				q->q_dst_map = NULL;
 				ubsecstats.hst_noload++;
 				err = ENOMEM;
 				goto errout;
 			}
 		} else if (crp->crp_flags & CRYPTO_F_IMBUF) {
 			if (nicealign) {
 				q->q_dst = q->q_src;
 			} else {
 				int totlen, len;
 				struct mbuf *m, *top, **mp;
 
 				ubsecstats.hst_unaligned++;
 				totlen = q->q_src_mapsize;
 				if (q->q_src_m->m_flags & M_PKTHDR) {
 					len = MHLEN;
 					MGETHDR(m, M_DONTWAIT, MT_DATA);
+					if (m && !m_dup_pkthdr(m, q->q_src_m, M_DONTWAIT)) {
+						m_free(m);
+						m = NULL;
+					}
 				} else {
 					len = MLEN;
 					MGET(m, M_DONTWAIT, MT_DATA);
 				}
 				if (m == NULL) {
 					ubsecstats.hst_nombuf++;
 					err = sc->sc_nqueue ? ERESTART : ENOMEM;
 					goto errout;
 				}
-				if (len == MHLEN)
-					M_COPY_PKTHDR(m, q->q_src_m);
 				if (totlen >= MINCLSIZE) {
 					MCLGET(m, M_DONTWAIT);
 					if ((m->m_flags & M_EXT) == 0) {
 						m_free(m);
 						ubsecstats.hst_nomcl++;
 						err = sc->sc_nqueue ? ERESTART : ENOMEM;
 						goto errout;
 					}
 					len = MCLBYTES;
 				}
 				m->m_len = len;
 				top = NULL;
 				mp = &top;
 
 				while (totlen > 0) {
 					if (top) {
 						MGET(m, M_DONTWAIT, MT_DATA);
 						if (m == NULL) {
 							m_freem(top);
 							ubsecstats.hst_nombuf++;
 							err = sc->sc_nqueue ? ERESTART : ENOMEM;
 							goto errout;
 						}
 						len = MLEN;
 					}
 					if (top && totlen >= MINCLSIZE) {
 						MCLGET(m, M_DONTWAIT);
 						if ((m->m_flags & M_EXT) == 0) {
 							*mp = m;
 							m_freem(top);
 							ubsecstats.hst_nomcl++;
 							err = sc->sc_nqueue ? ERESTART : ENOMEM;
 							goto errout;
 						}
 						len = MCLBYTES;
 					}
 					m->m_len = len = min(totlen, len);
 					totlen -= len;
 					*mp = m;
 					mp = &m->m_next;
 				}
 				q->q_dst_m = top;
 				ubsec_mcopy(q->q_src_m, q->q_dst_m,
 				    cpskip, cpoffset);
 				if (bus_dmamap_create(sc->sc_dmat, 
 				    BUS_DMA_NOWAIT, &q->q_dst_map) != 0) {
 					ubsecstats.hst_nomap++;
 					err = ENOMEM;
 					goto errout;
 				}
 				if (bus_dmamap_load_mbuf(sc->sc_dmat,
 				    q->q_dst_map, q->q_dst_m,
 				    ubsec_op_cb, &q->q_dst,
 				    BUS_DMA_NOWAIT) != 0) {
 					bus_dmamap_destroy(sc->sc_dmat,
 					q->q_dst_map);
 					q->q_dst_map = NULL;
 					ubsecstats.hst_noload++;
 					err = ENOMEM;
 					goto errout;
 				}
 			}
 		} else {
 			ubsecstats.hst_invalid++;
 			err = EINVAL;
 			goto errout;
 		}
 
 #ifdef UBSEC_DEBUG
 		if (ubsec_debug)
 			printf("dst skip: %d\n", dskip);
 #endif
 		for (i = j = 0; i < q->q_dst_nsegs; i++) {
 			struct ubsec_pktbuf *pb;
 			bus_size_t packl = q->q_dst_segs[i].ds_len;
 			bus_addr_t packp = q->q_dst_segs[i].ds_addr;
 
 			if (dskip >= packl) {
 				dskip -= packl;
 				continue;
 			}
 
 			packl -= dskip;
 			packp += dskip;
 			dskip = 0;
 
 			if (packl > 0xfffc) {
 				err = EIO;
 				goto errout;
 			}
 
 			if (j == 0)
 				pb = &dmap->d_dma->d_mcr.mcr_opktbuf;
 			else
 				pb = &dmap->d_dma->d_dbuf[j - 1];
 
 			pb->pb_addr = htole32(packp);
 
 			if (dtheend) {
 				if (packl > dtheend) {
 					pb->pb_len = htole32(dtheend);
 					dtheend = 0;
 				} else {
 					pb->pb_len = htole32(packl);
 					dtheend -= packl;
 				}
 			} else
 				pb->pb_len = htole32(packl);
 
 			if ((i + 1) == q->q_dst_nsegs) {
 				if (maccrd)
 					pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
 					    offsetof(struct ubsec_dmachunk, d_macbuf[0]));
 				else
 					pb->pb_next = 0;
 			} else
 				pb->pb_next = htole32(dmap->d_alloc.dma_paddr +
 				    offsetof(struct ubsec_dmachunk, d_dbuf[j]));
 			j++;
 		}
 	}
 
 	dmap->d_dma->d_mcr.mcr_cmdctxp = htole32(dmap->d_alloc.dma_paddr +
 	    offsetof(struct ubsec_dmachunk, d_ctx));
 
 	if (sc->sc_flags & UBS_FLAGS_LONGCTX) {
 		struct ubsec_pktctx_long *ctxl;
 
 		ctxl = (struct ubsec_pktctx_long *)(dmap->d_alloc.dma_vaddr +
 		    offsetof(struct ubsec_dmachunk, d_ctx));
 		
 		/* transform small context into long context */
 		ctxl->pc_len = htole16(sizeof(struct ubsec_pktctx_long));
 		ctxl->pc_type = htole16(UBS_PKTCTX_TYPE_IPSEC);
 		ctxl->pc_flags = ctx.pc_flags;
 		ctxl->pc_offset = ctx.pc_offset;
 		for (i = 0; i < 6; i++)
 			ctxl->pc_deskey[i] = ctx.pc_deskey[i];
 		for (i = 0; i < 5; i++)
 			ctxl->pc_hminner[i] = ctx.pc_hminner[i];
 		for (i = 0; i < 5; i++)
 			ctxl->pc_hmouter[i] = ctx.pc_hmouter[i];   
 		ctxl->pc_iv[0] = ctx.pc_iv[0];
 		ctxl->pc_iv[1] = ctx.pc_iv[1];
 	} else
 		bcopy(&ctx, dmap->d_alloc.dma_vaddr +
 		    offsetof(struct ubsec_dmachunk, d_ctx),
 		    sizeof(struct ubsec_pktctx));
 
 	UBSEC_LOCK(sc);
 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue, q, q_next);
 	sc->sc_nqueue++;
 	ubsecstats.hst_ipackets++;
 	ubsecstats.hst_ibytes += dmap->d_alloc.dma_size;
 	if ((hint & CRYPTO_HINT_MORE) == 0 || sc->sc_nqueue >= ubsec_maxbatch)
 		ubsec_feed(sc);
 	UBSEC_UNLOCK(sc);
 	return (0);
 
 errout:
 	if (q != NULL) {
 		if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
 			m_freem(q->q_dst_m);
 
 		if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
 			bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
 			bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
 		}
 		if (q->q_src_map != NULL) {
 			bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
 			bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
 		}
 
 		UBSEC_LOCK(sc);
 		SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
 		UBSEC_UNLOCK(sc);
 	}
 	if (err != ERESTART) {
 		crp->crp_etype = err;
 		crypto_done(crp);
 	} else {
 		sc->sc_needwakeup |= CRYPTO_SYMQ;
 	}
 	return (err);
 }
 
 static void
 ubsec_callback(struct ubsec_softc *sc, struct ubsec_q *q)
 {
 	struct cryptop *crp = (struct cryptop *)q->q_crp;
 	struct cryptodesc *crd;
 	struct ubsec_dma *dmap = q->q_dma;
 
 	bus_dmamap_sync(sc->sc_dmat, dmap->d_alloc.dma_map,
 	    BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE);
 	if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) {
 		bus_dmamap_sync(sc->sc_dmat, q->q_dst_map,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc->sc_dmat, q->q_dst_map);
 		bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map);
 	}
 	bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_POSTWRITE);
 	bus_dmamap_unload(sc->sc_dmat, q->q_src_map);
 	bus_dmamap_destroy(sc->sc_dmat, q->q_src_map);
 
 	if ((crp->crp_flags & CRYPTO_F_IMBUF) && (q->q_src_m != q->q_dst_m)) {
 		m_freem(q->q_src_m);
 		crp->crp_buf = (caddr_t)q->q_dst_m;
 	}
 	ubsecstats.hst_obytes += ((struct mbuf *)crp->crp_buf)->m_len;
 
 	/* copy out IV for future use */
 	if (q->q_flags & UBSEC_QFLAGS_COPYOUTIV) {
 		for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
 			if (crd->crd_alg != CRYPTO_DES_CBC &&
 			    crd->crd_alg != CRYPTO_3DES_CBC)
 				continue;
 			if (crp->crp_flags & CRYPTO_F_IMBUF)
 				m_copydata((struct mbuf *)crp->crp_buf,
 				    crd->crd_skip + crd->crd_len - 8, 8,
 				    (caddr_t)sc->sc_sessions[q->q_sesn].ses_iv);
 			else if (crp->crp_flags & CRYPTO_F_IOV) {
 				cuio_copydata((struct uio *)crp->crp_buf,
 				    crd->crd_skip + crd->crd_len - 8, 8,
 				    (caddr_t)sc->sc_sessions[q->q_sesn].ses_iv);
 			}
 			break;
 		}
 	}
 
 	for (crd = crp->crp_desc; crd; crd = crd->crd_next) {
 		if (crd->crd_alg != CRYPTO_MD5_HMAC &&
 		    crd->crd_alg != CRYPTO_SHA1_HMAC)
 			continue;
 		if (crp->crp_flags & CRYPTO_F_IMBUF)
 			m_copyback((struct mbuf *)crp->crp_buf,
 			    crd->crd_inject, 12,
 			    (caddr_t)dmap->d_dma->d_macbuf);
 		else if (crp->crp_flags & CRYPTO_F_IOV && crp->crp_mac)
 			bcopy((caddr_t)dmap->d_dma->d_macbuf,
 			    crp->crp_mac, 12);
 		break;
 	}
 	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
 	crypto_done(crp);
 }
 
 static void
 ubsec_mcopy(struct mbuf *srcm, struct mbuf *dstm, int hoffset, int toffset)
 {
 	int i, j, dlen, slen;
 	caddr_t dptr, sptr;
 
 	j = 0;
 	sptr = srcm->m_data;
 	slen = srcm->m_len;
 	dptr = dstm->m_data;
 	dlen = dstm->m_len;
 
 	while (1) {
 		for (i = 0; i < min(slen, dlen); i++) {
 			if (j < hoffset || j >= toffset)
 				*dptr++ = *sptr++;
 			slen--;
 			dlen--;
 			j++;
 		}
 		if (slen == 0) {
 			srcm = srcm->m_next;
 			if (srcm == NULL)
 				return;
 			sptr = srcm->m_data;
 			slen = srcm->m_len;
 		}
 		if (dlen == 0) {
 			dstm = dstm->m_next;
 			if (dstm == NULL)
 				return;
 			dptr = dstm->m_data;
 			dlen = dstm->m_len;
 		}
 	}
 }
 
 /*
  * feed the key generator, must be called at splimp() or higher.
  */
 static int
 ubsec_feed2(struct ubsec_softc *sc)
 {
 	struct ubsec_q2 *q;
 
 	while (!SIMPLEQ_EMPTY(&sc->sc_queue2)) {
 		if (READ_REG(sc, BS_STAT) & BS_STAT_MCR2_FULL)
 			break;
 		q = SIMPLEQ_FIRST(&sc->sc_queue2);
 
 		bus_dmamap_sync(sc->sc_dmat, q->q_mcr.dma_map,
 		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 		bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map,
 		    BUS_DMASYNC_PREWRITE);
 
 		WRITE_REG(sc, BS_MCR2, q->q_mcr.dma_paddr);
 		SIMPLEQ_REMOVE_HEAD(&sc->sc_queue2, q, q_next);
 		--sc->sc_nqueue2;
 		SIMPLEQ_INSERT_TAIL(&sc->sc_qchip2, q, q_next);
 	}
 	return (0);
 }
 
 /*
  * Callback for handling random numbers
  */
 static void
 ubsec_callback2(struct ubsec_softc *sc, struct ubsec_q2 *q)
 {
 	struct cryptkop *krp;
 	struct ubsec_ctx_keyop *ctx;
 
 	ctx = (struct ubsec_ctx_keyop *)q->q_ctx.dma_vaddr;
 	bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, BUS_DMASYNC_POSTWRITE);
 
 	switch (q->q_type) {
 #ifndef UBSEC_NO_RNG
 	case UBS_CTXOP_RNGBYPASS: {
 		struct ubsec_q2_rng *rng = (struct ubsec_q2_rng *)q;
 
 		bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map,
 		    BUS_DMASYNC_POSTREAD);
 		random_harvest(rng->rng_buf.dma_vaddr,
 			UBSEC_RNG_BUFSIZ*sizeof (u_int32_t),
 			UBSEC_RNG_BUFSIZ*sizeof (u_int32_t)*NBBY, 0,
 			RANDOM_PURE);
 		rng->rng_used = 0;
 		callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
 		break;
 	}
 #endif
 	case UBS_CTXOP_MODEXP: {
 		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
 		u_int rlen, clen;
 
 		krp = me->me_krp;
 		rlen = (me->me_modbits + 7) / 8;
 		clen = (krp->krp_param[krp->krp_iparams].crp_nbits + 7) / 8;
 
 		bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map,
 		    BUS_DMASYNC_POSTWRITE);
 
 		if (clen < rlen)
 			krp->krp_status = E2BIG;
 		else {
 			if (sc->sc_flags & UBS_FLAGS_HWNORM) {
 				bzero(krp->krp_param[krp->krp_iparams].crp_p,
 				    (krp->krp_param[krp->krp_iparams].crp_nbits
 					+ 7) / 8);
 				bcopy(me->me_C.dma_vaddr,
 				    krp->krp_param[krp->krp_iparams].crp_p,
 				    (me->me_modbits + 7) / 8);
 			} else
 				ubsec_kshift_l(me->me_shiftbits,
 				    me->me_C.dma_vaddr, me->me_normbits,
 				    krp->krp_param[krp->krp_iparams].crp_p,
 				    krp->krp_param[krp->krp_iparams].crp_nbits);
 		}
 
 		crypto_kdone(krp);
 
 		/* bzero all potentially sensitive data */
 		bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
 		bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
 		bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
 		bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
 
 		/* Can't free here, so put us on the free list. */
 		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &me->me_q, q_next);
 		break;
 	}
 	case UBS_CTXOP_RSAPRIV: {
 		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
 		u_int len;
 
 		krp = rp->rpr_krp;
 		bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map,
 		    BUS_DMASYNC_POSTWRITE);
 		bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map,
 		    BUS_DMASYNC_POSTREAD);
 
 		len = (krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_nbits + 7) / 8;
 		bcopy(rp->rpr_msgout.dma_vaddr,
 		    krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_p, len);
 
 		crypto_kdone(krp);
 
 		bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size);
 		bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size);
 		bzero(rp->rpr_q.q_ctx.dma_vaddr, rp->rpr_q.q_ctx.dma_size);
 
 		/* Can't free here, so put us on the free list. */
 		SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &rp->rpr_q, q_next);
 		break;
 	}
 	default:
 		device_printf(sc->sc_dev, "unknown ctx op: %x\n",
 		    letoh16(ctx->ctx_op));
 		break;
 	}
 }
 
 #ifndef UBSEC_NO_RNG
 static void
 ubsec_rng(void *vsc)
 {
 	struct ubsec_softc *sc = vsc;
 	struct ubsec_q2_rng *rng = &sc->sc_rng;
 	struct ubsec_mcr *mcr;
 	struct ubsec_ctx_rngbypass *ctx;
 
 	UBSEC_LOCK(sc);
 	if (rng->rng_used) {
 		UBSEC_UNLOCK(sc);
 		return;
 	}
 	sc->sc_nqueue2++;
 	if (sc->sc_nqueue2 >= UBS_MAX_NQUEUE)
 		goto out;
 
 	mcr = (struct ubsec_mcr *)rng->rng_q.q_mcr.dma_vaddr;
 	ctx = (struct ubsec_ctx_rngbypass *)rng->rng_q.q_ctx.dma_vaddr;
 
 	mcr->mcr_pkts = htole16(1);
 	mcr->mcr_flags = 0;
 	mcr->mcr_cmdctxp = htole32(rng->rng_q.q_ctx.dma_paddr);
 	mcr->mcr_ipktbuf.pb_addr = mcr->mcr_ipktbuf.pb_next = 0;
 	mcr->mcr_ipktbuf.pb_len = 0;
 	mcr->mcr_reserved = mcr->mcr_pktlen = 0;
 	mcr->mcr_opktbuf.pb_addr = htole32(rng->rng_buf.dma_paddr);
 	mcr->mcr_opktbuf.pb_len = htole32(((sizeof(u_int32_t) * UBSEC_RNG_BUFSIZ)) &
 	    UBS_PKTBUF_LEN);
 	mcr->mcr_opktbuf.pb_next = 0;
 
 	ctx->rbp_len = htole16(sizeof(struct ubsec_ctx_rngbypass));
 	ctx->rbp_op = htole16(UBS_CTXOP_RNGBYPASS);
 	rng->rng_q.q_type = UBS_CTXOP_RNGBYPASS;
 
 	bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, BUS_DMASYNC_PREREAD);
 
 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rng->rng_q, q_next);
 	rng->rng_used = 1;
 	ubsec_feed2(sc);
 	ubsecstats.hst_rng++;
 	UBSEC_UNLOCK(sc);
 
 	return;
 
 out:
 	/*
 	 * Something weird happened, generate our own call back.
 	 */
 	sc->sc_nqueue2--;
 	UBSEC_UNLOCK(sc);
 	callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc);
 }
 #endif /* UBSEC_NO_RNG */
 
 static void
 ubsec_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
 {
 	bus_addr_t *paddr = (bus_addr_t*) arg;
 	*paddr = segs->ds_addr;
 }
 
 static int
 ubsec_dma_malloc(
 	struct ubsec_softc *sc,
 	bus_size_t size,
 	struct ubsec_dma_alloc *dma,
 	int mapflags
 )
 {
 	int r;
 
 	r = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &dma->dma_map);
 	if (r != 0)
 		goto fail_0;
 
 	r = bus_dmamem_alloc(sc->sc_dmat, (void**) &dma->dma_vaddr,
 			     BUS_DMA_NOWAIT, &dma->dma_map);
 	if (r != 0)
 		goto fail_1;
 
 	r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr,
 		            size,
 			    ubsec_dmamap_cb,
 			    &dma->dma_paddr,
 			    mapflags | BUS_DMA_NOWAIT);
 	if (r != 0)
 		goto fail_2;
 
 	dma->dma_size = size;
 	return (0);
 
 fail_2:
 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
 fail_1:
 	bus_dmamem_free(sc->sc_dmat, dma->dma_vaddr, dma->dma_map);
 fail_0:
 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
 	dma->dma_map = NULL;
 	return (r);
 }
 
 static void
 ubsec_dma_free(struct ubsec_softc *sc, struct ubsec_dma_alloc *dma)
 {
 	bus_dmamap_unload(sc->sc_dmat, dma->dma_map);
 	bus_dmamem_free(sc->sc_dmat, dma->dma_vaddr, dma->dma_map);
 	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
 }
 
 /*
  * Resets the board.  Values in the regesters are left as is
  * from the reset (i.e. initial values are assigned elsewhere).
  */
 static void
 ubsec_reset_board(struct ubsec_softc *sc)
 {
     volatile u_int32_t ctrl;
 
     ctrl = READ_REG(sc, BS_CTRL);
     ctrl |= BS_CTRL_RESET;
     WRITE_REG(sc, BS_CTRL, ctrl);
 
     /*
      * Wait aprox. 30 PCI clocks = 900 ns = 0.9 us
      */
     DELAY(10);
 }
 
 /*
  * Init Broadcom registers
  */
 static void
 ubsec_init_board(struct ubsec_softc *sc)
 {
 	u_int32_t ctrl;
 
 	ctrl = READ_REG(sc, BS_CTRL);
 	ctrl &= ~(BS_CTRL_BE32 | BS_CTRL_BE64);
 	ctrl |= BS_CTRL_LITTLE_ENDIAN | BS_CTRL_MCR1INT;
 
 	if (sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG))
 		ctrl |= BS_CTRL_MCR2INT;
 	else
 		ctrl &= ~BS_CTRL_MCR2INT;
 
 	if (sc->sc_flags & UBS_FLAGS_HWNORM)
 		ctrl &= ~BS_CTRL_SWNORM;
 
 	WRITE_REG(sc, BS_CTRL, ctrl);
 }
 
 /*
  * Init Broadcom PCI registers
  */
 static void
 ubsec_init_pciregs(device_t dev)
 {
 #if 0
 	u_int32_t misc;
 
 	misc = pci_conf_read(pc, pa->pa_tag, BS_RTY_TOUT);
 	misc = (misc & ~(UBS_PCI_RTY_MASK << UBS_PCI_RTY_SHIFT))
 	    | ((UBS_DEF_RTY & 0xff) << UBS_PCI_RTY_SHIFT);
 	misc = (misc & ~(UBS_PCI_TOUT_MASK << UBS_PCI_TOUT_SHIFT))
 	    | ((UBS_DEF_TOUT & 0xff) << UBS_PCI_TOUT_SHIFT);
 	pci_conf_write(pc, pa->pa_tag, BS_RTY_TOUT, misc);
 #endif
 
 	/*
 	 * This will set the cache line size to 1, this will
 	 * force the BCM58xx chip just to do burst read/writes.
 	 * Cache line read/writes are to slow
 	 */
 	pci_write_config(dev, PCIR_CACHELNSZ, UBS_DEF_CACHELINE, 1);
 }
 
 /*
  * Clean up after a chip crash.
  * It is assumed that the caller in splimp()
  */
 static void
 ubsec_cleanchip(struct ubsec_softc *sc)
 {
 	struct ubsec_q *q;
 
 	while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) {
 		q = SIMPLEQ_FIRST(&sc->sc_qchip);
 		SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, q, q_next);
 		ubsec_free_q(sc, q);
 	}
 }
 
 /*
  * free a ubsec_q
  * It is assumed that the caller is within spimp()
  */
 static int
 ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q)
 {
 	struct ubsec_q *q2;
 	struct cryptop *crp;
 	int npkts;
 	int i;
 
 	npkts = q->q_nstacked_mcrs;
 
 	for (i = 0; i < npkts; i++) {
 		if(q->q_stacked_mcr[i]) {
 			q2 = q->q_stacked_mcr[i];
 
 			if ((q2->q_dst_m != NULL) && (q2->q_src_m != q2->q_dst_m)) 
 				m_freem(q2->q_dst_m);
 
 			crp = (struct cryptop *)q2->q_crp;
 			
 			SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q2, q_next);
 			
 			crp->crp_etype = EFAULT;
 			crypto_done(crp);
 		} else {
 			break;
 		}
 	}
 
 	/*
 	 * Free header MCR
 	 */
 	if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m))
 		m_freem(q->q_dst_m);
 
 	crp = (struct cryptop *)q->q_crp;
 	
 	SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next);
 	
 	crp->crp_etype = EFAULT;
 	crypto_done(crp);
 	return(0);
 }
 
 /*
  * Routine to reset the chip and clean up.
  * It is assumed that the caller is in splimp()
  */
 static void
 ubsec_totalreset(struct ubsec_softc *sc)
 {
 	ubsec_reset_board(sc);
 	ubsec_init_board(sc);
 	ubsec_cleanchip(sc);
 }
 
 static int
 ubsec_dmamap_aligned(struct ubsec_operand *op)
 {
 	int i;
 
 	for (i = 0; i < op->nsegs; i++) {
 		if (op->segs[i].ds_addr & 3)
 			return (0);
 		if ((i != (op->nsegs - 1)) &&
 		    (op->segs[i].ds_len & 3))
 			return (0);
 	}
 	return (1);
 }
 
 static void
 ubsec_kfree(struct ubsec_softc *sc, struct ubsec_q2 *q)
 {
 	switch (q->q_type) {
 	case UBS_CTXOP_MODEXP: {
 		struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q;
 
 		ubsec_dma_free(sc, &me->me_q.q_mcr);
 		ubsec_dma_free(sc, &me->me_q.q_ctx);
 		ubsec_dma_free(sc, &me->me_M);
 		ubsec_dma_free(sc, &me->me_E);
 		ubsec_dma_free(sc, &me->me_C);
 		ubsec_dma_free(sc, &me->me_epb);
 		free(me, M_DEVBUF);
 		break;
 	}
 	case UBS_CTXOP_RSAPRIV: {
 		struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q;
 
 		ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
 		ubsec_dma_free(sc, &rp->rpr_q.q_ctx);
 		ubsec_dma_free(sc, &rp->rpr_msgin);
 		ubsec_dma_free(sc, &rp->rpr_msgout);
 		free(rp, M_DEVBUF);
 		break;
 	}
 	default:
 		device_printf(sc->sc_dev, "invalid kfree 0x%x\n", q->q_type);
 		break;
 	}
 }
 
 static int
 ubsec_kprocess(void *arg, struct cryptkop *krp, int hint)
 {
 	struct ubsec_softc *sc = arg;
 	int r;
 
 	if (krp == NULL || krp->krp_callback == NULL)
 		return (EINVAL);
 
 	while (!SIMPLEQ_EMPTY(&sc->sc_q2free)) {
 		struct ubsec_q2 *q;
 
 		q = SIMPLEQ_FIRST(&sc->sc_q2free);
 		SIMPLEQ_REMOVE_HEAD(&sc->sc_q2free, q, q_next);
 		ubsec_kfree(sc, q);
 	}
 
 	switch (krp->krp_op) {
 	case CRK_MOD_EXP:
 		if (sc->sc_flags & UBS_FLAGS_HWNORM)
 			r = ubsec_kprocess_modexp_hw(sc, krp, hint);
 		else
 			r = ubsec_kprocess_modexp_sw(sc, krp, hint);
 		break;
 	case CRK_MOD_EXP_CRT:
 		return (ubsec_kprocess_rsapriv(sc, krp, hint));
 	default:
 		device_printf(sc->sc_dev, "kprocess: invalid op 0x%x\n",
 		    krp->krp_op);
 		krp->krp_status = EOPNOTSUPP;
 		crypto_kdone(krp);
 		return (0);
 	}
 	return (0);			/* silence compiler */
 }
 
 /*
  * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (sw normalization)
  */
 static int
 ubsec_kprocess_modexp_sw(struct ubsec_softc *sc, struct cryptkop *krp, int hint)
 {
 	struct ubsec_q2_modexp *me;
 	struct ubsec_mcr *mcr;
 	struct ubsec_ctx_modexp *ctx;
 	struct ubsec_pktbuf *epb;
 	int err = 0;
 	u_int nbits, normbits, mbits, shiftbits, ebits;
 
 	me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT);
 	if (me == NULL) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(me, sizeof *me);
 	me->me_krp = krp;
 	me->me_q.q_type = UBS_CTXOP_MODEXP;
 
 	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
 	if (nbits <= 512)
 		normbits = 512;
 	else if (nbits <= 768)
 		normbits = 768;
 	else if (nbits <= 1024)
 		normbits = 1024;
 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
 		normbits = 1536;
 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
 		normbits = 2048;
 	else {
 		err = E2BIG;
 		goto errout;
 	}
 
 	shiftbits = normbits - nbits;
 
 	me->me_modbits = nbits;
 	me->me_shiftbits = shiftbits;
 	me->me_normbits = normbits;
 
 	/* Sanity check: result bits must be >= true modulus bits. */
 	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
 		err = ERANGE;
 		goto errout;
 	}
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
 	    &me->me_q.q_mcr, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
 	    &me->me_q.q_ctx, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 
 	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
 	if (mbits > nbits) {
 		err = E2BIG;
 		goto errout;
 	}
 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	ubsec_kshift_r(shiftbits,
 	    krp->krp_param[UBS_MODEXP_PAR_M].crp_p, mbits,
 	    me->me_M.dma_vaddr, normbits);
 
 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
 
 	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
 	if (ebits > nbits) {
 		err = E2BIG;
 		goto errout;
 	}
 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	ubsec_kshift_r(shiftbits,
 	    krp->krp_param[UBS_MODEXP_PAR_E].crp_p, ebits,
 	    me->me_E.dma_vaddr, normbits);
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
 	    &me->me_epb, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
 	epb->pb_addr = htole32(me->me_E.dma_paddr);
 	epb->pb_next = 0;
 	epb->pb_len = htole32(normbits / 8);
 
 #ifdef UBSEC_DEBUG
 	if (ubsec_debug) {
 		printf("Epb ");
 		ubsec_dump_pb(epb);
 	}
 #endif
 
 	mcr->mcr_pkts = htole16(1);
 	mcr->mcr_flags = 0;
 	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
 	mcr->mcr_reserved = 0;
 	mcr->mcr_pktlen = 0;
 
 	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
 	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
 	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
 
 	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
 	mcr->mcr_opktbuf.pb_next = 0;
 	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
 
 #ifdef DIAGNOSTIC
 	/* Misaligned output buffer will hang the chip. */
 	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
 		panic("%s: modexp invalid addr 0x%x\n",
 		    device_get_nameunit(sc->sc_dev),
 		    letoh32(mcr->mcr_opktbuf.pb_addr));
 	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
 		panic("%s: modexp invalid len 0x%x\n",
 		    device_get_nameunit(sc->sc_dev),
 		    letoh32(mcr->mcr_opktbuf.pb_len));
 #endif
 
 	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
 	bzero(ctx, sizeof(*ctx));
 	ubsec_kshift_r(shiftbits,
 	    krp->krp_param[UBS_MODEXP_PAR_N].crp_p, nbits,
 	    ctx->me_N, normbits);
 	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
 	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
 	ctx->me_E_len = htole16(nbits);
 	ctx->me_N_len = htole16(nbits);
 
 #ifdef UBSEC_DEBUG
 	if (ubsec_debug) {
 		ubsec_dump_mcr(mcr);
 		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
 	}
 #endif
 
 	/*
 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
 	 * everything else.
 	 */
 	bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map, BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map, BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map, BUS_DMASYNC_PREREAD);
 	bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map, BUS_DMASYNC_PREWRITE);
 
 	/* Enqueue and we're done... */
 	UBSEC_LOCK(sc);
 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
 	ubsec_feed2(sc);
 	ubsecstats.hst_modexp++;
 	UBSEC_UNLOCK(sc);
 
 	return (0);
 
 errout:
 	if (me != NULL) {
 		if (me->me_q.q_mcr.dma_map != NULL)
 			ubsec_dma_free(sc, &me->me_q.q_mcr);
 		if (me->me_q.q_ctx.dma_map != NULL) {
 			bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
 			ubsec_dma_free(sc, &me->me_q.q_ctx);
 		}
 		if (me->me_M.dma_map != NULL) {
 			bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
 			ubsec_dma_free(sc, &me->me_M);
 		}
 		if (me->me_E.dma_map != NULL) {
 			bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
 			ubsec_dma_free(sc, &me->me_E);
 		}
 		if (me->me_C.dma_map != NULL) {
 			bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
 			ubsec_dma_free(sc, &me->me_C);
 		}
 		if (me->me_epb.dma_map != NULL)
 			ubsec_dma_free(sc, &me->me_epb);
 		free(me, M_DEVBUF);
 	}
 	krp->krp_status = err;
 	crypto_kdone(krp);
 	return (0);
 }
 
 /*
  * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (hw normalization)
  */
 static int
 ubsec_kprocess_modexp_hw(struct ubsec_softc *sc, struct cryptkop *krp, int hint)
 {
 	struct ubsec_q2_modexp *me;
 	struct ubsec_mcr *mcr;
 	struct ubsec_ctx_modexp *ctx;
 	struct ubsec_pktbuf *epb;
 	int err = 0;
 	u_int nbits, normbits, mbits, shiftbits, ebits;
 
 	me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT);
 	if (me == NULL) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(me, sizeof *me);
 	me->me_krp = krp;
 	me->me_q.q_type = UBS_CTXOP_MODEXP;
 
 	nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]);
 	if (nbits <= 512)
 		normbits = 512;
 	else if (nbits <= 768)
 		normbits = 768;
 	else if (nbits <= 1024)
 		normbits = 1024;
 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536)
 		normbits = 1536;
 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048)
 		normbits = 2048;
 	else {
 		err = E2BIG;
 		goto errout;
 	}
 
 	shiftbits = normbits - nbits;
 
 	/* XXX ??? */
 	me->me_modbits = nbits;
 	me->me_shiftbits = shiftbits;
 	me->me_normbits = normbits;
 
 	/* Sanity check: result bits must be >= true modulus bits. */
 	if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) {
 		err = ERANGE;
 		goto errout;
 	}
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
 	    &me->me_q.q_mcr, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr;
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp),
 	    &me->me_q.q_ctx, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 
 	mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]);
 	if (mbits > nbits) {
 		err = E2BIG;
 		goto errout;
 	}
 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(me->me_M.dma_vaddr, normbits / 8);
 	bcopy(krp->krp_param[UBS_MODEXP_PAR_M].crp_p,
 	    me->me_M.dma_vaddr, (mbits + 7) / 8);
 
 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
 
 	ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]);
 	if (ebits > nbits) {
 		err = E2BIG;
 		goto errout;
 	}
 	if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(me->me_E.dma_vaddr, normbits / 8);
 	bcopy(krp->krp_param[UBS_MODEXP_PAR_E].crp_p,
 	    me->me_E.dma_vaddr, (ebits + 7) / 8);
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf),
 	    &me->me_epb, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr;
 	epb->pb_addr = htole32(me->me_E.dma_paddr);
 	epb->pb_next = 0;
 	epb->pb_len = htole32((ebits + 7) / 8);
 
 #ifdef UBSEC_DEBUG
 	printf("Epb ");
 	ubsec_dump_pb(epb);
 #endif
 
 	mcr->mcr_pkts = htole16(1);
 	mcr->mcr_flags = 0;
 	mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr);
 	mcr->mcr_reserved = 0;
 	mcr->mcr_pktlen = 0;
 
 	mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr);
 	mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8);
 	mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr);
 
 	mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr);
 	mcr->mcr_opktbuf.pb_next = 0;
 	mcr->mcr_opktbuf.pb_len = htole32(normbits / 8);
 
 #ifdef DIAGNOSTIC
 	/* Misaligned output buffer will hang the chip. */
 	if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0)
 		panic("%s: modexp invalid addr 0x%x\n",
 		    device_get_nameunit(sc->sc_dev),
 		    letoh32(mcr->mcr_opktbuf.pb_addr));
 	if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0)
 		panic("%s: modexp invalid len 0x%x\n",
 		    device_get_nameunit(sc->sc_dev),
 		    letoh32(mcr->mcr_opktbuf.pb_len));
 #endif
 
 	ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr;
 	bzero(ctx, sizeof(*ctx));
 	bcopy(krp->krp_param[UBS_MODEXP_PAR_N].crp_p, ctx->me_N,
 	    (nbits + 7) / 8);
 	ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t)));
 	ctx->me_op = htole16(UBS_CTXOP_MODEXP);
 	ctx->me_E_len = htole16(ebits);
 	ctx->me_N_len = htole16(nbits);
 
 #ifdef UBSEC_DEBUG
 	if (ubsec_debug) {
 		ubsec_dump_mcr(mcr);
 		ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx);
 	}
 #endif
 
 	/*
 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
 	 * everything else.
 	 */
 	bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map, BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map, BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map, BUS_DMASYNC_PREREAD);
 	bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map, BUS_DMASYNC_PREWRITE);
 
 	/* Enqueue and we're done... */
 	UBSEC_LOCK(sc);
 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next);
 	ubsec_feed2(sc);
 	UBSEC_UNLOCK(sc);
 
 	return (0);
 
 errout:
 	if (me != NULL) {
 		if (me->me_q.q_mcr.dma_map != NULL)
 			ubsec_dma_free(sc, &me->me_q.q_mcr);
 		if (me->me_q.q_ctx.dma_map != NULL) {
 			bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size);
 			ubsec_dma_free(sc, &me->me_q.q_ctx);
 		}
 		if (me->me_M.dma_map != NULL) {
 			bzero(me->me_M.dma_vaddr, me->me_M.dma_size);
 			ubsec_dma_free(sc, &me->me_M);
 		}
 		if (me->me_E.dma_map != NULL) {
 			bzero(me->me_E.dma_vaddr, me->me_E.dma_size);
 			ubsec_dma_free(sc, &me->me_E);
 		}
 		if (me->me_C.dma_map != NULL) {
 			bzero(me->me_C.dma_vaddr, me->me_C.dma_size);
 			ubsec_dma_free(sc, &me->me_C);
 		}
 		if (me->me_epb.dma_map != NULL)
 			ubsec_dma_free(sc, &me->me_epb);
 		free(me, M_DEVBUF);
 	}
 	krp->krp_status = err;
 	crypto_kdone(krp);
 	return (0);
 }
 
 static int
 ubsec_kprocess_rsapriv(struct ubsec_softc *sc, struct cryptkop *krp, int hint)
 {
 	struct ubsec_q2_rsapriv *rp = NULL;
 	struct ubsec_mcr *mcr;
 	struct ubsec_ctx_rsapriv *ctx;
 	int err = 0;
 	u_int padlen, msglen;
 
 	msglen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_P]);
 	padlen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_Q]);
 	if (msglen > padlen)
 		padlen = msglen;
 
 	if (padlen <= 256)
 		padlen = 256;
 	else if (padlen <= 384)
 		padlen = 384;
 	else if (padlen <= 512)
 		padlen = 512;
 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 768)
 		padlen = 768;
 	else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 1024)
 		padlen = 1024;
 	else {
 		err = E2BIG;
 		goto errout;
 	}
 
 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DP]) > padlen) {
 		err = E2BIG;
 		goto errout;
 	}
 
 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DQ]) > padlen) {
 		err = E2BIG;
 		goto errout;
 	}
 
 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_PINV]) > padlen) {
 		err = E2BIG;
 		goto errout;
 	}
 
 	rp = (struct ubsec_q2_rsapriv *)malloc(sizeof *rp, M_DEVBUF, M_NOWAIT);
 	if (rp == NULL)
 		return (ENOMEM);
 	bzero(rp, sizeof *rp);
 	rp->rpr_krp = krp;
 	rp->rpr_q.q_type = UBS_CTXOP_RSAPRIV;
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr),
 	    &rp->rpr_q.q_mcr, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	mcr = (struct ubsec_mcr *)rp->rpr_q.q_mcr.dma_vaddr;
 
 	if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rsapriv),
 	    &rp->rpr_q.q_ctx, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	ctx = (struct ubsec_ctx_rsapriv *)rp->rpr_q.q_ctx.dma_vaddr;
 	bzero(ctx, sizeof *ctx);
 
 	/* Copy in p */
 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_P].crp_p,
 	    &ctx->rpr_buf[0 * (padlen / 8)],
 	    (krp->krp_param[UBS_RSAPRIV_PAR_P].crp_nbits + 7) / 8);
 
 	/* Copy in q */
 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_p,
 	    &ctx->rpr_buf[1 * (padlen / 8)],
 	    (krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_nbits + 7) / 8);
 
 	/* Copy in dp */
 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_p,
 	    &ctx->rpr_buf[2 * (padlen / 8)],
 	    (krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_nbits + 7) / 8);
 
 	/* Copy in dq */
 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_p,
 	    &ctx->rpr_buf[3 * (padlen / 8)],
 	    (krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_nbits + 7) / 8);
 
 	/* Copy in pinv */
 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_p,
 	    &ctx->rpr_buf[4 * (padlen / 8)],
 	    (krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_nbits + 7) / 8);
 
 	msglen = padlen * 2;
 
 	/* Copy in input message (aligned buffer/length). */
 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGIN]) > msglen) {
 		/* Is this likely? */
 		err = E2BIG;
 		goto errout;
 	}
 	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgin, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(rp->rpr_msgin.dma_vaddr, (msglen + 7) / 8);
 	bcopy(krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_p,
 	    rp->rpr_msgin.dma_vaddr,
 	    (krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_nbits + 7) / 8);
 
 	/* Prepare space for output message (aligned buffer/length). */
 	if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT]) < msglen) {
 		/* Is this likely? */
 		err = E2BIG;
 		goto errout;
 	}
 	if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgout, 0)) {
 		err = ENOMEM;
 		goto errout;
 	}
 	bzero(rp->rpr_msgout.dma_vaddr, (msglen + 7) / 8);
 
 	mcr->mcr_pkts = htole16(1);
 	mcr->mcr_flags = 0;
 	mcr->mcr_cmdctxp = htole32(rp->rpr_q.q_ctx.dma_paddr);
 	mcr->mcr_ipktbuf.pb_addr = htole32(rp->rpr_msgin.dma_paddr);
 	mcr->mcr_ipktbuf.pb_next = 0;
 	mcr->mcr_ipktbuf.pb_len = htole32(rp->rpr_msgin.dma_size);
 	mcr->mcr_reserved = 0;
 	mcr->mcr_pktlen = htole16(msglen);
 	mcr->mcr_opktbuf.pb_addr = htole32(rp->rpr_msgout.dma_paddr);
 	mcr->mcr_opktbuf.pb_next = 0;
 	mcr->mcr_opktbuf.pb_len = htole32(rp->rpr_msgout.dma_size);
 
 #ifdef DIAGNOSTIC
 	if (rp->rpr_msgin.dma_paddr & 3 || rp->rpr_msgin.dma_size & 3) {
 		panic("%s: rsapriv: invalid msgin %x(0x%jx)",
 		    device_get_nameunit(sc->sc_dev),
 		    rp->rpr_msgin.dma_paddr, (uintmax_t)rp->rpr_msgin.dma_size);
 	}
 	if (rp->rpr_msgout.dma_paddr & 3 || rp->rpr_msgout.dma_size & 3) {
 		panic("%s: rsapriv: invalid msgout %x(0x%jx)",
 		    device_get_nameunit(sc->sc_dev),
 		    rp->rpr_msgout.dma_paddr, (uintmax_t)rp->rpr_msgout.dma_size);
 	}
 #endif
 
 	ctx->rpr_len = (sizeof(u_int16_t) * 4) + (5 * (padlen / 8));
 	ctx->rpr_op = htole16(UBS_CTXOP_RSAPRIV);
 	ctx->rpr_q_len = htole16(padlen);
 	ctx->rpr_p_len = htole16(padlen);
 
 	/*
 	 * ubsec_feed2 will sync mcr and ctx, we just need to sync
 	 * everything else.
 	 */
 	bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map,
 	    BUS_DMASYNC_PREWRITE);
 	bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map,
 	    BUS_DMASYNC_PREREAD);
 
 	/* Enqueue and we're done... */
 	UBSEC_LOCK(sc);
 	SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rp->rpr_q, q_next);
 	ubsec_feed2(sc);
 	ubsecstats.hst_modexpcrt++;
 	UBSEC_UNLOCK(sc);
 	return (0);
 
 errout:
 	if (rp != NULL) {
 		if (rp->rpr_q.q_mcr.dma_map != NULL)
 			ubsec_dma_free(sc, &rp->rpr_q.q_mcr);
 		if (rp->rpr_msgin.dma_map != NULL) {
 			bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size);
 			ubsec_dma_free(sc, &rp->rpr_msgin);
 		}
 		if (rp->rpr_msgout.dma_map != NULL) {
 			bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size);
 			ubsec_dma_free(sc, &rp->rpr_msgout);
 		}
 		free(rp, M_DEVBUF);
 	}
 	krp->krp_status = err;
 	crypto_kdone(krp);
 	return (0);
 }
 
 #ifdef UBSEC_DEBUG
 static void
 ubsec_dump_pb(volatile struct ubsec_pktbuf *pb)
 {
 	printf("addr 0x%x (0x%x) next 0x%x\n",
 	    pb->pb_addr, pb->pb_len, pb->pb_next);
 }
 
 static void
 ubsec_dump_ctx2(struct ubsec_ctx_keyop *c)
 {
 	printf("CTX (0x%x):\n", c->ctx_len);
 	switch (letoh16(c->ctx_op)) {
 	case UBS_CTXOP_RNGBYPASS:
 	case UBS_CTXOP_RNGSHA1:
 		break;
 	case UBS_CTXOP_MODEXP:
 	{
 		struct ubsec_ctx_modexp *cx = (void *)c;
 		int i, len;
 
 		printf(" Elen %u, Nlen %u\n",
 		    letoh16(cx->me_E_len), letoh16(cx->me_N_len));
 		len = (cx->me_N_len + 7)/8;
 		for (i = 0; i < len; i++)
 			printf("%s%02x", (i == 0) ? " N: " : ":", cx->me_N[i]);
 		printf("\n");
 		break;
 	}
 	default:
 		printf("unknown context: %x\n", c->ctx_op);
 	}
 	printf("END CTX\n");
 }
 
 static void
 ubsec_dump_mcr(struct ubsec_mcr *mcr)
 {
 	volatile struct ubsec_mcr_add *ma;
 	int i;
 
 	printf("MCR:\n");
 	printf(" pkts: %u, flags 0x%x\n",
 	    letoh16(mcr->mcr_pkts), letoh16(mcr->mcr_flags));
 	ma = (volatile struct ubsec_mcr_add *)&mcr->mcr_cmdctxp;
 	for (i = 0; i < letoh16(mcr->mcr_pkts); i++) {
 		printf(" %d: ctx 0x%x len 0x%x rsvd 0x%x\n", i,
 		    letoh32(ma->mcr_cmdctxp), letoh16(ma->mcr_pktlen),
 		    letoh16(ma->mcr_reserved));
 		printf(" %d: ipkt ", i);
 		ubsec_dump_pb(&ma->mcr_ipktbuf);
 		printf(" %d: opkt ", i);
 		ubsec_dump_pb(&ma->mcr_opktbuf);
 		ma++;
 	}
 	printf("END MCR\n");
 }
 #endif /* UBSEC_DEBUG */
 
 /*
  * Return the number of significant bits of a big number.
  */
 static int
 ubsec_ksigbits(struct crparam *cr)
 {
 	u_int plen = (cr->crp_nbits + 7) / 8;
 	int i, sig = plen * 8;
 	u_int8_t c, *p = cr->crp_p;
 
 	for (i = plen - 1; i >= 0; i--) {
 		c = p[i];
 		if (c != 0) {
 			while ((c & 0x80) == 0) {
 				sig--;
 				c <<= 1;
 			}
 			break;
 		}
 		sig -= 8;
 	}
 	return (sig);
 }
 
 static void
 ubsec_kshift_r(
 	u_int shiftbits,
 	u_int8_t *src, u_int srcbits,
 	u_int8_t *dst, u_int dstbits)
 {
 	u_int slen, dlen;
 	int i, si, di, n;
 
 	slen = (srcbits + 7) / 8;
 	dlen = (dstbits + 7) / 8;
 
 	for (i = 0; i < slen; i++)
 		dst[i] = src[i];
 	for (i = 0; i < dlen - slen; i++)
 		dst[slen + i] = 0;
 
 	n = shiftbits / 8;
 	if (n != 0) {
 		si = dlen - n - 1;
 		di = dlen - 1;
 		while (si >= 0)
 			dst[di--] = dst[si--];
 		while (di >= 0)
 			dst[di--] = 0;
 	}
 
 	n = shiftbits % 8;
 	if (n != 0) {
 		for (i = dlen - 1; i > 0; i--)
 			dst[i] = (dst[i] << n) |
 			    (dst[i - 1] >> (8 - n));
 		dst[0] = dst[0] << n;
 	}
 }
 
 static void
 ubsec_kshift_l(
 	u_int shiftbits,
 	u_int8_t *src, u_int srcbits,
 	u_int8_t *dst, u_int dstbits)
 {
 	int slen, dlen, i, n;
 
 	slen = (srcbits + 7) / 8;
 	dlen = (dstbits + 7) / 8;
 
 	n = shiftbits / 8;
 	for (i = 0; i < slen; i++)
 		dst[i] = src[i + n];
 	for (i = 0; i < dlen - slen; i++)
 		dst[slen + i] = 0;
 
 	n = shiftbits % 8;
 	if (n != 0) {
 		for (i = 0; i < (dlen - 1); i++)
 			dst[i] = (dst[i] >> n) | (dst[i + 1] << (8 - n));
 		dst[dlen - 1] = dst[dlen - 1] >> n;
 	}
 }
Index: head/sys/kern/uipc_mbuf.c
===================================================================
--- head/sys/kern/uipc_mbuf.c	(revision 108465)
+++ head/sys/kern/uipc_mbuf.c	(revision 108466)
@@ -1,739 +1,779 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_mbuf.c	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #include "opt_mac.h"
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/sysctl.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 
 int	max_linkhdr;
 int	max_protohdr;
 int	max_hdr;
 int	max_datalen;
 
 /*
  * sysctl(8) exported objects
  */
 SYSCTL_DECL(_kern_ipc);
 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW,
 	   &max_linkhdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW,
 	   &max_protohdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, "");
 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW,
 	   &max_datalen, 0, "");
 
 /*
- * Copy mbuf pkthdr from "from" to "to".
+ * "Move" mbuf pkthdr from "from" to "to".
  * "from" must have M_PKTHDR set, and "to" must be empty.
- * aux pointer will be moved to "to".
  */
 void
-m_copy_pkthdr(struct mbuf *to, struct mbuf *from)
+m_move_pkthdr(struct mbuf *to, struct mbuf *from)
 {
 
 #if 0
+	/* see below for why these are not enabled */
 	KASSERT(to->m_flags & M_PKTHDR,
-	    ("m_copy_pkthdr() called on non-header"));
+	    ("m_move_pkthdr: called on non-header"));
+	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags),
+	    ("m_move_pkthdr: to has tags"));
 #endif
+	KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster"));
 #ifdef MAC
 	if (to->m_flags & M_PKTHDR)
 		mac_destroy_mbuf(to);
 #endif
+	to->m_flags = from->m_flags & M_COPYFLAGS;
 	to->m_data = to->m_pktdat;
+	to->m_pkthdr = from->m_pkthdr;		/* especially tags */
+#ifdef MAC
+	mac_init_mbuf(to, 1);			/* XXXMAC no way to fail */
+	mac_create_mbuf_from_mbuf(from, to);
+#endif
+	SLIST_INIT(&from->m_pkthdr.tags);	/* purge tags from src */
+	from->m_flags &= ~M_PKTHDR;
+}
+
+/*
+ * Duplicate "from"'s mbuf pkthdr in "to".
+ * "from" must have M_PKTHDR set, and "to" must be empty.
+ * In particular, this does a deep copy of the packet tags.
+ */
+int
+m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how)
+{
+
+#if 0
+	/*
+	 * The mbuf allocator only initializes the pkthdr
+	 * when the mbuf is allocated with MGETHDR. Many users
+	 * (e.g. m_copy*, m_prepend) use MGET and then
+	 * smash the pkthdr as needed causing these
+	 * assertions to trip.  For now just disable them.
+	 */
+	KASSERT(to->m_flags & M_PKTHDR, ("m_dup_pkthdr: called on non-header"));
+	KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags"));
+#endif
+	KASSERT((to->m_flags & M_EXT) == 0, ("m_dup_pkthdr: to has cluster"));
+#ifdef MAC
+	if (to->m_flags & M_PKTHDR)
+		mac_destroy_mbuf(to);
+#endif
 	to->m_flags = from->m_flags & M_COPYFLAGS;
+	to->m_data = to->m_pktdat;
 	to->m_pkthdr = from->m_pkthdr;
 #ifdef MAC
 	mac_init_mbuf(to, 1);			/* XXXMAC no way to fail */
 	mac_create_mbuf_from_mbuf(from, to);
 #endif
-	SLIST_INIT(&from->m_pkthdr.tags);
+	SLIST_INIT(&to->m_pkthdr.tags);
+	return (m_tag_copy_chain(to, from, how));
 }
 
 /*
  * Lesser-used path for M_PREPEND:
  * allocate new mbuf to prepend to chain,
  * copy junk along.
  */
 struct mbuf *
 m_prepend(struct mbuf *m, int len, int how)
 {
 	struct mbuf *mn;
 
 	MGET(mn, how, m->m_type);
 	if (mn == NULL) {
 		m_freem(m);
 		return (NULL);
 	}
 	if (m->m_flags & M_PKTHDR) {
-		M_COPY_PKTHDR(mn, m);
+		M_MOVE_PKTHDR(mn, m);
 #ifdef MAC
 		mac_destroy_mbuf(m);
 #endif
-		m->m_flags &= ~M_PKTHDR;
 	}
 	mn->m_next = m;
 	m = mn;
 	if (len < MHLEN)
 		MH_ALIGN(m, len);
 	m->m_len = len;
 	return (m);
 }
 
 /*
  * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
  * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
  * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  */
 struct mbuf *
 m_copym(struct mbuf *m, int off0, int len, int wait)
 {
 	struct mbuf *n, **np;
 	int off = off0;
 	struct mbuf *top;
 	int copyhdr = 0;
 
 	KASSERT(off >= 0, ("m_copym, negative off %d", off));
 	KASSERT(len >= 0, ("m_copym, negative len %d", len));
 	if (off == 0 && m->m_flags & M_PKTHDR)
 		copyhdr = 1;
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	np = &top;
 	top = 0;
 	while (len > 0) {
 		if (m == NULL) {
 			KASSERT(len == M_COPYALL, 
 			    ("m_copym, length > size of mbuf chain"));
 			break;
 		}
 		MGET(n, wait, m->m_type);
 		*np = n;
 		if (n == NULL)
 			goto nospace;
 		if (copyhdr) {
-			M_COPY_PKTHDR(n, m);
+			if (!m_dup_pkthdr(n, m, wait))
+				goto nospace;
 			if (len == M_COPYALL)
 				n->m_pkthdr.len -= off0;
 			else
 				n->m_pkthdr.len = len;
 			copyhdr = 0;
 		}
 		n->m_len = min(len, m->m_len - off);
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data + off;
 			n->m_ext = m->m_ext;
 			n->m_flags |= M_EXT;
 			MEXT_ADD_REF(m);
 		} else
 			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
 			    (u_int)n->m_len);
 		if (len != M_COPYALL)
 			len -= n->m_len;
 		off = 0;
 		m = m->m_next;
 		np = &n->m_next;
 	}
 	if (top == NULL)
 		mbstat.m_mcfail++;	/* XXX: No consistency. */
 
 	return (top);
 nospace:
 	m_freem(top);
 	mbstat.m_mcfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
 /*
  * Copy an entire packet, including header (which must be present).
  * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
  * Note that the copy is read-only, because clusters are not copied,
  * only their reference counts are incremented.
  * Preserve alignment of the first mbuf so if the creator has left
  * some room at the beginning (e.g. for inserting protocol headers)
  * the copies still have the room available.
  */
 struct mbuf *
 m_copypacket(struct mbuf *m, int how)
 {
 	struct mbuf *top, *n, *o;
 
 	MGET(n, how, m->m_type);
 	top = n;
 	if (n == NULL)
 		goto nospace;
 
-	M_COPY_PKTHDR(n, m);
+	if (!m_dup_pkthdr(n, m, how))
+		goto nospace;
 	n->m_len = m->m_len;
 	if (m->m_flags & M_EXT) {
 		n->m_data = m->m_data;
 		n->m_ext = m->m_ext;
 		n->m_flags |= M_EXT;
 		MEXT_ADD_REF(m);
 	} else {
 		n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat );
 		bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 	}
 
 	m = m->m_next;
 	while (m) {
 		MGET(o, how, m->m_type);
 		if (o == NULL)
 			goto nospace;
 
 		n->m_next = o;
 		n = n->m_next;
 
 		n->m_len = m->m_len;
 		if (m->m_flags & M_EXT) {
 			n->m_data = m->m_data;
 			n->m_ext = m->m_ext;
 			n->m_flags |= M_EXT;
 			MEXT_ADD_REF(m);
 		} else {
 			bcopy(mtod(m, char *), mtod(n, char *), n->m_len);
 		}
 
 		m = m->m_next;
 	}
 	return top;
 nospace:
 	m_freem(top);
 	mbstat.m_mcfail++;	/* XXX: No consistency. */ 
 	return (NULL);
 }
 
 /*
  * Copy data from an mbuf chain starting "off" bytes from the beginning,
  * continuing for "len" bytes, into the indicated buffer.
  */
 void
 m_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
 {
 	u_int count;
 
 	KASSERT(off >= 0, ("m_copydata, negative off %d", off));
 	KASSERT(len >= 0, ("m_copydata, negative len %d", len));
 	while (off > 0) {
 		KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain"));
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	while (len > 0) {
 		KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain"));
 		count = min(m->m_len - off, len);
 		bcopy(mtod(m, caddr_t) + off, cp, count);
 		len -= count;
 		cp += count;
 		off = 0;
 		m = m->m_next;
 	}
 }
 
 /*
  * Copy a packet header mbuf chain into a completely new chain, including
  * copying any mbuf clusters.  Use this instead of m_copypacket() when
  * you need a writable copy of an mbuf chain.
  */
 struct mbuf *
 m_dup(struct mbuf *m, int how)
 {
 	struct mbuf **p, *top = NULL;
 	int remain, moff, nsize;
 
 	/* Sanity check */
 	if (m == NULL)
 		return (NULL);
 	KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__));
 
 	/* While there's more data, get a new mbuf, tack it on, and fill it */
 	remain = m->m_pkthdr.len;
 	moff = 0;
 	p = &top;
 	while (remain > 0 || top == NULL) {	/* allow m->m_pkthdr.len == 0 */
 		struct mbuf *n;
 
 		/* Get the next new mbuf */
 		MGET(n, how, m->m_type);
 		if (n == NULL)
 			goto nospace;
 		if (top == NULL) {		/* first one, must be PKTHDR */
-			M_COPY_PKTHDR(n, m);
+			if (!m_dup_pkthdr(n, m, how))
+				goto nospace;
 			nsize = MHLEN;
 		} else				/* not the first one */
 			nsize = MLEN;
 		if (remain >= MINCLSIZE) {
 			MCLGET(n, how);
 			if ((n->m_flags & M_EXT) == 0) {
 				(void)m_free(n);
 				goto nospace;
 			}
 			nsize = MCLBYTES;
 		}
 		n->m_len = 0;
 
 		/* Link it into the new chain */
 		*p = n;
 		p = &n->m_next;
 
 		/* Copy data from original mbuf(s) into new mbuf */
 		while (n->m_len < nsize && m != NULL) {
 			int chunk = min(nsize - n->m_len, m->m_len - moff);
 
 			bcopy(m->m_data + moff, n->m_data + n->m_len, chunk);
 			moff += chunk;
 			n->m_len += chunk;
 			remain -= chunk;
 			if (moff == m->m_len) {
 				m = m->m_next;
 				moff = 0;
 			}
 		}
 
 		/* Check correct total mbuf length */
 		KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL),
 		    	("%s: bogus m_pkthdr.len", __func__));
 	}
 	return (top);
 
 nospace:
 	m_freem(top);
 	mbstat.m_mcfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
 /*
  * Concatenate mbuf chain n to m.
  * Both chains must be of the same type (e.g. MT_DATA).
  * Any m_pkthdr is not updated.
  */
 void
 m_cat(struct mbuf *m, struct mbuf *n)
 {
 	while (m->m_next)
 		m = m->m_next;
 	while (n) {
 		if (m->m_flags & M_EXT ||
 		    m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) {
 			/* just join the two chains */
 			m->m_next = n;
 			return;
 		}
 		/* splat the data from one into the other */
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		    (u_int)n->m_len);
 		m->m_len += n->m_len;
 		n = m_free(n);
 	}
 }
 
 void
 m_adj(struct mbuf *mp, int req_len)
 {
 	int len = req_len;
 	struct mbuf *m;
 	int count;
 
 	if ((m = mp) == NULL)
 		return;
 	if (len >= 0) {
 		/*
 		 * Trim from head.
 		 */
 		while (m != NULL && len > 0) {
 			if (m->m_len <= len) {
 				len -= m->m_len;
 				m->m_len = 0;
 				m = m->m_next;
 			} else {
 				m->m_len -= len;
 				m->m_data += len;
 				len = 0;
 			}
 		}
 		m = mp;
 		if (mp->m_flags & M_PKTHDR)
 			m->m_pkthdr.len -= (req_len - len);
 	} else {
 		/*
 		 * Trim from tail.  Scan the mbuf chain,
 		 * calculating its length and finding the last mbuf.
 		 * If the adjustment only affects this mbuf, then just
 		 * adjust and return.  Otherwise, rescan and truncate
 		 * after the remaining size.
 		 */
 		len = -len;
 		count = 0;
 		for (;;) {
 			count += m->m_len;
 			if (m->m_next == (struct mbuf *)0)
 				break;
 			m = m->m_next;
 		}
 		if (m->m_len >= len) {
 			m->m_len -= len;
 			if (mp->m_flags & M_PKTHDR)
 				mp->m_pkthdr.len -= len;
 			return;
 		}
 		count -= len;
 		if (count < 0)
 			count = 0;
 		/*
 		 * Correct length for chain is "count".
 		 * Find the mbuf with last data, adjust its length,
 		 * and toss data from remaining mbufs on chain.
 		 */
 		m = mp;
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len = count;
 		for (; m; m = m->m_next) {
 			if (m->m_len >= count) {
 				m->m_len = count;
 				break;
 			}
 			count -= m->m_len;
 		}
 		while (m->m_next)
 			(m = m->m_next) ->m_len = 0;
 	}
 }
 
 /*
  * Rearange an mbuf chain so that len bytes are contiguous
  * and in the data area of an mbuf (so that mtod and dtom
  * will work for a structure of size len).  Returns the resulting
  * mbuf chain on success, frees it and returns null on failure.
  * If there is room, it will add up to max_protohdr-len extra bytes to the
  * contiguous region in an attempt to avoid being called next time.
  */
 struct mbuf *
 m_pullup(struct mbuf *n, int len)
 {
 	struct mbuf *m;
 	int count;
 	int space;
 
 	/*
 	 * If first mbuf has no cluster, and has room for len bytes
 	 * without shifting current data, pullup into it,
 	 * otherwise allocate a new mbuf to prepend to the chain.
 	 */
 	if ((n->m_flags & M_EXT) == 0 &&
 	    n->m_data + len < &n->m_dat[MLEN] && n->m_next) {
 		if (n->m_len >= len)
 			return (n);
 		m = n;
 		n = n->m_next;
 		len -= m->m_len;
 	} else {
 		if (len > MHLEN)
 			goto bad;
 		MGET(m, M_DONTWAIT, n->m_type);
 		if (m == NULL)
 			goto bad;
 		m->m_len = 0;
-		if (n->m_flags & M_PKTHDR) {
-			M_COPY_PKTHDR(m, n);
-			n->m_flags &= ~M_PKTHDR;
-		}
+		if (n->m_flags & M_PKTHDR)
+			M_MOVE_PKTHDR(m, n);
 	}
 	space = &m->m_dat[MLEN] - (m->m_data + m->m_len);
 	do {
 		count = min(min(max(len, max_protohdr), space), n->m_len);
 		bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len,
 		  (u_int)count);
 		len -= count;
 		m->m_len += count;
 		n->m_len -= count;
 		space -= count;
 		if (n->m_len)
 			n->m_data += count;
 		else
 			n = m_free(n);
 	} while (len > 0 && n);
 	if (len > 0) {
 		(void) m_free(m);
 		goto bad;
 	}
 	m->m_next = n;
 	return (m);
 bad:
 	m_freem(n);
 	mbstat.m_mpfail++;	/* XXX: No consistency. */
 	return (NULL);
 }
 
 /*
  * Partition an mbuf chain in two pieces, returning the tail --
  * all but the first len0 bytes.  In case of failure, it returns NULL and
  * attempts to restore the chain to its original state.
  *
  * Note that the resulting mbufs might be read-only, because the new
  * mbuf can end up sharing an mbuf cluster with the original mbuf if
  * the "breaking point" happens to lie within a cluster mbuf. Use the
  * M_WRITABLE() macro to check for this case.
  */
 struct mbuf *
 m_split(struct mbuf *m0, int len0, int wait)
 {
 	struct mbuf *m, *n;
 	u_int len = len0, remain;
 
 	for (m = m0; m && len > m->m_len; m = m->m_next)
 		len -= m->m_len;
 	if (m == NULL)
 		return (NULL);
 	remain = m->m_len - len;
 	if (m0->m_flags & M_PKTHDR) {
 		MGETHDR(n, wait, m0->m_type);
 		if (n == NULL)
 			return (NULL);
 		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
 		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
 		m0->m_pkthdr.len = len0;
 		if (m->m_flags & M_EXT)
 			goto extpacket;
 		if (remain > MHLEN) {
 			/* m can't be the lead packet */
 			MH_ALIGN(n, 0);
 			n->m_next = m_split(m, len, wait);
 			if (n->m_next == NULL) {
 				(void) m_free(n);
 				return (NULL);
 			} else {
 				n->m_len = 0;
 				return (n);
 			}
 		} else
 			MH_ALIGN(n, remain);
 	} else if (remain == 0) {
 		n = m->m_next;
 		m->m_next = NULL;
 		return (n);
 	} else {
 		MGET(n, wait, m->m_type);
 		if (n == NULL)
 			return (NULL);
 		M_ALIGN(n, remain);
 	}
 extpacket:
 	if (m->m_flags & M_EXT) {
 		n->m_flags |= M_EXT;
 		n->m_ext = m->m_ext;
 		MEXT_ADD_REF(m);
 		n->m_data = m->m_data + len;
 	} else {
 		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
 	}
 	n->m_len = remain;
 	m->m_len = len;
 	n->m_next = m->m_next;
 	m->m_next = NULL;
 	return (n);
 }
 /*
  * Routine to copy from device local memory into mbufs.
  * Note that `off' argument is offset into first mbuf of target chain from
  * which to begin copying the data to.
  */
 struct mbuf *
 m_devget(char *buf, int totlen, int off, struct ifnet *ifp,
 	 void (*copy)(char *from, caddr_t to, u_int len))
 {
 	struct mbuf *m;
 	struct mbuf *top = 0, **mp = &top;
 	int len;
 
 	if (off < 0 || off > MHLEN)
 		return (NULL);
 
 	MGETHDR(m, M_DONTWAIT, MT_DATA);
 	if (m == NULL)
 		return (NULL);
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.len = totlen;
 	len = MHLEN;
 
 	while (totlen > 0) {
 		if (top) {
 			MGET(m, M_DONTWAIT, MT_DATA);
 			if (m == NULL) {
 				m_freem(top);
 				return (NULL);
 			}
 			len = MLEN;
 		}
 		if (totlen + off >= MINCLSIZE) {
 			MCLGET(m, M_DONTWAIT);
 			if (m->m_flags & M_EXT)
 				len = MCLBYTES;
 		} else {
 			/*
 			 * Place initial small packet/header at end of mbuf.
 			 */
 			if (top == NULL && totlen + off + max_linkhdr <= len) {
 				m->m_data += max_linkhdr;
 				len -= max_linkhdr;
 			}
 		}
 		if (off) {
 			m->m_data += off;
 			len -= off;
 			off = 0;
 		}
 		m->m_len = len = min(totlen, len);
 		if (copy)
 			copy(buf, mtod(m, caddr_t), (u_int)len);
 		else
 			bcopy(buf, mtod(m, caddr_t), (u_int)len);
 		buf += len;
 		*mp = m;
 		mp = &m->m_next;
 		totlen -= len;
 	}
 	return (top);
 }
 
 /*
  * Copy data from a buffer back into the indicated mbuf chain,
  * starting "off" bytes from the beginning, extending the mbuf
  * chain if necessary.
  */
 void
 m_copyback(struct mbuf *m0, int off, int len, caddr_t cp)
 {
 	int mlen;
 	struct mbuf *m = m0, *n;
 	int totlen = 0;
 
 	if (m0 == NULL)
 		return;
 	while (off > (mlen = m->m_len)) {
 		off -= mlen;
 		totlen += mlen;
 		if (m->m_next == NULL) {
 			n = m_get_clrd(M_DONTWAIT, m->m_type);
 			if (n == NULL)
 				goto out;
 			n->m_len = min(MLEN, len + off);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 	while (len > 0) {
 		mlen = min (m->m_len - off, len);
 		bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen);
 		cp += mlen;
 		len -= mlen;
 		mlen += off;
 		off = 0;
 		totlen += mlen;
 		if (len == 0)
 			break;
 		if (m->m_next == NULL) {
 			n = m_get(M_DONTWAIT, m->m_type);
 			if (n == NULL)
 				break;
 			n->m_len = min(MLEN, len);
 			m->m_next = n;
 		}
 		m = m->m_next;
 	}
 out:	if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen))
 		m->m_pkthdr.len = totlen;
 }
 
 void
 m_print(const struct mbuf *m)
 {
 	int len;
 	const struct mbuf *m2;
 
 	len = m->m_pkthdr.len;
 	m2 = m;
 	while (len) {
 		printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-");
 		len -= m2->m_len;
 		m2 = m2->m_next;
 	}
 	return;
 }
 
 u_int
 m_fixhdr(struct mbuf *m0)
 {
 	u_int len;
 
 	len = m_length(m0, NULL);
 	m0->m_pkthdr.len = len;
 	return (len);
 }
 
 u_int
 m_length(struct mbuf *m0, struct mbuf **last)
 {
 	struct mbuf *m;
 	u_int len;
 
 	len = 0;
 	for (m = m0; m != NULL; m = m->m_next) {
 		len += m->m_len;
 		if (m->m_next == NULL)
 			break;
 	}
 	if (last != NULL)
 		*last = m;
 	return (len);
 }
Index: head/sys/kern/uipc_mbuf2.c
===================================================================
--- head/sys/kern/uipc_mbuf2.c	(revision 108465)
+++ head/sys/kern/uipc_mbuf2.c	(revision 108466)
@@ -1,456 +1,458 @@
 /*	$FreeBSD$	*/
 /*	$KAME: uipc_mbuf2.c,v 1.31 2001/11/28 11:08:53 itojun Exp $	*/
 /*	$NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $	*/
 
 /*
  * Copyright (C) 1999 WIDE Project.
  * All rights reserved.
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_mbuf.c	8.4 (Berkeley) 2/14/95
  */
 
 /*#define PULLDOWN_DEBUG*/
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 
 MALLOC_DEFINE(M_PACKET_TAGS, "tag", "packet-attached information");
 
 /* can't call it m_dup(), as freebsd[34] uses m_dup() with different arg */
 static struct mbuf *m_dup1(struct mbuf *, int, int, int);
 
 /*
  * ensure that [off, off + len) is contiguous on the mbuf chain "m".
  * packet chain before "off" is kept untouched.
  * if offp == NULL, the target will start at <retval, 0> on resulting chain.
  * if offp != NULL, the target will start at <retval, *offp> on resulting chain.
  *
  * on error return (NULL return value), original "m" will be freed.
  *
  * XXX: M_TRAILINGSPACE/M_LEADINGSPACE only permitted on writable ext_buf.
  */
 struct mbuf *
 m_pulldown(struct mbuf *m, int off, int len, int *offp)
 {
 	struct mbuf *n, *o;
 	int hlen, tlen, olen;
 	int writable;
 
 	/* check invalid arguments. */
 	if (m == NULL)
 		panic("m == NULL in m_pulldown()");
 	if (len > MCLBYTES) {
 		m_freem(m);
 		return NULL;	/* impossible */
 	}
 
 #ifdef PULLDOWN_DEBUG
     {
 	struct mbuf *t;
 	printf("before:");
 	for (t = m; t; t = t->m_next)
 		printf(" %d", t->m_len);
 	printf("\n");
     }
 #endif
 	n = m;
 	while (n != NULL && off > 0) {
 		if (n->m_len > off)
 			break;
 		off -= n->m_len;
 		n = n->m_next;
 	}
 	/* be sure to point non-empty mbuf */
 	while (n != NULL && n->m_len == 0)
 		n = n->m_next;
 	if (!n) {
 		m_freem(m);
 		return NULL;	/* mbuf chain too short */
 	}
 
 	/*
 	 * XXX: This code is flawed because it considers a "writable" mbuf
 	 *      data region to require all of the following:
 	 *	  (i) mbuf _has_ to have M_EXT set; if it is just a regular
 	 *	      mbuf, it is still not considered "writable."
 	 *	  (ii) since mbuf has M_EXT, the ext_type _has_ to be
 	 *	       EXT_CLUSTER. Anything else makes it non-writable.
 	 *	  (iii) M_WRITABLE() must evaluate true.
 	 *      Ideally, the requirement should only be (iii).
 	 *
 	 * If we're writable, we're sure we're writable, because the ref. count
 	 * cannot increase from 1, as that would require posession of mbuf
 	 * n by someone else (which is impossible). However, if we're _not_
 	 * writable, we may eventually become writable )if the ref. count drops
 	 * to 1), but we'll fail to notice it unless we re-evaluate
 	 * M_WRITABLE(). For now, we only evaluate once at the beginning and
 	 * live with this.
 	 */
 	/*
 	 * XXX: This is dumb. If we're just a regular mbuf with no M_EXT,
 	 *      then we're not "writable," according to this code.
 	 */
 	writable = 0;
 	if ((n->m_flags & M_EXT) == 0 ||
 	    (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n)))
 		writable = 1;
 
 	/*
 	 * the target data is on <n, off>.
 	 * if we got enough data on the mbuf "n", we're done.
 	 */
 	if ((off == 0 || offp) && len <= n->m_len - off && writable)
 		goto ok;
 
 	/*
 	 * when len <= n->m_len - off and off != 0, it is a special case.
 	 * len bytes from <n, off> sits in single mbuf, but the caller does
 	 * not like the starting position (off).
 	 * chop the current mbuf into two pieces, set off to 0.
 	 */
 	if (len <= n->m_len - off) {
 		o = m_dup1(n, off, n->m_len - off, M_DONTWAIT);
 		if (o == NULL) {
 			m_freem(m);
 			return NULL;	/* ENOBUFS */
 		}
 		n->m_len = off;
 		o->m_next = n->m_next;
 		n->m_next = o;
 		n = n->m_next;
 		off = 0;
 		goto ok;
 	}
 
 	/*
 	 * we need to take hlen from <n, off> and tlen from <n->m_next, 0>,
 	 * and construct contiguous mbuf with m_len == len.
 	 * note that hlen + tlen == len, and tlen > 0.
 	 */
 	hlen = n->m_len - off;
 	tlen = len - hlen;
 
 	/*
 	 * ensure that we have enough trailing data on mbuf chain.
 	 * if not, we can do nothing about the chain.
 	 */
 	olen = 0;
 	for (o = n->m_next; o != NULL; o = o->m_next)
 		olen += o->m_len;
 	if (hlen + olen < len) {
 		m_freem(m);
 		return NULL;	/* mbuf chain too short */
 	}
 
 	/*
 	 * easy cases first.
 	 * we need to use m_copydata() to get data from <n->m_next, 0>.
 	 */
 	if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen
 	 && writable) {
 		m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len);
 		n->m_len += tlen;
 		m_adj(n->m_next, tlen);
 		goto ok;
 	}
 	if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen
 	 && writable) {
 		n->m_next->m_data -= hlen;
 		n->m_next->m_len += hlen;
 		bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen);
 		n->m_len -= hlen;
 		n = n->m_next;
 		off = 0;
 		goto ok;
 	}
 
 	/*
 	 * now, we need to do the hard way.  don't m_copy as there's no room
 	 * on both end.
 	 */
 	MGET(o, M_DONTWAIT, m->m_type);
 	if (o && len > MLEN) {
 		MCLGET(o, M_DONTWAIT);
 		if ((o->m_flags & M_EXT) == 0) {
 			m_free(o);
 			o = NULL;
 		}
 	}
 	if (!o) {
 		m_freem(m);
 		return NULL;	/* ENOBUFS */
 	}
 	/* get hlen from <n, off> into <o, 0> */
 	o->m_len = hlen;
 	bcopy(mtod(n, caddr_t) + off, mtod(o, caddr_t), hlen);
 	n->m_len -= hlen;
 	/* get tlen from <n->m_next, 0> into <o, hlen> */
 	m_copydata(n->m_next, 0, tlen, mtod(o, caddr_t) + o->m_len);
 	o->m_len += tlen;
 	m_adj(n->m_next, tlen);
 	o->m_next = n->m_next;
 	n->m_next = o;
 	n = o;
 	off = 0;
 
 ok:
 #ifdef PULLDOWN_DEBUG
     {
 	struct mbuf *t;
 	printf("after:");
 	for (t = m; t; t = t->m_next)
 		printf("%c%d", t == n ? '*' : ' ', t->m_len);
 	printf(" (off=%d)\n", off);
     }
 #endif
 	if (offp)
 		*offp = off;
 	return n;
 }
 
 static struct mbuf *
 m_dup1(struct mbuf *m, int off, int len, int wait)
 {
 	struct mbuf *n;
 	int l;
 	int copyhdr;
 
 	if (len > MCLBYTES)
 		return NULL;
 	if (off == 0 && (m->m_flags & M_PKTHDR) != 0) {
 		copyhdr = 1;
 		MGETHDR(n, wait, m->m_type);
 		l = MHLEN;
 	} else {
 		copyhdr = 0;
 		MGET(n, wait, m->m_type);
 		l = MLEN;
 	}
 	if (n && len > l) {
 		MCLGET(n, wait);
 		if ((n->m_flags & M_EXT) == 0) {
 			m_free(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return NULL;
 
-	if (copyhdr)
-		M_COPY_PKTHDR(n, m);
+	if (copyhdr && !m_dup_pkthdr(n, m, wait)) {
+		m_free(n);
+		return NULL;
+	}
 	m_copydata(m, off, len, mtod(n, caddr_t));
 	return n;
 }
 
 /* Get a packet tag structure along with specified data following. */
 struct m_tag *
 m_tag_alloc(u_int32_t cookie, int type, int len, int wait)
 {
 	struct m_tag *t;
 
 	if (len < 0)
 		return NULL;
 	t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait);
 	if (t == NULL)
 		return NULL;
 	t->m_tag_id = type;
 	t->m_tag_len = len;
 	t->m_tag_cookie = cookie;
 	return t;
 }
 
 /* Free a packet tag. */
 void
 m_tag_free(struct m_tag *t)
 {
 	free(t, M_PACKET_TAGS);
 }
 
 /* Prepend a packet tag. */
 void
 m_tag_prepend(struct mbuf *m, struct m_tag *t)
 {
 	KASSERT(m && t, ("m_tag_prepend: null argument, m %p t %p", m, t));
 	SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link);
 }
 
 /* Unlink a packet tag. */
 void
 m_tag_unlink(struct mbuf *m, struct m_tag *t)
 {
 	KASSERT(m && t, ("m_tag_unlink: null argument, m %p t %p", m, t));
 	SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link);
 }
 
 /* Unlink and free a packet tag. */
 void
 m_tag_delete(struct mbuf *m, struct m_tag *t)
 {
 	KASSERT(m && t, ("m_tag_delete: null argument, m %p t %p", m, t));
 	m_tag_unlink(m, t);
 	m_tag_free(t);
 }
 
 /* Unlink and free a packet tag chain, starting from given tag. */
 void
 m_tag_delete_chain(struct mbuf *m, struct m_tag *t)
 {
 	struct m_tag *p, *q;
 
 	KASSERT(m, ("m_tag_delete_chain: null mbuf"));
 	if (t != NULL)
 		p = t;
 	else
 		p = SLIST_FIRST(&m->m_pkthdr.tags);
 	if (p == NULL)
 		return;
 	while ((q = SLIST_NEXT(p, m_tag_link)) != NULL)
 		m_tag_delete(m, q);
 	m_tag_delete(m, p);
 }
 
 /* Find a tag, starting from a given position. */
 struct m_tag *
 m_tag_locate(struct mbuf *m, u_int32_t cookie, int type, struct m_tag *t)
 {
 	struct m_tag *p;
 
 	KASSERT(m, ("m_tag_locate: null mbuf"));
 	if (t == NULL)
 		p = SLIST_FIRST(&m->m_pkthdr.tags);
 	else
 		p = SLIST_NEXT(t, m_tag_link);
 	while (p != NULL) {
 		if (p->m_tag_cookie == cookie && p->m_tag_id == type)
 			return p;
 		p = SLIST_NEXT(p, m_tag_link);
 	}
 	return NULL;
 }
 
 /* Copy a single tag. */
 struct m_tag *
-m_tag_copy(struct m_tag *t)
+m_tag_copy(struct m_tag *t, int how)
 {
 	struct m_tag *p;
 
 	KASSERT(t, ("m_tag_copy: null tag"));
-	p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, M_NOWAIT);
+	p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, how);
 	if (p == NULL)
 		return (NULL);
 	bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */
 	return p;
 }
 
 /*
  * Copy two tag chains. The destination mbuf (to) loses any attached
  * tags even if the operation fails. This should not be a problem, as
  * m_tag_copy_chain() is typically called with a newly-allocated
  * destination mbuf.
  */
 int
-m_tag_copy_chain(struct mbuf *to, struct mbuf *from)
+m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how)
 {
 	struct m_tag *p, *t, *tprev = NULL;
 
 	KASSERT(to && from,
 		("m_tag_copy_chain: null argument, to %p from %p", to, from));
 	m_tag_delete_chain(to, NULL);
 	SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) {
-		t = m_tag_copy(p);
+		t = m_tag_copy(p, how);
 		if (t == NULL) {
 			m_tag_delete_chain(to, NULL);
 			return 0;
 		}
 		if (tprev == NULL)
 			SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link);
 		else {
 			SLIST_INSERT_AFTER(tprev, t, m_tag_link);
 			tprev = t;
 		}
 	}
 	return 1;
 }
 
 /* Initialize tags on an mbuf. */
 void
 m_tag_init(struct mbuf *m)
 {
 	SLIST_INIT(&m->m_pkthdr.tags);
 }
 
 /* Get first tag in chain. */
 struct m_tag *
 m_tag_first(struct mbuf *m)
 {
 	return SLIST_FIRST(&m->m_pkthdr.tags);
 }
 
 /* Get next tag in chain. */
 struct m_tag *
 m_tag_next(struct mbuf *m, struct m_tag *t)
 {
 	return SLIST_NEXT(t, m_tag_link);
 }
Index: head/sys/net/if_loop.c
===================================================================
--- head/sys/net/if_loop.c	(revision 108465)
+++ head/sys/net/if_loop.c	(revision 108466)
@@ -1,458 +1,458 @@
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_loop.c	8.2 (Berkeley) 1/9/95
  * $FreeBSD$
  */
 
 /*
  * Loopback interface driver for protocol testing and timing.
  */
 
 #include "opt_atalk.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipx.h"
 #include "opt_mac.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <machine/bus.h>
 #include <sys/rman.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/netisr.h>
 #include <net/route.h>
 #include <net/bpf.h>
 #include <net/bpfdesc.h>
 
 #ifdef	INET
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #endif
 
 #ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
 
 #ifdef INET6
 #ifndef INET
 #include <netinet/in.h>
 #endif
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #endif
 
 #ifdef NS
 #include <netns/ns.h>
 #include <netns/ns_if.h>
 #endif
 
 #ifdef NETATALK
 #include <netatalk/at.h>
 #include <netatalk/at_var.h>
 #endif
 
 #ifdef TINY_LOMTU
 #define	LOMTU	(1024+512)
 #elif defined(LARGE_LOMTU)
 #define LOMTU	131072
 #else
 #define LOMTU	16384
 #endif
 
 #define LONAME	"lo"
 
 struct lo_softc {
 	struct	ifnet sc_if;		/* network-visible interface */
 	LIST_ENTRY(lo_softc) sc_next;
 };
 
 int		loioctl(struct ifnet *, u_long, caddr_t);
 static void	lortrequest(int, struct rtentry *, struct rt_addrinfo *);
 int		looutput(struct ifnet *ifp, struct mbuf *m,
 		    struct sockaddr *dst, struct rtentry *rt);
 int		lo_clone_create(struct if_clone *, int);
 void		lo_clone_destroy(struct ifnet *);
 
 struct ifnet *loif = NULL;			/* Used externally */
 
 static MALLOC_DEFINE(M_LO, LONAME, "Loopback Interface");
 
 static LIST_HEAD(lo_list, lo_softc) lo_list;
 
 struct if_clone lo_cloner = IF_CLONE_INITIALIZER(LONAME,
     lo_clone_create, lo_clone_destroy, 1, IF_MAXUNIT);
 
 void
 lo_clone_destroy(ifp)
 	struct ifnet *ifp;
 {
 	struct lo_softc *sc;
 	
 	sc = ifp->if_softc;
 
 	/* XXX: destroying lo0 will lead to panics. */
 	KASSERT(loif != ifp, ("%s: destroying lo0", __func__));
 
 	bpfdetach(ifp);
 	if_detach(ifp);
 	LIST_REMOVE(sc, sc_next);
 	free(sc, M_LO);
 }
 
 int
 lo_clone_create(ifc, unit)
 	struct if_clone *ifc;
 	int unit;
 {
 	struct lo_softc *sc;
 
 	MALLOC(sc, struct lo_softc *, sizeof(*sc), M_LO, M_WAITOK | M_ZERO);
 
 	sc->sc_if.if_name = LONAME;
 	sc->sc_if.if_unit = unit;
 	sc->sc_if.if_mtu = LOMTU;
 	sc->sc_if.if_flags = IFF_LOOPBACK | IFF_MULTICAST;
 	sc->sc_if.if_ioctl = loioctl;
 	sc->sc_if.if_output = looutput;
 	sc->sc_if.if_type = IFT_LOOP;
 	sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen;
 	sc->sc_if.if_softc = sc;
 	if_attach(&sc->sc_if);
 	bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int));
 	LIST_INSERT_HEAD(&lo_list, sc, sc_next);
 	if (loif == NULL)
 		loif = &sc->sc_if;
 
 	return (0);
 }
 
 static int
 loop_modevent(module_t mod, int type, void *data) 
 { 
 	switch (type) { 
 	case MOD_LOAD: 
 		LIST_INIT(&lo_list);
 		if_clone_attach(&lo_cloner);
 		break; 
 	case MOD_UNLOAD: 
 		printf("loop module unload - not possible for this module type\n"); 
 		return EINVAL; 
 	} 
 	return 0; 
 } 
 
 static moduledata_t loop_mod = { 
 	"loop", 
 	loop_modevent, 
 	0
 }; 
 
 DECLARE_MODULE(loop, loop_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 
 int
 looutput(ifp, m, dst, rt)
 	struct ifnet *ifp;
 	register struct mbuf *m;
 	struct sockaddr *dst;
 	register struct rtentry *rt;
 {
 	if ((m->m_flags & M_PKTHDR) == 0)
 		panic("looutput no HDR");
 
 	if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
 		m_freem(m);
 		return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
 		        rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
 	}
 	/*
 	 * KAME requires that the packet to be contiguous on the
 	 * mbuf.  We need to make that sure.
 	 * this kind of code should be avoided.
 	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
 	 */
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
+		/* XXX MT_HEADER should be m->m_type */
 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
 		if (!n)
 			goto contiguousfail;
-		MCLGET(n, M_DONTWAIT);
-		if (! (n->m_flags & M_EXT)) {
-			m_freem(n);
-			goto contiguousfail;
-		}
-
-		m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
-		n->m_pkthdr = m->m_pkthdr;
-		n->m_len = m->m_pkthdr.len;
-		SLIST_INIT(&m->m_pkthdr.tags);
+		M_MOVE_PKTHDR(n, m);
 #ifdef MAC
 		/* 
 		 * XXXMAC: Once we put labels in tags and proper
 		 * primitives are used for relocating mbuf header
 		 * data, this will no longer be required.
 		 */
 		m->m_pkthdr.label.l_flags &= ~MAC_FLAG_INITIALIZED;
 #endif
+		MCLGET(n, M_DONTWAIT);
+		if (! (n->m_flags & M_EXT)) {
+			m_freem(n);
+			goto contiguousfail;
+		}
+
+		m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
+		n->m_len = m->m_pkthdr.len;
 		m_freem(m);
 		m = n;
 	}
 	if (0) {
 contiguousfail:
 		printf("looutput: mbuf allocation failed\n");
 	}
 
 	ifp->if_opackets++;
 	ifp->if_obytes += m->m_pkthdr.len;
 #if 1	/* XXX */
 	switch (dst->sa_family) {
 	case AF_INET:
 	case AF_INET6:
 	case AF_IPX:
 	case AF_NS:
 	case AF_APPLETALK:
 		break;
 	default:
 		printf("looutput: af=%d unexpected\n", dst->sa_family);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 #endif
 	return(if_simloop(ifp, m, dst->sa_family, 0));
 }
 
 /*
  * if_simloop()
  *
  * This function is to support software emulation of hardware loopback,
  * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't
  * hear their own broadcasts, we create a copy of the packet that we
  * would normally receive via a hardware loopback.
  *
  * This function expects the packet to include the media header of length hlen.
  */
 
 int
 if_simloop(ifp, m, af, hlen)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	int af;
 	int hlen;
 {
 	int isr;
 	struct ifqueue *inq = 0;
 
 	KASSERT((m->m_flags & M_PKTHDR) != 0, ("if_simloop: no HDR"));
 	m->m_pkthdr.rcvif = ifp;
 
 	/* BPF write needs to be handled specially */
 	if (af == AF_UNSPEC) {
 		KASSERT(m->m_len >= sizeof(int), ("if_simloop: m_len"));
 		af = *(mtod(m, int *));
 		m->m_len -= sizeof(int);
 		m->m_pkthdr.len -= sizeof(int);
 		m->m_data += sizeof(int);
 	}
 
 	/* Let BPF see incoming packet */
 	if (ifp->if_bpf) {
 		struct mbuf m0, *n = m;
 
 		if (ifp->if_bpf->bif_dlt == DLT_NULL) {
 			/*
 			 * We need to prepend the address family as
 			 * a four byte field.  Cons up a dummy header
 			 * to pacify bpf.  This is safe because bpf
 			 * will only read from the mbuf (i.e., it won't
 			 * try to free it or keep a pointer a to it).
 			 */
 			m0.m_next = m;
 			m0.m_len = 4;
 			m0.m_data = (char *)&af;
 			n = &m0;
 		}
 		BPF_MTAP(ifp, n);
 	}
 
 	/* Strip away media header */
 	if (hlen > 0) {
 		m_adj(m, hlen);
 #if defined(__alpha__) || defined(__ia64__) || defined(__sparc64__)
 		/* The alpha doesn't like unaligned data.
 		 * We move data down in the first mbuf */
 		if (mtod(m, vm_offset_t) & 3) {
 			KASSERT(hlen >= 3, ("if_simloop: hlen too small"));
 			bcopy(m->m_data, 
 			    (char *)(mtod(m, vm_offset_t) 
 				- (mtod(m, vm_offset_t) & 3)),
 			    m->m_len);
 			mtod(m,vm_offset_t) -= (mtod(m, vm_offset_t) & 3);
 		}
 #endif
 	}
 
 	/* Deliver to upper layer protocol */
 	switch (af) {
 #ifdef INET
 	case AF_INET:
 		inq = &ipintrq;
 		isr = NETISR_IP;
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		m->m_flags |= M_LOOP;
 		inq = &ip6intrq;
 		isr = NETISR_IPV6;
 		break;
 #endif
 #ifdef IPX
 	case AF_IPX:
 		inq = &ipxintrq;
 		isr = NETISR_IPX;
 		break;
 #endif
 #ifdef NS
 	case AF_NS:
 		inq = &nsintrq;
 		isr = NETISR_NS;
 		break;
 #endif
 #ifdef NETATALK
 	case AF_APPLETALK:
 	        inq = &atintrq2;
 		isr = NETISR_ATALK;
 		break;
 #endif
 	default:
 		printf("if_simloop: can't handle af=%d\n", af);
 		m_freem(m);
 		return (EAFNOSUPPORT);
 	}
 	ifp->if_ipackets++;
 	ifp->if_ibytes += m->m_pkthdr.len;
 	(void) IF_HANDOFF(inq, m, NULL);
 	schednetisr(isr);
 	return (0);
 }
 
 /* ARGSUSED */
 static void
 lortrequest(cmd, rt, info)
 	int cmd;
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
 	if (rt) {
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
 		/*
 		 * For optimal performance, the send and receive buffers
 		 * should be at least twice the MTU plus a little more for
 		 * overhead.
 		 */
 		rt->rt_rmx.rmx_recvpipe =
 			rt->rt_rmx.rmx_sendpipe = 3 * LOMTU;
 	}
 }
 
 /*
  * Process an ioctl request.
  */
 /* ARGSUSED */
 int
 loioctl(ifp, cmd, data)
 	register struct ifnet *ifp;
 	u_long cmd;
 	caddr_t data;
 {
 	register struct ifaddr *ifa;
 	register struct ifreq *ifr = (struct ifreq *)data;
 	register int error = 0;
 
 	switch (cmd) {
 
 	case SIOCSIFADDR:
 		ifp->if_flags |= IFF_UP | IFF_RUNNING;
 		ifa = (struct ifaddr *)data;
 		ifa->ifa_rtrequest = lortrequest;
 		/*
 		 * Everything else is done at a higher level.
 		 */
 		break;
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		if (ifr == 0) {
 			error = EAFNOSUPPORT;		/* XXX */
 			break;
 		}
 		switch (ifr->ifr_addr.sa_family) {
 
 #ifdef INET
 		case AF_INET:
 			break;
 #endif
 #ifdef INET6
 		case AF_INET6:
 			break;
 #endif
 
 		default:
 			error = EAFNOSUPPORT;
 			break;
 		}
 		break;
 
 	case SIOCSIFMTU:
 		ifp->if_mtu = ifr->ifr_mtu;
 		break;
 
 	case SIOCSIFFLAGS:
 		break;
 
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
Index: head/sys/netatm/port.h
===================================================================
--- head/sys/netatm/port.h	(revision 108465)
+++ head/sys/netatm/port.h	(revision 108466)
@@ -1,307 +1,306 @@
 /*
  *
  * ===================================
  * HARP  |  Host ATM Research Platform
  * ===================================
  *
  *
  * This Host ATM Research Platform ("HARP") file (the "Software") is
  * made available by Network Computing Services, Inc. ("NetworkCS")
  * "AS IS".  NetworkCS does not provide maintenance, improvements or
  * support of any kind.
  *
  * NETWORKCS MAKES NO WARRANTIES OR REPRESENTATIONS, EXPRESS OR IMPLIED,
  * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY
  * AND FITNESS FOR A PARTICULAR PURPOSE, AS TO ANY ELEMENT OF THE
  * SOFTWARE OR ANY SUPPORT PROVIDED IN CONNECTION WITH THIS SOFTWARE.
  * In no event shall NetworkCS be responsible for any damages, including
  * but not limited to consequential damages, arising from or relating to
  * any use of the Software or related support.
  *
  * Copyright 1994-1998 Network Computing Services, Inc.
  *
  * Copies of this Software may be made, however, the above copyright
  * notice must be reproduced on all copies.
  *
  *	@(#) $FreeBSD$
  *
  */
 
 /*
  * System Configuration
  * --------------------
  *
  * Porting aides
  *
  */
 
 #ifndef _NETATM_PORT_H
 #define	_NETATM_PORT_H
 
 
 #ifdef _KERNEL
 /*
  * Kernel buffers
  *
  * KBuffer 		Typedef for a kernel buffer.
  *
  * KB_NEXT(bfr)		Access next buffer in chain (r/w).
  * KB_LEN(bfr)		Access length of data in this buffer (r/w).
  * KB_QNEXT(bfr)	Access next buffer in queue (r/w).
  *
  * KB_ALLOC(bfr, size, flags, type)
  *			Allocates a new kernel buffer of at least size bytes.
  * KB_ALLOCPKT(bfr, size, flags, type)
  *			Allocates a new kernel packet header buffer of at
  *			least size bytes.
  * KB_ALLOCEXT(bfr, size, flags, type)
  *			Allocates a new kernel buffer with external storage
  *			of at least size bytes.
  * KB_FREEONE(bfr, nxt)	Free buffer bfr and set next buffer in chain in nxt.
  * KB_FREEALL(bfr)	Free bfr's entire buffer chain.
  * KB_COPY(bfr, off, len, new, flags)
  *			Copy len bytes of user data from buffer bfr starting at
  *			byte offset off and return new buffer chain in new.
  *			If len is KB_COPYALL, copy until end of chain.
  * KB_COPYDATA(bfr, off, len, datap)
  *			Copy data from buffer bfr starting at byte offset off
  *			for len bytes into the data area pointed to by datap.
  *			Returns the number of bytes not copied to datap.
  * KB_PULLUP(bfr, n, new)
  *			Get at least the first n bytes of data in the buffer 
  *			chain headed by bfr contiguous in the first buffer.
  *			Returns the (potentially new) head of the chain in new.
  *			On failure the chain is freed and NULL is returned.
  * KB_LINKHEAD(new, head)
  *			Link the kernel buffer new at the head of the buffer
  *			chain headed by head.  If both new and head are
  *			packet header buffers, new will become the packet
  *			header for the chain.
  * KB_LINK(new, prev)
  *			Link the kernel buffer new into the buffer chain
  *			after the buffer prev.
  * KB_UNLINKHEAD(head, next)
  *			Unlink the kernel buffer from the head of the buffer
  *			chain headed by head.  The buffer head will be freed
  *			and the new chain head will be placed in next.
  * KB_UNLINK(old, prev, next)
  *			Unlink the kernel buffer old with previous buffer prev
  *			from its buffer chain.  The following buffer in the
  *			chain will be placed in next and the buffer old will
  *			be freed.
  * KB_ISPKT(bfr)	Tests whether bfr is a packet header buffer.
  * KB_ISEXT(bfr)	Tests whether bfr has external storage.
  * KB_BFRSTART(bfr, x, t)
  *			Sets x (cast to type t) to point to the start of the
  *			buffer space in bfr.
  * KB_BFREND(bfr, x, t)
  *			Sets x (cast to type t) to point one byte past the end
  *			of the buffer space in bfr.
  * KB_BFRLEN(bfr)	Returns length of buffer space in bfr.
  * KB_DATASTART(bfr, x, t)
  *			Sets x (cast to type t) to point to the start of the
  *			buffer data contained in bfr.
  * KB_DATAEND(bfr, x, t)
  *			Sets x (cast to type t) to point one byte past the end
  *			of the buffer data contained in bfr.
  * KB_HEADSET(bfr, n)	Sets the start address for buffer data in buffer bfr to
  *			n bytes from the beginning of the buffer space.
  * KB_HEADMOVE(bfr, n) 	Adjust buffer data controls to move data down (n > 0) 
  *			or up (n < 0) n bytes in the buffer bfr.
  * KB_HEADADJ(bfr, n) 	Adjust buffer data controls to add (n > 0) or subtract
  *			(n < 0) n bytes of data to/from the beginning of bfr.
  * KB_TAILADJ(bfr, n)	Adjust buffer data controls to add (n > 0) or subtract
  *			(n < 0) n bytes of data to/from the end of bfr.
  * KB_TAILALIGN(bfr, n)	Set buffer data controls to place an object of size n
  *			at the end of bfr, longword aligned.
  * KB_HEADROOM(bfr, n)	Set n to the amount of buffer space available before
  *			the start of data in bfr.
  * KB_TAILROOM(bfr, n)	Set n to the amount of buffer space available after
  *			the end of data in bfr.
  * KB_PLENGET(bfr, n)	Set n to bfr's packet length.
  * KB_PLENSET(bfr, n)	Set bfr's packet length to n.
  * KB_PLENADJ(bfr, n)	Adjust total packet length by n bytes.
  *
  */
 #include <sys/mbuf.h>
 typedef struct mbuf	KBuffer;
 
 #define	KB_F_WAIT	M_TRYWAIT
 #define	KB_F_NOWAIT	M_DONTWAIT
 
 #define	KB_T_HEADER	MT_HEADER
 #define	KB_T_DATA	MT_DATA
 
 #define	KB_COPYALL	M_COPYALL
 
 #define	KB_NEXT(bfr)		(bfr)->m_next
 #define	KB_LEN(bfr)		(bfr)->m_len
 #define	KB_QNEXT(bfr)		(bfr)->m_nextpkt
 #define KB_ALLOC(bfr, size, flags, type) {		\
 	if ((size) <= MLEN) {				\
 		MGET((bfr), (flags), (type));		\
 	} else						\
 		(bfr) = NULL;				\
 }
 #define KB_ALLOCPKT(bfr, size, flags, type) {		\
 	if ((size) <= MHLEN) {				\
 		MGETHDR((bfr), (flags), (type));	\
 	} else						\
 		(bfr) = NULL;				\
 }
 #define KB_ALLOCEXT(bfr, size, flags, type) {		\
 	if ((size) <= MCLBYTES)	{			\
 		MGET((bfr), (flags), (type));		\
 		if ((bfr) != NULL) {			\
 			MCLGET((bfr), (flags));		\
 			if (((bfr)->m_flags & M_EXT) == 0) {	\
 				m_freem((bfr));		\
 				(bfr) = NULL;		\
 			}				\
 		}					\
 	} else						\
 		(bfr) = NULL;				\
 }
 #define KB_FREEONE(bfr, nxt) {				\
 	(nxt) = m_free(bfr);				\
 }
 #define KB_FREEALL(bfr) {				\
 	m_freem(bfr);					\
 }
 #define	KB_COPY(bfr, off, len, new, flags) {		\
 	(new) = m_copym((bfr), (off), (len), (flags));	\
 }
 #define	KB_COPYDATA(bfr, off, len, datap) 		\
 	(m_copydata((bfr), (off), (len), (datap)), 0)
 #define	KB_PULLUP(bfr, n, new) {			\
 	(new) = m_pullup((bfr), (n));			\
 }
 #define	KB_LINKHEAD(new, head) {			\
 	if ((head) && KB_ISPKT(new) && KB_ISPKT(head)) {\
-		M_COPY_PKTHDR((new), (head));		\
-		(head)->m_flags &= ~M_PKTHDR;		\
+		M_MOVE_PKTHDR((new), (head));		\
 	}						\
 	(new)->m_next = (head);				\
 }
 #define	KB_LINK(new, prev) {				\
 	(new)->m_next = (prev)->m_next;			\
 	(prev)->m_next = (new);				\
 }
 #define	KB_UNLINKHEAD(head, next) {			\
 	(next) = m_free((head));			\
 	(head) = NULL;					\
 }
 #define	KB_UNLINK(old, prev, next) {			\
 	(next) = m_free((old));				\
 	(old) = NULL;					\
 	(prev)->m_next = (next);			\
 }
 #define	KB_ISPKT(bfr)		(((bfr)->m_flags & M_PKTHDR) != 0)
 #define	KB_ISEXT(bfr)		(((bfr)->m_flags & M_EXT) != 0)
 #define	KB_BFRSTART(bfr, x, t) {			\
 	if ((bfr)->m_flags & M_EXT)			\
 		(x) = (t)((bfr)->m_ext.ext_buf);	\
 	else if ((bfr)->m_flags & M_PKTHDR)		\
 		(x) = (t)(&(bfr)->m_pktdat);		\
 	else						\
 		(x) = (t)((bfr)->m_dat);		\
 }
 #define	KB_BFREND(bfr, x, t) {				\
 	if ((bfr)->m_flags & M_EXT)			\
 		(x) = (t)((bfr)->m_ext.ext_buf + (bfr)->m_ext.ext_size);\
 	else if ((bfr)->m_flags & M_PKTHDR)		\
 		(x) = (t)(&(bfr)->m_pktdat + MHLEN);	\
 	else						\
 		(x) = (t)((bfr)->m_dat + MLEN);		\
 }
 #define	KB_BFRLEN(bfr)					\
 	(((bfr)->m_flags & M_EXT) ? (bfr)->m_ext.ext_size :	\
 		(((bfr)->m_flags & M_PKTHDR) ? MHLEN : MLEN))
 #define	KB_DATASTART(bfr, x, t) {			\
 	(x) = mtod((bfr), t);				\
 }
 #define	KB_DATAEND(bfr, x, t) {				\
 	(x) = (t)(mtod((bfr), caddr_t) + (bfr)->m_len);	\
 }
 #define	KB_HEADSET(bfr, n) {				\
 	if ((bfr)->m_flags & M_EXT)			\
 		(bfr)->m_data = (bfr)->m_ext.ext_buf + (n);	\
 	else if ((bfr)->m_flags & M_PKTHDR)		\
 		(bfr)->m_data = (bfr)->m_pktdat + (n);	\
 	else						\
 		(bfr)->m_data = (bfr)->m_dat + (n);	\
 }
 #define	KB_HEADMOVE(bfr, n) {				\
 	(bfr)->m_data += (n);				\
 }
 #define	KB_HEADADJ(bfr, n) {				\
 	(bfr)->m_len += (n);				\
 	(bfr)->m_data -= (n);				\
 }
 #define	KB_TAILADJ(bfr, n) {				\
 	(bfr)->m_len += (n);				\
 }
 #define	KB_TAILALIGN(bfr, n) {				\
 	(bfr)->m_len = (n);				\
 	if ((bfr)->m_flags & M_EXT)			\
 		(bfr)->m_data = (caddr_t)(((uintptr_t)(bfr)->m_ext.ext_buf \
 			+ (bfr)->m_ext.ext_size - (n)) & ~(sizeof(long) - 1));\
 	else						\
 		(bfr)->m_data = (caddr_t)(((uintptr_t)(bfr)->m_dat + MLEN - (n)) \
 			& ~(sizeof(long) - 1));		\
 }
 #define	KB_HEADROOM(bfr, n) {				\
 	/* N = m_leadingspace(BFR) XXX */		\
 	(n) = ((bfr)->m_flags & M_EXT ? (bfr)->m_data - (bfr)->m_ext.ext_buf : \
 		(bfr)->m_flags & M_PKTHDR ? (bfr)->m_data - (bfr)->m_pktdat : \
 			(bfr)->m_data - (bfr)->m_dat);	\
 }
 #define	KB_TAILROOM(bfr, n) {				\
 	(n) = M_TRAILINGSPACE(bfr);			\
 }
 #define	KB_PLENGET(bfr, n) {				\
 	(n) = (bfr)->m_pkthdr.len;			\
 }
 #define	KB_PLENSET(bfr, n) {				\
 	(bfr)->m_pkthdr.len = (n);			\
 }
 #define	KB_PLENADJ(bfr, n) {				\
 	(bfr)->m_pkthdr.len += (n);			\
 }
 
 
 /*
  * Kernel time
  *
  * KTimeout_ret		Typedef for timeout() function return
  *
  * KT_TIME(t)		Sets t to the current time.
  *
  */
 typedef void	KTimeout_ret;
 #define	KT_TIME(t)	microtime(&t)
 
 #endif	/* _KERNEL */
 
 #ifndef NTOHL
 #if BYTE_ORDER == BIG_ENDIAN
 #define	NTOHL(x)	(x)
 #define	NTOHS(x)	(x)
 #define	HTONL(x)	(x)
 #define	HTONS(x)	(x)
 #else
 #define	NTOHL(x)	(x) = ntohl((u_long)(x))
 #define	NTOHS(x)	(x) = ntohs((u_short)(x))
 #define	HTONL(x)	(x) = htonl((u_long)(x))
 #define	HTONS(x)	(x) = htons((u_short)(x))
 #endif
 #endif	/* NTOHL */
 
 #ifndef MAX
 #define	MAX(a,b)	max((a),(b))
 #endif
 #ifndef MIN
 #define	MIN(a,b)	min((a),(b))
 #endif
 
 #endif	/* _NETATM_PORT_H */
Index: head/sys/netinet/ip_input.c
===================================================================
--- head/sys/netinet/ip_input.c	(revision 108465)
+++ head/sys/netinet/ip_input.c	(revision 108466)
@@ -1,2153 +1,2162 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  * $FreeBSD$
  */
 
 #include "opt_bootp.h"
 #include "opt_ipfw.h"
 #include "opt_ipdn.h"
 #include "opt_ipdivert.h"
 #include "opt_ipfilter.h"
 #include "opt_ipstealth.h"
 #include "opt_ipsec.h"
 #include "opt_mac.h"
 #include "opt_pfil_hooks.h"
 #include "opt_random_ip_id.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mac.h>
 #include <sys/mbuf.h>
 #include <sys/malloc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 
 #include <net/pfil.h>
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/intrq.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_icmp.h>
 #include <machine/in_cksum.h>
 
 #include <sys/socketvar.h>
 
 #include <netinet/ip_fw.h>
 #include <netinet/ip_dummynet.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #endif
 
 int rsvp_on = 0;
 
 int	ipforwarding = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
     &ipforwarding, 0, "Enable IP forwarding between interfaces");
 
 static int	ipsendredirects = 1; /* XXX */
 SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
     &ipsendredirects, 0, "Enable sending IP redirects");
 
 int	ip_defttl = IPDEFTTL;
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
     &ip_defttl, 0, "Maximum TTL on IP packets");
 
 static int	ip_dosourceroute = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW,
     &ip_dosourceroute, 0, "Enable forwarding source routed IP packets");
 
 static int	ip_acceptsourceroute = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, 
     CTLFLAG_RW, &ip_acceptsourceroute, 0, 
     "Enable accepting source routed IP packets");
 
 static int	ip_keepfaith = 0;
 SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
 	&ip_keepfaith,	0,
 	"Enable packet capture for FAITH IPv4->IPv6 translater daemon");
 
 static int	ip_nfragpackets = 0;
 static int	ip_maxfragpackets;	/* initialized in ip_init() */
 SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW,
 	&ip_maxfragpackets, 0,
 	"Maximum number of IPv4 fragment reassembly queue entries");
 
 static int	ip_sendsourcequench = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
 	&ip_sendsourcequench, 0,
 	"Enable the transmission of source quench packets");
 
 /*
  * XXX - Setting ip_checkinterface mostly implements the receive side of
  * the Strong ES model described in RFC 1122, but since the routing table
  * and transmit implementation do not implement the Strong ES model,
  * setting this to 1 results in an odd hybrid.
  *
  * XXX - ip_checkinterface currently must be disabled if you use ipnat
  * to translate the destination address to another local interface.
  *
  * XXX - ip_checkinterface must be disabled if you add IP aliases
  * to the loopback interface instead of the interface where the
  * packets for those addresses are received.
  */
 static int	ip_checkinterface = 1;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
     &ip_checkinterface, 0, "Verify packet arrives on correct interface");
 
 #ifdef DIAGNOSTIC
 static int	ipprintfs = 0;
 #endif
 
 static int	ipqmaxlen = IFQ_MAXLEN;
 
 extern	struct domain inetdomain;
 extern	struct protosw inetsw[];
 u_char	ip_protox[IPPROTO_MAX];
 struct	in_ifaddrhead in_ifaddrhead; 		/* first inet address */
 struct	in_ifaddrhashhead *in_ifaddrhashtbl;	/* inet addr hash table  */
 u_long 	in_ifaddrhmask;				/* mask for hash table */
 
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW,
     &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue");
 SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD,
     &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue");
 
 struct ipstat ipstat;
 SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
     &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)");
 
 /* Packet reassembly stuff */
 #define IPREASS_NHASH_LOG2      6
 #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
 #define IPREASS_HMASK           (IPREASS_NHASH - 1)
 #define IPREASS_HASH(x,y) \
 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
 
 static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH];
 static int    nipq = 0;         /* total # of reass queues */
 static int    maxnipq;
 
 #ifdef IPCTL_DEFMTU
 SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
     &ip_mtu, 0, "Default MTU");
 #endif
 
 #ifdef IPSTEALTH
 static int	ipstealth = 0;
 SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
     &ipstealth, 0, "");
 #endif
 
 
 /* Firewall hooks */
 ip_fw_chk_t *ip_fw_chk_ptr;
 int fw_enable = 1 ;
 int fw_one_pass = 1;
 
 /* Dummynet hooks */
 ip_dn_io_t *ip_dn_io_ptr;
 
 
 /*
  * XXX this is ugly -- the following two global variables are
  * used to store packet state while it travels through the stack.
  * Note that the code even makes assumptions on the size and
  * alignment of fields inside struct ip_srcrt so e.g. adding some
  * fields will break the code. This needs to be fixed.
  *
  * We need to save the IP options in case a protocol wants to respond
  * to an incoming packet over the same route if the packet got here
  * using IP source routing.  This allows connection establishment and
  * maintenance when the remote end is on a network that is not known
  * to us.
  */
 static int	ip_nhops = 0;
 static	struct ip_srcrt {
 	struct	in_addr dst;			/* final destination */
 	char	nop;				/* one NOP to align */
 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
 } ip_srcrt;
 
 static void	save_rte(u_char *, struct in_addr);
 static int	ip_dooptions(struct mbuf *m, int,
 			struct sockaddr_in *next_hop);
 static void	ip_forward(struct mbuf *m, int srcrt,
 			struct sockaddr_in *next_hop);
 static void	ip_freef(struct ipqhead *, struct ipq *);
 static struct	mbuf *ip_reass(struct mbuf *, struct ipqhead *,
 		struct ipq *, u_int32_t *, u_int16_t *);
 static void	ipintr(void);
 
 /*
  * IP initialization: fill in IP protocol switch table.
  * All protocols not implemented in kernel go to raw IP protocol handler.
  */
 void
 ip_init()
 {
 	register struct protosw *pr;
 	register int i;
 
 	TAILQ_INIT(&in_ifaddrhead);
 	in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask);
 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
 	if (pr == 0)
 		panic("ip_init");
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip_protox[i] = pr - inetsw;
 	for (pr = inetdomain.dom_protosw;
 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
 			ip_protox[pr->pr_protocol] = pr - inetsw;
 
 	for (i = 0; i < IPREASS_NHASH; i++)
 	    TAILQ_INIT(&ipq[i]);
 
 	maxnipq = nmbclusters / 4;
 	ip_maxfragpackets = nmbclusters / 4;
 
 #ifndef RANDOM_IP_ID
 	ip_id = time_second & 0xffff;
 #endif
 	ipintrq.ifq_maxlen = ipqmaxlen;
 	mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF);
 	ipintrq_present = 1;
 
 	register_netisr(NETISR_IP, ipintr);
 }
 
 /*
  * XXX watch out this one. It is perhaps used as a cache for
  * the most recently used route ? it is cleared in in_addroute()
  * when a new route is successfully created.
  */
 struct	route ipforward_rt;
 
 /*
  * Ip input routine.  Checksum and byte swap header.  If fragmented
  * try to reassemble.  Process options.  Pass to next level.
  */
 void
 ip_input(struct mbuf *m)
 {
 	struct ip *ip;
 	struct ipq *fp;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
 	int    i, hlen, checkif;
 	u_short sum;
 	struct in_addr pkt_dst;
 	u_int32_t divert_info = 0;		/* packet divert/tee info */
 	struct ip_fw_args args;
 #ifdef PFIL_HOOKS
 	struct packet_filter_hook *pfh;
 	struct mbuf *m0;
 	int rv;
 #endif /* PFIL_HOOKS */
 #ifdef FAST_IPSEC
 	struct m_tag *mtag;
 	struct tdb_ident *tdbi;
 	struct secpolicy *sp;
 	int s, error;
 #endif /* FAST_IPSEC */
 
 	args.eh = NULL;
 	args.oif = NULL;
 	args.rule = NULL;
 	args.divert_rule = 0;			/* divert cookie */
 	args.next_hop = NULL;
 
 	/* Grab info from MT_TAG mbufs prepended to the chain.	*/
 	for (; m && m->m_type == MT_TAG; m = m->m_next) {
 		switch(m->_m_tag_id) {
 		default:
 			printf("ip_input: unrecognised MT_TAG tag %d\n",
 			    m->_m_tag_id);
 			break;
 
 		case PACKET_TAG_DUMMYNET:
 			args.rule = ((struct dn_pkt *)m)->rule;
 			break;
 
 		case PACKET_TAG_DIVERT:
 			args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff;
 			break;
 
 		case PACKET_TAG_IPFORWARD:
 			args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data;
 			break;
 		}
 	}
 
 	KASSERT(m != NULL && (m->m_flags & M_PKTHDR) != 0,
 	    ("ip_input: no HDR"));
 
 	if (args.rule) {	/* dummynet already filtered us */
 		ip = mtod(m, struct ip *);
 		hlen = ip->ip_hl << 2;
 		goto iphack ;
 	}
 
 	ipstat.ips_total++;
 
 	if (m->m_pkthdr.len < sizeof(struct ip))
 		goto tooshort;
 
 	if (m->m_len < sizeof (struct ip) &&
 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
 		ipstat.ips_toosmall++;
 		return;
 	}
 	ip = mtod(m, struct ip *);
 
 	if (ip->ip_v != IPVERSION) {
 		ipstat.ips_badvers++;
 		goto bad;
 	}
 
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
 		ipstat.ips_badhlen++;
 		goto bad;
 	}
 	if (hlen > m->m_len) {
 		if ((m = m_pullup(m, hlen)) == 0) {
 			ipstat.ips_badhlen++;
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 
 	/* 127/8 must not appear on wire - RFC1122 */
 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
 			ipstat.ips_badaddr++;
 			goto bad;
 		}
 	}
 
 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
 	} else {
 		if (hlen == sizeof(struct ip)) {
 			sum = in_cksum_hdr(ip);
 		} else {
 			sum = in_cksum(m, hlen);
 		}
 	}
 	if (sum) {
 		ipstat.ips_badsum++;
 		goto bad;
 	}
 
 	/*
 	 * Convert fields to host representation.
 	 */
 	ip->ip_len = ntohs(ip->ip_len);
 	if (ip->ip_len < hlen) {
 		ipstat.ips_badlen++;
 		goto bad;
 	}
 	ip->ip_off = ntohs(ip->ip_off);
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IP header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len < ip->ip_len) {
 tooshort:
 		ipstat.ips_tooshort++;
 		goto bad;
 	}
 	if (m->m_pkthdr.len > ip->ip_len) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip->ip_len;
 			m->m_pkthdr.len = ip->ip_len;
 		} else
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
 
 	/*
 	 * IpHack's section.
 	 * Right now when no processing on packet has done
 	 * and it is still fresh out of network we do our black
 	 * deals with it.
 	 * - Firewall: deny/allow/divert
 	 * - Xlate: translate packet's addr/port (NAT).
 	 * - Pipe: pass pkt through dummynet.
 	 * - Wrap: fake packet's addr/port <unimpl.>
 	 * - Encapsulate: put it in another IP and send out. <unimp.>
  	 */
 
 iphack:
 
 #ifdef PFIL_HOOKS
 	/*
 	 * Run through list of hooks for input packets.  If there are any
 	 * filters which require that additional packets in the flow are
 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
 	 * Note that filters must _never_ set this flag, as another filter
 	 * in the list may have previously cleared it.
 	 */
 	m0 = m;
 	pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
 	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
 		if (pfh->pfil_func) {
 			rv = pfh->pfil_func(ip, hlen,
 					    m->m_pkthdr.rcvif, 0, &m0);
 			if (rv)
 				return;
 			m = m0;
 			if (m == NULL)
 				return;
 			ip = mtod(m, struct ip *);
 		}
 #endif /* PFIL_HOOKS */
 
 	if (fw_enable && IPFW_LOADED) {
 		/*
 		 * If we've been forwarded from the output side, then
 		 * skip the firewall a second time
 		 */
 		if (args.next_hop)
 			goto ours;
 
 		args.m = m;
 		i = ip_fw_chk_ptr(&args);
 		m = args.m;
 
 		if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */
 			if (m)
 				m_freem(m);
 			return;
 		}
 		ip = mtod(m, struct ip *); /* just in case m changed */
 		if (i == 0 && args.next_hop == NULL)	/* common case */
 			goto pass;
                 if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) {
 			/* Send packet to the appropriate pipe */
 			ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args);
 			return;
 		}
 #ifdef IPDIVERT
 		if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) {
 			/* Divert or tee packet */
 			divert_info = i;
 			goto ours;
 		}
 #endif
 		if (i == 0 && args.next_hop != NULL)
 			goto pass;
 		/*
 		 * if we get here, the packet must be dropped
 		 */
 		m_freem(m);
 		return;
 	}
 pass:
 
 	/*
 	 * Process options and, if not destined for us,
 	 * ship it on.  ip_dooptions returns 1 when an
 	 * error was detected (causing an icmp message
 	 * to be sent and the original packet to be freed).
 	 */
 	ip_nhops = 0;		/* for source routed packets */
 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop))
 		return;
 
         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
          * matter if it is destined to another node, or whether it is 
          * a multicast one, RSVP wants it! and prevents it from being forwarded
          * anywhere else. Also checks if the rsvp daemon is running before
 	 * grabbing the packet.
          */
 	if (rsvp_on && ip->ip_p==IPPROTO_RSVP) 
 		goto ours;
 
 	/*
 	 * Check our list of addresses, to see if the packet is for us.
 	 * If we don't have any addresses, assume any unicast packet
 	 * we receive might be for us (and let the upper layers deal
 	 * with it).
 	 */
 	if (TAILQ_EMPTY(&in_ifaddrhead) &&
 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
 		goto ours;
 
 	/*
 	 * Cache the destination address of the packet; this may be
 	 * changed by use of 'ipfw fwd'.
 	 */
 	pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst;
 
 	/*
 	 * Enable a consistency check between the destination address
 	 * and the arrival interface for a unicast packet (the RFC 1122
 	 * strong ES model) if IP forwarding is disabled and the packet
 	 * is not locally generated and the packet is not subject to
 	 * 'ipfw fwd'.
 	 *
 	 * XXX - Checking also should be disabled if the destination
 	 * address is ipnat'ed to a different interface.
 	 *
 	 * XXX - Checking is incompatible with IP aliases added
 	 * to the loopback interface instead of the interface where
 	 * the packets are received.
 	 */
 	checkif = ip_checkinterface && (ipforwarding == 0) && 
 	    m->m_pkthdr.rcvif != NULL &&
 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) &&
 	    (args.next_hop == NULL);
 
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
 	LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
 		 * arrived via the correct interface if checking is
 		 * enabled.
 		 */
 		if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && 
 		    (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif))
 			goto ours;
 	}
 	/*
 	 * Check for broadcast addresses.
 	 *
 	 * Only accept broadcast packets that arrive via the matching
 	 * interface.  Reception of forwarded directed broadcasts would
 	 * be handled via ip_forward() and ether_output() with the loopback
 	 * into the stack for SIMPLEX interfaces handled by ether_output().
 	 */
 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
 	        TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != AF_INET)
 				continue;
 			ia = ifatoia(ifa);
 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
 			    pkt_dst.s_addr)
 				goto ours;
 			if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr)
 				goto ours;
 #ifdef BOOTP_COMPAT
 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY)
 				goto ours;
 #endif
 		}
 	}
 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
 		struct in_multi *inm;
 		if (ip_mrouter) {
 			/*
 			 * If we are acting as a multicast router, all
 			 * incoming multicast packets are passed to the
 			 * kernel-level multicast forwarding function.
 			 * The packet is returned (relatively) intact; if
 			 * ip_mforward() returns a non-zero value, the packet
 			 * must be discarded, else it may be accepted below.
 			 */
 			if (ip_mforward &&
 			    ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
 				ipstat.ips_cantforward++;
 				m_freem(m);
 				return;
 			}
 
 			/*
 			 * The process-level routing daemon needs to receive
 			 * all multicast IGMP packets, whether or not this
 			 * host belongs to their destination groups.
 			 */
 			if (ip->ip_p == IPPROTO_IGMP)
 				goto ours;
 			ipstat.ips_forward++;
 		}
 		/*
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
 		if (inm == NULL) {
 			ipstat.ips_notmember++;
 			m_freem(m);
 			return;
 		}
 		goto ours;
 	}
 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
 		goto ours;
 	if (ip->ip_dst.s_addr == INADDR_ANY)
 		goto ours;
 
 	/*
 	 * FAITH(Firewall Aided Internet Translator)
 	 */
 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) {
 		if (ip_keepfaith) {
 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 
 				goto ours;
 		}
 		m_freem(m);
 		return;
 	}
 
 	/*
 	 * Not for us; forward if possible and desirable.
 	 */
 	if (ipforwarding == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 	} else {
 #ifdef IPSEC
 		/*
 		 * Enforce inbound IPsec SPD.
 		 */
 		if (ipsec4_in_reject(m, NULL)) {
 			ipsecstat.in_polvio++;
 			goto bad;
 		}
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
 		s = splnet();
 		if (mtag != NULL) {
 			tdbi = (struct tdb_ident *)(mtag + 1);
 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
 		} else {
 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
 						   IP_FORWARDING, &error);   
 		}
 		if (sp == NULL) {	/* NB: can happen if error */
 			splx(s);
 			/*XXX error stat???*/
 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
 			goto bad;
 		}
 
 		/*
 		 * Check security policy against packet attributes.
 		 */
 		error = ipsec_in_reject(sp, m);
 		KEY_FREESP(&sp);
 		splx(s);
 		if (error) {
 			ipstat.ips_cantforward++;
 			goto bad;
 		}
 #endif /* FAST_IPSEC */
 		ip_forward(m, 0, args.next_hop);
 	}
 	return;
 
 ours:
 #ifdef IPSTEALTH
 	/*
 	 * IPSTEALTH: Process non-routing options only
 	 * if the packet is destined for us.
 	 */
 	if (ipstealth && hlen > sizeof (struct ip) &&
 	    ip_dooptions(m, 1, args.next_hop))
 		return;
 #endif /* IPSTEALTH */
 
 	/* Count the packet in the ip address stats */
 	if (ia != NULL) {
 		ia->ia_ifa.if_ipackets++;
 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
 	}
 
 	/*
 	 * If offset or IP_MF are set, must reassemble.
 	 * Otherwise, nothing need be done.
 	 * (We could look in the reassembly queue to see
 	 * if the packet was previously fragmented,
 	 * but it's not worth the time; just let them time out.)
 	 */
 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
 
 		sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
 		/*
 		 * Look for queue of fragments
 		 * of this datagram.
 		 */
 		TAILQ_FOREACH(fp, &ipq[sum], ipq_list)
 			if (ip->ip_id == fp->ipq_id &&
 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
 #ifdef MAC
 			    mac_fragment_match(m, fp) &&
 #endif
 			    ip->ip_p == fp->ipq_p)
 				goto found;
 
 		fp = 0;
 
 		/* check if there's a place for the new queue */
 		if (nipq > maxnipq) {
 		    /*
 		     * drop something from the tail of the current queue
 		     * before proceeding further
 		     */
 		    struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead);
 		    if (q == NULL) {   /* gak */
 			for (i = 0; i < IPREASS_NHASH; i++) {
 			    struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead);
 			    if (r) {
 				ip_freef(&ipq[i], r);
 				break;
 			    }
 			}
 		    } else
 			ip_freef(&ipq[sum], q);
 		}
 found:
 		/*
 		 * Adjust ip_len to not reflect header,
 		 * convert offset of this to bytes.
 		 */
 		ip->ip_len -= hlen;
 		if (ip->ip_off & IP_MF) {
 		        /*
 		         * Make sure that fragments have a data length
 			 * that's a non-zero multiple of 8 bytes.
 		         */
 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
 				ipstat.ips_toosmall++; /* XXX */
 				goto bad;
 			}
 			m->m_flags |= M_FRAG;
 		} else
 			m->m_flags &= ~M_FRAG;
 		ip->ip_off <<= 3;
 
 		/*
 		 * Attempt reassembly; if it succeeds, proceed.
 		 * ip_reass() will return a different mbuf, and update
 		 * the divert info in divert_info and args.divert_rule.
 		 */
 		ipstat.ips_fragments++;
 		m->m_pkthdr.header = ip;
 		m = ip_reass(m,
 		    &ipq[sum], fp, &divert_info, &args.divert_rule);
 		if (m == 0)
 			return;
 		ipstat.ips_reassembled++;
 		ip = mtod(m, struct ip *);
 		/* Get the header length of the reassembled packet */
 		hlen = ip->ip_hl << 2;
 #ifdef IPDIVERT
 		/* Restore original checksum before diverting packet */
 		if (divert_info != 0) {
 			ip->ip_len += hlen;
 			ip->ip_len = htons(ip->ip_len);
 			ip->ip_off = htons(ip->ip_off);
 			ip->ip_sum = 0;
 			if (hlen == sizeof(struct ip))
 				ip->ip_sum = in_cksum_hdr(ip);
 			else
 				ip->ip_sum = in_cksum(m, hlen);
 			ip->ip_off = ntohs(ip->ip_off);
 			ip->ip_len = ntohs(ip->ip_len);
 			ip->ip_len -= hlen;
 		}
 #endif
 	} else
 		ip->ip_len -= hlen;
 
 #ifdef IPDIVERT
 	/*
 	 * Divert or tee packet to the divert protocol if required.
 	 */
 	if (divert_info != 0) {
 		struct mbuf *clone = NULL;
 
 		/* Clone packet if we're doing a 'tee' */
 		if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0)
 			clone = m_dup(m, M_DONTWAIT);
 
 		/* Restore packet header fields to original values */
 		ip->ip_len += hlen;
 		ip->ip_len = htons(ip->ip_len);
 		ip->ip_off = htons(ip->ip_off);
 
 		/* Deliver packet to divert input routine */
 		divert_packet(m, 1, divert_info & 0xffff, args.divert_rule);
 		ipstat.ips_delivered++;
 
 		/* If 'tee', continue with original packet */
 		if (clone == NULL)
 			return;
 		m = clone;
 		ip = mtod(m, struct ip *);
 		ip->ip_len += hlen;
 		/*
 		 * Jump backwards to complete processing of the
 		 * packet. But first clear divert_info to avoid
 		 * entering this block again.
 		 * We do not need to clear args.divert_rule
 		 * or args.next_hop as they will not be used.
 		 */
 		divert_info = 0;
 		goto pass;
 	}
 #endif
 
 #ifdef IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
 	    ipsec4_in_reject(m, NULL)) {
 		ipsecstat.in_polvio++;
 		goto bad;
 	}
 #endif
 #if FAST_IPSEC
 	/*
 	 * enforce IPsec policy checking if we are seeing last header.
 	 * note that we do not visit this with protocols with pcb layer
 	 * code - like udp/tcp/raw ip.
 	 */
 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
 		/*
 		 * Check if the packet has already had IPsec processing
 		 * done.  If so, then just pass it along.  This tag gets
 		 * set during AH, ESP, etc. input handling, before the
 		 * packet is returned to the ip input queue for delivery.
 		 */ 
 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
 		s = splnet();
 		if (mtag != NULL) {
 			tdbi = (struct tdb_ident *)(mtag + 1);
 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
 		} else {
 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
 						   IP_FORWARDING, &error);   
 		}
 		if (sp != NULL) {
 			/*
 			 * Check security policy against packet attributes.
 			 */
 			error = ipsec_in_reject(sp, m);
 			KEY_FREESP(&sp);
 		} else {
 			/* XXX error stat??? */
 			error = EINVAL;
 DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
 			goto bad;
 		}
 		splx(s);
 		if (error)
 			goto bad;
 	}
 #endif /* FAST_IPSEC */
 
 	/*
 	 * Switch out to protocol's input routine.
 	 */
 	ipstat.ips_delivered++;
 	if (args.next_hop && ip->ip_p == IPPROTO_TCP) {
 		/* TCP needs IPFORWARD info if available */
 		struct m_hdr tag;
 
 		tag.mh_type = MT_TAG;
 		tag.mh_flags = PACKET_TAG_IPFORWARD;
 		tag.mh_data = (caddr_t)args.next_hop;
 		tag.mh_next = m;
 
 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(
 			(struct mbuf *)&tag, hlen);
 	} else
 		(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
 	return;
 bad:
 	m_freem(m);
 }
 
 /*
  * IP software interrupt routine - to go away sometime soon
  */
 static void
 ipintr(void)
 {
 	struct mbuf *m;
 
 	while (1) {
 		IF_DEQUEUE(&ipintrq, m);
 		if (m == 0)
 			return;
 		ip_input(m);
 	}
 }
 
 /*
  * Take incoming datagram fragment and try to reassemble it into
  * whole datagram.  If a chain for reassembly of this datagram already
  * exists, then it is given as fp; otherwise have to make a chain.
  *
  * When IPDIVERT enabled, keep additional state with each packet that
  * tells us if we need to divert or tee the packet we're building.
  * In particular, *divinfo includes the port and TEE flag,
  * *divert_rule is the number of the matching rule.
  */
 
 static struct mbuf *
 ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp,
 	u_int32_t *divinfo, u_int16_t *divert_rule)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	register struct mbuf *p, *q, *nq;
 	struct mbuf *t;
 	int hlen = ip->ip_hl << 2;
 	int i, next;
 
 	/*
 	 * Presence of header sizes in mbufs
 	 * would confuse code below.
 	 */
 	m->m_data += hlen;
 	m->m_len -= hlen;
 
 	/*
 	 * If first fragment to arrive, create a reassembly queue.
 	 */
 	if (fp == 0) {
 		/*
 		 * Enforce upper bound on number of fragmented packets
 		 * for which we attempt reassembly;
 		 * If maxfrag is 0, never accept fragments.
 		 * If maxfrag is -1, accept all fragments without limitation.
 		 */
 		if ((ip_maxfragpackets >= 0) && (ip_nfragpackets >= ip_maxfragpackets))
 			goto dropfrag;
 		ip_nfragpackets++;
 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
 			goto dropfrag;
 		fp = mtod(t, struct ipq *);
 #ifdef MAC
 		mac_init_ipq(fp);
 		mac_create_ipq(m, fp);
 #endif
 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
 		nipq++;
 		fp->ipq_ttl = IPFRAGTTL;
 		fp->ipq_p = ip->ip_p;
 		fp->ipq_id = ip->ip_id;
 		fp->ipq_src = ip->ip_src;
 		fp->ipq_dst = ip->ip_dst;
 		fp->ipq_frags = m;
 		m->m_nextpkt = NULL;
 #ifdef IPDIVERT
 		fp->ipq_div_info = 0;
 		fp->ipq_div_cookie = 0;
 #endif
 		goto inserted;
 	} else {
 #ifdef MAC
 		mac_update_ipq(m, fp);
 #endif
 	}
 
 #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
 
 	/*
 	 * Find a segment which begins after this one does.
 	 */
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
 		if (GETIP(q)->ip_off > ip->ip_off)
 			break;
 
 	/*
 	 * If there is a preceding segment, it may provide some of
 	 * our data already.  If so, drop the data from the incoming
 	 * segment.  If it provides all of our data, drop us, otherwise
 	 * stick new segment in the proper place.
 	 *
 	 * If some of the data is dropped from the the preceding
 	 * segment, then it's checksum is invalidated.
 	 */
 	if (p) {
 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
 		if (i > 0) {
 			if (i >= ip->ip_len)
 				goto dropfrag;
 			m_adj(m, i);
 			m->m_pkthdr.csum_flags = 0;
 			ip->ip_off += i;
 			ip->ip_len -= i;
 		}
 		m->m_nextpkt = p->m_nextpkt;
 		p->m_nextpkt = m;
 	} else {
 		m->m_nextpkt = fp->ipq_frags;
 		fp->ipq_frags = m;
 	}
 
 	/*
 	 * While we overlap succeeding segments trim them or,
 	 * if they are completely covered, dequeue them.
 	 */
 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
 	     q = nq) {
 		i = (ip->ip_off + ip->ip_len) -
 		    GETIP(q)->ip_off;
 		if (i < GETIP(q)->ip_len) {
 			GETIP(q)->ip_len -= i;
 			GETIP(q)->ip_off += i;
 			m_adj(q, i);
 			q->m_pkthdr.csum_flags = 0;
 			break;
 		}
 		nq = q->m_nextpkt;
 		m->m_nextpkt = nq;
 		m_freem(q);
 	}
 
 inserted:
 
 #ifdef IPDIVERT
 	/*
 	 * Transfer firewall instructions to the fragment structure.
 	 * Only trust info in the fragment at offset 0.
 	 */
 	if (ip->ip_off == 0) {
 		fp->ipq_div_info = *divinfo;
 		fp->ipq_div_cookie = *divert_rule;
 	}
 	*divinfo = 0;
 	*divert_rule = 0;
 #endif
 
 	/*
 	 * Check for complete reassembly.
 	 */
 	next = 0;
 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
 		if (GETIP(q)->ip_off != next)
 			return (0);
 		next += GETIP(q)->ip_len;
 	}
 	/* Make sure the last packet didn't have the IP_MF flag */
 	if (p->m_flags & M_FRAG)
 		return (0);
 
 	/*
 	 * Reassembly is complete.  Make sure the packet is a sane size.
 	 */
 	q = fp->ipq_frags;
 	ip = GETIP(q);
 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
 		ipstat.ips_toolong++;
 		ip_freef(head, fp);
 		return (0);
 	}
 
 	/*
 	 * Concatenate fragments.
 	 */
 	m = q;
 	t = m->m_next;
 	m->m_next = 0;
 	m_cat(m, t);
 	nq = q->m_nextpkt;
 	q->m_nextpkt = 0;
 	for (q = nq; q != NULL; q = nq) {
 		nq = q->m_nextpkt;
 		q->m_nextpkt = NULL;
 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
 		m_cat(m, q);
 	}
 #ifdef MAC
 	mac_create_datagram_from_ipq(fp, m);
 	mac_destroy_ipq(fp);
 #endif
 
 #ifdef IPDIVERT
 	/*
 	 * Extract firewall instructions from the fragment structure.
 	 */
 	*divinfo = fp->ipq_div_info;
 	*divert_rule = fp->ipq_div_cookie;
 #endif
 
 	/*
 	 * Create header for new ip packet by
 	 * modifying header of first packet;
 	 * dequeue and discard fragment reassembly header.
 	 * Make header visible.
 	 */
 	ip->ip_len = next;
 	ip->ip_src = fp->ipq_src;
 	ip->ip_dst = fp->ipq_dst;
 	TAILQ_REMOVE(head, fp, ipq_list);
 	nipq--;
 	(void) m_free(dtom(fp));
 	ip_nfragpackets--;
 	m->m_len += (ip->ip_hl << 2);
 	m->m_data -= (ip->ip_hl << 2);
 	/* some debugging cruft by sklower, below, will go away soon */
 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
 		m_fixhdr(m);
 	return (m);
 
 dropfrag:
 #ifdef IPDIVERT
 	*divinfo = 0;
 	*divert_rule = 0;
 #endif
 	ipstat.ips_fragdropped++;
 	m_freem(m);
 	return (0);
 
 #undef GETIP
 }
 
 /*
  * Free a fragment reassembly header and all
  * associated datagrams.
  */
 static void
 ip_freef(fhp, fp)
 	struct ipqhead *fhp;
 	struct ipq *fp;
 {
 	register struct mbuf *q;
 
 	while (fp->ipq_frags) {
 		q = fp->ipq_frags;
 		fp->ipq_frags = q->m_nextpkt;
 		m_freem(q);
 	}
 	TAILQ_REMOVE(fhp, fp, ipq_list);
 	(void) m_free(dtom(fp));
 	ip_nfragpackets--;
 	nipq--;
 }
 
 /*
  * IP timer processing;
  * if a timer expires on a reassembly
  * queue, discard it.
  */
 void
 ip_slowtimo()
 {
 	register struct ipq *fp;
 	int s = splnet();
 	int i;
 
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		for(fp = TAILQ_FIRST(&ipq[i]); fp;) {
 			struct ipq *fpp;
 
 			fpp = fp;
 			fp = TAILQ_NEXT(fp, ipq_list);
 			if(--fpp->ipq_ttl == 0) {
 				ipstat.ips_fragtimeout++;
 				ip_freef(&ipq[i], fpp);
 			}
 		}
 	}
 	/*
 	 * If we are over the maximum number of fragments
 	 * (due to the limit being lowered), drain off
 	 * enough to get down to the new limit.
 	 */
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		if (ip_maxfragpackets >= 0) {
 			while (ip_nfragpackets > ip_maxfragpackets &&
 				!TAILQ_EMPTY(&ipq[i])) {
 				ipstat.ips_fragdropped++;
 				ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
 			}
 		}
 	}
 	ipflow_slowtimo();
 	splx(s);
 }
 
 /*
  * Drain off all datagram fragments.
  */
 void
 ip_drain()
 {
 	int     i;
 
 	for (i = 0; i < IPREASS_NHASH; i++) {
 		while(!TAILQ_EMPTY(&ipq[i])) {
 			ipstat.ips_fragdropped++;
 			ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i]));
 		}
 	}
 	in_rtqdrain();
 }
 
 /*
  * Do option processing on a datagram,
  * possibly discarding it if bad options are encountered,
  * or forwarding it if source-routed.
  * The pass argument is used when operating in the IPSTEALTH
  * mode to tell what options to process:
  * [LS]SRR (pass 0) or the others (pass 1).
  * The reason for as many as two passes is that when doing IPSTEALTH,
  * non-routing options should be processed only if the packet is for us.
  * Returns 1 if packet has been forwarded/freed,
  * 0 if the packet should be processed further.
  */
 static int
 ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	u_char *cp;
 	struct in_ifaddr *ia;
 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
 	struct in_addr *sin, dst;
 	n_time ntime;
 	struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
 
 	dst = ip->ip_dst;
 	cp = (u_char *)(ip + 1);
 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
 		opt = cp[IPOPT_OPTVAL];
 		if (opt == IPOPT_EOL)
 			break;
 		if (opt == IPOPT_NOP)
 			optlen = 1;
 		else {
 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			optlen = cp[IPOPT_OLEN];
 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 		}
 		switch (opt) {
 
 		default:
 			break;
 
 		/*
 		 * Source routing with record.
 		 * Find interface with current destination address.
 		 * If none on this machine then drop if strictly routed,
 		 * or do nothing if loosely routed.
 		 * Record interface address and bring up next address
 		 * component.  If strictly routed make sure next
 		 * address is on directly accessible net.
 		 */
 		case IPOPT_LSRR:
 		case IPOPT_SSRR:
 #ifdef IPSTEALTH
 			if (ipstealth && pass > 0)
 				break;
 #endif
 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			ipaddr.sin_addr = ip->ip_dst;
 			ia = (struct in_ifaddr *)
 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
 			if (ia == 0) {
 				if (opt == IPOPT_SSRR) {
 					type = ICMP_UNREACH;
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				}
 				if (!ip_dosourceroute)
 					goto nosourcerouting;
 				/*
 				 * Loose routing, and not at next destination
 				 * yet; nothing to do except forward.
 				 */
 				break;
 			}
 			off--;			/* 0 origin */
 			if (off > optlen - (int)sizeof(struct in_addr)) {
 				/*
 				 * End of source route.  Should be for us.
 				 */
 				if (!ip_acceptsourceroute)
 					goto nosourcerouting;
 				save_rte(cp, ip->ip_src);
 				break;
 			}
 #ifdef IPSTEALTH
 			if (ipstealth)
 				goto dropit;
 #endif
 			if (!ip_dosourceroute) {
 				if (ipforwarding) {
 					char buf[16]; /* aaa.bbb.ccc.ddd\0 */
 					/*
 					 * Acting as a router, so generate ICMP
 					 */
 nosourcerouting:
 					strcpy(buf, inet_ntoa(ip->ip_dst));
 					log(LOG_WARNING, 
 					    "attempted source route from %s to %s\n",
 					    inet_ntoa(ip->ip_src), buf);
 					type = ICMP_UNREACH;
 					code = ICMP_UNREACH_SRCFAIL;
 					goto bad;
 				} else {
 					/*
 					 * Not acting as a router, so silently drop.
 					 */
 #ifdef IPSTEALTH
 dropit:
 #endif
 					ipstat.ips_cantforward++;
 					m_freem(m);
 					return (1);
 				}
 			}
 
 			/*
 			 * locate outgoing interface
 			 */
 			(void)memcpy(&ipaddr.sin_addr, cp + off,
 			    sizeof(ipaddr.sin_addr));
 
 			if (opt == IPOPT_SSRR) {
 #define	INA	struct in_ifaddr *
 #define	SA	struct sockaddr *
 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
 			} else
 				ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt);
 			if (ia == 0) {
 				type = ICMP_UNREACH;
 				code = ICMP_UNREACH_SRCFAIL;
 				goto bad;
 			}
 			ip->ip_dst = ipaddr.sin_addr;
 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 			    sizeof(struct in_addr));
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			/*
 			 * Let ip_intr's mcast routing check handle mcast pkts
 			 */
 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
 			break;
 
 		case IPOPT_RR:
 #ifdef IPSTEALTH
 			if (ipstealth && pass == 0)
 				break;
 #endif
 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 				goto bad;
 			}
 			/*
 			 * If no space remains, ignore.
 			 */
 			off--;			/* 0 origin */
 			if (off > optlen - (int)sizeof(struct in_addr))
 				break;
 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
 			    sizeof(ipaddr.sin_addr));
 			/*
 			 * locate outgoing interface; if we're the destination,
 			 * use the incoming interface (should be same).
 			 */
 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
 			    (ia = ip_rtaddr(ipaddr.sin_addr,
 			    &ipforward_rt)) == 0) {
 				type = ICMP_UNREACH;
 				code = ICMP_UNREACH_HOST;
 				goto bad;
 			}
 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
 			    sizeof(struct in_addr));
 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 			break;
 
 		case IPOPT_TS:
 #ifdef IPSTEALTH
 			if (ipstealth && pass == 0)
 				break;
 #endif
 			code = cp - (u_char *)ip;
 			if (optlen < 4 || optlen > 40) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if ((off = cp[IPOPT_OFFSET]) < 5) {
 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
 				goto bad;
 			}
 			if (off > optlen - (int)sizeof(int32_t)) {
 				cp[IPOPT_OFFSET + 1] += (1 << 4);
 				if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				break;
 			}
 			off--;				/* 0 origin */
 			sin = (struct in_addr *)(cp + off);
 			switch (cp[IPOPT_OFFSET + 1] & 0x0f) {
 
 			case IPOPT_TS_TSONLY:
 				break;
 
 			case IPOPT_TS_TSANDADDR:
 				if (off + sizeof(n_time) +
 				    sizeof(struct in_addr) > optlen) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				ipaddr.sin_addr = dst;
 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
 							    m->m_pkthdr.rcvif);
 				if (ia == 0)
 					continue;
 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
 				    sizeof(struct in_addr));
 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 				off += sizeof(struct in_addr);
 				break;
 
 			case IPOPT_TS_PRESPEC:
 				if (off + sizeof(n_time) +
 				    sizeof(struct in_addr) > optlen) {
 					code = &cp[IPOPT_OFFSET] - (u_char *)ip;
 					goto bad;
 				}
 				(void)memcpy(&ipaddr.sin_addr, sin,
 				    sizeof(struct in_addr));
 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
 					continue;
 				cp[IPOPT_OFFSET] += sizeof(struct in_addr);
 				off += sizeof(struct in_addr);
 				break;
 
 			default:
 				code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip;
 				goto bad;
 			}
 			ntime = iptime();
 			(void)memcpy(cp + off, &ntime, sizeof(n_time));
 			cp[IPOPT_OFFSET] += sizeof(n_time);
 		}
 	}
 	if (forward && ipforwarding) {
 		ip_forward(m, 1, next_hop);
 		return (1);
 	}
 	return (0);
 bad:
 	icmp_error(m, type, code, 0, 0);
 	ipstat.ips_badoptions++;
 	return (1);
 }
 
 /*
  * Given address of next destination (final or next hop),
  * return internet address info of interface to be used to get there.
  */
 struct in_ifaddr *
 ip_rtaddr(dst, rt)
 	struct in_addr dst;
 	struct route *rt;
 {
 	register struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)&rt->ro_dst;
 
 	if (rt->ro_rt == 0 ||
 	    !(rt->ro_rt->rt_flags & RTF_UP) ||
 	    dst.s_addr != sin->sin_addr.s_addr) {
 		if (rt->ro_rt) {
 			RTFREE(rt->ro_rt);
 			rt->ro_rt = 0;
 		}
 		sin->sin_family = AF_INET;
 		sin->sin_len = sizeof(*sin);
 		sin->sin_addr = dst;
 
 		rtalloc_ign(rt, RTF_PRCLONING);
 	}
 	if (rt->ro_rt == 0)
 		return ((struct in_ifaddr *)0);
 	return (ifatoia(rt->ro_rt->rt_ifa));
 }
 
 /*
  * Save incoming source route for use in replies,
  * to be picked up later by ip_srcroute if the receiver is interested.
  */
 static void
 save_rte(option, dst)
 	u_char *option;
 	struct in_addr dst;
 {
 	unsigned olen;
 
 	olen = option[IPOPT_OLEN];
 #ifdef DIAGNOSTIC
 	if (ipprintfs)
 		printf("save_rte: olen %d\n", olen);
 #endif
 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
 		return;
 	bcopy(option, ip_srcrt.srcopt, olen);
 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
 	ip_srcrt.dst = dst;
 }
 
 /*
  * Retrieve incoming source route for use in replies,
  * in the same form used by setsockopt.
  * The first hop is placed before the options, will be removed later.
  */
 struct mbuf *
 ip_srcroute()
 {
 	register struct in_addr *p, *q;
 	register struct mbuf *m;
 
 	if (ip_nhops == 0)
 		return ((struct mbuf *)0);
 	m = m_get(M_DONTWAIT, MT_HEADER);
 	if (m == 0)
 		return ((struct mbuf *)0);
 
 #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
 
 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
 	    OPTSIZ;
 #ifdef DIAGNOSTIC
 	if (ipprintfs)
 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
 #endif
 
 	/*
 	 * First save first hop for return route
 	 */
 	p = &ip_srcrt.route[ip_nhops - 1];
 	*(mtod(m, struct in_addr *)) = *p--;
 #ifdef DIAGNOSTIC
 	if (ipprintfs)
 		printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr));
 #endif
 
 	/*
 	 * Copy option fields and padding (nop) to mbuf.
 	 */
 	ip_srcrt.nop = IPOPT_NOP;
 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
 	    &ip_srcrt.nop, OPTSIZ);
 	q = (struct in_addr *)(mtod(m, caddr_t) +
 	    sizeof(struct in_addr) + OPTSIZ);
 #undef OPTSIZ
 	/*
 	 * Record return path as an IP source route,
 	 * reversing the path (pointers are now aligned).
 	 */
 	while (p >= ip_srcrt.route) {
 #ifdef DIAGNOSTIC
 		if (ipprintfs)
 			printf(" %lx", (u_long)ntohl(q->s_addr));
 #endif
 		*q++ = *p--;
 	}
 	/*
 	 * Last hop goes to final destination.
 	 */
 	*q = ip_srcrt.dst;
 #ifdef DIAGNOSTIC
 	if (ipprintfs)
 		printf(" %lx\n", (u_long)ntohl(q->s_addr));
 #endif
 	return (m);
 }
 
 /*
  * Strip out IP options, at higher
  * level protocol in the kernel.
  * Second argument is buffer to which options
  * will be moved, and return value is their length.
  * XXX should be deleted; last arg currently ignored.
  */
 void
 ip_stripoptions(m, mopt)
 	register struct mbuf *m;
 	struct mbuf *mopt;
 {
 	register int i;
 	struct ip *ip = mtod(m, struct ip *);
 	register caddr_t opts;
 	int olen;
 
 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
 	opts = (caddr_t)(ip + 1);
 	i = m->m_len - (sizeof (struct ip) + olen);
 	bcopy(opts + olen, opts, (unsigned)i);
 	m->m_len -= olen;
 	if (m->m_flags & M_PKTHDR)
 		m->m_pkthdr.len -= olen;
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = sizeof(struct ip) >> 2;
 }
 
 u_char inetctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		0,		0,
 	ENOPROTOOPT,	ECONNREFUSED
 };
 
 /*
  * Forward a packet.  If some error occurs return the sender
  * an icmp packet.  Note we can't always generate a meaningful
  * icmp message because icmp doesn't have a large enough repertoire
  * of codes and types.
  *
  * If not forwarding, just drop the packet.  This could be confusing
  * if ipforwarding was zero but some routing protocol was advancing
  * us as a gateway to somewhere.  However, we must let the routing
  * protocol deal with that.
  *
  * The srcrt parameter indicates whether the packet is being forwarded
  * via a source route.
  */
 static void
 ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop)
 {
 	struct ip *ip = mtod(m, struct ip *);
 	struct rtentry *rt;
 	int error, type = 0, code = 0;
 	struct mbuf *mcopy;
 	n_long dest;
 	struct in_addr pkt_dst;
 	struct ifnet *destifp;
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	struct ifnet dummyifp;
 #endif
 
 	dest = 0;
 	/*
 	 * Cache the destination address of the packet; this may be
 	 * changed by use of 'ipfw fwd'.
 	 */
 	pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst;
 
 #ifdef DIAGNOSTIC
 	if (ipprintfs)
 		printf("forward: src %lx dst %lx ttl %x\n",
 		    (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr,
 		    ip->ip_ttl);
 #endif
 
 
 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) {
 		ipstat.ips_cantforward++;
 		m_freem(m);
 		return;
 	}
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 		if (ip->ip_ttl <= IPTTLDEC) {
 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
 			    dest, 0);
 			return;
 		}
 #ifdef IPSTEALTH
 	}
 #endif
 
 	if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) {
 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
 		return;
 	} else
 		rt = ipforward_rt.ro_rt;
 
 	/*
 	 * Save the IP header and at most 8 bytes of the payload,
 	 * in case we need to generate an ICMP message to the src.
 	 *
 	 * XXX this can be optimized a lot by saving the data in a local
 	 * buffer on the stack (72 bytes at most), and only allocating the
 	 * mbuf if really necessary. The vast majority of the packets
 	 * are forwarded without having to send an ICMP back (either
 	 * because unnecessary, or because rate limited), so we are
 	 * really we are wasting a lot of work here.
 	 *
 	 * We don't use m_copy() because it might return a reference
 	 * to a shared cluster. Both this function and ip_output()
 	 * assume exclusive access to the IP header in `m', so any
 	 * data in a cluster may change before we reach icmp_error().
 	 */
 	MGET(mcopy, M_DONTWAIT, m->m_type);
+	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
+		/*
+		 * It's probably ok if the pkthdr dup fails (because
+		 * the deep copy of the tag chain failed), but for now
+		 * be conservative and just discard the copy since
+		 * code below may some day want the tags.
+		 */
+		m_free(mcopy);
+		mcopy = NULL;
+	}
 	if (mcopy != NULL) {
-		M_COPY_PKTHDR(mcopy, m);
 		mcopy->m_len = imin((ip->ip_hl << 2) + 8,
 		    (int)ip->ip_len);
 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
 #ifdef MAC
 		/*
 		 * XXXMAC: This will eventually become an explicit
 		 * labeling point.
 		 */
 		mac_create_mbuf_from_mbuf(m, mcopy);
 #endif
 	}
 
 #ifdef IPSTEALTH
 	if (!ipstealth) {
 #endif
 		ip->ip_ttl -= IPTTLDEC;
 #ifdef IPSTEALTH
 	}
 #endif
 
 	/*
 	 * If forwarding packet using same interface that it came in on,
 	 * perhaps should send a redirect to sender to shortcut a hop.
 	 * Only send redirect if source is sending directly to us,
 	 * and if packet was not source routed (or has any options).
 	 * Also, don't send redirect if forwarding using a default route
 	 * or a route modified by a redirect.
 	 */
 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
 	    ipsendredirects && !srcrt && !next_hop) {
 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
 		u_long src = ntohl(ip->ip_src.s_addr);
 
 		if (RTA(rt) &&
 		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
 		    if (rt->rt_flags & RTF_GATEWAY)
 			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
 		    else
 			dest = pkt_dst.s_addr;
 		    /* Router requirements says to only send host redirects */
 		    type = ICMP_REDIRECT;
 		    code = ICMP_REDIRECT_HOST;
 #ifdef DIAGNOSTIC
 		    if (ipprintfs)
 		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
 #endif
 		}
 	}
 
     {
 	struct m_hdr tag;
 
 	if (next_hop) {
 		/* Pass IPFORWARD info if available */
  
 		tag.mh_type = MT_TAG;
 		tag.mh_flags = PACKET_TAG_IPFORWARD;
 		tag.mh_data = (caddr_t)next_hop;
 		tag.mh_next = m;
 		m = (struct mbuf *)&tag;
 	}
 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, 
 			  IP_FORWARDING, 0, NULL);
     }
 	if (error)
 		ipstat.ips_cantforward++;
 	else {
 		ipstat.ips_forward++;
 		if (type)
 			ipstat.ips_redirectsent++;
 		else {
 			if (mcopy) {
 				ipflow_create(&ipforward_rt, mcopy);
 				m_freem(mcopy);
 			}
 			return;
 		}
 	}
 	if (mcopy == NULL)
 		return;
 	destifp = NULL;
 
 	switch (error) {
 
 	case 0:				/* forwarded, but need redirect */
 		/* type, code set above */
 		break;
 
 	case ENETUNREACH:		/* shouldn't happen, checked above */
 	case EHOSTUNREACH:
 	case ENETDOWN:
 	case EHOSTDOWN:
 	default:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_HOST;
 		break;
 
 	case EMSGSIZE:
 		type = ICMP_UNREACH;
 		code = ICMP_UNREACH_NEEDFRAG;
 #ifdef IPSEC
 		/*
 		 * If the packet is routed over IPsec tunnel, tell the
 		 * originator the tunnel MTU.
 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
 		 * XXX quickhack!!!
 		 */
 		if (ipforward_rt.ro_rt) {
 			struct secpolicy *sp = NULL;
 			int ipsecerror;
 			int ipsechdr;
 			struct route *ro;
 
 			sp = ipsec4_getpolicybyaddr(mcopy,
 						    IPSEC_DIR_OUTBOUND,
 			                            IP_FORWARDING,
 			                            &ipsecerror);
 
 			if (sp == NULL)
 				destifp = ipforward_rt.ro_rt->rt_ifp;
 			else {
 				/* count IPsec header size */
 				ipsechdr = ipsec4_hdrsiz(mcopy,
 							 IPSEC_DIR_OUTBOUND,
 							 NULL);
 
 				/*
 				 * find the correct route for outer IPv4
 				 * header, compute tunnel MTU.
 				 *
 				 * XXX BUG ALERT
 				 * The "dummyifp" code relies upon the fact
 				 * that icmp_error() touches only ifp->if_mtu.
 				 */
 				/*XXX*/
 				destifp = NULL;
 				if (sp->req != NULL
 				 && sp->req->sav != NULL
 				 && sp->req->sav->sah != NULL) {
 					ro = &sp->req->sav->sah->sa_route;
 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
 						dummyifp.if_mtu =
 						    ro->ro_rt->rt_ifp->if_mtu;
 						dummyifp.if_mtu -= ipsechdr;
 						destifp = &dummyifp;
 					}
 				}
 
 				key_freesp(sp);
 			}
 		}
 #elif FAST_IPSEC
 		/*
 		 * If the packet is routed over IPsec tunnel, tell the
 		 * originator the tunnel MTU.
 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
 		 * XXX quickhack!!!
 		 */
 		if (ipforward_rt.ro_rt) {
 			struct secpolicy *sp = NULL;
 			int ipsecerror;
 			int ipsechdr;
 			struct route *ro;
 
 			sp = ipsec_getpolicybyaddr(mcopy,
 						   IPSEC_DIR_OUTBOUND,
 			                           IP_FORWARDING,
 			                           &ipsecerror);
 
 			if (sp == NULL)
 				destifp = ipforward_rt.ro_rt->rt_ifp;
 			else {
 				/* count IPsec header size */
 				ipsechdr = ipsec4_hdrsiz(mcopy,
 							 IPSEC_DIR_OUTBOUND,
 							 NULL);
 
 				/*
 				 * find the correct route for outer IPv4
 				 * header, compute tunnel MTU.
 				 *
 				 * XXX BUG ALERT
 				 * The "dummyifp" code relies upon the fact
 				 * that icmp_error() touches only ifp->if_mtu.
 				 */
 				/*XXX*/
 				destifp = NULL;
 				if (sp->req != NULL
 				 && sp->req->sav != NULL
 				 && sp->req->sav->sah != NULL) {
 					ro = &sp->req->sav->sah->sa_route;
 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
 						dummyifp.if_mtu =
 						    ro->ro_rt->rt_ifp->if_mtu;
 						dummyifp.if_mtu -= ipsechdr;
 						destifp = &dummyifp;
 					}
 				}
 
 				KEY_FREESP(&sp);
 			}
 		}
 #else /* !IPSEC && !FAST_IPSEC */
 		if (ipforward_rt.ro_rt)
 			destifp = ipforward_rt.ro_rt->rt_ifp;
 #endif /*IPSEC*/
 		ipstat.ips_cantfrag++;
 		break;
 
 	case ENOBUFS:
 		/*
 		 * A router should not generate ICMP_SOURCEQUENCH as
 		 * required in RFC1812 Requirements for IP Version 4 Routers.
 		 * Source quench could be a big problem under DoS attacks,
 		 * or if the underlying interface is rate-limited.
 		 * Those who need source quench packets may re-enable them
 		 * via the net.inet.ip.sendsourcequench sysctl.
 		 */
 		if (ip_sendsourcequench == 0) {
 			m_freem(mcopy);
 			return;
 		} else {
 			type = ICMP_SOURCEQUENCH;
 			code = 0;
 		}
 		break;
 
 	case EACCES:			/* ipfw denied packet */
 		m_freem(mcopy);
 		return;
 	}
 	icmp_error(mcopy, type, code, dest, destifp);
 }
 
 void
 ip_savecontrol(inp, mp, ip, m)
 	register struct inpcb *inp;
 	register struct mbuf **mp;
 	register struct ip *ip;
 	register struct mbuf *m;
 {
 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
 		struct timeval tv;
 
 		microtime(&tv);
 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 			SCM_TIMESTAMP, SOL_SOCKET);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	if (inp->inp_flags & INP_RECVDSTADDR) {
 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #ifdef notyet
 	/* XXX
 	 * Moving these out of udp_input() made them even more broken
 	 * than they already were.
 	 */
 	/* options were tossed already */
 	if (inp->inp_flags & INP_RECVOPTS) {
 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 	/* ip_srcroute doesn't do what we want here, need to fix */
 	if (inp->inp_flags & INP_RECVRETOPTS) {
 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 #endif
 	if (inp->inp_flags & INP_RECVIF) {
 		struct ifnet *ifp;
 		struct sdlbuf {
 			struct sockaddr_dl sdl;
 			u_char	pad[32];
 		} sdlbuf;
 		struct sockaddr_dl *sdp;
 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
 
 		if (((ifp = m->m_pkthdr.rcvif)) 
 		&& ( ifp->if_index && (ifp->if_index <= if_index))) {
 			sdp = (struct sockaddr_dl *)
 			    (ifaddr_byindex(ifp->if_index)->ifa_addr);
 			/*
 			 * Change our mind and don't try copy.
 			 */
 			if ((sdp->sdl_family != AF_LINK)
 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
 				goto makedummy;
 			}
 			bcopy(sdp, sdl2, sdp->sdl_len);
 		} else {
 makedummy:	
 			sdl2->sdl_len
 				= offsetof(struct sockaddr_dl, sdl_data[0]);
 			sdl2->sdl_family = AF_LINK;
 			sdl2->sdl_index = 0;
 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
 		}
 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
 			IP_RECVIF, IPPROTO_IP);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 }
 
 /*
  * XXX these routines are called from the upper part of the kernel.
  * They need to be locked when we remove Giant.
  *
  * They could also be moved to ip_mroute.c, since all the RSVP
  *  handling is done there already.
  */
 static int ip_rsvp_on;
 struct socket *ip_rsvpd;
 int
 ip_rsvp_init(struct socket *so)
 {
 	if (so->so_type != SOCK_RAW ||
 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
 		return EOPNOTSUPP;
 
 	if (ip_rsvpd != NULL)
 		return EADDRINUSE;
 
 	ip_rsvpd = so;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-increment
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (!ip_rsvp_on) {
 		ip_rsvp_on = 1;
 		rsvp_on++;
 	}
 
 	return 0;
 }
 
 int
 ip_rsvp_done(void)
 {
 	ip_rsvpd = NULL;
 	/*
 	 * This may seem silly, but we need to be sure we don't over-decrement
 	 * the RSVP counter, in case something slips up.
 	 */
 	if (ip_rsvp_on) {
 		ip_rsvp_on = 0;
 		rsvp_on--;
 	}
 	return 0;
 }
 
 void
 rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
 {
 	if (rsvp_input_p) { /* call the real one if loaded */
 		rsvp_input_p(m, off);
 		return;
 	}
 
 	/* Can still get packets with rsvp_on = 0 if there is a local member
 	 * of the group to which the RSVP packet is addressed.  But in this
 	 * case we want to throw the packet away.
 	 */
 	
 	if (!rsvp_on) {
 		m_freem(m);
 		return;
 	}
 
 	if (ip_rsvpd != NULL) { 
 		rip_input(m, off);
 		return;
 	}
 	/* Drop the packet */
 	m_freem(m);
 }
Index: head/sys/netinet6/esp_input.c
===================================================================
--- head/sys/netinet6/esp_input.c	(revision 108465)
+++ head/sys/netinet6/esp_input.c	(revision 108466)
@@ -1,986 +1,985 @@
 /*	$FreeBSD$	*/
 /*	$KAME: esp_input.c,v 1.62 2002/01/07 11:39:57 kjc Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * RFC1827/2406 Encapsulated Security Payload.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <machine/cpu.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_ecn.h>
 #ifdef INET6
 #include <netinet6/ip6_ecn.h>
 #endif
 
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6protosw.h>
 #endif
 
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netinet6/ah.h>
 #ifdef INET6
 #include <netinet6/ah6.h>
 #endif
 #include <netinet6/esp.h>
 #ifdef INET6
 #include <netinet6/esp6.h>
 #endif
 #include <netkey/key.h>
 #include <netkey/keydb.h>
 #include <netkey/key_debug.h>
 
 #include <machine/stdarg.h>
 
 #include <net/net_osdep.h>
 
 #define IPLEN_FLIPPED
 
 #define ESPMAXLEN \
 	(sizeof(struct esp) < sizeof(struct newesp) \
 		? sizeof(struct newesp) : sizeof(struct esp))
 
 #ifdef INET
 extern struct protosw inetsw[];
 
 void
 esp4_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct ip *ip;
 	struct esp *esp;
 	struct esptail esptail;
 	u_int32_t spi;
 	struct secasvar *sav = NULL;
 	size_t taillen;
 	u_int16_t nxt;
 	const struct esp_algorithm *algo;
 	int ivlen;
 	size_t hlen;
 	size_t esplen;
 	int proto;
 
 	/* sanity check for alignment. */
 	if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) {
 		ipseclog((LOG_ERR, "IPv4 ESP input: packet alignment problem "
 			"(off=%d, pktlen=%d)\n", off, m->m_pkthdr.len));
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 
 	if (m->m_len < off + ESPMAXLEN) {
 		m = m_pullup(m, off + ESPMAXLEN);
 		if (!m) {
 			ipseclog((LOG_DEBUG,
 			    "IPv4 ESP input: can't pullup in esp4_input\n"));
 			ipsecstat.in_inval++;
 			goto bad;
 		}
 	}
 
 	ip = mtod(m, struct ip *);
 	proto = ip->ip_p;
 	esp = (struct esp *)(((u_int8_t *)ip) + off);
 #ifdef _IP_VHL
 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
 #else
 	hlen = ip->ip_hl << 2;
 #endif
 
 	/* find the sassoc. */
 	spi = esp->esp_spi;
 
 	if ((sav = key_allocsa(AF_INET,
 	                      (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst,
 	                      IPPROTO_ESP, spi)) == 0) {
 		ipseclog((LOG_WARNING,
 		    "IPv4 ESP input: no key association found for spi %u\n",
 		    (u_int32_t)ntohl(spi)));
 		ipsecstat.in_nosa++;
 		goto bad;
 	}
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP esp4_input called to allocate SA:%p\n", sav));
 	if (sav->state != SADB_SASTATE_MATURE
 	 && sav->state != SADB_SASTATE_DYING) {
 		ipseclog((LOG_DEBUG,
 		    "IPv4 ESP input: non-mature/dying SA found for spi %u\n",
 		    (u_int32_t)ntohl(spi)));
 		ipsecstat.in_badspi++;
 		goto bad;
 	}
 	algo = esp_algorithm_lookup(sav->alg_enc);
 	if (!algo) {
 		ipseclog((LOG_DEBUG, "IPv4 ESP input: "
 		    "unsupported encryption algorithm for spi %u\n",
 		    (u_int32_t)ntohl(spi)));
 		ipsecstat.in_badspi++;
 		goto bad;
 	}
 
 	/* check if we have proper ivlen information */
 	ivlen = sav->ivlen;
 	if (ivlen < 0) {
 		ipseclog((LOG_ERR, "inproper ivlen in IPv4 ESP input: %s %s\n",
 		    ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 
 	if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay
 	 && (sav->alg_auth && sav->key_auth)))
 		goto noreplaycheck;
 
 	if (sav->alg_auth == SADB_X_AALG_NULL ||
 	    sav->alg_auth == SADB_AALG_NONE)
 		goto noreplaycheck;
 
 	/*
 	 * check for sequence number.
 	 */
 	if (ipsec_chkreplay(ntohl(((struct newesp *)esp)->esp_seq), sav))
 		; /* okey */
 	else {
 		ipsecstat.in_espreplay++;
 		ipseclog((LOG_WARNING,
 		    "replay packet in IPv4 ESP input: %s %s\n",
 		    ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
 		goto bad;
 	}
 
 	/* check ICV */
     {
 	u_char sum0[AH_MAXSUMSIZE];
 	u_char sum[AH_MAXSUMSIZE];
 	const struct ah_algorithm *sumalgo;
 	size_t siz;
 
 	sumalgo = ah_algorithm_lookup(sav->alg_auth);
 	if (!sumalgo)
 		goto noreplaycheck;
 	siz = (((*sumalgo->sumsiz)(sav) + 3) & ~(4 - 1));
 	if (m->m_pkthdr.len < off + ESPMAXLEN + siz) {
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 	if (AH_MAXSUMSIZE < siz) {
 		ipseclog((LOG_DEBUG,
 		    "internal error: AH_MAXSUMSIZE must be larger than %lu\n",
 		    (u_long)siz));
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 
 	m_copydata(m, m->m_pkthdr.len - siz, siz, &sum0[0]);
 
 	if (esp_auth(m, off, m->m_pkthdr.len - off - siz, sav, sum)) {
 		ipseclog((LOG_WARNING, "auth fail in IPv4 ESP input: %s %s\n",
 		    ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
 		ipsecstat.in_espauthfail++;
 		goto bad;
 	}
 
 	if (bcmp(sum0, sum, siz) != 0) {
 		ipseclog((LOG_WARNING, "auth fail in IPv4 ESP input: %s %s\n",
 		    ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
 		ipsecstat.in_espauthfail++;
 		goto bad;
 	}
 
 	/* strip off the authentication data */
 	m_adj(m, -siz);
 	ip = mtod(m, struct ip *);
 #ifdef IPLEN_FLIPPED
 	ip->ip_len = ip->ip_len - siz;
 #else
 	ip->ip_len = htons(ntohs(ip->ip_len) - siz);
 #endif
 	m->m_flags |= M_AUTHIPDGM;
 	ipsecstat.in_espauthsucc++;
     }
 
 	/*
 	 * update sequence number.
 	 */
 	if ((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay) {
 		if (ipsec_updatereplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) {
 			ipsecstat.in_espreplay++;
 			goto bad;
 		}
 	}
 
 noreplaycheck:
 
 	/* process main esp header. */
 	if (sav->flags & SADB_X_EXT_OLD) {
 		/* RFC 1827 */
 		esplen = sizeof(struct esp);
 	} else {
 		/* RFC 2406 */
 		if (sav->flags & SADB_X_EXT_DERIV)
 			esplen = sizeof(struct esp);
 		else
 			esplen = sizeof(struct newesp);
 	}
 
 	if (m->m_pkthdr.len < off + esplen + ivlen + sizeof(esptail)) {
 		ipseclog((LOG_WARNING,
 		    "IPv4 ESP input: packet too short\n"));
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 
 	if (m->m_len < off + esplen + ivlen) {
 		m = m_pullup(m, off + esplen + ivlen);
 		if (!m) {
 			ipseclog((LOG_DEBUG,
 			    "IPv4 ESP input: can't pullup in esp4_input\n"));
 			ipsecstat.in_inval++;
 			goto bad;
 		}
 	}
 
 	/*
 	 * pre-compute and cache intermediate key
 	 */
 	if (esp_schedule(algo, sav) != 0) {
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 
 	/*
 	 * decrypt the packet.
 	 */
 	if (!algo->decrypt)
 		panic("internal error: no decrypt function");
 	if ((*algo->decrypt)(m, off, sav, algo, ivlen)) {
 		/* m is already freed */
 		m = NULL;
 		ipseclog((LOG_ERR, "decrypt fail in IPv4 ESP input: %s\n",
 		    ipsec_logsastr(sav)));
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 	ipsecstat.in_esphist[sav->alg_enc]++;
 
 	m->m_flags |= M_DECRYPTED;
 
 	/*
 	 * find the trailer of the ESP.
 	 */
 	m_copydata(m, m->m_pkthdr.len - sizeof(esptail), sizeof(esptail),
 	     (caddr_t)&esptail);
 	nxt = esptail.esp_nxt;
 	taillen = esptail.esp_padlen + sizeof(esptail);
 
 	if (m->m_pkthdr.len < taillen
 	 || m->m_pkthdr.len - taillen < hlen) {	/* ? */
 		ipseclog((LOG_WARNING,
 		    "bad pad length in IPv4 ESP input: %s %s\n",
 		    ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 
 	/* strip off the trailing pad area. */
 	m_adj(m, -taillen);
 
 #ifdef IPLEN_FLIPPED
 	ip->ip_len = ip->ip_len - taillen;
 #else
 	ip->ip_len = htons(ntohs(ip->ip_len) - taillen);
 #endif
 
 	/* was it transmitted over the IPsec tunnel SA? */
 	if (ipsec4_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) {
 		/*
 		 * strip off all the headers that precedes ESP header.
 		 *	IP4 xx ESP IP4' payload -> IP4' payload
 		 *
 		 * XXX more sanity checks
 		 * XXX relationship with gif?
 		 */
 		u_int8_t tos;
 
 		tos = ip->ip_tos;
 		m_adj(m, off + esplen + ivlen);
 		if (m->m_len < sizeof(*ip)) {
 			m = m_pullup(m, sizeof(*ip));
 			if (!m) {
 				ipsecstat.in_inval++;
 				goto bad;
 			}
 		}
 		ip = mtod(m, struct ip *);
 		/* ECN consideration. */
 		ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos);
 		if (!key_checktunnelsanity(sav, AF_INET,
 			    (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) {
 			ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
 			    "in IPv4 ESP input: %s %s\n",
 			    ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav)));
 			ipsecstat.in_inval++;
 			goto bad;
 		}
 
 		key_sa_recordxfer(sav, m);
 		if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0 ||
 		    ipsec_addhist(m, IPPROTO_IPV4, 0) != 0) {
 			ipsecstat.in_nomem++;
 			goto bad;
 		}
 
 		if (! IF_HANDOFF(&ipintrq, m, NULL)) {
 			ipsecstat.in_inval++;
 			m = NULL;
 			goto bad;
 		}
 		m = NULL;
 		schednetisr(NETISR_IP); /* can be skipped but to make sure */
 		nxt = IPPROTO_DONE;
 	} else {
 		/*
 		 * strip off ESP header and IV.
 		 * even in m_pulldown case, we need to strip off ESP so that
 		 * we can always compute checksum for AH correctly.
 		 */
 		size_t stripsiz;
 
 		stripsiz = esplen + ivlen;
 
 		ip = mtod(m, struct ip *);
 		ovbcopy((caddr_t)ip, (caddr_t)(((u_char *)ip) + stripsiz), off);
 		m->m_data += stripsiz;
 		m->m_len -= stripsiz;
 		m->m_pkthdr.len -= stripsiz;
 
 		ip = mtod(m, struct ip *);
 #ifdef IPLEN_FLIPPED
 		ip->ip_len = ip->ip_len - stripsiz;
 #else
 		ip->ip_len = htons(ntohs(ip->ip_len) - stripsiz);
 #endif
 		ip->ip_p = nxt;
 
 		key_sa_recordxfer(sav, m);
 		if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0) {
 			ipsecstat.in_nomem++;
 			goto bad;
 		}
 
 		if (nxt != IPPROTO_DONE) {
 			if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
 			    ipsec4_in_reject(m, NULL)) {
 				ipsecstat.in_polvio++;
 				goto bad;
 			}
 			(*inetsw[ip_protox[nxt]].pr_input)(m, off);
 		} else
 			m_freem(m);
 		m = NULL;
 	}
 
 	if (sav) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP esp4_input call free SA:%p\n", sav));
 		key_freesav(sav);
 	}
 	ipsecstat.in_success++;
 	return;
 
 bad:
 	if (sav) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP esp4_input call free SA:%p\n", sav));
 		key_freesav(sav);
 	}
 	if (m)
 		m_freem(m);
 	return;
 }
 #endif /* INET */
 
 #ifdef INET6
 int
 esp6_input(mp, offp, proto)
 	struct mbuf **mp;
 	int *offp, proto;
 {
 	struct mbuf *m = *mp;
 	int off = *offp;
 	struct ip6_hdr *ip6;
 	struct esp *esp;
 	struct esptail esptail;
 	u_int32_t spi;
 	struct secasvar *sav = NULL;
 	size_t taillen;
 	u_int16_t nxt;
 	const struct esp_algorithm *algo;
 	int ivlen;
 	size_t esplen;
 
 	/* sanity check for alignment. */
 	if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) {
 		ipseclog((LOG_ERR, "IPv6 ESP input: packet alignment problem "
 			"(off=%d, pktlen=%d)\n", off, m->m_pkthdr.len));
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, ESPMAXLEN, IPPROTO_DONE);
 	esp = (struct esp *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(esp, struct esp *, m, off, ESPMAXLEN);
 	if (esp == NULL) {
 		ipsec6stat.in_inval++;
 		return IPPROTO_DONE;
 	}
 #endif
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if (ntohs(ip6->ip6_plen) == 0) {
 		ipseclog((LOG_ERR, "IPv6 ESP input: "
 		    "ESP with IPv6 jumbogram is not supported.\n"));
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 
 	/* find the sassoc. */
 	spi = esp->esp_spi;
 
 	if ((sav = key_allocsa(AF_INET6,
 	                      (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst,
 	                      IPPROTO_ESP, spi)) == 0) {
 		ipseclog((LOG_WARNING,
 		    "IPv6 ESP input: no key association found for spi %u\n",
 		    (u_int32_t)ntohl(spi)));
 		ipsec6stat.in_nosa++;
 		goto bad;
 	}
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP esp6_input called to allocate SA:%p\n", sav));
 	if (sav->state != SADB_SASTATE_MATURE
 	 && sav->state != SADB_SASTATE_DYING) {
 		ipseclog((LOG_DEBUG,
 		    "IPv6 ESP input: non-mature/dying SA found for spi %u\n",
 		    (u_int32_t)ntohl(spi)));
 		ipsec6stat.in_badspi++;
 		goto bad;
 	}
 	algo = esp_algorithm_lookup(sav->alg_enc);
 	if (!algo) {
 		ipseclog((LOG_DEBUG, "IPv6 ESP input: "
 		    "unsupported encryption algorithm for spi %u\n",
 		    (u_int32_t)ntohl(spi)));
 		ipsec6stat.in_badspi++;
 		goto bad;
 	}
 
 	/* check if we have proper ivlen information */
 	ivlen = sav->ivlen;
 	if (ivlen < 0) {
 		ipseclog((LOG_ERR, "inproper ivlen in IPv6 ESP input: %s %s\n",
 		    ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav)));
 		ipsec6stat.in_badspi++;
 		goto bad;
 	}
 
 	if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay
 	 && (sav->alg_auth && sav->key_auth)))
 		goto noreplaycheck;
 
 	if (sav->alg_auth == SADB_X_AALG_NULL ||
 	    sav->alg_auth == SADB_AALG_NONE)
 		goto noreplaycheck;
 
 	/*
 	 * check for sequence number.
 	 */
 	if (ipsec_chkreplay(ntohl(((struct newesp *)esp)->esp_seq), sav))
 		; /* okey */
 	else {
 		ipsec6stat.in_espreplay++;
 		ipseclog((LOG_WARNING,
 		    "replay packet in IPv6 ESP input: %s %s\n",
 		    ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav)));
 		goto bad;
 	}
 
 	/* check ICV */
     {
 	u_char sum0[AH_MAXSUMSIZE];
 	u_char sum[AH_MAXSUMSIZE];
 	const struct ah_algorithm *sumalgo;
 	size_t siz;
 
 	sumalgo = ah_algorithm_lookup(sav->alg_auth);
 	if (!sumalgo)
 		goto noreplaycheck;
 	siz = (((*sumalgo->sumsiz)(sav) + 3) & ~(4 - 1));
 	if (m->m_pkthdr.len < off + ESPMAXLEN + siz) {
 		ipsecstat.in_inval++;
 		goto bad;
 	}
 	if (AH_MAXSUMSIZE < siz) {
 		ipseclog((LOG_DEBUG,
 		    "internal error: AH_MAXSUMSIZE must be larger than %lu\n",
 		    (u_long)siz));
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 
 	m_copydata(m, m->m_pkthdr.len - siz, siz, &sum0[0]);
 
 	if (esp_auth(m, off, m->m_pkthdr.len - off - siz, sav, sum)) {
 		ipseclog((LOG_WARNING, "auth fail in IPv6 ESP input: %s %s\n",
 		    ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav)));
 		ipsec6stat.in_espauthfail++;
 		goto bad;
 	}
 
 	if (bcmp(sum0, sum, siz) != 0) {
 		ipseclog((LOG_WARNING, "auth fail in IPv6 ESP input: %s %s\n",
 		    ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav)));
 		ipsec6stat.in_espauthfail++;
 		goto bad;
 	}
 
 	/* strip off the authentication data */
 	m_adj(m, -siz);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - siz);
 
 	m->m_flags |= M_AUTHIPDGM;
 	ipsec6stat.in_espauthsucc++;
     }
 
 	/*
 	 * update sequence number.
 	 */
 	if ((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay) {
 		if (ipsec_updatereplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) {
 			ipsec6stat.in_espreplay++;
 			goto bad;
 		}
 	}
 
 noreplaycheck:
 
 	/* process main esp header. */
 	if (sav->flags & SADB_X_EXT_OLD) {
 		/* RFC 1827 */
 		esplen = sizeof(struct esp);
 	} else {
 		/* RFC 2406 */
 		if (sav->flags & SADB_X_EXT_DERIV)
 			esplen = sizeof(struct esp);
 		else
 			esplen = sizeof(struct newesp);
 	}
 
 	if (m->m_pkthdr.len < off + esplen + ivlen + sizeof(esptail)) {
 		ipseclog((LOG_WARNING,
 		    "IPv6 ESP input: packet too short\n"));
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, esplen + ivlen, IPPROTO_DONE);	/* XXX */
 #else
 	IP6_EXTHDR_GET(esp, struct esp *, m, off, esplen + ivlen);
 	if (esp == NULL) {
 		ipsec6stat.in_inval++;
 		m = NULL;
 		goto bad;
 	}
 #endif
 	ip6 = mtod(m, struct ip6_hdr *);	/* set it again just in case */
 
 	/*
 	 * pre-compute and cache intermediate key
 	 */
 	if (esp_schedule(algo, sav) != 0) {
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 
 	/*
 	 * decrypt the packet.
 	 */
 	if (!algo->decrypt)
 		panic("internal error: no decrypt function");
 	if ((*algo->decrypt)(m, off, sav, algo, ivlen)) {
 		/* m is already freed */
 		m = NULL;
 		ipseclog((LOG_ERR, "decrypt fail in IPv6 ESP input: %s\n",
 		    ipsec_logsastr(sav)));
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 	ipsec6stat.in_esphist[sav->alg_enc]++;
 
 	m->m_flags |= M_DECRYPTED;
 
 	/*
 	 * find the trailer of the ESP.
 	 */
 	m_copydata(m, m->m_pkthdr.len - sizeof(esptail), sizeof(esptail),
 	     (caddr_t)&esptail);
 	nxt = esptail.esp_nxt;
 	taillen = esptail.esp_padlen + sizeof(esptail);
 
 	if (m->m_pkthdr.len < taillen
 	 || m->m_pkthdr.len - taillen < sizeof(struct ip6_hdr)) {	/* ? */
 		ipseclog((LOG_WARNING,
 		    "bad pad length in IPv6 ESP input: %s %s\n",
 		    ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav)));
 		ipsec6stat.in_inval++;
 		goto bad;
 	}
 
 	/* strip off the trailing pad area. */
 	m_adj(m, -taillen);
 
 	ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - taillen);
 
 	/* was it transmitted over the IPsec tunnel SA? */
 	if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) {
 		/*
 		 * strip off all the headers that precedes ESP header.
 		 *	IP6 xx ESP IP6' payload -> IP6' payload
 		 *
 		 * XXX more sanity checks
 		 * XXX relationship with gif?
 		 */
 		u_int32_t flowinfo;	/* net endian */
 		flowinfo = ip6->ip6_flow;
 		m_adj(m, off + esplen + ivlen);
 		if (m->m_len < sizeof(*ip6)) {
 #ifndef PULLDOWN_TEST
 			/*
 			 * m_pullup is prohibited in KAME IPv6 input processing
 			 * but there's no other way!
 			 */
 #else
 			/* okay to pullup in m_pulldown style */
 #endif
 			m = m_pullup(m, sizeof(*ip6));
 			if (!m) {
 				ipsec6stat.in_inval++;
 				goto bad;
 			}
 		}
 		ip6 = mtod(m, struct ip6_hdr *);
 		/* ECN consideration. */
 		ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow);
 		if (!key_checktunnelsanity(sav, AF_INET6,
 			    (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) {
 			ipseclog((LOG_ERR, "ipsec tunnel address mismatch "
 			    "in IPv6 ESP input: %s %s\n",
 			    ipsec6_logpacketstr(ip6, spi),
 			    ipsec_logsastr(sav)));
 			ipsec6stat.in_inval++;
 			goto bad;
 		}
 
 		key_sa_recordxfer(sav, m);
 		if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0 || 
 		    ipsec_addhist(m, IPPROTO_IPV6, 0) != 0) {
 			ipsec6stat.in_nomem++;
 			goto bad;
 		}
 
 		if (! IF_HANDOFF(&ip6intrq, m, NULL)) {
 			ipsec6stat.in_inval++;
 			m = NULL;
 			goto bad;
 		}
 		m = NULL;
 		schednetisr(NETISR_IPV6); /* can be skipped but to make sure */
 		nxt = IPPROTO_DONE;
 	} else {
 		/*
 		 * strip off ESP header and IV.
 		 * even in m_pulldown case, we need to strip off ESP so that
 		 * we can always compute checksum for AH correctly.
 		 */
 		size_t stripsiz;
 		char *prvnxtp;
 
 		/*
 		 * Set the next header field of the previous header correctly.
 		 */
 		prvnxtp = ip6_get_prevhdr(m, off); /* XXX */
 		*prvnxtp = nxt;
 
 		stripsiz = esplen + ivlen;
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (m->m_len >= stripsiz + off) {
 			ovbcopy((caddr_t)ip6, ((caddr_t)ip6) + stripsiz, off);
 			m->m_data += stripsiz;
 			m->m_len -= stripsiz;
 			m->m_pkthdr.len -= stripsiz;
 		} else {
 			/*
 			 * this comes with no copy if the boundary is on
 			 * cluster
 			 */
 			struct mbuf *n;
 
 			n = m_split(m, off, M_DONTWAIT);
 			if (n == NULL) {
 				/* m is retained by m_split */
 				goto bad;
 			}
 			m_adj(n, stripsiz);
 			m_cat(m, n);
 			/* m_cat does not update m_pkthdr.len */
 			m->m_pkthdr.len += n->m_pkthdr.len;
 		}
 
 #ifndef PULLDOWN_TEST
 		/*
 		 * KAME requires that the packet to be contiguous on the
 		 * mbuf.  We need to make that sure.
 		 * this kind of code should be avoided.
 		 * XXX other conditions to avoid running this part?
 		 */
 		if (m->m_len != m->m_pkthdr.len) {
 			struct mbuf *n = NULL;
 			int maxlen;
 
 			MGETHDR(n, M_DONTWAIT, MT_HEADER);
 			maxlen = MHLEN;
 			if (n)
-				M_COPY_PKTHDR(n, m);
+				M_MOVE_PKTHDR(n, m);
 			if (n && m->m_pkthdr.len > maxlen) {
 				MCLGET(n, M_DONTWAIT);
 				maxlen = MCLBYTES;
 				if ((n->m_flags & M_EXT) == 0) {
 					m_free(n);
 					n = NULL;
 				}
 			}
 			if (!n) {
 				printf("esp6_input: mbuf allocation failed\n");
 				goto bad;
 			}
 
 			if (m->m_pkthdr.len <= maxlen) {
 				m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
 				n->m_len = m->m_pkthdr.len;
 				n->m_pkthdr.len = m->m_pkthdr.len;
 				n->m_next = NULL;
 				m_freem(m);
 			} else {
 				m_copydata(m, 0, maxlen, mtod(n, caddr_t));
 				n->m_len = maxlen;
 				n->m_pkthdr.len = m->m_pkthdr.len;
 				n->m_next = m;
 				m_adj(m, maxlen);
-				m->m_flags &= ~M_PKTHDR;
 			}
 			m = n;
 		}
 #endif
 
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - stripsiz);
 
 		key_sa_recordxfer(sav, m);
 		if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0) {
 			ipsec6stat.in_nomem++;
 			goto bad;
 		}
 	}
 
 	*offp = off;
 	*mp = m;
 
 	if (sav) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP esp6_input call free SA:%p\n", sav));
 		key_freesav(sav);
 	}
 	ipsec6stat.in_success++;
 	return nxt;
 
 bad:
 	if (sav) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP esp6_input call free SA:%p\n", sav));
 		key_freesav(sav);
 	}
 	if (m)
 		m_freem(m);
 	return IPPROTO_DONE;
 }
 
 void
 esp6_ctlinput(cmd, sa, d)
 	int cmd;
 	struct sockaddr *sa;
 	void *d;
 {
 	const struct newesp *espp;
 	struct newesp esp;
 	struct ip6ctlparam *ip6cp = NULL, ip6cp1;
 	struct secasvar *sav;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	int off;
 	struct sockaddr_in6 *sa6_src, *sa6_dst;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 	if ((unsigned)cmd >= PRC_NCMDS)
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* calm gcc */
 	}
 
 	if (ip6) {
 		/*
 		 * Notify the error to all possible sockets via pfctlinput2.
 		 * Since the upper layer information (such as protocol type,
 		 * source and destination ports) is embedded in the encrypted
 		 * data and might have been cut, we can't directly call
 		 * an upper layer ctlinput function. However, the pcbnotify
 		 * function will consider source and destination addresses
 		 * as well as the flow info value, and may be able to find
 		 * some PCB that should be notified.
 		 * Although pfctlinput2 will call esp6_ctlinput(), there is
 		 * no possibility of an infinite loop of function calls,
 		 * because we don't pass the inner IPv6 header.
 		 */
 		bzero(&ip6cp1, sizeof(ip6cp1));
 		ip6cp1.ip6c_src = ip6cp->ip6c_src;
 		pfctlinput2(cmd, sa, (void *)&ip6cp1);
 
 		/*
 		 * Then go to special cases that need ESP header information.
 		 * XXX: We assume that when ip6 is non NULL,
 		 * M and OFF are valid.
 		 */
 
 		/* check if we can safely examine src and dst ports */
 		if (m->m_pkthdr.len < off + sizeof(esp))
 			return;
 
 		if (m->m_len < off + sizeof(esp)) {
 			/*
 			 * this should be rare case,
 			 * so we compromise on this copy...
 			 */
 			m_copydata(m, off, sizeof(esp), (caddr_t)&esp);
 			espp = &esp;
 		} else
 			espp = (struct newesp*)(mtod(m, caddr_t) + off);
 
 		if (cmd == PRC_MSGSIZE) {
 			int valid = 0;
 
 			/*
 			 * Check to see if we have a valid SA corresponding to
 			 * the address in the ICMP message payload.
 			 */
 			sa6_src = ip6cp->ip6c_src;
 			sa6_dst = (struct sockaddr_in6 *)sa;
 			sav = key_allocsa(AF_INET6,
 					  (caddr_t)&sa6_src->sin6_addr,
 					  (caddr_t)&sa6_dst->sin6_addr,
 					  IPPROTO_ESP, espp->esp_spi);
 			if (sav) {
 				if (sav->state == SADB_SASTATE_MATURE ||
 				    sav->state == SADB_SASTATE_DYING)
 					valid++;
 				key_freesav(sav);
 			}
 
 			/* XXX Further validation? */
 
 			/*
 			 * Depending on the value of "valid" and routing table
 			 * size (mtudisc_{hi,lo}wat), we will:
 			 * - recalcurate the new MTU and create the
 			 *   corresponding routing entry, or
 			 * - ignore the MTU change notification.
 			 */
 			icmp6_mtudisc_update((struct ip6ctlparam *)d, valid);
 		}
 	} else {
 		/* we normally notify any pcb here */
 	}
 }
 #endif /* INET6 */
Index: head/sys/netinet6/icmp6.c
===================================================================
--- head/sys/netinet6/icmp6.c	(revision 108465)
+++ head/sys/netinet6/icmp6.c	(revision 108466)
@@ -1,2874 +1,2883 @@
 /*	$FreeBSD$	*/
 /*	$KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/syslog.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6protosw.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/mld6_var.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #include <netkey/key.h>
 #endif
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/key.h>
 #define	IPSEC
 #endif
 
 #include <net/net_osdep.h>
 
 #ifdef HAVE_NRL_INPCB
 /* inpcb members */
 #define in6pcb		inpcb
 #define in6p_laddr	inp_laddr6
 #define in6p_faddr	inp_faddr6
 #define in6p_icmp6filt	inp_icmp6filt
 #define in6p_route	inp_route
 #define in6p_socket	inp_socket
 #define in6p_flags	inp_flags
 #define in6p_moptions	inp_moptions6
 #define in6p_outputopts	inp_outputopts6
 #define in6p_ip6	inp_ipv6
 #define in6p_flowinfo	inp_flowinfo
 #define in6p_sp		inp_sp
 #define in6p_next	inp_next
 #define in6p_prev	inp_prev
 /* macro names */
 #define sotoin6pcb	sotoinpcb
 /* function names */
 #define in6_pcbdetach	in_pcbdetach
 #define in6_rtchange	in_rtchange
 
 /*
  * for KAME src sync over BSD*'s. XXX: FreeBSD (>=3) are VERY different from
  * others...
  */
 #define in6p_ip6_nxt	inp_ipv6.ip6_nxt
 #endif
 
 extern struct domain inet6domain;
 
 struct icmp6stat icmp6stat;
 
 extern struct inpcbhead ripcb;
 extern int icmp6errppslim;
 static int icmp6errpps_count = 0;
 static struct timeval icmp6errppslim_last;
 extern int icmp6_nodeinfo;
 
 static void icmp6_errcount __P((struct icmp6errstat *, int, int));
 static int icmp6_rip6_input __P((struct mbuf **, int));
 static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int));
 static const char *icmp6_redirect_diag __P((struct in6_addr *,
 	struct in6_addr *, struct in6_addr *));
 #define	HAVE_PPSRATECHECK
 #ifndef HAVE_PPSRATECHECK
 static int ppsratecheck __P((struct timeval *, int *, int));
 #endif
 static struct mbuf *ni6_input __P((struct mbuf *, int));
 static struct mbuf *ni6_nametodns __P((const char *, int, int));
 static int ni6_dnsmatch __P((const char *, int, const char *, int));
 static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
 			  struct ifnet **, char *));
 static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
 				struct ifnet *, int));
 static int icmp6_notify_error __P((struct mbuf *, int, int, int));
 
 #ifdef COMPAT_RFC1885
 static struct route_in6 icmp6_reflect_rt;
 #endif
 
 
 void
 icmp6_init()
 {
 	mld6_init();
 }
 
 static void
 icmp6_errcount(stat, type, code)
 	struct icmp6errstat *stat;
 	int type, code;
 {
 	switch (type) {
 	case ICMP6_DST_UNREACH:
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			stat->icp6errs_dst_unreach_noroute++;
 			return;
 		case ICMP6_DST_UNREACH_ADMIN:
 			stat->icp6errs_dst_unreach_admin++;
 			return;
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			stat->icp6errs_dst_unreach_beyondscope++;
 			return;
 		case ICMP6_DST_UNREACH_ADDR:
 			stat->icp6errs_dst_unreach_addr++;
 			return;
 		case ICMP6_DST_UNREACH_NOPORT:
 			stat->icp6errs_dst_unreach_noport++;
 			return;
 		}
 		break;
 	case ICMP6_PACKET_TOO_BIG:
 		stat->icp6errs_packet_too_big++;
 		return;
 	case ICMP6_TIME_EXCEEDED:
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 			stat->icp6errs_time_exceed_transit++;
 			return;
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			stat->icp6errs_time_exceed_reassembly++;
 			return;
 		}
 		break;
 	case ICMP6_PARAM_PROB:
 		switch (code) {
 		case ICMP6_PARAMPROB_HEADER:
 			stat->icp6errs_paramprob_header++;
 			return;
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			stat->icp6errs_paramprob_nextheader++;
 			return;
 		case ICMP6_PARAMPROB_OPTION:
 			stat->icp6errs_paramprob_option++;
 			return;
 		}
 		break;
 	case ND_REDIRECT:
 		stat->icp6errs_redirect++;
 		return;
 	}
 	stat->icp6errs_unknown++;
 }
 
 /*
  * Generate an error packet of type error in response to bad IP6 packet.
  */
 void
 icmp6_error(m, type, code, param)
 	struct mbuf *m;
 	int type, code, param;
 {
 	struct ip6_hdr *oip6, *nip6;
 	struct icmp6_hdr *icmp6;
 	u_int preplen;
 	int off;
 	int nxt;
 
 	icmp6stat.icp6s_error++;
 
 	/* count per-type-code statistics */
 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code);
 
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m->m_flags & M_DECRYPTED) {
 		icmp6stat.icp6s_canterror++;
 		goto freeit;
 	}
 #endif
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
 #else
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		m = m_pullup(m, sizeof(struct ip6_hdr));
 		if (m == NULL)
 			return;
 	}
 #endif
 	oip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Multicast destination check. For unrecognized option errors,
 	 * this check has already done in ip6_unknown_opt(), so we can
 	 * check only for other errors.
 	 */
 	if ((m->m_flags & (M_BCAST|M_MCAST) ||
 	     IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
 	    (type != ICMP6_PACKET_TOO_BIG &&
 	     (type != ICMP6_PARAM_PROB ||
 	      code != ICMP6_PARAMPROB_OPTION)))
 		goto freeit;
 
 	/* Source address check. XXX: the case of anycast source? */
 	if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
 	    IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
 		goto freeit;
 
 	/*
 	 * If we are about to send ICMPv6 against ICMPv6 error/redirect,
 	 * don't do it.
 	 */
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off >= 0 && nxt == IPPROTO_ICMPV6) {
 		struct icmp6_hdr *icp;
 
 #ifndef PULLDOWN_TEST
 		IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
 		icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
 			sizeof(*icp));
 		if (icp == NULL) {
 			icmp6stat.icp6s_tooshort++;
 			return;
 		}
 #endif
 		if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
 		    icp->icmp6_type == ND_REDIRECT) {
 			/*
 			 * ICMPv6 error
 			 * Special case: for redirect (which is
 			 * informational) we must not send icmp6 error.
 			 */
 			icmp6stat.icp6s_canterror++;
 			goto freeit;
 		} else {
 			/* ICMPv6 informational - send the error */
 		}
 	} else {
 		/* non-ICMPv6 - send the error */
 	}
 
 	oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
 
 	/* Finally, do rate limitation check. */
 	if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
 		icmp6stat.icp6s_toofreq++;
 		goto freeit;
 	}
 
 	/*
 	 * OK, ICMP6 can be generated.
 	 */
 
 	if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
 		m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
 
 	preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 	M_PREPEND(m, preplen, M_DONTWAIT);
 	if (m && m->m_len < preplen)
 		m = m_pullup(m, preplen);
 	if (m == NULL) {
 		nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
 		return;
 	}
 
 	nip6 = mtod(m, struct ip6_hdr *);
 	nip6->ip6_src  = oip6->ip6_src;
 	nip6->ip6_dst  = oip6->ip6_dst;
 
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
 		oip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
 		oip6->ip6_dst.s6_addr16[1] = 0;
 
 	icmp6 = (struct icmp6_hdr *)(nip6 + 1);
 	icmp6->icmp6_type = type;
 	icmp6->icmp6_code = code;
 	icmp6->icmp6_pptr = htonl((u_int32_t)param);
 
 	/*
 	 * icmp6_reflect() is designed to be in the input path.
 	 * icmp6_error() can be called from both input and outut path,
 	 * and if we are in output path rcvif could contain bogus value.
 	 * clear m->m_pkthdr.rcvif for safety, we should have enough scope
 	 * information in ip header (nip6).
 	 */
 	m->m_pkthdr.rcvif = NULL;
 
 	icmp6stat.icp6s_outhist[type]++;
 	icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
 
 	return;
 
   freeit:
 	/*
 	 * If we can't tell wheter or not we can generate ICMP6, free it.
 	 */
 	m_freem(m);
 }
 
 /*
  * Process a received ICMP6 message.
  */
 int
 icmp6_input(mp, offp, proto)
 	struct mbuf **mp;
 	int *offp, proto;
 {
 	struct mbuf *m = *mp, *n;
 	struct ip6_hdr *ip6, *nip6;
 	struct icmp6_hdr *icmp6, *nicmp6;
 	int off = *offp;
 	int icmp6len = m->m_pkthdr.len - *offp;
 	int code, sum, noff;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
 	/* m might change if M_LOOP.  So, call mtod after this */
 #endif
 
 	/*
 	 * Locate icmp6 structure in mbuf, and check
 	 * that not corrupted and of at least minimum length
 	 */
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (icmp6len < sizeof(struct icmp6_hdr)) {
 		icmp6stat.icp6s_tooshort++;
 		goto freeit;
 	}
 
 	/*
 	 * calculate the checksum
 	 */
 #ifndef PULLDOWN_TEST
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return IPPROTO_DONE;
 	}
 #endif
 	code = icmp6->icmp6_code;
 
 	if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
 		nd6log((LOG_ERR,
 		    "ICMP6 checksum error(%d|%x) %s\n",
 		    icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src)));
 		icmp6stat.icp6s_checksum++;
 		goto freeit;
 	}
 
 	if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
 		/*
 		 * Deliver very specific ICMP6 type only.
 		 * This is important to deilver TOOBIG.  Otherwise PMTUD
 		 * will not work.
 		 */
 		switch (icmp6->icmp6_type) {
 		case ICMP6_DST_UNREACH:
 		case ICMP6_PACKET_TOO_BIG:
 		case ICMP6_TIME_EXCEEDED:
 			break;
 		default:
 			goto freeit;
 		}
 	}
 
 	icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
 	icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
 	if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
 
 	switch (icmp6->icmp6_type) {
 	case ICMP6_DST_UNREACH:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
 		switch (code) {
 		case ICMP6_DST_UNREACH_NOROUTE:
 			code = PRC_UNREACH_NET;
 			break;
 		case ICMP6_DST_UNREACH_ADMIN:
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
 			code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
 			break;
 		case ICMP6_DST_UNREACH_ADDR:
 			code = PRC_HOSTDEAD;
 			break;
 #ifdef COMPAT_RFC1885
 		case ICMP6_DST_UNREACH_NOTNEIGHBOR:
 			code = PRC_UNREACH_SRCFAIL;
 			break;
 #else
 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
 			/* I mean "source address was incorrect." */
 			code = PRC_PARAMPROB;
 			break;
 #endif
 		case ICMP6_DST_UNREACH_NOPORT:
 			code = PRC_UNREACH_PORT;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PACKET_TOO_BIG:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
 		if (code != 0)
 			goto badcode;
 
 		code = PRC_MSGSIZE;
 
 		/*
 		 * Updating the path MTU will be done after examining
 		 * intermediate extension headers.
 		 */
 		goto deliver;
 		break;
 
 	case ICMP6_TIME_EXCEEDED:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
 		switch (code) {
 		case ICMP6_TIME_EXCEED_TRANSIT:
 		case ICMP6_TIME_EXCEED_REASSEMBLY:
 			code += PRC_TIMXCEED_INTRANS;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_PARAM_PROB:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
 		switch (code) {
 		case ICMP6_PARAMPROB_NEXTHEADER:
 			code = PRC_UNREACH_PROTOCOL;
 			break;
 		case ICMP6_PARAMPROB_HEADER:
 		case ICMP6_PARAMPROB_OPTION:
 			code = PRC_PARAMPROB;
 			break;
 		default:
 			goto badcode;
 		}
 		goto deliver;
 		break;
 
 	case ICMP6_ECHO_REQUEST:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
 		if (code != 0)
 			goto badcode;
 		if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
 			/* Give up remote */
 			break;
 		}
 		if ((n->m_flags & M_EXT) != 0
 		 || n->m_len < off + sizeof(struct icmp6_hdr)) {
 			struct mbuf *n0 = n;
 			const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
 
 			/*
 			 * Prepare an internal mbuf. m_pullup() doesn't
 			 * always copy the length we specified.
 			 */
 			if (maxlen >= MCLBYTES) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
 			MGETHDR(n, M_DONTWAIT, n0->m_type);
 			if (n && maxlen >= MHLEN) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_free(n);
 					n = NULL;
 				}
 			}
 			if (n == NULL) {
 				/* Give up remote */
 				m_freem(n0);
 				break;
 			}
-			M_COPY_PKTHDR(n, n0);
+			M_MOVE_PKTHDR(n, n0);
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			noff = sizeof(struct ip6_hdr);
 			n->m_pkthdr.len = n->m_len =
 				noff + sizeof(struct icmp6_hdr);
 			/*
 			 * Adjust mbuf. ip6_plen will be adjusted in
 			 * ip6_output().
 			 */
 			m_adj(n0, off + sizeof(struct icmp6_hdr));
 			n->m_pkthdr.len += n0->m_pkthdr.len;
 			n->m_next = n0;
-			n0->m_flags &= ~M_PKTHDR;
 		} else {
 			nip6 = mtod(n, struct ip6_hdr *);
 			nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off);
 			noff = off;
 		}
 		nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
 		nicmp6->icmp6_code = 0;
 		if (n) {
 			icmp6stat.icp6s_reflect++;
 			icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
 			icmp6_reflect(n, noff);
 		}
 		break;
 
 	case ICMP6_ECHO_REPLY:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case MLD_LISTENER_QUERY:
 	case MLD_LISTENER_REPORT:
 		if (icmp6len < sizeof(struct mld_hdr))
 			goto badlen;
 		if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
 		else
 			icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			mld6_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		mld6_input(n, off);
 		/* m stays. */
 		break;
 
 	case MLD_LISTENER_DONE:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
 		if (icmp6len < sizeof(struct mld_hdr))	/* necessary? */
 			goto badlen;
 		break;		/* nothing to be done in kernel */
 
 	case MLD_MTRACE_RESP:
 	case MLD_MTRACE:
 		/* XXX: these two are experimental.  not officially defind. */
 		/* XXX: per-interface statistics? */
 		break;		/* just pass it to applications */
 
 	case ICMP6_WRUREQUEST:	/* ICMP6_FQDN_QUERY */
 	    {
 		enum { WRU, FQDN } mode;
 
 		if (!icmp6_nodeinfo)
 			break;
 
 		if (icmp6len == sizeof(struct icmp6_hdr) + 4)
 			mode = WRU;
 		else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
 			mode = FQDN;
 		else
 			goto badlen;
 
 #define hostnamelen	strlen(hostname)
 		if (mode == FQDN) {
 #ifndef PULLDOWN_TEST
 			IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
 					 IPPROTO_DONE);
 #endif
 			n = m_copy(m, 0, M_COPYALL);
 			if (n)
 				n = ni6_input(n, off);
 			/* XXX meaningless if n == NULL */
 			noff = sizeof(struct ip6_hdr);
 		} else {
 			u_char *p;
 			int maxlen, maxhlen;
 
 			if ((icmp6_nodeinfo & 5) != 5) 
 				break;
 
 			if (code != 0)
 				goto badcode;
 			maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
 			if (maxlen >= MCLBYTES) {
 				/* Give up remote */
 				break;
 			}
 			MGETHDR(n, M_DONTWAIT, m->m_type);
 			if (n && maxlen > MHLEN) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_free(n);
 					n = NULL;
 				}
 			}
+			if (!m_dup_pkthdr(n, m, M_DONTWAIT)) {
+				/*
+				 * Previous code did a blind M_COPY_PKTHDR
+				 * and said "just for rcvif".  If true, then
+				 * we could tolerate the dup failing (due to
+				 * the deep copy of the tag chain).  For now
+				 * be conservative and just fail.
+				 */
+				m_free(n);
+				n = NULL;
+			}
 			if (n == NULL) {
 				/* Give up remote */
 				break;
 			}
 			n->m_pkthdr.rcvif = NULL;
 			n->m_len = 0;
 			maxhlen = M_TRAILINGSPACE(n) - maxlen;
 			if (maxhlen > hostnamelen)
 				maxhlen = hostnamelen;
 			/*
 			 * Copy IPv6 and ICMPv6 only.
 			 */
 			nip6 = mtod(n, struct ip6_hdr *);
 			bcopy(ip6, nip6, sizeof(struct ip6_hdr));
 			nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
 			bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
 			p = (u_char *)(nicmp6 + 1);
 			bzero(p, 4);
 			bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */
 			noff = sizeof(struct ip6_hdr);
-			M_COPY_PKTHDR(n, m); /* just for rcvif */
 			n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 				sizeof(struct icmp6_hdr) + 4 + maxhlen;
 			nicmp6->icmp6_type = ICMP6_WRUREPLY;
 			nicmp6->icmp6_code = 0;
 		}
 #undef hostnamelen
 		if (n) {
 			icmp6stat.icp6s_reflect++;
 			icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
 			icmp6_reflect(n, noff);
 		}
 		break;
 	    }
 
 	case ICMP6_WRUREPLY:
 		if (code != 0)
 			goto badcode;
 		break;
 
 	case ND_ROUTER_SOLICIT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_rs_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_rs_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_ROUTER_ADVERT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_router_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_ra_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_ra_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_SOLICIT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_solicit))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_ns_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_ns_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_NEIGHBOR_ADVERT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_neighbor_advert))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			nd6_na_input(m, off, icmp6len);
 			m = NULL;
 			goto freeit;
 		}
 		nd6_na_input(n, off, icmp6len);
 		/* m stays. */
 		break;
 
 	case ND_REDIRECT:
 		icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
 		if (code != 0)
 			goto badcode;
 		if (icmp6len < sizeof(struct nd_redirect))
 			goto badlen;
 		if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
 			/* give up local */
 			icmp6_redirect_input(m, off);
 			m = NULL;
 			goto freeit;
 		}
 		icmp6_redirect_input(n, off);
 		/* m stays. */
 		break;
 
 	case ICMP6_ROUTER_RENUMBERING:
 		if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
 		    code != ICMP6_ROUTER_RENUMBERING_RESULT)
 			goto badcode;
 		if (icmp6len < sizeof(struct icmp6_router_renum))
 			goto badlen;
 		break;
 
 	default:
 		nd6log((LOG_DEBUG,
 		    "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
 		    icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src),
 		    ip6_sprintf(&ip6->ip6_dst),
 		    m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
 		if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
 			/* ICMPv6 error: MUST deliver it by spec... */
 			code = PRC_NCMDS;
 			/* deliver */
 		} else {
 			/* ICMPv6 informational: MUST not deliver */
 			break;
 		}
 	deliver:
 		if (icmp6_notify_error(m, off, icmp6len, code)) {
 			/* In this case, m should've been freed. */
 			return(IPPROTO_DONE);
 		}
 		break;
 
 	badcode:
 		icmp6stat.icp6s_badcode++;
 		break;
 
 	badlen:
 		icmp6stat.icp6s_badlen++;
 		break;
 	}
 
 	/* deliver the packet to appropriate sockets */
 	icmp6_rip6_input(&m, *offp);
 
 	return IPPROTO_DONE;
 
  freeit:
 	m_freem(m);
 	return IPPROTO_DONE;
 }
 
 static int
 icmp6_notify_error(m, off, icmp6len, code)
 	struct mbuf *m;
 	int off, icmp6len;
 {
 	struct icmp6_hdr *icmp6;
 	struct ip6_hdr *eip6;
 	u_int32_t notifymtu;
 	struct sockaddr_in6 icmp6src, icmp6dst;
 
 	if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
 		icmp6stat.icp6s_tooshort++;
 		goto freeit;
 	}
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off,
 			 sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr),
 			 -1);
 	icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 		       sizeof(*icmp6) + sizeof(struct ip6_hdr));
 	if (icmp6 == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return(-1);
 	}
 #endif
 	eip6 = (struct ip6_hdr *)(icmp6 + 1);
 
 	/* Detect the upper level protocol */
 	{
 		void (*ctlfunc) __P((int, struct sockaddr *, void *));
 		u_int8_t nxt = eip6->ip6_nxt;
 		int eoff = off + sizeof(struct icmp6_hdr) +
 			sizeof(struct ip6_hdr);
 		struct ip6ctlparam ip6cp;
 		struct in6_addr *finaldst = NULL;
 		int icmp6type = icmp6->icmp6_type;
 		struct ip6_frag *fh;
 		struct ip6_rthdr *rth;
 		struct ip6_rthdr0 *rth0;
 		int rthlen;
 
 		while (1) { /* XXX: should avoid infinite loop explicitly? */
 			struct ip6_ext *eh;
 
 			switch (nxt) {
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_AH:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 						 sizeof(struct ip6_ext),
 						 -1);
 				eh = (struct ip6_ext *)(mtod(m, caddr_t)
 							+ eoff);
 #else
 				IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
 					       eoff, sizeof(*eh));
 				if (eh == NULL) {
 					icmp6stat.icp6s_tooshort++;
 					return(-1);
 				}
 #endif
 				
 				if (nxt == IPPROTO_AH)
 					eoff += (eh->ip6e_len + 2) << 2;
 				else
 					eoff += (eh->ip6e_len + 1) << 3;
 				nxt = eh->ip6e_nxt;
 				break;
 			case IPPROTO_ROUTING:
 				/*
 				 * When the erroneous packet contains a
 				 * routing header, we should examine the
 				 * header to determine the final destination.
 				 * Otherwise, we can't properly update
 				 * information that depends on the final
 				 * destination (e.g. path MTU).
 				 */
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth),
 						 -1);
 				rth = (struct ip6_rthdr *)(mtod(m, caddr_t)
 							   + eoff);
 #else
 				IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
 					       eoff, sizeof(*rth));
 				if (rth == NULL) {
 					icmp6stat.icp6s_tooshort++;
 					return(-1);
 				}
 #endif
 				rthlen = (rth->ip6r_len + 1) << 3;
 				/*
 				 * XXX: currently there is no
 				 * officially defined type other
 				 * than type-0.
 				 * Note that if the segment left field
 				 * is 0, all intermediate hops must
 				 * have been passed.
 				 */
 				if (rth->ip6r_segleft &&
 				    rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
 					int hops;
 
 #ifndef PULLDOWN_TEST
 					IP6_EXTHDR_CHECK(m, 0, eoff + rthlen,
 							 -1);
 					rth0 = (struct ip6_rthdr0 *)(mtod(m, caddr_t) + eoff);
 #else
 					IP6_EXTHDR_GET(rth0,
 						       struct ip6_rthdr0 *, m,
 						       eoff, rthlen);
 					if (rth0 == NULL) {
 						icmp6stat.icp6s_tooshort++;
 						return(-1);
 					}
 #endif
 					/* just ignore a bogus header */
 					if ((rth0->ip6r0_len % 2) == 0 &&
 					    (hops = rth0->ip6r0_len/2))
 						finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
 				}
 				eoff += rthlen;
 				nxt = rth->ip6r_nxt;
 				break;
 			case IPPROTO_FRAGMENT:
 #ifndef PULLDOWN_TEST
 				IP6_EXTHDR_CHECK(m, 0, eoff +
 						 sizeof(struct ip6_frag),
 						 -1);
 				fh = (struct ip6_frag *)(mtod(m, caddr_t)
 							 + eoff);
 #else
 				IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
 					       eoff, sizeof(*fh));
 				if (fh == NULL) {
 					icmp6stat.icp6s_tooshort++;
 					return(-1);
 				}
 #endif
 				/*
 				 * Data after a fragment header is meaningless
 				 * unless it is the first fragment, but
 				 * we'll go to the notify label for path MTU
 				 * discovery.
 				 */
 				if (fh->ip6f_offlg & IP6F_OFF_MASK)
 					goto notify;
 
 				eoff += sizeof(struct ip6_frag);
 				nxt = fh->ip6f_nxt;
 				break;
 			default:
 				/*
 				 * This case includes ESP and the No Next
 				 * Header.  In such cases going to the notify
 				 * label does not have any meaning
 				 * (i.e. ctlfunc will be NULL), but we go
 				 * anyway since we might have to update
 				 * path MTU information.
 				 */
 				goto notify;
 			}
 		}
 	  notify:
 #ifndef PULLDOWN_TEST
 		icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
 #else
 		IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
 			       sizeof(*icmp6) + sizeof(struct ip6_hdr));
 		if (icmp6 == NULL) {
 			icmp6stat.icp6s_tooshort++;
 			return(-1);
 		}
 #endif
 
 		eip6 = (struct ip6_hdr *)(icmp6 + 1);
 		bzero(&icmp6dst, sizeof(icmp6dst));
 		icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6dst.sin6_family = AF_INET6;
 		if (finaldst == NULL)
 			icmp6dst.sin6_addr = eip6->ip6_dst;
 		else
 			icmp6dst.sin6_addr = *finaldst;
 		icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							  &icmp6dst.sin6_addr);
 #ifndef SCOPEDROUTING
 		if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst,
 				   NULL, NULL)) {
 			/* should be impossbile */
 			nd6log((LOG_DEBUG,
 			    "icmp6_notify_error: in6_embedscope failed\n"));
 			goto freeit;
 		}
 #endif
 
 		/*
 		 * retrieve parameters from the inner IPv6 header, and convert
 		 * them into sockaddr structures.
 		 */
 		bzero(&icmp6src, sizeof(icmp6src));
 		icmp6src.sin6_len = sizeof(struct sockaddr_in6);
 		icmp6src.sin6_family = AF_INET6;
 		icmp6src.sin6_addr = eip6->ip6_src;
 		icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							  &icmp6src.sin6_addr);
 #ifndef SCOPEDROUTING
 		if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src,
 				   NULL, NULL)) {
 			/* should be impossbile */
 			nd6log((LOG_DEBUG,
 			    "icmp6_notify_error: in6_embedscope failed\n"));
 			goto freeit;
 		}
 #endif
 		icmp6src.sin6_flowinfo =
 			(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
 
 		if (finaldst == NULL)
 			finaldst = &eip6->ip6_dst;
 		ip6cp.ip6c_m = m;
 		ip6cp.ip6c_icmp6 = icmp6;
 		ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
 		ip6cp.ip6c_off = eoff;
 		ip6cp.ip6c_finaldst = finaldst;
 		ip6cp.ip6c_src = &icmp6src;
 		ip6cp.ip6c_nxt = nxt;
 
 		if (icmp6type == ICMP6_PACKET_TOO_BIG) {
 			notifymtu = ntohl(icmp6->icmp6_mtu);
 			ip6cp.ip6c_cmdarg = (void *)&notifymtu;
 			icmp6_mtudisc_update(&ip6cp, 1);	/*XXX*/
 		}
 
 		ctlfunc = (void (*) __P((int, struct sockaddr *, void *)))
 			(inet6sw[ip6_protox[nxt]].pr_ctlinput);
 		if (ctlfunc) {
 			(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
 					  &ip6cp);
 		}
 	}
 	return(0);
 
   freeit:
 	m_freem(m);
 	return(-1);
 }
 
 void
 icmp6_mtudisc_update(ip6cp, validated)
 	struct ip6ctlparam *ip6cp;
 	int validated;
 {
 	struct in6_addr *dst = ip6cp->ip6c_finaldst;
 	struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
 	struct mbuf *m = ip6cp->ip6c_m;	/* will be necessary for scope issue */
 	u_int mtu = ntohl(icmp6->icmp6_mtu);
 	struct rtentry *rt = NULL;
 	struct sockaddr_in6 sin6;
 
 	if (!validated)
 		return;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = PF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_addr = *dst;
 	/* XXX normally, this won't happen */
 	if (IN6_IS_ADDR_LINKLOCAL(dst)) {
 		sin6.sin6_addr.s6_addr16[1] =
 		    htons(m->m_pkthdr.rcvif->if_index);
 	}
 	/* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */
 	rt = rtalloc1((struct sockaddr *)&sin6, 0,
 		      RTF_CLONING | RTF_PRCLONING);
 
 	if (rt && (rt->rt_flags & RTF_HOST)
 	    && !(rt->rt_rmx.rmx_locks & RTV_MTU)) {
 		if (mtu < IPV6_MMTU) {
 				/* xxx */
 			rt->rt_rmx.rmx_locks |= RTV_MTU;
 		} else if (mtu < rt->rt_ifp->if_mtu &&
 			   rt->rt_rmx.rmx_mtu > mtu) {
 			icmp6stat.icp6s_pmtuchg++;
 			rt->rt_rmx.rmx_mtu = mtu;
 		}
 	}
 	if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */
 		RTFREE(rt);
 	}
 }
 
 /*
  * Process a Node Information Query packet, based on
  * draft-ietf-ipngwg-icmp-name-lookups-07.
  * 
  * Spec incompatibilities:
  * - IPv6 Subject address handling
  * - IPv4 Subject address handling support missing
  * - Proxy reply (answer even if it's not for me)
  * - joins NI group address at in6_ifattach() time only, does not cope
  *   with hostname changes by sethostname(3)
  */
 #define hostnamelen	strlen(hostname)
 static struct mbuf *
 ni6_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct mbuf *n = NULL;
 	u_int16_t qtype;
 	int subjlen;
 	int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 	struct ni_reply_fqdn *fqdn;
 	int addrs;		/* for NI_QTYPE_NODEADDR */
 	struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
 	struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */
 	struct sockaddr_in6 sin6_d; /* XXX: we should retrieve this from m_aux */
 	struct ip6_hdr *ip6;
 	int oldfqdn = 0;	/* if 1, return pascal string (03 draft) */
 	char *subj = NULL;
 	struct in6_ifaddr *ia6 = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #ifndef PULLDOWN_TEST
 	ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
 	if (ni6 == NULL) {
 		/* m is already reclaimed */
 		return NULL;
 	}
 #endif
 
 	/*
 	 * Validate IPv6 destination address.
 	 *
 	 * The Responder must discard the Query without further processing
 	 * unless it is one of the Responder's unicast or anycast addresses, or
 	 * a link-local scope multicast address which the Responder has joined.
 	 * [icmp-name-lookups-07, Section 4.]
 	 */
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr));
 	/* XXX scopeid */
 	if ((ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)&sin6)) != NULL) {
 		/* unicast/anycast, fine */
 		if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    (icmp6_nodeinfo & 4) == 0) {
 			nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
 				"a temporary address in %s:%d",
 			       __FILE__, __LINE__));
 			goto bad;
 		}
 	} else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr))
 		; /* link-local multicast, fine */
 	else
 		goto bad;
 
 	/* validate query Subject field. */
 	qtype = ntohs(ni6->ni_qtype);
 	subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 	case NI_QTYPE_SUPTYPES:
 		/* 07 draft */
 		if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
 			break;
 		/* FALLTHROUGH */
 	case NI_QTYPE_FQDN:
 	case NI_QTYPE_NODEADDR:
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 #if ICMP6_NI_SUBJ_IPV6 != 0
 		case 0:
 #endif
 			/*
 			 * backward compatibility - try to accept 03 draft
 			 * format, where no Subject is present.
 			 */
 			if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
 			    subjlen == 0) {
 				oldfqdn++;
 				break;
 			}
 #if ICMP6_NI_SUBJ_IPV6 != 0
 			if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
 				goto bad;
 #endif
 
 			if (subjlen != sizeof(sin6.sin6_addr))
 				goto bad;
 
 			/*
 			 * Validate Subject address.
 			 *
 			 * Not sure what exactly "address belongs to the node"
 			 * means in the spec, is it just unicast, or what?
 			 *
 			 * At this moment we consider Subject address as
 			 * "belong to the node" if the Subject address equals
 			 * to the IPv6 destination address; validation for
 			 * IPv6 destination address should have done enough
 			 * check for us.
 			 *
 			 * We do not do proxy at this moment.
 			 */
 			/* m_pulldown instead of copy? */
 			m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
 			    subjlen, (caddr_t)&sin6.sin6_addr);
 			sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 							      &sin6.sin6_addr);
 #ifndef SCOPEDROUTING
 			in6_embedscope(&sin6.sin6_addr, &sin6, NULL, NULL);
 #endif
 			bzero(&sin6_d, sizeof(sin6_d));
 			sin6_d.sin6_family = AF_INET6; /* not used, actually */
 			sin6_d.sin6_len = sizeof(sin6_d); /* ditto */
 			sin6_d.sin6_addr = ip6->ip6_dst;
 			sin6_d.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif,
 								&ip6->ip6_dst);
 #ifndef SCOPEDROUTING
 			in6_embedscope(&sin6_d.sin6_addr, &sin6_d, NULL, NULL);
 #endif
 			subj = (char *)&sin6;
 			if (SA6_ARE_ADDR_EQUAL(&sin6, &sin6_d))
 				break;
 
 			/*
 			 * XXX if we are to allow other cases, we should really
 			 * be careful about scope here.
 			 * basically, we should disallow queries toward IPv6
 			 * destination X with subject Y, if scope(X) > scope(Y).
 			 * if we allow scope(X) > scope(Y), it will result in
 			 * information leakage across scope boundary.
 			 */
 			goto bad;
 
 		case ICMP6_NI_SUBJ_FQDN:
 			/*
 			 * Validate Subject name with gethostname(3).
 			 *
 			 * The behavior may need some debate, since:
 			 * - we are not sure if the node has FQDN as
 			 *   hostname (returned by gethostname(3)).
 			 * - the code does wildcard match for truncated names.
 			 *   however, we are not sure if we want to perform
 			 *   wildcard match, if gethostname(3) side has
 			 *   truncated hostname.
 			 */
 			n = ni6_nametodns(hostname, hostnamelen, 0);
 			if (!n || n->m_next || n->m_len == 0)
 				goto bad;
 			IP6_EXTHDR_GET(subj, char *, m,
 			    off + sizeof(struct icmp6_nodeinfo), subjlen);
 			if (subj == NULL)
 				goto bad;
 			if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
 					n->m_len)) {
 				goto bad;
 			}
 			m_freem(n);
 			n = NULL;
 			break;
 
 		case ICMP6_NI_SUBJ_IPV4:	/* XXX: to be implemented? */
 		default:
 			goto bad;
 		}
 		break;
 	}
 
 	/* refuse based on configuration.  XXX ICMP6_NI_REFUSED? */
 	switch (qtype) {
 	case NI_QTYPE_FQDN:
 		if ((icmp6_nodeinfo & 1) == 0)
 			goto bad;
 		break;
 	case NI_QTYPE_NODEADDR:
 		if ((icmp6_nodeinfo & 2) == 0)
 			goto bad;
 		break;
 	}
 
 	/* guess reply length */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		break;		/* no reply data */
 	case NI_QTYPE_SUPTYPES:
 		replylen += sizeof(u_int32_t);
 		break;
 	case NI_QTYPE_FQDN:
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		break;
 	case NI_QTYPE_NODEADDR:
 		addrs = ni6_addrs(ni6, m, &ifp, subj);
 		if ((replylen += addrs * (sizeof(struct in6_addr) +
 					  sizeof(u_int32_t))) > MCLBYTES)
 			replylen = MCLBYTES; /* XXX: will truncate pkt later */
 		break;
 	default:
 		/*
 		 * XXX: We must return a reply with the ICMP6 code
 		 * `unknown Qtype' in this case. However we regard the case
 		 * as an FQDN query for backward compatibility.
 		 * Older versions set a random value to this field,
 		 * so it rarely varies in the defined qtypes.
 		 * But the mechanism is not reliable...
 		 * maybe we should obsolete older versions.
 		 */
 		qtype = NI_QTYPE_FQDN;
 		/* XXX will append an mbuf */
 		replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
 		oldfqdn++;
 		break;
 	}
 
 	/* allocate an mbuf to reply. */
 	MGETHDR(n, M_DONTWAIT, m->m_type);
 	if (n == NULL) {
 		m_freem(m);
 		return(NULL);
 	}
-	M_COPY_PKTHDR(n, m); /* just for recvif */
+	M_MOVE_PKTHDR(n, m); /* just for recvif */
 	if (replylen > MHLEN) {
 		if (replylen > MCLBYTES) {
 			/*
 			 * XXX: should we try to allocate more? But MCLBYTES
 			 * is probably much larger than IPV6_MMTU...
 			 */
 			goto bad;
 		}
 		MCLGET(n, M_DONTWAIT);
 		if ((n->m_flags & M_EXT) == 0) {
 			goto bad;
 		}
 	}
 	n->m_pkthdr.len = n->m_len = replylen;
 
 	/* copy mbuf header and IPv6 + Node Information base headers */
 	bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
 	nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
 	bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
 
 	/* qtype dependent procedure */
 	switch (qtype) {
 	case NI_QTYPE_NOOP:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = 0;
 		break;
 	case NI_QTYPE_SUPTYPES:
 	{
 		u_int32_t v;
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		nni6->ni_flags = htons(0x0000);	/* raw bitmap */
 		/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
 		v = (u_int32_t)htonl(0x0000000f);
 		bcopy(&v, nni6 + 1, sizeof(u_int32_t));
 		break;
 	}
 	case NI_QTYPE_FQDN:
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
 						sizeof(struct ip6_hdr) +
 						sizeof(struct icmp6_nodeinfo));
 		nni6->ni_flags = 0; /* XXX: meaningless TTL */
 		fqdn->ni_fqdn_ttl = 0;	/* ditto. */
 		/*
 		 * XXX do we really have FQDN in variable "hostname"?
 		 */
 		n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn);
 		if (n->m_next == NULL)
 			goto bad;
 		/* XXX we assume that n->m_next is not a chain */
 		if (n->m_next->m_next != NULL)
 			goto bad;
 		n->m_pkthdr.len += n->m_next->m_len;
 		break;
 	case NI_QTYPE_NODEADDR:
 	{
 		int lenlim, copied;
 
 		nni6->ni_code = ICMP6_NI_SUCCESS;
 		n->m_pkthdr.len = n->m_len =
 		    sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
 		lenlim = M_TRAILINGSPACE(n);
 		copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
 		/* XXX: reset mbuf length */
 		n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
 			sizeof(struct icmp6_nodeinfo) + copied;
 		break;
 	}
 	default:
 		break;		/* XXX impossible! */
 	}
 
 	nni6->ni_type = ICMP6_NI_REPLY;
 	m_freem(m);
 	return(n);
 
   bad:
 	m_freem(m);
 	if (n)
 		m_freem(n);
 	return(NULL);
 }
 #undef hostnamelen
 
 /*
  * make a mbuf with DNS-encoded string.  no compression support.
  *
  * XXX names with less than 2 dots (like "foo" or "foo.section") will be
  * treated as truncated name (two \0 at the end).  this is a wild guess.
  */
 static struct mbuf *
 ni6_nametodns(name, namelen, old)
 	const char *name;
 	int namelen;
 	int old;	/* return pascal string if non-zero */
 {
 	struct mbuf *m;
 	char *cp, *ep;
 	const char *p, *q;
 	int i, len, nterm;
 
 	if (old)
 		len = namelen + 1;
 	else
 		len = MCLBYTES;
 
 	/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
 	MGET(m, M_DONTWAIT, MT_DATA);
 	if (m && len > MLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0)
 			goto fail;
 	}
 	if (!m)
 		goto fail;
 	m->m_next = NULL;
 
 	if (old) {
 		m->m_len = len;
 		*mtod(m, char *) = namelen;
 		bcopy(name, mtod(m, char *) + 1, namelen);
 		return m;
 	} else {
 		m->m_len = 0;
 		cp = mtod(m, char *);
 		ep = mtod(m, char *) + M_TRAILINGSPACE(m);
 
 		/* if not certain about my name, return empty buffer */
 		if (namelen == 0)
 			return m;
 
 		/*
 		 * guess if it looks like shortened hostname, or FQDN.
 		 * shortened hostname needs two trailing "\0".
 		 */
 		i = 0;
 		for (p = name; p < name + namelen; p++) {
 			if (*p && *p == '.')
 				i++;
 		}
 		if (i < 2)
 			nterm = 2;
 		else
 			nterm = 1;
 
 		p = name;
 		while (cp < ep && p < name + namelen) {
 			i = 0;
 			for (q = p; q < name + namelen && *q && *q != '.'; q++)
 				i++;
 			/* result does not fit into mbuf */
 			if (cp + i + 1 >= ep)
 				goto fail;
 			/*
 			 * DNS label length restriction, RFC1035 page 8.
 			 * "i == 0" case is included here to avoid returning
 			 * 0-length label on "foo..bar".
 			 */
 			if (i <= 0 || i >= 64)
 				goto fail;
 			*cp++ = i;
 			bcopy(p, cp, i);
 			cp += i;
 			p = q;
 			if (p < name + namelen && *p == '.')
 				p++;
 		}
 		/* termination */
 		if (cp + nterm >= ep)
 			goto fail;
 		while (nterm-- > 0)
 			*cp++ = '\0';
 		m->m_len = cp - mtod(m, char *);
 		return m;
 	}
 
 	panic("should not reach here");
 	/* NOTREACHED */
 
  fail:
 	if (m)
 		m_freem(m);
 	return NULL;
 }
 
 /*
  * check if two DNS-encoded string matches.  takes care of truncated
  * form (with \0\0 at the end).  no compression support.
  * XXX upper/lowercase match (see RFC2065)
  */
 static int
 ni6_dnsmatch(a, alen, b, blen)
 	const char *a;
 	int alen;
 	const char *b;
 	int blen;
 {
 	const char *a0, *b0;
 	int l;
 
 	/* simplest case - need validation? */
 	if (alen == blen && bcmp(a, b, alen) == 0)
 		return 1;
 
 	a0 = a;
 	b0 = b;
 
 	/* termination is mandatory */
 	if (alen < 2 || blen < 2)
 		return 0;
 	if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
 		return 0;
 	alen--;
 	blen--;
 
 	while (a - a0 < alen && b - b0 < blen) {
 		if (a - a0 + 1 > alen || b - b0 + 1 > blen)
 			return 0;
 
 		if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
 			return 0;
 		/* we don't support compression yet */
 		if (a[0] >= 64 || b[0] >= 64)
 			return 0;
 
 		/* truncated case */
 		if (a[0] == 0 && a - a0 == alen - 1)
 			return 1;
 		if (b[0] == 0 && b - b0 == blen - 1)
 			return 1;
 		if (a[0] == 0 || b[0] == 0)
 			return 0;
 
 		if (a[0] != b[0])
 			return 0;
 		l = a[0];
 		if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
 			return 0;
 		if (bcmp(a + 1, b + 1, l) != 0)
 			return 0;
 
 		a += 1 + l;
 		b += 1 + l;
 	}
 
 	if (a - a0 == alen && b - b0 == blen)
 		return 1;
 	else
 		return 0;
 }
 
 /*
  * calculate the number of addresses to be returned in the node info reply.
  */
 static int
 ni6_addrs(ni6, m, ifpp, subj)
 	struct icmp6_nodeinfo *ni6;
 	struct mbuf *m;
 	struct ifnet **ifpp;
 	char *subj;
 {
 	struct ifnet *ifp;
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */
 	int addrs = 0, addrsofif, iffound = 0;
 	int niflags = ni6->ni_flags;
 
 	if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
 		switch (ni6->ni_code) {
 		case ICMP6_NI_SUBJ_IPV6:
 			if (subj == NULL) /* must be impossible... */
 				return(0);
 			subj_ip6 = (struct sockaddr_in6 *)subj;
 			break;
 		default:
 			/*
 			 * XXX: we only support IPv6 subject address for
 			 * this Qtype.
 			 */
 			return(0);
 		}
 	}
 
 	IFNET_RLOCK();
 	for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list))
 	{
 		addrsofif = 0;
 		TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list)
 		{
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
 			    IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr,
 					       &ifa6->ia_addr.sin6_addr))
 				iffound = 1;
 
 			/*
 			 * IPv4-mapped addresses can only be returned by a
 			 * Node Information proxy, since they represent
 			 * addresses of IPv4-only nodes, which perforce do
 			 * not implement this protocol.
 			 * [icmp-name-lookups-07, Section 5.4]
 			 * So we don't support NI_NODEADDR_FLAG_COMPAT in
 			 * this function at this moment.
 			 */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental.  not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue; /* we need only unicast addresses */
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (icmp6_nodeinfo & 4) == 0) {
 				continue;
 			}
 			addrsofif++; /* count the address */
 		}
 		if (iffound) {
 			*ifpp = ifp;
 			IFNET_RUNLOCK();
 			return(addrsofif);
 		}
 
 		addrs += addrsofif;
 	}
 	IFNET_RUNLOCK();
 
 	return(addrs);
 }
 
 static int
 ni6_store_addrs(ni6, nni6, ifp0, resid)
 	struct icmp6_nodeinfo *ni6, *nni6;
 	struct ifnet *ifp0;
 	int resid;
 {
 	struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet);
 	struct in6_ifaddr *ifa6;
 	struct ifaddr *ifa;
 	struct ifnet *ifp_dep = NULL;
 	int copied = 0, allow_deprecated = 0;
 	u_char *cp = (u_char *)(nni6 + 1);
 	int niflags = ni6->ni_flags;
 	u_int32_t ltime;
 
 	if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
 		return(0);	/* needless to copy */
 		
 	IFNET_RLOCK();
   again:
 	for (; ifp; ifp = TAILQ_NEXT(ifp, if_list))
 	{
 		for (ifa = ifp->if_addrlist.tqh_first; ifa;
 		     ifa = ifa->ifa_list.tqe_next)
 		{
 			if (ifa->ifa_addr->sa_family != AF_INET6)
 				continue;
 			ifa6 = (struct in6_ifaddr *)ifa;
 
 			if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
 			    allow_deprecated == 0) {
 				/*
 				 * prefererred address should be put before
 				 * deprecated addresses.
 				 */
 
 				/* record the interface for later search */
 				if (ifp_dep == NULL)
 					ifp_dep = ifp;
 
 				continue;
 			}
 			else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
 				 allow_deprecated != 0)
 				continue; /* we now collect deprecated addrs */
 
 			/* What do we have to do about ::1? */
 			switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
 			case IPV6_ADDR_SCOPE_LINKLOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_SITELOCAL:
 				if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
 					continue;
 				break;
 			case IPV6_ADDR_SCOPE_GLOBAL:
 				if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
 					continue;
 				break;
 			default:
 				continue;
 			}
 
 			/*
 			 * check if anycast is okay.
 			 * XXX: just experimental. not in the spec.
 			 */
 			if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
 			    (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
 				continue;
 			if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (icmp6_nodeinfo & 4) == 0) {
 				continue;
 			}
 
 			/* now we can copy the address */
 			if (resid < sizeof(struct in6_addr) +
 			    sizeof(u_int32_t)) {
 				/*
 				 * We give up much more copy.
 				 * Set the truncate flag and return.
 				 */
 				nni6->ni_flags |=
 					NI_NODEADDR_FLAG_TRUNCATE;
 				IFNET_RUNLOCK();
 				return(copied);
 			}
 
 			/*
 			 * Set the TTL of the address.
 			 * The TTL value should be one of the following
 			 * according to the specification:
 			 *
 			 * 1. The remaining lifetime of a DHCP lease on the
 			 *    address, or
 			 * 2. The remaining Valid Lifetime of a prefix from
 			 *    which the address was derived through Stateless
 			 *    Autoconfiguration.
 			 *
 			 * Note that we currently do not support stateful
 			 * address configuration by DHCPv6, so the former
 			 * case can't happen.
 			 */
 			if (ifa6->ia6_lifetime.ia6t_expire == 0)
 				ltime = ND6_INFINITE_LIFETIME;
 			else {
 				if (ifa6->ia6_lifetime.ia6t_expire >
 				    time_second)
 					ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
 				else
 					ltime = 0;
 			}
 			
 			bcopy(&ltime, cp, sizeof(u_int32_t));
 			cp += sizeof(u_int32_t);
 
 			/* copy the address itself */
 			bcopy(&ifa6->ia_addr.sin6_addr, cp,
 			      sizeof(struct in6_addr));
 			/* XXX: KAME link-local hack; remove ifindex */
 			if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr))
 				((struct in6_addr *)cp)->s6_addr16[1] = 0;
 			cp += sizeof(struct in6_addr);
 			
 			resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
 			copied += (sizeof(struct in6_addr) +
 				   sizeof(u_int32_t));
 		}
 		if (ifp0)	/* we need search only on the specified IF */
 			break;
 	}
 
 	if (allow_deprecated == 0 && ifp_dep != NULL) {
 		ifp = ifp_dep;
 		allow_deprecated = 1;
 
 		goto again;
 	}
 
 	IFNET_RUNLOCK();
 
 	return(copied);
 }
 
 /*
  * XXX almost dup'ed code with rip6_input.
  */
 static int
 icmp6_rip6_input(mp, off)
 	struct	mbuf **mp;
 	int	off;
 {
 	struct mbuf *m = *mp;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct in6pcb *in6p;
 	struct in6pcb *last = NULL;
 	struct sockaddr_in6 rip6src;
 	struct icmp6_hdr *icmp6;
 	struct mbuf *opts = NULL;
 
 #ifndef PULLDOWN_TEST
 	/* this is assumed to be safe. */
 	icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
 	if (icmp6 == NULL) {
 		/* m is already reclaimed */
 		return IPPROTO_DONE;
 	}
 #endif
 
 	bzero(&rip6src, sizeof(rip6src));
 	rip6src.sin6_len = sizeof(struct sockaddr_in6);
 	rip6src.sin6_family = AF_INET6;
 	/* KAME hack: recover scopeid */
 	(void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif);
 
 	LIST_FOREACH(in6p, &ripcb, inp_list)
 	{
 		if ((in6p->inp_vflag & INP_IPV6) == 0)
 			continue;
 #ifdef HAVE_NRL_INPCB
 		if (!(in6p->in6p_flags & INP_IPV6))
 			continue;
 #endif
 		if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
 			continue;
 		if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
 		   !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
 			continue;
 		if (in6p->in6p_icmp6filt
 		    && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
 				 in6p->in6p_icmp6filt))
 			continue;
 		if (last) {
 			struct	mbuf *n;
 			if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
 				if (last->in6p_flags & IN6P_CONTROLOPTS)
 					ip6_savecontrol(last, &opts, ip6, n);
 				/* strip intermediate headers */
 				m_adj(n, off);
 				if (sbappendaddr(&last->in6p_socket->so_rcv,
 						 (struct sockaddr *)&rip6src,
 						 n, opts) == 0) {
 					/* should notify about lost packet */
 					m_freem(n);
 					if (opts) {
 						m_freem(opts);
 					}
 				} else
 					sorwakeup(last->in6p_socket);
 				opts = NULL;
 			}
 		}
 		last = in6p;
 	}
 	if (last) {
 		if (last->in6p_flags & IN6P_CONTROLOPTS)
 			ip6_savecontrol(last, &opts, ip6, m);
 		/* strip intermediate headers */
 		m_adj(m, off);
 		if (sbappendaddr(&last->in6p_socket->so_rcv,
 				 (struct sockaddr *)&rip6src, m, opts) == 0) {
 			m_freem(m);
 			if (opts)
 				m_freem(opts);
 		} else
 			sorwakeup(last->in6p_socket);
 	} else {
 		m_freem(m);
 		ip6stat.ip6s_delivered--;
 	}
 	return IPPROTO_DONE;
 }
 
 /*
  * Reflect the ip6 packet back to the source.
  * OFF points to the icmp6 header, counted from the top of the mbuf.
  */
 void
 icmp6_reflect(m, off)
 	struct	mbuf *m;
 	size_t off;
 {
 	struct ip6_hdr *ip6;
 	struct icmp6_hdr *icmp6;
 	struct in6_ifaddr *ia;
 	struct in6_addr t, *src = 0;
 	int plen;
 	int type, code;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 sa6_src, sa6_dst;
 #ifdef COMPAT_RFC1885
 	int mtu = IPV6_MMTU;
 	struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst;
 #endif
 
 	/* too short to reflect */
 	if (off < sizeof(struct ip6_hdr)) {
 		nd6log((LOG_DEBUG,
 		    "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
 		    (u_long)off, (u_long)sizeof(struct ip6_hdr),
 		    __FILE__, __LINE__));
 		goto bad;
 	}
 
 	/*
 	 * If there are extra headers between IPv6 and ICMPv6, strip
 	 * off that header first.
 	 */
 #ifdef DIAGNOSTIC
 	if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
 		panic("assumption failed in icmp6_reflect");
 #endif
 	if (off > sizeof(struct ip6_hdr)) {
 		size_t l;
 		struct ip6_hdr nip6;
 
 		l = off - sizeof(struct ip6_hdr);
 		m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
 		m_adj(m, l);
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 		bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
 	} else /* off == sizeof(struct ip6_hdr) */ {
 		size_t l;
 		l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
 		if (m->m_len < l) {
 			if ((m = m_pullup(m, l)) == NULL)
 				return;
 		}
 	}
 	plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	icmp6 = (struct icmp6_hdr *)(ip6 + 1);
 	type = icmp6->icmp6_type; /* keep type for statistics */
 	code = icmp6->icmp6_code; /* ditto. */
 
 	t = ip6->ip6_dst;
 	/*
 	 * ip6_input() drops a packet if its src is multicast.
 	 * So, the src is never multicast.
 	 */
 	ip6->ip6_dst = ip6->ip6_src;
 
 	/*
 	 * XXX: make sure to embed scope zone information, using
 	 * already embedded IDs or the received interface (if any).
 	 * Note that rcvif may be NULL.
 	 * TODO: scoped routing case (XXX).
 	 */
 	bzero(&sa6_src, sizeof(sa6_src));
 	sa6_src.sin6_family = AF_INET6;
 	sa6_src.sin6_len = sizeof(sa6_src);
 	sa6_src.sin6_addr = ip6->ip6_dst;
 	in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif);
 	in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL);
 	bzero(&sa6_dst, sizeof(sa6_dst));
 	sa6_dst.sin6_family = AF_INET6;
 	sa6_dst.sin6_len = sizeof(sa6_dst);
 	sa6_dst.sin6_addr = t;
 	in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif);
 	in6_embedscope(&t, &sa6_dst, NULL, NULL);
 
 #ifdef COMPAT_RFC1885
 	/*
 	 * xxx guess MTU
 	 * RFC 1885 requires that echo reply should be truncated if it
 	 * does not fit in with (return) path MTU, but the description was
 	 * removed in the new spec.
 	 */
 	if (icmp6_reflect_rt.ro_rt == 0 ||
 	    ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) {
 		if (icmp6_reflect_rt.ro_rt) {
 			RTFREE(icmp6_reflect_rt.ro_rt);
 			icmp6_reflect_rt.ro_rt = 0;
 		}
 		bzero(sin6, sizeof(*sin6));
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_len = sizeof(struct sockaddr_in6);
 		sin6->sin6_addr = ip6->ip6_dst;
 
 		rtalloc_ign((struct route *)&icmp6_reflect_rt.ro_rt,
 			    RTF_PRCLONING);
 	}
 
 	if (icmp6_reflect_rt.ro_rt == 0)
 		goto bad;
 
 	if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST)
 	    && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu)
 		mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu;
 
 	if (mtu < m->m_pkthdr.len) {
 		plen -= (m->m_pkthdr.len - mtu);
 		m_adj(m, mtu - m->m_pkthdr.len);
 	}
 #endif
 	/*
 	 * If the incoming packet was addressed directly to us(i.e. unicast),
 	 * use dst as the src for the reply.
 	 * The IN6_IFF_NOTREADY case would be VERY rare, but is possible
 	 * (for example) when we encounter an error while forwarding procedure
 	 * destined to a duplicated address of ours.
 	 */
 	for (ia = in6_ifaddr; ia; ia = ia->ia_next)
 		if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) &&
 		    (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) {
 			src = &t;
 			break;
 		}
 	if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) {
 		/*
 		 * This is the case if the dst is our link-local address
 		 * and the sender is also ourselves.
 		 */
 		src = &t;
 	}
 
 	if (src == 0) {
 		int e;
 		struct route_in6 ro;
 
 		/*
 		 * This case matches to multicasts, our anycast, or unicasts
 		 * that we do not own.  Select a source address based on the
 		 * source address of the erroneous packet.
 		 */
 		bzero(&ro, sizeof(ro));
 		src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e);
 		if (ro.ro_rt)
 			RTFREE(ro.ro_rt); /* XXX: we could use this */
 		if (src == NULL) {
 			nd6log((LOG_DEBUG,
 			    "icmp6_reflect: source can't be determined: "
 			    "dst=%s, error=%d\n",
 			    ip6_sprintf(&sa6_src.sin6_addr), e));
 			goto bad;
 		}
 	}
 
 	ip6->ip6_src = *src;
 
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	if (m->m_pkthdr.rcvif) {
 		/* XXX: This may not be the outgoing interface */
 		ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim;
 	} else
 		ip6->ip6_hlim = ip6_defhlim;
 
 	icmp6->icmp6_cksum = 0;
 	icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
 					sizeof(struct ip6_hdr), plen);
 
 	/*
 	 * XXX option handling
 	 */
 
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 #ifdef COMPAT_RFC1885
 	ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif, NULL);
 #else
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 #endif
 	if (outif)
 		icmp6_ifoutstat_inc(outif, type, code);
 
 	return;
 
  bad:
 	m_freem(m);
 	return;
 }
 
 void
 icmp6_fasttimo()
 {
 
 	mld6_fasttimeo();
 }
 
 static const char *
 icmp6_redirect_diag(src6, dst6, tgt6)
 	struct in6_addr *src6;
 	struct in6_addr *dst6;
 	struct in6_addr *tgt6;
 {
 	static char buf[1024];
 	snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
 		ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6));
 	return buf;
 }
 
 void
 icmp6_redirect_input(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct ifnet *ifp = m->m_pkthdr.rcvif;
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 	struct nd_redirect *nd_rd;
 	int icmp6len = ntohs(ip6->ip6_plen);
 	char *lladdr = NULL;
 	int lladdrlen = 0;
 	u_char *redirhdr = NULL;
 	int redirhdrlen = 0;
 	struct rtentry *rt = NULL;
 	int is_router;
 	int is_onlink;
 	struct in6_addr src6 = ip6->ip6_src;
 	struct in6_addr redtgt6;
 	struct in6_addr reddst6;
 	union nd_opts ndopts;
 
 	if (!m || !ifp)
 		return;
 
 	/* XXX if we are router, we don't update route by icmp6 redirect */
 	if (ip6_forwarding)
 		goto freeit;
 	if (!icmp6_rediraccept)
 		goto freeit;
 
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, icmp6len,);
 	nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
 #else
 	IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
 	if (nd_rd == NULL) {
 		icmp6stat.icp6s_tooshort++;
 		return;
 	}
 #endif
 	redtgt6 = nd_rd->nd_rd_target;
 	reddst6 = nd_rd->nd_rd_dst;
 
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		redtgt6.s6_addr16[1] = htons(ifp->if_index);
 	if (IN6_IS_ADDR_LINKLOCAL(&reddst6))
 		reddst6.s6_addr16[1] = htons(ifp->if_index);
 
 	/* validation */
 	if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect sent from %s rejected; "
 			"must be from linklocal\n", ip6_sprintf(&src6)));
 		goto bad;
 	}
 	if (ip6->ip6_hlim != 255) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect sent from %s rejected; "
 			"hlim=%d (must be 255)\n",
 			ip6_sprintf(&src6), ip6->ip6_hlim));
 		goto bad;
 	}
     {
 	/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
 	struct sockaddr_in6 sin6;
 	struct in6_addr *gw6;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
 	rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
 	if (rt) {
 		if (rt->rt_gateway == NULL ||
 		    rt->rt_gateway->sa_family != AF_INET6) {
 			nd6log((LOG_ERR,
 			    "ICMP6 redirect rejected; no route "
 			    "with inet6 gateway found for redirect dst: %s\n",
 			    icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			RTFREE(rt);
 			goto bad;
 		}
 
 		gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
 		if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
 			nd6log((LOG_ERR,
 				"ICMP6 redirect rejected; "
 				"not equal to gw-for-src=%s (must be same): "
 				"%s\n",
 				ip6_sprintf(gw6),
 				icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 			RTFREE(rt);
 			goto bad;
 		}
 	} else {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect rejected; "
 			"no route found for redirect dst: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 	RTFREE(rt);
 	rt = NULL;
     }
 	if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect rejected; "
 			"redirect dst must be unicast: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	is_router = is_onlink = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
 		is_router = 1;	/* router case */
 	if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
 		is_onlink = 1;	/* on-link destination case */
 	if (!is_router && !is_onlink) {
 		nd6log((LOG_ERR,
 			"ICMP6 redirect rejected; "
 			"neither router case nor onlink case: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 	/* validation passed */
 
 	icmp6len -= sizeof(*nd_rd);
 	nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
 	if (nd6_options(&ndopts) < 0) {
 		nd6log((LOG_INFO, "icmp6_redirect_input: "
 			"invalid ND option, rejected: %s\n",
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		/* nd6_options have incremented stats */
 		goto freeit;
 	}
 
 	if (ndopts.nd_opts_tgt_lladdr) {
 		lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
 		lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
 	}
 
 	if (ndopts.nd_opts_rh) {
 		redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
 		redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */
 	}
 
 	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
 		nd6log((LOG_INFO,
 			"icmp6_redirect_input: lladdrlen mismatch for %s "
 			"(if %d, icmp6 packet %d): %s\n",
 			ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2,
 			icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
 		goto bad;
 	}
 
 	/* RFC 2461 8.3 */
 	nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
 			 is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
 
 	if (!is_onlink) {	/* better router case.  perform rtredirect. */
 		/* perform rtredirect */
 		struct sockaddr_in6 sdst;
 		struct sockaddr_in6 sgw;
 		struct sockaddr_in6 ssrc;
 
 		bzero(&sdst, sizeof(sdst));
 		bzero(&sgw, sizeof(sgw));
 		bzero(&ssrc, sizeof(ssrc));
 		sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
 		sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
 			sizeof(struct sockaddr_in6);
 		bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 		bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
 		rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
 			   (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
 			   (struct sockaddr *)&ssrc,
 			   (struct rtentry **)NULL);
 	}
 	/* finally update cached route in each socket via pfctlinput */
     {
 	struct sockaddr_in6 sdst;
 
 	bzero(&sdst, sizeof(sdst));
 	sdst.sin6_family = AF_INET6;
 	sdst.sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
 	pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
 #ifdef IPSEC
 	key_sa_routechange((struct sockaddr *)&sdst);
 #endif
     }
 
  freeit:
 	m_freem(m);
 	return;
 
  bad:
 	icmp6stat.icp6s_badredirect++;
 	m_freem(m);
 }
 
 void
 icmp6_redirect_output(m0, rt)
 	struct mbuf *m0;
 	struct rtentry *rt;
 {
 	struct ifnet *ifp;	/* my outgoing interface */
 	struct in6_addr *ifp_ll6;
 	struct in6_addr *router_ll6;
 	struct ip6_hdr *sip6;	/* m0 as struct ip6_hdr */
 	struct mbuf *m = NULL;	/* newly allocated one */
 	struct ip6_hdr *ip6;	/* m as struct ip6_hdr */
 	struct nd_redirect *nd_rd;
 	size_t maxlen;
 	u_char *p;
 	struct ifnet *outif = NULL;
 	struct sockaddr_in6 src_sa;
 
 	icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
 
 	/* if we are not router, we don't send icmp6 redirect */
 	if (!ip6_forwarding || ip6_accept_rtadv)
 		goto fail;
 
 	/* sanity check */
 	if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
 		goto fail;
 
 	/*
 	 * Address check:
 	 *  the source address must identify a neighbor, and
 	 *  the destination address must not be a multicast address
 	 *  [RFC 2461, sec 8.2]
 	 */
 	sip6 = mtod(m0, struct ip6_hdr *);
 	bzero(&src_sa, sizeof(src_sa));
 	src_sa.sin6_family = AF_INET6;
 	src_sa.sin6_len = sizeof(src_sa);
 	src_sa.sin6_addr = sip6->ip6_src;
 	/* we don't currently use sin6_scope_id, but eventually use it */
 	src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src);
 	if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
 		goto fail;
 	if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
 		goto fail;	/* what should we do here? */
 
 	/* rate limit */
 	if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
 		goto fail;
 
 	/*
 	 * Since we are going to append up to 1280 bytes (= IPV6_MMTU),
 	 * we almost always ask for an mbuf cluster for simplicity.
 	 * (MHLEN < IPV6_MMTU is almost always true)
 	 */
 #if IPV6_MMTU >= MCLBYTES
 # error assumption failed about IPV6_MMTU and MCLBYTES
 #endif
 	MGETHDR(m, M_DONTWAIT, MT_HEADER);
 	if (m && IPV6_MMTU >= MHLEN)
 		MCLGET(m, M_DONTWAIT);
 	if (!m)
 		goto fail;
 	m->m_pkthdr.rcvif = NULL;
 	m->m_len = 0;
 	maxlen = M_TRAILINGSPACE(m);
 	maxlen = min(IPV6_MMTU, maxlen);
 	/* just for safety */
 	if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
 	    ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
 		goto fail;
 	}
 
 	{
 		/* get ip6 linklocal address for ifp(my outgoing interface). */
 		struct in6_ifaddr *ia;
 		if ((ia = in6ifa_ifpforlinklocal(ifp,
 						 IN6_IFF_NOTREADY|
 						 IN6_IFF_ANYCAST)) == NULL)
 			goto fail;
 		ifp_ll6 = &ia->ia_addr.sin6_addr;
 	}
 
 	/* get ip6 linklocal address for the router. */
 	if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
 		struct sockaddr_in6 *sin6;
 		sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
 		router_ll6 = &sin6->sin6_addr;
 		if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
 			router_ll6 = (struct in6_addr *)NULL;
 	} else
 		router_ll6 = (struct in6_addr *)NULL;
 
 	/* ip6 */
 	ip6 = mtod(m, struct ip6_hdr *);
 	ip6->ip6_flow = 0;
 	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
 	ip6->ip6_vfc |= IPV6_VERSION;
 	/* ip6->ip6_plen will be set later */
 	ip6->ip6_nxt = IPPROTO_ICMPV6;
 	ip6->ip6_hlim = 255;
 	/* ip6->ip6_src must be linklocal addr for my outgoing if. */
 	bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
 	bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
 
 	/* ND Redirect */
 	nd_rd = (struct nd_redirect *)(ip6 + 1);
 	nd_rd->nd_rd_type = ND_REDIRECT;
 	nd_rd->nd_rd_code = 0;
 	nd_rd->nd_rd_reserved = 0;
 	if (rt->rt_flags & RTF_GATEWAY) {
 		/*
 		 * nd_rd->nd_rd_target must be a link-local address in
 		 * better router cases.
 		 */
 		if (!router_ll6)
 			goto fail;
 		bcopy(router_ll6, &nd_rd->nd_rd_target,
 		      sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		      sizeof(nd_rd->nd_rd_dst));
 	} else {
 		/* make sure redtgt == reddst */
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
 		      sizeof(nd_rd->nd_rd_target));
 		bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
 		      sizeof(nd_rd->nd_rd_dst));
 	}
 
 	p = (u_char *)(nd_rd + 1);
 
 	if (!router_ll6)
 		goto nolladdropt;
 
     {
 	/* target lladdr option */
 	struct rtentry *rt_router = NULL;
 	int len;
 	struct sockaddr_dl *sdl;
 	struct nd_opt_hdr *nd_opt;
 	char *lladdr;
 
 	rt_router = nd6_lookup(router_ll6, 0, ifp);
 	if (!rt_router)
 		goto nolladdropt;
 	len = sizeof(*nd_opt) + ifp->if_addrlen;
 	len = (len + 7) & ~7;	/* round by 8 */
 	/* safety check */
 	if (len + (p - (u_char *)ip6) > maxlen)
 		goto nolladdropt;
 	if (!(rt_router->rt_flags & RTF_GATEWAY) &&
 	    (rt_router->rt_flags & RTF_LLINFO) &&
 	    (rt_router->rt_gateway->sa_family == AF_LINK) &&
 	    (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) &&
 	    sdl->sdl_alen) {
 		nd_opt = (struct nd_opt_hdr *)p;
 		nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
 		nd_opt->nd_opt_len = len >> 3;
 		lladdr = (char *)(nd_opt + 1);
 		bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen);
 		p += len;
 	}
     }
 nolladdropt:;
 
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* just to be safe */
 #ifdef M_DECRYPTED	/*not openbsd*/
 	if (m0->m_flags & M_DECRYPTED)
 		goto noredhdropt;
 #endif
 	if (p - (u_char *)ip6 > maxlen)
 		goto noredhdropt;
 
     {
 	/* redirected header option */
 	int len;
 	struct nd_opt_rd_hdr *nd_opt_rh;
 
 	/*
 	 * compute the maximum size for icmp6 redirect header option.
 	 * XXX room for auth header?
 	 */
 	len = maxlen - (p - (u_char *)ip6);
 	len &= ~7;
 
 	/* This is just for simplicity. */
 	if (m0->m_pkthdr.len != m0->m_len) {
 		if (m0->m_next) {
 			m_freem(m0->m_next);
 			m0->m_next = NULL;
 		}
 		m0->m_pkthdr.len = m0->m_len;
 	}
 
 	/*
 	 * Redirected header option spec (RFC2461 4.6.3) talks nothing
 	 * about padding/truncate rule for the original IP packet.
 	 * From the discussion on IPv6imp in Feb 1999, the consensus was:
 	 * - "attach as much as possible" is the goal
 	 * - pad if not aligned (original size can be guessed by original
 	 *   ip6 header)
 	 * Following code adds the padding if it is simple enough,
 	 * and truncates if not.
 	 */
 	if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
 		panic("assumption failed in %s:%d\n", __FILE__, __LINE__);
 
 	if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
 		/* not enough room, truncate */
 		m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh);
 	} else {
 		/* enough room, pad or truncate */
 		size_t extra;
 
 		extra = m0->m_pkthdr.len % 8;
 		if (extra) {
 			/* pad if easy enough, truncate if not */
 			if (8 - extra <= M_TRAILINGSPACE(m0)) {
 				/* pad */
 				m0->m_len += (8 - extra);
 				m0->m_pkthdr.len += (8 - extra);
 			} else {
 				/* truncate */
 				m0->m_pkthdr.len -= extra;
 				m0->m_len -= extra;
 			}
 		}
 		len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
 		m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh);
 	}
 
 	nd_opt_rh = (struct nd_opt_rd_hdr *)p;
 	bzero(nd_opt_rh, sizeof(*nd_opt_rh));
 	nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
 	nd_opt_rh->nd_opt_rh_len = len >> 3;
 	p += sizeof(*nd_opt_rh);
 	m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
 
 	/* connect m0 to m */
 	m->m_next = m0;
 	m->m_pkthdr.len = m->m_len + m0->m_len;
     }
 noredhdropt:;
 
 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src))
 		sip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst))
 		sip6->ip6_dst.s6_addr16[1] = 0;
 #if 0
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
 		ip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst))
 		ip6->ip6_dst.s6_addr16[1] = 0;
 #endif
 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target))
 		nd_rd->nd_rd_target.s6_addr16[1] = 0;
 	if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst))
 		nd_rd->nd_rd_dst.s6_addr16[1] = 0;
 
 	ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 
 	nd_rd->nd_rd_cksum = 0;
 	nd_rd->nd_rd_cksum
 		= in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen));
 
 	/* send the packet to outside... */
 	ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
 	if (outif) {
 		icmp6_ifstat_inc(outif, ifs6_out_msg);
 		icmp6_ifstat_inc(outif, ifs6_out_redirect);
 	}
 	icmp6stat.icp6s_outhist[ND_REDIRECT]++;
 
 	return;
 
 fail:
 	if (m)
 		m_freem(m);
 	if (m0)
 		m_freem(m0);
 }
 
 #ifdef HAVE_NRL_INPCB
 #define sotoin6pcb	sotoinpcb
 #define in6pcb		inpcb
 #define in6p_icmp6filt	inp_icmp6filt
 #endif
 /*
  * ICMPv6 socket option processing.
  */
 int
 icmp6_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int error = 0;
 	int optlen;
 	struct inpcb *inp = sotoinpcb(so);
 	int level, op, optname;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 	} else
 		level = op = optname = optlen = 0;
 
 	if (level != IPPROTO_ICMPV6) {
 		return EINVAL;
 	}
 
 	switch (op) {
 	case PRCO_SETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			struct icmp6_filter *p;
 
 			if (optlen != sizeof(*p)) {
 				error = EMSGSIZE;
 				break;
 			}
 			if (inp->in6p_icmp6filt == NULL) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen,
 				optlen);
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case PRCO_GETOPT:
 		switch (optname) {
 		case ICMP6_FILTER:
 		    {
 			if (inp->in6p_icmp6filt == NULL) {
 				error = EINVAL;
 				break;
 			}
 			error = sooptcopyout(sopt, inp->in6p_icmp6filt,
 				sizeof(struct icmp6_filter));
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 
 	return(error);
 }
 #ifdef HAVE_NRL_INPCB
 #undef sotoin6pcb
 #undef in6pcb
 #undef in6p_icmp6filt
 #endif
 
 #ifndef HAVE_PPSRATECHECK
 #ifndef timersub
 #define	timersub(tvp, uvp, vvp)						\
 	do {								\
 		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;		\
 		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;	\
 		if ((vvp)->tv_usec < 0) {				\
 			(vvp)->tv_sec--;				\
 			(vvp)->tv_usec += 1000000;			\
 		}							\
 	} while (0)
 #endif
 
 /*
  * ppsratecheck(): packets (or events) per second limitation.
  */
 static int
 ppsratecheck(lasttime, curpps, maxpps)
 	struct timeval *lasttime;
 	int *curpps;
 	int maxpps;	/* maximum pps allowed */
 {
 	struct timeval tv, delta;
 	int s, rv;
 
 	s = splclock(); 
 	microtime(&tv);
 	splx(s);
 
 	timersub(&tv, lasttime, &delta);
 
 	/*
 	 * Check for 0,0 so that the message will be seen at least once.
 	 * If more than one second has passed since the last update of
 	 * lasttime, reset the counter.
 	 *
 	 * We do increment *curpps even in *curpps < maxpps case, as some may
 	 * try to use *curpps for stat purposes as well.
 	 */
 	if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) ||
 	    delta.tv_sec >= 1) {
 		*lasttime = tv;
 		*curpps = 0;
 		rv = 1;
 	} else if (maxpps < 0)
 		rv = 1;
 	else if (*curpps < maxpps)
 		rv = 1;
 	else
 		rv = 0;
 
 #if 1 /* DIAGNOSTIC? */
 	/* be careful about wrap-around */
 	if (*curpps + 1 > *curpps)
 		*curpps = *curpps + 1;
 #else
 	/*
 	 * assume that there's not too many calls to this function.
 	 * not sure if the assumption holds, as it depends on *caller's*
 	 * behavior, not the behavior of this function.
 	 * IMHO it is wrong to make assumption on the caller's behavior,
 	 * so the above #if is #if 1, not #ifdef DIAGNOSTIC.
 	 */
 	*curpps = *curpps + 1;
 #endif
 
 	return (rv);
 }
 #endif
 
 /*
  * Perform rate limit check.
  * Returns 0 if it is okay to send the icmp6 packet.
  * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
  * limitation.
  *
  * XXX per-destination/type check necessary?
  */
 static int
 icmp6_ratelimit(dst, type, code)
 	const struct in6_addr *dst;	/* not used at this moment */
 	const int type;			/* not used at this moment */
 	const int code;			/* not used at this moment */
 {
 	int ret;
 
 	ret = 0;	/* okay to send */
 
 	/* PPS limit */
 	if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count,
 	    icmp6errppslim)) {
 		/* The packet is subject to rate limit */
 		ret++;
 	}
 
 	return ret;
 }
Index: head/sys/netinet6/ip6_input.c
===================================================================
--- head/sys/netinet6/ip6_input.c	(revision 108465)
+++ head/sys/netinet6/ip6_input.c	(revision 108466)
@@ -1,1663 +1,1663 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
  */
 
 #include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_pfil_hooks.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/netisr.h>
 #include <net/intrq.h>
 #ifdef PFIL_HOOKS
 #include <net/pfil.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #ifdef INET
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #endif /* INET */
 #include <netinet/ip6.h>
 #include <netinet6/in6_var.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/icmp6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_prefix.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #endif
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #define	IPSEC
 #endif /* FAST_IPSEC */
 
 #include <netinet6/ip6_fw.h>
 
 #include <netinet6/ip6protosw.h>
 
 #include <net/net_osdep.h>
 
 extern struct domain inet6domain;
 
 u_char ip6_protox[IPPROTO_MAX];
 static int ip6qmaxlen = IFQ_MAXLEN;
 struct in6_ifaddr *in6_ifaddr;
 
 extern struct callout in6_tmpaddrtimer_ch;
 
 int ip6_forward_srcrt;			/* XXX */
 int ip6_sourcecheck;			/* XXX */
 int ip6_sourcecheck_interval;		/* XXX */
 
 int ip6_ours_check_algorithm;
 
 
 /* firewall hooks */
 ip6_fw_chk_t *ip6_fw_chk_ptr;
 ip6_fw_ctl_t *ip6_fw_ctl_ptr;
 int ip6_fw_enable = 1;
 
 struct ip6stat ip6stat;
 
 static void ip6_init2 __P((void *));
 static struct ip6aux *ip6_setdstifaddr __P((struct mbuf *, struct in6_ifaddr *));
 static int ip6_hopopts_input __P((u_int32_t *, u_int32_t *, struct mbuf **, int *));
 #ifdef PULLDOWN_TEST
 static struct mbuf *ip6_pullexthdr __P((struct mbuf *, size_t, int));
 #endif
 
 
 /*
  * IP6 initialization: fill in IP6 protocol switch table.
  * All protocols not implemented in kernel go to raw IP6 protocol handler.
  */
 void
 ip6_init()
 {
 	struct ip6protosw *pr;
 	int i;
 	struct timeval tv;
 
 #ifdef DIAGNOSTIC
 	if (sizeof(struct protosw) != sizeof(struct ip6protosw))
 		panic("sizeof(protosw) != sizeof(ip6protosw)");
 #endif
 	pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
 	if (pr == 0)
 		panic("ip6_init");
 	for (i = 0; i < IPPROTO_MAX; i++)
 		ip6_protox[i] = pr - inet6sw;
 	for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
 	    pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
 		if (pr->pr_domain->dom_family == PF_INET6 &&
 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
 			ip6_protox[pr->pr_protocol] = pr - inet6sw;
 	ip6intrq.ifq_maxlen = ip6qmaxlen;
 	mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
 	ip6intrq_present = 1;
 	register_netisr(NETISR_IPV6, ip6intr);
 	nd6_init();
 	frag6_init();
 	/*
 	 * in many cases, random() here does NOT return random number
 	 * as initialization during bootstrap time occur in fixed order.
 	 */
 	microtime(&tv);
 	ip6_flow_seq = random() ^ tv.tv_usec;
 	microtime(&tv);
 	ip6_desync_factor = (random() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR;
 }
 
 static void
 ip6_init2(dummy)
 	void *dummy;
 {
 
 	/*
 	 * to route local address of p2p link to loopback,
 	 * assign loopback address first.
 	 */
 	in6_ifattach(&loif[0], NULL);
 
 	/* nd6_timer_init */
 	callout_init(&nd6_timer_ch, 0);
 	callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL);
 
 	/* router renumbering prefix list maintenance */
 	callout_init(&in6_rr_timer_ch, 0);
 	callout_reset(&in6_rr_timer_ch, hz, in6_rr_timer, NULL);
 
 	/* timer for regeneranation of temporary addresses randomize ID */
 	callout_reset(&in6_tmpaddrtimer_ch,
 		      (ip6_temp_preferred_lifetime - ip6_desync_factor -
 		       ip6_temp_regen_advance) * hz,
 		      in6_tmpaddrtimer, NULL);
 }
 
 /* cheat */
 /* This must be after route_init(), which is now SI_ORDER_THIRD */
 SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
 
 /*
  * IP6 input interrupt handling. Just pass the packet to ip6_input.
  */
 void
 ip6intr()
 {
 	int s;
 	struct mbuf *m;
 
 	for (;;) {
 		s = splimp();
 		IF_DEQUEUE(&ip6intrq, m);
 		splx(s);
 		if (m == 0)
 			return;
 		ip6_input(m);
 	}
 }
 
 extern struct	route_in6 ip6_forward_rt;
 
 void
 ip6_input(m)
 	struct mbuf *m;
 {
 	struct ip6_hdr *ip6;
 	int off = sizeof(struct ip6_hdr), nest;
 	u_int32_t plen;
 	u_int32_t rtalert = ~0;
 	int nxt, ours = 0;
 	struct ifnet *deliverifp = NULL;
 #ifdef  PFIL_HOOKS
 	struct packet_filter_hook *pfh;
 	struct mbuf *m0;
 	int rv;
 #endif  /* PFIL_HOOKS */
 
 #ifdef IPSEC
 	/*
 	 * should the inner packet be considered authentic?
 	 * see comment in ah4_input().
 	 */
 	if (m) {
 		m->m_flags &= ~M_AUTHIPHDR;
 		m->m_flags &= ~M_AUTHIPDGM;
 	}
 #endif
 
 	/*
 	 * make sure we don't have onion peering information into m_aux.
 	 */
 	ip6_delaux(m);
 
 	/*
 	 * mbuf statistics
 	 */
 	if (m->m_flags & M_EXT) {
 		if (m->m_next)
 			ip6stat.ip6s_mext2m++;
 		else
 			ip6stat.ip6s_mext1++;
 	} else {
 #define M2MMAX	(sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0]))
 		if (m->m_next) {
 			if (m->m_flags & M_LOOP) {
 				ip6stat.ip6s_m2m[loif[0].if_index]++;	/* XXX */
 			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
 				ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
 			else
 				ip6stat.ip6s_m2m[0]++;
 		} else
 			ip6stat.ip6s_m1++;
 #undef M2MMAX
 	}
 
 	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
 	ip6stat.ip6s_total++;
 
 #ifndef PULLDOWN_TEST
 	/*
 	 * L2 bridge code and some other code can return mbuf chain
 	 * that does not conform to KAME requirement.  too bad.
 	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
 	 */
 	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
 		struct mbuf *n;
 
 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
 		if (n)
-			M_COPY_PKTHDR(n, m);
+			M_MOVE_PKTHDR(n, m);
 		if (n && m->m_pkthdr.len > MHLEN) {
 			MCLGET(n, M_DONTWAIT);
 			if ((n->m_flags & M_EXT) == 0) {
 				m_freem(n);
 				n = NULL;
 			}
 		}
 		if (n == NULL) {
 			m_freem(m);
 			return;	/*ENOBUFS*/
 		}
 
 		m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t));
 		n->m_len = m->m_pkthdr.len;
 		m_freem(m);
 		m = n;
 	}
 	IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /*nothing*/);
 #endif
 
 	if (m->m_len < sizeof(struct ip6_hdr)) {
 		struct ifnet *inifp;
 		inifp = m->m_pkthdr.rcvif;
 		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) {
 			ip6stat.ip6s_toosmall++;
 			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
 			return;
 		}
 	}
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
 		ip6stat.ip6s_badvers++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 		goto bad;
 	}
 
 #ifdef PFIL_HOOKS
 	/*
 	 * Run through list of hooks for input packets.  If there are any
 	 * filters which require that additional packets in the flow are
 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
 	 * Note that filters must _never_ set this flag, as another filter
 	 * in the list may have previously cleared it.
 	 */
 	m0 = m;
 	pfh = pfil_hook_get(PFIL_IN, &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh);
 	for (; pfh; pfh = pfh->pfil_link.tqe_next)
 		if (pfh->pfil_func) {
 			rv = pfh->pfil_func(ip6, sizeof(*ip6),
 					    m->m_pkthdr.rcvif, 0, &m0);
 			if (rv)
 				return;
 			m = m0;
 			if (m == NULL)
 				return;
 			ip6 = mtod(m, struct ip6_hdr *);
 		}
 #endif /* PFIL_HOOKS */
 
 	ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
 
 	/*
 	 * Check with the firewall...
 	 */
 	if (ip6_fw_enable && ip6_fw_chk_ptr) {
 		u_short port = 0;
 		/* If ipfw says divert, we have to just drop packet */
 		/* use port as a dummy argument */
 		if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) {
 			m_freem(m);
 			m = NULL;
 		}
 		if (!m)
 			return;
 	}
 
 	/*
 	 * Check against address spoofing/corruption.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
 		/*
 		 * XXX: "badscope" is not very suitable for a multicast source.
 		 */
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 	if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
 	     IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) &&
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 	/*
 	 * The following check is not documented in specs.  A malicious
 	 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
 	 * and bypass security checks (act as if it was from 127.0.0.1 by using
 	 * IPv6 src ::ffff:127.0.0.1).	Be cautious.
 	 *
 	 * This check chokes if we are in an SIIT cloud.  As none of BSDs
 	 * support IPv4-less kernel compilation, we cannot support SIIT
 	 * environment at all.  So, it makes more sense for us to reject any
 	 * malicious packets for non-SIIT environment, than try to do a
 	 * partical support for SIIT environment.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #if 0
 	/*
 	 * Reject packets with IPv4 compatible addresses (auto tunnel).
 	 *
 	 * The code forbids auto tunnel relay case in RFC1933 (the check is
 	 * stronger than RFC1933).  We may want to re-enable it if mech-xx
 	 * is revised to forbid relaying case.
 	 */
 	if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 #endif
 
 	/* drop packets if interface ID portion is already filled */
 	if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src) &&
 		    ip6->ip6_src.s6_addr16[1]) {
 			ip6stat.ip6s_badscope++;
 			goto bad;
 		}
 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) &&
 		    ip6->ip6_dst.s6_addr16[1]) {
 			ip6stat.ip6s_badscope++;
 			goto bad;
 		}
 	}
 
 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
 		ip6->ip6_src.s6_addr16[1]
 			= htons(m->m_pkthdr.rcvif->if_index);
 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
 		ip6->ip6_dst.s6_addr16[1]
 			= htons(m->m_pkthdr.rcvif->if_index);
 
 #if 0 /* this case seems to be unnecessary. (jinmei, 20010401) */
 	/*
 	 * We use rt->rt_ifp to determine if the address is ours or not.
 	 * If rt_ifp is lo0, the address is ours.
 	 * The problem here is, rt->rt_ifp for fe80::%lo0/64 is set to lo0,
 	 * so any address under fe80::%lo0/64 will be mistakenly considered
 	 * local.  The special case is supplied to handle the case properly
 	 * by actually looking at interface addresses
 	 * (using in6ifa_ifpwithaddr).
 	 */
 	if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) != 0 &&
 	    IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) {
 		if (!in6ifa_ifpwithaddr(m->m_pkthdr.rcvif, &ip6->ip6_dst)) {
 			icmp6_error(m, ICMP6_DST_UNREACH,
 			    ICMP6_DST_UNREACH_ADDR, 0);
 			/* m is already freed */
 			return;
 		}
 
 		ours = 1;
 		deliverifp = m->m_pkthdr.rcvif;
 		goto hbhcheck;
 	}
 #endif
 
 	/*
 	 * Multicast check
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 	  	struct	in6_multi *in6m = 0;
 
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
 		/*
 		 * See if we belong to the destination multicast group on the
 		 * arrival interface.
 		 */
 		IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
 		if (in6m)
 			ours = 1;
 		else if (!ip6_mrouter) {
 			ip6stat.ip6s_notmember++;
 			ip6stat.ip6s_cantforward++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			goto bad;
 		}
 		deliverifp = m->m_pkthdr.rcvif;
 		goto hbhcheck;
 	}
 
 	/*
 	 *  Unicast check
 	 */
 	switch (ip6_ours_check_algorithm) {
 	default:
 		/*
 		 * XXX: I intentionally broke our indentation rule here,
 		 *      since this switch-case is just for measurement and
 		 *      therefore should soon be removed.
 		 */
 	if (ip6_forward_rt.ro_rt != NULL &&
 	    (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 && 
 	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 			       &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr))
 		ip6stat.ip6s_forward_cachehit++;
 	else {
 		struct sockaddr_in6 *dst6;
 
 		if (ip6_forward_rt.ro_rt) {
 			/* route is down or destination is different */
 			ip6stat.ip6s_forward_cachemiss++;
 			RTFREE(ip6_forward_rt.ro_rt);
 			ip6_forward_rt.ro_rt = 0;
 		}
 
 		bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
 		dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst;
 		dst6->sin6_len = sizeof(struct sockaddr_in6);
 		dst6->sin6_family = AF_INET6;
 		dst6->sin6_addr = ip6->ip6_dst;
 #ifdef SCOPEDROUTING
 		ip6_forward_rt.ro_dst.sin6_scope_id =
 			in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_dst);
 #endif
 
 		rtalloc_ign((struct route *)&ip6_forward_rt, RTF_PRCLONING);
 	}
 
 #define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
 
 	/*
 	 * Accept the packet if the forwarding interface to the destination
 	 * according to the routing table is the loopback interface,
 	 * unless the associated route has a gateway.
 	 * Note that this approach causes to accept a packet if there is a
 	 * route to the loopback interface for the destination of the packet.
 	 * But we think it's even useful in some situations, e.g. when using
 	 * a special daemon which wants to intercept the packet.
 	 *
 	 * XXX: some OSes automatically make a cloned route for the destination
 	 * of an outgoing packet.  If the outgoing interface of the packet
 	 * is a loopback one, the kernel would consider the packet to be
 	 * accepted, even if we have no such address assinged on the interface.
 	 * We check the cloned flag of the route entry to reject such cases,
 	 * assuming that route entries for our own addresses are not made by
 	 * cloning (it should be true because in6_addloop explicitly installs
 	 * the host route).  However, we might have to do an explicit check
 	 * while it would be less efficient.  Or, should we rather install a
 	 * reject route for such a case?
 	 */
 	if (ip6_forward_rt.ro_rt &&
 	    (ip6_forward_rt.ro_rt->rt_flags &
 	     (RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
 #ifdef RTF_WASCLONED
 	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
 #endif
 #ifdef RTF_CLONED
 	    !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
 #endif
 #if 0
 	    /*
 	     * The check below is redundant since the comparison of
 	     * the destination and the key of the rtentry has
 	     * already done through looking up the routing table.
 	     */
 	    IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
 				&rt6_key(ip6_forward_rt.ro_rt)->sin6_addr)
 #endif
 	    ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
 		struct in6_ifaddr *ia6 =
 			(struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa;
 
 		/*
 		 * record address information into m_aux.
 		 */
 		(void)ip6_setdstifaddr(m, ia6);
 
 		/*
 		 * packets to a tentative, duplicated, or somehow invalid
 		 * address must not be accepted.
 		 */
 		if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
 			/* this address is ready */
 			ours = 1;
 			deliverifp = ia6->ia_ifp;	/* correct? */
 			/* Count the packet in the ip address stats */
 			ia6->ia_ifa.if_ipackets++;
 			ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
 			goto hbhcheck;
 		} else {
 			/* address is not ready, so discard the packet. */
 			nd6log((LOG_INFO,
 			    "ip6_input: packet to an unready address %s->%s\n",
 			    ip6_sprintf(&ip6->ip6_src),
 			    ip6_sprintf(&ip6->ip6_dst)));
 
 			goto bad;
 		}
 	}
 	} /* XXX indentation (see above) */
 
 	/*
 	 * FAITH(Firewall Aided Internet Translator)
 	 */
 	if (ip6_keepfaith) {
 		if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp
 		 && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
 			/* XXX do we need more sanity checks? */
 			ours = 1;
 			deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */
 			goto hbhcheck;
 		}
 	}
 
 	/*
 	 * Now there is no reason to process the packet if it's not our own
 	 * and we're not a router.
 	 */
 	if (!ip6_forwarding) {
 		ip6stat.ip6s_cantforward++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 		goto bad;
 	}
 
   hbhcheck:
 	/*
 	 * record address information into m_aux, if we don't have one yet.
 	 * note that we are unable to record it, if the address is not listed
 	 * as our interface address (e.g. multicast addresses, addresses
 	 * within FAITH prefixes and such).
 	 */
 	if (deliverifp && !ip6_getdstifaddr(m)) {
 		struct in6_ifaddr *ia6;
 
 		ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
 		if (ia6) {
 			if (!ip6_setdstifaddr(m, ia6)) {
 				/*
 				 * XXX maybe we should drop the packet here,
 				 * as we could not provide enough information
 				 * to the upper layers.
 				 */
 			}
 		}
 	}
 
 	/*
 	 * Process Hop-by-Hop options header if it's contained.
 	 * m may be modified in ip6_hopopts_input().
 	 * If a JumboPayload option is included, plen will also be modified.
 	 */
 	plen = (u_int32_t)ntohs(ip6->ip6_plen);
 	if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 		struct ip6_hbh *hbh;
 
 		if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
 #if 0	/*touches NULL pointer*/
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 #endif
 			return;	/* m have already been freed */
 		}
 
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		/*
 		 * if the payload length field is 0 and the next header field
 		 * indicates Hop-by-Hop Options header, then a Jumbo Payload
 		 * option MUST be included.
 		 */
 		if (ip6->ip6_plen == 0 && plen == 0) {
 			/*
 			 * Note that if a valid jumbo payload option is
 			 * contained, ip6_hoptops_input() must set a valid
 			 * (non-zero) payload length to the variable plen. 
 			 */
 			ip6stat.ip6s_badoptions++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_HEADER,
 				    (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
 			return;
 		}
 #ifndef PULLDOWN_TEST
 		/* ip6_hopopts_input() ensures that mbuf is contiguous */
 		hbh = (struct ip6_hbh *)(ip6 + 1);
 #else
 		IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 			sizeof(struct ip6_hbh));
 		if (hbh == NULL) {
 			ip6stat.ip6s_tooshort++;
 			return;
 		}
 #endif
 		nxt = hbh->ip6h_nxt;
 
 		/*
 		 * accept the packet if a router alert option is included
 		 * and we act as an IPv6 router.
 		 */
 		if (rtalert != ~0 && ip6_forwarding)
 			ours = 1;
 	} else
 		nxt = ip6->ip6_nxt;
 
 	/*
 	 * Check that the amount of data in the buffers
 	 * is as at least much as the IPv6 header would have us expect.
 	 * Trim mbufs if longer than we expect.
 	 * Drop packet if shorter than we expect.
 	 */
 	if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
 		ip6stat.ip6s_tooshort++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 		goto bad;
 	}
 	if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = sizeof(struct ip6_hdr) + plen;
 			m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
 		} else
 			m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
 	}
 
 	/*
 	 * Forward if desirable.
 	 */
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/*
 		 * If we are acting as a multicast router, all
 		 * incoming multicast packets are passed to the
 		 * kernel-level multicast forwarding function.
 		 * The packet is returned (relatively) intact; if
 		 * ip6_mforward() returns a non-zero value, the packet
 		 * must be discarded, else it may be accepted below.
 		 */
 		if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
 			ip6stat.ip6s_cantforward++;
 			m_freem(m);
 			return;
 		}
 		if (!ours) {
 			m_freem(m);
 			return;
 		}
 	} else if (!ours) {
 		ip6_forward(m, 0);
 		return;
 	}	
 
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/*
 	 * Malicious party may be able to use IPv4 mapped addr to confuse
 	 * tcp/udp stack and bypass security checks (act as if it was from
 	 * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1).  Be cautious.
 	 *
 	 * For SIIT end node behavior, you may want to disable the check.
 	 * However, you will  become vulnerable to attacks using IPv4 mapped
 	 * source.
 	 */
 	if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
 	    IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
 		ip6stat.ip6s_badscope++;
 		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
 		goto bad;
 	}
 
 	/*
 	 * Tell launch routine the next header
 	 */
 	ip6stat.ip6s_delivered++;
 	in6_ifstat_inc(deliverifp, ifs6_in_deliver);
 	nest = 0;
 
 	while (nxt != IPPROTO_DONE) {
 		if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
 			ip6stat.ip6s_toomanyhdr++;
 			goto bad;
 		}
 
 		/*
 		 * protection against faulty packet - there should be
 		 * more sanity checks in header chain processing.
 		 */
 		if (m->m_pkthdr.len < off) {
 			ip6stat.ip6s_tooshort++;
 			in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
 			goto bad;
 		}
 
 #if 0
 		/*
 		 * do we need to do it for every header?  yeah, other
 		 * functions can play with it (like re-allocate and copy).
 		 */
 		mhist = ip6_addaux(m);
 		if (mhist && M_TRAILINGSPACE(mhist) >= sizeof(nxt)) {
 			hist = mtod(mhist, caddr_t) + mhist->m_len;
 			bcopy(&nxt, hist, sizeof(nxt));
 			mhist->m_len += sizeof(nxt);
 		} else {
 			ip6stat.ip6s_toomanyhdr++;
 			goto bad;
 		}
 #endif
 
 #ifdef IPSEC
 		/*
 		 * enforce IPsec policy checking if we are seeing last header.
 		 * note that we do not visit this with protocols with pcb layer
 		 * code - like udp/tcp/raw ip.
 		 */
 		if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 &&
 		    ipsec6_in_reject(m, NULL)) {
 			ipsec6stat.in_polvio++;
 			goto bad;
 		}
 #endif
 
 		nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
 	}
 	return;
  bad:
 	m_freem(m);
 }
 
 /*
  * set/grab in6_ifaddr correspond to IPv6 destination address.
  * XXX backward compatibility wrapper
  */
 static struct ip6aux *
 ip6_setdstifaddr(m, ia6)
 	struct mbuf *m;
 	struct in6_ifaddr *ia6;
 {
 	struct ip6aux *n;
 
 	n = ip6_addaux(m);
 	if (n)
 		n->ip6a_dstia6 = ia6;
 	return n;	/* NULL if failed to set */
 }
 
 struct in6_ifaddr *
 ip6_getdstifaddr(m)
 	struct mbuf *m;
 {
 	struct ip6aux *n;
 
 	n = ip6_findaux(m);
 	if (n)
 		return n->ip6a_dstia6;
 	else
 		return NULL;
 }
 
 /*
  * Hop-by-Hop options header processing. If a valid jumbo payload option is
  * included, the real payload length will be stored in plenp.
  */
 static int
 ip6_hopopts_input(plenp, rtalertp, mp, offp)
 	u_int32_t *plenp;
 	u_int32_t *rtalertp;	/* XXX: should be stored more smart way */
 	struct mbuf **mp;
 	int *offp;
 {
 	struct mbuf *m = *mp;
 	int off = *offp, hbhlen;
 	struct ip6_hbh *hbh;
 	u_int8_t *opt;
 
 	/* validation of the length of the header */
 #ifndef PULLDOWN_TEST
 	IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 
 	IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
 	hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
 #else
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
 		sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
 	if (hbh == NULL) {
 		ip6stat.ip6s_tooshort++;
 		return -1;
 	}
 	hbhlen = (hbh->ip6h_len + 1) << 3;
 	IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
 		hbhlen);
 	if (hbh == NULL) {
 		ip6stat.ip6s_tooshort++;
 		return -1;
 	}
 #endif
 	off += hbhlen;
 	hbhlen -= sizeof(struct ip6_hbh);
 	opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
 
 	if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
 				hbhlen, rtalertp, plenp) < 0)
 		return(-1);
 
 	*offp = off;
 	*mp = m;
 	return(0);
 }
 
 /*
  * Search header for all Hop-by-hop options and process each option.
  * This function is separate from ip6_hopopts_input() in order to
  * handle a case where the sending node itself process its hop-by-hop
  * options header. In such a case, the function is called from ip6_output().
  *
  * The function assumes that hbh header is located right after the IPv6 header
  * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
  * opthead + hbhlen is located in continuous memory region.
  */
 int
 ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp)
 	struct mbuf *m;
 	u_int8_t *opthead;
 	int hbhlen;
 	u_int32_t *rtalertp;
 	u_int32_t *plenp;
 {
 	struct ip6_hdr *ip6;
 	int optlen = 0;
 	u_int8_t *opt = opthead;
 	u_int16_t rtalert_val;
 	u_int32_t jumboplen;
 	const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
 
 	for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
 		switch (*opt) {
 		case IP6OPT_PAD1:
 			optlen = 1;
 			break;
 		case IP6OPT_PADN:
 			if (hbhlen < IP6OPT_MINLEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			optlen = *(opt + 1) + 2;
 			break;
 		case IP6OPT_RTALERT:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_RTALERT_LEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 1 - opthead);
 				return(-1);
 			}
 			optlen = IP6OPT_RTALERT_LEN;
 			bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
 			*rtalertp = ntohs(rtalert_val);
 			break;
 		case IP6OPT_JUMBO:
 			/* XXX may need check for alignment */
 			if (hbhlen < IP6OPT_JUMBO_LEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
 				/* XXX stat */
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 1 - opthead);
 				return(-1);
 			}
 			optlen = IP6OPT_JUMBO_LEN;
 
 			/*
 			 * IPv6 packets that have non 0 payload length
 			 * must not contain a jumbo payload option.
 			 */
 			ip6 = mtod(m, struct ip6_hdr *);
 			if (ip6->ip6_plen) {
 				ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt - opthead);
 				return(-1);
 			}
 
 			/*
 			 * We may see jumbolen in unaligned location, so
 			 * we'd need to perform bcopy().
 			 */
 			bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
 			jumboplen = (u_int32_t)htonl(jumboplen);
 
 #if 1
 			/*
 			 * if there are multiple jumbo payload options,
 			 * *plenp will be non-zero and the packet will be
 			 * rejected.
 			 * the behavior may need some debate in ipngwg -
 			 * multiple options does not make sense, however,
 			 * there's no explicit mention in specification.
 			 */
 			if (*plenp != 0) {
 				ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 2 - opthead);
 				return(-1);
 			}
 #endif
 
 			/*
 			 * jumbo payload length must be larger than 65535.
 			 */
 			if (jumboplen <= IPV6_MAXPACKET) {
 				ip6stat.ip6s_badoptions++;
 				icmp6_error(m, ICMP6_PARAM_PROB,
 					    ICMP6_PARAMPROB_HEADER,
 					    erroff + opt + 2 - opthead);
 				return(-1);
 			}
 			*plenp = jumboplen;
 
 			break;
 		default:		/* unknown option */
 			if (hbhlen < IP6OPT_MINLEN) {
 				ip6stat.ip6s_toosmall++;
 				goto bad;
 			}
 			optlen = ip6_unknown_opt(opt, m,
 			    erroff + opt - opthead);
 			if (optlen == -1)
 				return(-1);
 			optlen += 2;
 			break;
 		}
 	}
 
 	return(0);
 
   bad:
 	m_freem(m);
 	return(-1);
 }
 
 /*
  * Unknown option processing.
  * The third argument `off' is the offset from the IPv6 header to the option,
  * which is necessary if the IPv6 header the and option header and IPv6 header
  * is not continuous in order to return an ICMPv6 error.
  */
 int
 ip6_unknown_opt(optp, m, off)
 	u_int8_t *optp;
 	struct mbuf *m;
 	int off;
 {
 	struct ip6_hdr *ip6;
 
 	switch (IP6OPT_TYPE(*optp)) {
 	case IP6OPT_TYPE_SKIP: /* ignore the option */
 		return((int)*(optp + 1));
 	case IP6OPT_TYPE_DISCARD:	/* silently discard */
 		m_freem(m);
 		return(-1);
 	case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
 		ip6stat.ip6s_badoptions++;
 		icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
 		return(-1);
 	case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
 		ip6stat.ip6s_badoptions++;
 		ip6 = mtod(m, struct ip6_hdr *);
 		if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
 		    (m->m_flags & (M_BCAST|M_MCAST)))
 			m_freem(m);
 		else
 			icmp6_error(m, ICMP6_PARAM_PROB,
 				    ICMP6_PARAMPROB_OPTION, off);
 		return(-1);
 	}
 
 	m_freem(m);		/* XXX: NOTREACHED */
 	return(-1);
 }
 
 /*
  * Create the "control" list for this pcb.
  * The function will not modify mbuf chain at all.
  *
  * with KAME mbuf chain restriction:
  * The routine will be called from upper layer handlers like tcp6_input().
  * Thus the routine assumes that the caller (tcp6_input) have already
  * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
  * very first mbuf on the mbuf chain.
  */
 void
 ip6_savecontrol(in6p, mp, ip6, m)
 	struct inpcb *in6p;
 	struct mbuf **mp;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 {
 #if __FreeBSD_version >= 500000
 	struct thread *td = curthread;	/* XXX */
 #else
 	struct proc *td = curproc;	/* XXX */
 #endif
 	int privileged = 0;
 	int rthdr_exist = 0;
 
 
 	if (td && !suser(td))
  		privileged++;
 
 #ifdef SO_TIMESTAMP
 	if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) {
 		struct timeval tv;
 
 		microtime(&tv);
 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
 				      SCM_TIMESTAMP, SOL_SOCKET);
 		if (*mp) {
 			mp = &(*mp)->m_next;
 		}
 	}
 #endif
 
 	/* RFC 2292 sec. 5 */
 	if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) {
 		struct in6_pktinfo pi6;
 		bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
 		if (IN6_IS_SCOPE_LINKLOCAL(&pi6.ipi6_addr))
 			pi6.ipi6_addr.s6_addr16[1] = 0;
 		pi6.ipi6_ifindex = (m && m->m_pkthdr.rcvif)
 					? m->m_pkthdr.rcvif->if_index
 					: 0;
 		*mp = sbcreatecontrol((caddr_t) &pi6,
 			sizeof(struct in6_pktinfo), IPV6_PKTINFO,
 			IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	if ((in6p->in6p_flags & IN6P_HOPLIMIT) != 0) {
 		int hlim = ip6->ip6_hlim & 0xff;
 		*mp = sbcreatecontrol((caddr_t) &hlim,
 			sizeof(int), IPV6_HOPLIMIT, IPPROTO_IPV6);
 		if (*mp)
 			mp = &(*mp)->m_next;
 	}
 
 	/*
 	 * IPV6_HOPOPTS socket option. We require super-user privilege
 	 * for the option, but it might be too strict, since there might
 	 * be some hop-by-hop options which can be returned to normal user.
 	 * See RFC 2292 section 6.
 	 */
 	if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0 && privileged) {
 		/*
 		 * Check if a hop-by-hop options header is contatined in the
 		 * received packet, and if so, store the options as ancillary
 		 * data. Note that a hop-by-hop options header must be
 		 * just after the IPv6 header, which fact is assured through
 		 * the IPv6 input processing.
 		 */
 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 		if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
 			struct ip6_hbh *hbh;
 			int hbhlen = 0;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext;
 #endif
 
 #ifndef PULLDOWN_TEST
 			hbh = (struct ip6_hbh *)(ip6 + 1);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 #else
 			ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
 			    ip6->ip6_nxt);
 			if (ext == NULL) {
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 			hbh = mtod(ext, struct ip6_hbh *);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (hbhlen != ext->m_len) {
 				m_freem(ext);
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 #endif
 
 			/*
 			 * XXX: We copy whole the header even if a jumbo
 			 * payload option is included, which option is to
 			 * be removed before returning in the RFC 2292.
 			 * Note: this constraint is removed in 2292bis.
 			 */
 			*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
 					      IPV6_HOPOPTS, IPPROTO_IPV6);
 			if (*mp)
 				mp = &(*mp)->m_next;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 #endif
 		}
 	}
 
 	/* IPV6_DSTOPTS and IPV6_RTHDR socket options */
 	if ((in6p->in6p_flags & (IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) {
 		int proto, off, nxt;
 
 		/*
 		 * go through the header chain to see if a routing header is
 		 * contained in the packet. We need this information to store
 		 * destination options headers (if any) properly.
 		 * XXX: performance issue. We should record this info when
 		 * processing extension headers in incoming routine.
 		 * (todo) use m_aux? 
 		 */
 		proto = IPPROTO_IPV6;
 		off = 0;
 		nxt = -1;
 		while (1) {
 			int newoff;
 
 			newoff = ip6_nexthdr(m, off, proto, &nxt);
 			if (newoff < 0)
 				break;
 			if (newoff < off) /* invalid, check for safety */
 				break;
 			if ((proto = nxt) == IPPROTO_ROUTING) {
 				rthdr_exist = 1;
 				break;
 			}
 			off = newoff;
 		}
 	}
 
 	if ((in6p->in6p_flags &
 	     (IN6P_RTHDR | IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) {
 		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 		int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
 
 		/*
 		 * Search for destination options headers or routing
 		 * header(s) through the header chain, and stores each
 		 * header as ancillary data.
 		 * Note that the order of the headers remains in
 		 * the chain of ancillary data.
 		 */
 		while (1) {	/* is explicit loop prevention necessary? */
 			struct ip6_ext *ip6e = NULL;
 			int elen;
 #ifdef PULLDOWN_TEST
 			struct mbuf *ext = NULL;
 #endif
 
 			/*
 			 * if it is not an extension header, don't try to
 			 * pull it from the chain.
 			 */
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 			case IPPROTO_ROUTING:
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 			default:
 				goto loopend;
 			}
 
 #ifndef PULLDOWN_TEST
 			if (off + sizeof(*ip6e) > m->m_len)
 				goto loopend;
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (off + elen > m->m_len)
 				goto loopend;
 #else
 			ext = ip6_pullexthdr(m, off, nxt);
 			if (ext == NULL) {
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 			ip6e = mtod(ext, struct ip6_ext *);
 			if (nxt == IPPROTO_AH)
 				elen = (ip6e->ip6e_len + 2) << 2;
 			else
 				elen = (ip6e->ip6e_len + 1) << 3;
 			if (elen != ext->m_len) {
 				m_freem(ext);
 				ip6stat.ip6s_tooshort++;
 				return;
 			}
 #endif
 
 			switch (nxt) {
 			case IPPROTO_DSTOPTS:
 				if ((in6p->in6p_flags & IN6P_DSTOPTS) == 0)
 					break;
 
 				/*
 				 * We also require super-user privilege for
 				 * the option.
 				 * See the comments on IN6_HOPOPTS.
 				 */
 				if (!privileged)
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 						      IPV6_DSTOPTS,
 						      IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_ROUTING:
 				if (!in6p->in6p_flags & IN6P_RTHDR)
 					break;
 
 				*mp = sbcreatecontrol((caddr_t)ip6e, elen,
 						      IPV6_RTHDR,
 						      IPPROTO_IPV6);
 				if (*mp)
 					mp = &(*mp)->m_next;
 				break;
 			case IPPROTO_HOPOPTS:
 			case IPPROTO_AH: /* is it possible? */
 				break;
 
 			default:
 				/*
 			 	 * other cases have been filtered in the above.
 				 * none will visit this case.  here we supply
 				 * the code just in case (nxt overwritten or
 				 * other cases).
 				 */
 #ifdef PULLDOWN_TEST
 				m_freem(ext);
 #endif
 				goto loopend;
 
 			}
 
 			/* proceed with the next header. */
 			off += elen;
 			nxt = ip6e->ip6e_nxt;
 			ip6e = NULL;
 #ifdef PULLDOWN_TEST
 			m_freem(ext);
 			ext = NULL;
 #endif
 		}
 	  loopend:
 		;
 	}
 
 }
 
 #ifdef PULLDOWN_TEST
 /*
  * pull single extension header from mbuf chain.  returns single mbuf that
  * contains the result, or NULL on error.
  */
 static struct mbuf *
 ip6_pullexthdr(m, off, nxt)
 	struct mbuf *m;
 	size_t off;
 	int nxt;
 {
 	struct ip6_ext ip6e;
 	size_t elen;
 	struct mbuf *n;
 
 #ifdef DIAGNOSTIC
 	switch (nxt) {
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_AH: /* is it possible? */
 		break;
 	default:
 		printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
 	}
 #endif
 
 	m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 	if (nxt == IPPROTO_AH)
 		elen = (ip6e.ip6e_len + 2) << 2;
 	else
 		elen = (ip6e.ip6e_len + 1) << 3;
 
 	MGET(n, M_DONTWAIT, MT_DATA);
 	if (n && elen >= MLEN) {
 		MCLGET(n, M_DONTWAIT);
 		if ((n->m_flags & M_EXT) == 0) {
 			m_free(n);
 			n = NULL;
 		}
 	}
 	if (!n)
 		return NULL;
 
 	n->m_len = 0;
 	if (elen >= M_TRAILINGSPACE(n)) {
 		m_free(n);
 		return NULL;
 	}
 
 	m_copydata(m, off, elen, mtod(n, caddr_t));
 	n->m_len = elen;
 	return n;
 }
 #endif
 
 /*
  * Get pointer to the previous header followed by the header
  * currently processed.
  * XXX: This function supposes that
  *	M includes all headers,
  *	the next header field and the header length field of each header
  *	are valid, and
  *	the sum of each header length equals to OFF.
  * Because of these assumptions, this function must be called very
  * carefully. Moreover, it will not be used in the near future when
  * we develop `neater' mechanism to process extension headers.
  */
 char *
 ip6_get_prevhdr(m, off)
 	struct mbuf *m;
 	int off;
 {
 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 	if (off == sizeof(struct ip6_hdr))
 		return(&ip6->ip6_nxt);
 	else {
 		int len, nxt;
 		struct ip6_ext *ip6e = NULL;
 
 		nxt = ip6->ip6_nxt;
 		len = sizeof(struct ip6_hdr);
 		while (len < off) {
 			ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
 
 			switch (nxt) {
 			case IPPROTO_FRAGMENT:
 				len += sizeof(struct ip6_frag);
 				break;
 			case IPPROTO_AH:
 				len += (ip6e->ip6e_len + 2) << 2;
 				break;
 			default:
 				len += (ip6e->ip6e_len + 1) << 3;
 				break;
 			}
 			nxt = ip6e->ip6e_nxt;
 		}
 		if (ip6e)
 			return(&ip6e->ip6e_nxt);
 		else
 			return NULL;
 	}
 }
 
 /*
  * get next header offset.  m will be retained.
  */
 int
 ip6_nexthdr(m, off, proto, nxtp)
 	struct mbuf *m;
 	int off;
 	int proto;
 	int *nxtp;
 {
 	struct ip6_hdr ip6;
 	struct ip6_ext ip6e;
 	struct ip6_frag fh;
 
 	/* just in case */
 	if (m == NULL)
 		panic("ip6_nexthdr: m == NULL");
 	if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
 		return -1;
 
 	switch (proto) {
 	case IPPROTO_IPV6:
 		if (m->m_pkthdr.len < off + sizeof(ip6))
 			return -1;
 		m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
 		if (nxtp)
 			*nxtp = ip6.ip6_nxt;
 		off += sizeof(ip6);
 		return off;
 
 	case IPPROTO_FRAGMENT:
 		/*
 		 * terminate parsing if it is not the first fragment,
 		 * it does not make sense to parse through it.
 		 */
 		if (m->m_pkthdr.len < off + sizeof(fh))
 			return -1;
 		m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
 		if ((ntohs(fh.ip6f_offlg) & IP6F_OFF_MASK) != 0)
 			return -1;
 		if (nxtp)
 			*nxtp = fh.ip6f_nxt;
 		off += sizeof(struct ip6_frag);
 		return off;
 
 	case IPPROTO_AH:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 2) << 2;
 		return off;
 
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_ROUTING:
 	case IPPROTO_DSTOPTS:
 		if (m->m_pkthdr.len < off + sizeof(ip6e))
 			return -1;
 		m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 		if (nxtp)
 			*nxtp = ip6e.ip6e_nxt;
 		off += (ip6e.ip6e_len + 1) << 3;
 		return off;
 
 	case IPPROTO_NONE:
 	case IPPROTO_ESP:
 	case IPPROTO_IPCOMP:
 		/* give up */
 		return -1;
 
 	default:
 		return -1;
 	}
 
 	return -1;
 }
 
 /*
  * get offset for the last header in the chain.  m will be kept untainted.
  */
 int
 ip6_lasthdr(m, off, proto, nxtp)
 	struct mbuf *m;
 	int off;
 	int proto;
 	int *nxtp;
 {
 	int newoff;
 	int nxt;
 
 	if (!nxtp) {
 		nxt = -1;
 		nxtp = &nxt;
 	}
 	while (1) {
 		newoff = ip6_nexthdr(m, off, proto, nxtp);
 		if (newoff < 0)
 			return off;
 		else if (newoff < off)
 			return -1;	/* invalid */
 		else if (newoff == off)
 			return newoff;
 
 		off = newoff;
 		proto = *nxtp;
 	}
 }
 
 struct ip6aux *
 ip6_addaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	if (!tag) {
 		tag = m_tag_get(PACKET_TAG_IPV6_INPUT,
 				sizeof (struct ip6aux),
 				M_DONTWAIT);
 		if (tag)
 			m_tag_prepend(m, tag);
 	}
 	if (tag)
 		bzero(tag+1, sizeof (struct ip6aux));
 	return tag ? (struct ip6aux*)(tag+1) : NULL;
 }
 
 struct ip6aux *
 ip6_findaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	return tag ? (struct ip6aux*)(tag+1) : NULL;
 }
 
 void
 ip6_delaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
 	if (tag)
 		m_tag_delete(m, tag);
 }
 
 /*
  * System control for IP6
  */
 
 u_char	inet6ctlerrmap[PRC_NCMDS] = {
 	0,		0,		0,		0,
 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
 	0,		0,		0,		0,
 	ENOPROTOOPT
 };
Index: head/sys/netinet6/ip6_output.c
===================================================================
--- head/sys/netinet6/ip6_output.c	(revision 108465)
+++ head/sys/netinet6/ip6_output.c	(revision 108466)
@@ -1,2616 +1,2616 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
  */
 
 #include "opt_ip6fw.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_pfil_hooks.h"
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/errno.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #ifdef PFIL_HOOKS
 #include <net/pfil.h>
 #endif
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet6/nd6.h>
 
 #ifdef IPSEC
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netkey/key.h>
 #endif /* IPSEC */
 
 #ifdef FAST_IPSEC
 #include <netipsec/ipsec.h>
 #include <netipsec/ipsec6.h>
 #include <netipsec/key.h>
 #endif /* FAST_IPSEC */
 
 #include <netinet6/ip6_fw.h>
 
 #include <net/net_osdep.h>
 
 #include <netinet6/ip6protosw.h>
 
 static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options");
 
 struct ip6_exthdrs {
 	struct mbuf *ip6e_ip6;
 	struct mbuf *ip6e_hbh;
 	struct mbuf *ip6e_dest1;
 	struct mbuf *ip6e_rthdr;
 	struct mbuf *ip6e_dest2;
 };
 
 static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
 			    struct socket *, struct sockopt *sopt));
 static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *));
 static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **));
 static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int));
 static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
 				  struct ip6_frag **));
 static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t));
 static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *));
 
 /*
  * IP6 output. The packet in mbuf chain m contains a skeletal IP6
  * header (with pri, len, nxt, hlim, src, dst).
  * This function may modify ver and hlim only.
  * The mbuf chain containing the packet will be freed.
  * The mbuf opt, if present, will not be freed.
  *
  * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
  * nd_ifinfo.linkmtu is u_int32_t.  so we use u_long to hold largest one,
  * which is rt_rmx.rmx_mtu.
  */
 int
 ip6_output(m0, opt, ro, flags, im6o, ifpp, inp)
 	struct mbuf *m0;
 	struct ip6_pktopts *opt;
 	struct route_in6 *ro;
 	int flags;
 	struct ip6_moptions *im6o;
 	struct ifnet **ifpp;		/* XXX: just for statistics */
 	struct inpcb *inp;
 {
 	struct ip6_hdr *ip6, *mhip6;
 	struct ifnet *ifp, *origifp;
 	struct mbuf *m = m0;
 	int hlen, tlen, len, off;
 	struct route_in6 ip6route;
 	struct sockaddr_in6 *dst;
 	int error = 0;
 	struct in6_ifaddr *ia = NULL;
 	u_long mtu;
 	u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
 	struct ip6_exthdrs exthdrs;
 	struct in6_addr finaldst;
 	struct route_in6 *ro_pmtu = NULL;
 	int hdrsplit = 0;
 	int needipsec = 0;
 #ifdef PFIL_HOOKS
 	struct packet_filter_hook *pfh;
 	struct mbuf *m1;
 	int rv;
 #endif /* PFIL_HOOKS */
 #ifdef IPSEC
 	int needipsectun = 0;
 	struct secpolicy *sp = NULL;
 	struct socket *so = inp ? inp->inp_socket : NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	int needipsectun = 0;
 	struct secpolicy *sp = NULL;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 #endif /* FAST_IPSEC */
 
 #define MAKE_EXTHDR(hp, mp)						\
     do {								\
 	if (hp) {							\
 		struct ip6_ext *eh = (struct ip6_ext *)(hp);		\
 		error = ip6_copyexthdr((mp), (caddr_t)(hp), 		\
 				       ((eh)->ip6e_len + 1) << 3);	\
 		if (error)						\
 			goto freehdrs;					\
 	}								\
     } while (0)
 	
 	bzero(&exthdrs, sizeof(exthdrs));
 	
 	if (opt) {
 		/* Hop-by-Hop options header */
 		MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
 		/* Destination options header(1st part) */
 		MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
 		/* Routing header */
 		MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
 		/* Destination options header(2nd part) */
 		MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
 	}
 
 #ifdef IPSEC
 	/* get a security policy for this packet */
 	if (so == NULL)
 		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
 	else
 		sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
 
 	if (sp == NULL) {
 		ipsec6stat.out_inval++;
 		goto freehdrs;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		ipsec6stat.out_polvio++;
 		goto freehdrs;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		needipsec = 0;
 		break;
 	
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto freehdrs;
 		}
 		needipsec = 1;
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	/* get a security policy for this packet */
 	if (inp == NULL)
 		sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error);
 	else
 		sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error);
 
 	if (sp == NULL) {
 		newipsecstat.ips_out_inval++;
 		goto freehdrs;
 	}
 
 	error = 0;
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		/*
 		 * This packet is just discarded.
 		 */
 		newipsecstat.ips_out_polvio++;
 		goto freehdrs;
 
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		/* no need to do IPsec. */
 		needipsec = 0;
 		break;
 	
 	case IPSEC_POLICY_IPSEC:
 		if (sp->req == NULL) {
 			/* acquire a policy */
 			error = key_spdacquire(sp);
 			goto freehdrs;
 		}
 		needipsec = 1;
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		printf("ip6_output: Invalid policy found. %d\n", sp->policy);
 	}
 #endif /* FAST_IPSEC */
 
 	/*
 	 * Calculate the total length of the extension header chain.
 	 * Keep the length of the unfragmentable part for fragmentation.
 	 */
 	optlen = 0;
 	if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len;
 	if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len;
 	if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len;
 	unfragpartlen = optlen + sizeof(struct ip6_hdr);
 	/* NOTE: we don't add AH/ESP length here. do that later. */
 	if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len;
 
 	/*
 	 * If we need IPsec, or there is at least one extension header,
 	 * separate IP6 header from the payload.
 	 */
 	if ((needipsec || optlen) && !hdrsplit) {
 		if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 			m = NULL;
 			goto freehdrs;
 		}
 		m = exthdrs.ip6e_ip6;
 		hdrsplit++;
 	}
 
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
 	/* adjust mbuf packet header length */
 	m->m_pkthdr.len += optlen;
 	plen = m->m_pkthdr.len - sizeof(*ip6);
 
 	/* If this is a jumbo payload, insert a jumbo payload option. */
 	if (plen > IPV6_MAXPACKET) {
 		if (!hdrsplit) {
 			if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
 				m = NULL;
 				goto freehdrs;
 			}
 			m = exthdrs.ip6e_ip6;
 			hdrsplit++;
 		}
 		/* adjust pointer */
 		ip6 = mtod(m, struct ip6_hdr *);
 		if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
 			goto freehdrs;
 		ip6->ip6_plen = 0;
 	} else
 		ip6->ip6_plen = htons(plen);
 
 	/*
 	 * Concatenate headers and fill in next header fields.
 	 * Here we have, on "m"
 	 *	IPv6 payload
 	 * and we insert headers accordingly.  Finally, we should be getting:
 	 *	IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
 	 *
 	 * during the header composing process, "m" points to IPv6 header.
 	 * "mprev" points to an extension header prior to esp.
 	 */
 	{
 		u_char *nexthdrp = &ip6->ip6_nxt;
 		struct mbuf *mprev = m;
 
 		/*
 		 * we treat dest2 specially.  this makes IPsec processing
 		 * much easier.  the goal here is to make mprev point the
 		 * mbuf prior to dest2.
 		 *
 		 * result: IPv6 dest2 payload
 		 * m and mprev will point to IPv6 header.
 		 */
 		if (exthdrs.ip6e_dest2) {
 			if (!hdrsplit)
 				panic("assumption failed: hdr not split");
 			exthdrs.ip6e_dest2->m_next = m->m_next;
 			m->m_next = exthdrs.ip6e_dest2;
 			*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_DSTOPTS;
 		}
 
 #define MAKE_CHAIN(m, mp, p, i)\
     do {\
 	if (m) {\
 		if (!hdrsplit) \
 			panic("assumption failed: hdr not split"); \
 		*mtod((m), u_char *) = *(p);\
 		*(p) = (i);\
 		p = mtod((m), u_char *);\
 		(m)->m_next = (mp)->m_next;\
 		(mp)->m_next = (m);\
 		(mp) = (m);\
 	}\
     } while (0)
 		/*
 		 * result: IPv6 hbh dest1 rthdr dest2 payload
 		 * m will point to IPv6 header.  mprev will point to the
 		 * extension header prior to dest2 (rthdr in the above case).
 		 */
 		MAKE_CHAIN(exthdrs.ip6e_hbh, mprev,
 			   nexthdrp, IPPROTO_HOPOPTS);
 		MAKE_CHAIN(exthdrs.ip6e_dest1, mprev,
 			   nexthdrp, IPPROTO_DSTOPTS);
 		MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev,
 			   nexthdrp, IPPROTO_ROUTING);
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 		if (!needipsec)
 			goto skip_ipsec2;
 
 		/*
 		 * pointers after IPsec headers are not valid any more.
 		 * other pointers need a great care too.
 		 * (IPsec routines should not mangle mbufs prior to AH/ESP)
 		 */
 		exthdrs.ip6e_dest2 = NULL;
 
 	    {
 		struct ip6_rthdr *rh = NULL;
 		int segleft_org = 0;
 		struct ipsec_output_state state;
 
 		if (exthdrs.ip6e_rthdr) {
 			rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
 			segleft_org = rh->ip6r_segleft;
 			rh->ip6r_segleft = 0;
 		}
 
 		bzero(&state, sizeof(state));
 		state.m = m;
 		error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
 			&needipsectun);
 		m = state.m;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_trans. */
 			m = NULL;
 			switch (error) {
 			case EHOSTUNREACH:
 			case ENETUNREACH:
 			case EMSGSIZE:
 			case ENOBUFS:
 			case ENOMEM:
 				break;
 			default:
 				printf("ip6_output (ipsec): error code %d\n", error);
 				/* fall through */
 			case ENOENT:
 				/* don't show these error codes to the user */
 				error = 0;
 				break;
 			}
 			goto bad;
 		}
 		if (exthdrs.ip6e_rthdr) {
 			/* ah6_output doesn't modify mbuf chain */
 			rh->ip6r_segleft = segleft_org;
 		}
 	    }
 skip_ipsec2:;
 #endif
 	}
 
 	/*
 	 * If there is a routing header, replace destination address field
 	 * with the first hop of the routing header.
 	 */
 	if (exthdrs.ip6e_rthdr) {
 		struct ip6_rthdr *rh =
 			(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
 						  struct ip6_rthdr *));
 		struct ip6_rthdr0 *rh0;
 
 		finaldst = ip6->ip6_dst;
 		switch (rh->ip6r_type) {
 		case IPV6_RTHDR_TYPE_0:
 			 rh0 = (struct ip6_rthdr0 *)rh;
 			 ip6->ip6_dst = rh0->ip6r0_addr[0];
 			 bcopy((caddr_t)&rh0->ip6r0_addr[1],
 			       (caddr_t)&rh0->ip6r0_addr[0],
 			       sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1)
 				 );
 			 rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst;
 			 break;
 		default:	/* is it possible? */
 			 error = EINVAL;
 			 goto bad;
 		}
 	}
 
 	/* Source address validation */
 	if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
 	    (flags & IPV6_DADOUTPUT) == 0) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
 		error = EOPNOTSUPP;
 		ip6stat.ip6s_badscope++;
 		goto bad;
 	}
 
 	ip6stat.ip6s_localout++;
 
 	/*
 	 * Route packet.
 	 */
 	if (ro == 0) {
 		ro = &ip6route;
 		bzero((caddr_t)ro, sizeof(*ro));
 	}
 	ro_pmtu = ro;
 	if (opt && opt->ip6po_rthdr)
 		ro = &opt->ip6po_route;
 	dst = (struct sockaddr_in6 *)&ro->ro_dst;
 	/*
 	 * If there is a cached route,
 	 * check that it is to the same destination
 	 * and is still up. If not, free it and try again.
 	 */
 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 			 dst->sin6_family != AF_INET6 ||
 			 !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) {
 		RTFREE(ro->ro_rt);
 		ro->ro_rt = (struct rtentry *)0;
 	}
 	if (ro->ro_rt == 0) {
 		bzero(dst, sizeof(*dst));
 		dst->sin6_family = AF_INET6;
 		dst->sin6_len = sizeof(struct sockaddr_in6);
 		dst->sin6_addr = ip6->ip6_dst;
 #ifdef SCOPEDROUTING
 		/* XXX: sin6_scope_id should already be fixed at this point */
 		if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr))
 			dst->sin6_scope_id = ntohs(dst->sin6_addr.s6_addr16[1]);
 #endif
 	}
 #if defined(IPSEC) || defined(FAST_IPSEC)
 	if (needipsec && needipsectun) {
 		struct ipsec_output_state state;
 
 		/*
 		 * All the extension headers will become inaccessible
 		 * (since they can be encrypted).
 		 * Don't panic, we need no more updates to extension headers
 		 * on inner IPv6 packet (since they are now encapsulated).
 		 *
 		 * IPv6 [ESP|AH] IPv6 [extension headers] payload
 		 */
 		bzero(&exthdrs, sizeof(exthdrs));
 		exthdrs.ip6e_ip6 = m;
 
 		bzero(&state, sizeof(state));
 		state.m = m;
 		state.ro = (struct route *)ro;
 		state.dst = (struct sockaddr *)dst;
 
 		error = ipsec6_output_tunnel(&state, sp, flags);
 
 		m = state.m;
 		ro = (struct route_in6 *)state.ro;
 		dst = (struct sockaddr_in6 *)state.dst;
 		if (error) {
 			/* mbuf is already reclaimed in ipsec6_output_tunnel. */
 			m0 = m = NULL;
 			m = NULL;
 			switch (error) {
 			case EHOSTUNREACH:
 			case ENETUNREACH:
 			case EMSGSIZE:
 			case ENOBUFS:
 			case ENOMEM:
 				break;
 			default:
 				printf("ip6_output (ipsec): error code %d\n", error);
 				/* fall through */
 			case ENOENT:
 				/* don't show these error codes to the user */
 				error = 0;
 				break;
 			}
 			goto bad;
 		}
 
 		exthdrs.ip6e_ip6 = m;
 	}
 #endif /* IPSEC */
 
 	if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
 		/* Unicast */
 
 #define ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 #define sin6tosa(sin6)	((struct sockaddr *)(sin6))
 		/* xxx
 		 * interface selection comes here
 		 * if an interface is specified from an upper layer,
 		 * ifp must point it.
 		 */
 		if (ro->ro_rt == 0) {
 			/*
 			 * non-bsdi always clone routes, if parent is
 			 * PRF_CLONING.
 			 */
 			rtalloc((struct route *)ro);
 		}
 		if (ro->ro_rt == 0) {
 			ip6stat.ip6s_noroute++;
 			error = EHOSTUNREACH;
 			/* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */
 			goto bad;
 		}
 		ia = ifatoia6(ro->ro_rt->rt_ifa);
 		ifp = ro->ro_rt->rt_ifp;
 		ro->ro_rt->rt_use++;
 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
 			dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway;
 		m->m_flags &= ~(M_BCAST | M_MCAST);	/* just in case */
 
 		in6_ifstat_inc(ifp, ifs6_out_request);
 
 		/*
 		 * Check if the outgoing interface conflicts with
 		 * the interface specified by ifi6_ifindex (if specified).
 		 * Note that loopback interface is always okay.
 		 * (this may happen when we are sending a packet to one of
 		 *  our own addresses.)
 		 */
 		if (opt && opt->ip6po_pktinfo
 		 && opt->ip6po_pktinfo->ipi6_ifindex) {
 			if (!(ifp->if_flags & IFF_LOOPBACK)
 			 && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) {
 				ip6stat.ip6s_noroute++;
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 		}
 
 		if (opt && opt->ip6po_hlim != -1)
 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 	} else {
 		/* Multicast */
 		struct	in6_multi *in6m;
 
 		m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
 
 		/*
 		 * See if the caller provided any multicast options
 		 */
 		ifp = NULL;
 		if (im6o != NULL) {
 			ip6->ip6_hlim = im6o->im6o_multicast_hlim;
 			if (im6o->im6o_multicast_ifp != NULL)
 				ifp = im6o->im6o_multicast_ifp;
 		} else
 			ip6->ip6_hlim = ip6_defmcasthlim;
 
 		/*
 		 * See if the caller provided the outgoing interface
 		 * as an ancillary data.
 		 * Boundary check for ifindex is assumed to be already done.
 		 */
 		if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex)
 			ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex);
 
 		/*
 		 * If the destination is a node-local scope multicast,
 		 * the packet should be loop-backed only.
 		 */
 		if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) {
 			/*
 			 * If the outgoing interface is already specified,
 			 * it should be a loopback interface.
 			 */
 			if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) {
 				ip6stat.ip6s_badscope++;
 				error = ENETUNREACH; /* XXX: better error? */
 				/* XXX correct ifp? */
 				in6_ifstat_inc(ifp, ifs6_out_discard);
 				goto bad;
 			} else {
 				ifp = &loif[0];
 			}
 		}
 
 		if (opt && opt->ip6po_hlim != -1)
 			ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
 
 		/*
 		 * If caller did not provide an interface lookup a
 		 * default in the routing table.  This is either a
 		 * default for the speicfied group (i.e. a host
 		 * route), or a multicast default (a route for the
 		 * ``net'' ff00::/8).
 		 */
 		if (ifp == NULL) {
 			if (ro->ro_rt == 0) {
 				ro->ro_rt = rtalloc1((struct sockaddr *)
 						&ro->ro_dst, 0, 0UL);
 			}
 			if (ro->ro_rt == 0) {
 				ip6stat.ip6s_noroute++;
 				error = EHOSTUNREACH;
 				/* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */
 				goto bad;
 			}
 			ia = ifatoia6(ro->ro_rt->rt_ifa);
 			ifp = ro->ro_rt->rt_ifp;
 			ro->ro_rt->rt_use++;
 		}
 
 		if ((flags & IPV6_FORWARDING) == 0)
 			in6_ifstat_inc(ifp, ifs6_out_request);
 		in6_ifstat_inc(ifp, ifs6_out_mcast);
 
 		/*
 		 * Confirm that the outgoing interface supports multicast.
 		 */
 		if ((ifp->if_flags & IFF_MULTICAST) == 0) {
 			ip6stat.ip6s_noroute++;
 			in6_ifstat_inc(ifp, ifs6_out_discard);
 			error = ENETUNREACH;
 			goto bad;
 		}
 		IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
 		if (in6m != NULL &&
 		   (im6o == NULL || im6o->im6o_multicast_loop)) {
 			/*
 			 * If we belong to the destination multicast group
 			 * on the outgoing interface, and the caller did not
 			 * forbid loopback, loop back a copy.
 			 */
 			ip6_mloopback(ifp, m, dst);
 		} else {
 			/*
 			 * If we are acting as a multicast router, perform
 			 * multicast forwarding as if the packet had just
 			 * arrived on the interface to which we are about
 			 * to send.  The multicast forwarding function
 			 * recursively calls this function, using the
 			 * IPV6_FORWARDING flag to prevent infinite recursion.
 			 *
 			 * Multicasts that are looped back by ip6_mloopback(),
 			 * above, will be forwarded by the ip6_input() routine,
 			 * if necessary.
 			 */
 			if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
 				if (ip6_mforward(ip6, ifp, m) != 0) {
 					m_freem(m);
 					goto done;
 				}
 			}
 		}
 		/*
 		 * Multicasts with a hoplimit of zero may be looped back,
 		 * above, but must not be transmitted on a network.
 		 * Also, multicasts addressed to the loopback interface
 		 * are not sent -- the above call to ip6_mloopback() will
 		 * loop back a copy if this host actually belongs to the
 		 * destination group on the loopback interface.
 		 */
 		if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) {
 			m_freem(m);
 			goto done;
 		}
 	}
 
 	/*
 	 * Fill the outgoing inteface to tell the upper layer
 	 * to increment per-interface statistics.
 	 */
 	if (ifpp)
 		*ifpp = ifp;
 
 	/*
 	 * Determine path MTU.
 	 */
 	if (ro_pmtu != ro) {
 		/* The first hop and the final destination may differ. */
 		struct sockaddr_in6 *sin6_fin =
 			(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
 		if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
 				       !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr,
 							   &finaldst))) {
 			RTFREE(ro_pmtu->ro_rt);
 			ro_pmtu->ro_rt = (struct rtentry *)0;
 		}
 		if (ro_pmtu->ro_rt == 0) {
 			bzero(sin6_fin, sizeof(*sin6_fin));
 			sin6_fin->sin6_family = AF_INET6;
 			sin6_fin->sin6_len = sizeof(struct sockaddr_in6);
 			sin6_fin->sin6_addr = finaldst;
 
 			rtalloc((struct route *)ro_pmtu);
 		}
 	}
 	if (ro_pmtu->ro_rt != NULL) {
 		u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu;
 
 		mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
 		if (mtu > ifmtu || mtu == 0) {
 			/*
 			 * The MTU on the route is larger than the MTU on
 			 * the interface!  This shouldn't happen, unless the
 			 * MTU of the interface has been changed after the
 			 * interface was brought up.  Change the MTU in the
 			 * route to match the interface MTU (as long as the
 			 * field isn't locked).
 			 *
 			 * if MTU on the route is 0, we need to fix the MTU.
 			 * this case happens with path MTU discovery timeouts.
 			 */
 			 mtu = ifmtu;
 			 if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0)
 				 ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */
 		}
 	} else {
 		mtu = nd_ifinfo[ifp->if_index].linkmtu;
 	}
 
 	/*
 	 * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting
 	 */
 	if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU)
 		mtu = IPV6_MMTU;
 
 	/* Fake scoped addresses */
 	if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
 		/*
 		 * If source or destination address is a scoped address, and
 		 * the packet is going to be sent to a loopback interface,
 		 * we should keep the original interface.
 		 */
 
 		/*
 		 * XXX: this is a very experimental and temporary solution.
 		 * We eventually have sockaddr_in6 and use the sin6_scope_id
 		 * field of the structure here.
 		 * We rely on the consistency between two scope zone ids
 		 * of source and destination, which should already be assured.
 		 * Larger scopes than link will be supported in the future. 
 		 */
 		origifp = NULL;
 		if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src))
 			origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1]));
 		else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst))
 			origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1]));
 		/*
 		 * XXX: origifp can be NULL even in those two cases above.
 		 * For example, if we remove the (only) link-local address
 		 * from the loopback interface, and try to send a link-local
 		 * address without link-id information.  Then the source
 		 * address is ::1, and the destination address is the
 		 * link-local address with its s6_addr16[1] being zero.
 		 * What is worse, if the packet goes to the loopback interface
 		 * by a default rejected route, the null pointer would be
 		 * passed to looutput, and the kernel would hang.
 		 * The following last resort would prevent such disaster.
 		 */
 		if (origifp == NULL)
 			origifp = ifp;
 	}
 	else
 		origifp = ifp;
 #ifndef SCOPEDROUTING
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 #endif
 
 	/*
 	 * Check with the firewall...
 	 */
         if (ip6_fw_enable && ip6_fw_chk_ptr) {
 		u_short port = 0;
 		m->m_pkthdr.rcvif = NULL;	/* XXX */
 		/* If ipfw says divert, we have to just drop packet */
 		if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) {
 			m_freem(m);
 			goto done;
 		}
 		if (!m) {
 			error = EACCES;
 			goto done;
 		}
 	}
 
 	/*
 	 * If the outgoing packet contains a hop-by-hop options header,
 	 * it must be examined and processed even by the source node.
 	 * (RFC 2460, section 4.)
 	 */
 	if (exthdrs.ip6e_hbh) {
 		struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
 		u_int32_t dummy1; /* XXX unused */
 		u_int32_t dummy2; /* XXX unused */
 
 #ifdef DIAGNOSTIC
 		if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
 			panic("ip6e_hbh is not continuous");
 #endif
 		/*
 		 *  XXX: if we have to send an ICMPv6 error to the sender,
 		 *       we need the M_LOOP flag since icmp6_error() expects
 		 *       the IPv6 and the hop-by-hop options header are
 		 *       continuous unless the flag is set.
 		 */
 		m->m_flags |= M_LOOP;
 		m->m_pkthdr.rcvif = ifp;
 		if (ip6_process_hopopts(m,
 					(u_int8_t *)(hbh + 1),
 					((hbh->ip6h_len + 1) << 3) -
 					sizeof(struct ip6_hbh),
 					&dummy1, &dummy2) < 0) {
 			/* m was already freed at this point */
 			error = EINVAL;/* better error? */
 			goto done;
 		}
 		m->m_flags &= ~M_LOOP; /* XXX */
 		m->m_pkthdr.rcvif = NULL;
 	}
 
 #ifdef PFIL_HOOKS
 	/*
 	 * Run through list of hooks for output packets.
 	 */
 	m1 = m;
 	pfh = pfil_hook_get(PFIL_OUT, &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh);
 	for (; pfh; pfh = pfh->pfil_link.tqe_next)
 		if (pfh->pfil_func) {
 			rv = pfh->pfil_func(ip6, sizeof(*ip6), ifp, 1, &m1);
 			if (rv) {
 				error = EHOSTUNREACH;
 				goto done;
 			}
 			m = m1;
 			if (m == NULL)
 				goto done;
 			ip6 = mtod(m, struct ip6_hdr *);
 		}
 #endif /* PFIL_HOOKS */
 	/*
 	 * Send the packet to the outgoing interface.
 	 * If necessary, do IPv6 fragmentation before sending.
 	 */
 	tlen = m->m_pkthdr.len;
 	if (tlen <= mtu
 #ifdef notyet
 	    /*
 	     * On any link that cannot convey a 1280-octet packet in one piece,
 	     * link-specific fragmentation and reassembly must be provided at
 	     * a layer below IPv6. [RFC 2460, sec.5]
 	     * Thus if the interface has ability of link-level fragmentation,
 	     * we can just send the packet even if the packet size is
 	     * larger than the link's MTU.
 	     * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet...
 	     */
 	
 	    || ifp->if_flags & IFF_FRAGMENTABLE
 #endif
 	    )
 	{
  		/* Record statistics for this interface address. */
  		if (ia && !(flags & IPV6_FORWARDING)) {
  			ia->ia_ifa.if_opackets++;
  			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
  		}
 #ifdef IPSEC
 		/* clean ipsec history once it goes out of the node */
 		ipsec_delaux(m);
 #endif
 		error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		goto done;
 	} else if (mtu < IPV6_MMTU) {
 		/*
 		 * note that path MTU is never less than IPV6_MMTU
 		 * (see icmp6_input).
 		 */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */
 		error = EMSGSIZE;
 		in6_ifstat_inc(ifp, ifs6_out_fragfail);
 		goto bad;
 	} else {
 		struct mbuf **mnext, *m_frgpart;
 		struct ip6_frag *ip6f;
 		u_int32_t id = htonl(ip6_id++);
 		u_char nextproto;
 
 		/*
 		 * Too large for the destination or interface;
 		 * fragment if possible.
 		 * Must be able to put at least 8 bytes per fragment.
 		 */
 		hlen = unfragpartlen;
 		if (mtu > IPV6_MAXPACKET)
 			mtu = IPV6_MAXPACKET;
 
 		len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
 		if (len < 8) {
 			error = EMSGSIZE;
 			in6_ifstat_inc(ifp, ifs6_out_fragfail);
 			goto bad;
 		}
 
 		mnext = &m->m_nextpkt;
 
 		/*
 		 * Change the next header field of the last header in the
 		 * unfragmentable part.
 		 */
 		if (exthdrs.ip6e_rthdr) {
 			nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
 			*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_dest1) {
 			nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
 			*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
 		} else if (exthdrs.ip6e_hbh) {
 			nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
 			*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
 		} else {
 			nextproto = ip6->ip6_nxt;
 			ip6->ip6_nxt = IPPROTO_FRAGMENT;
 		}
 
 		/*
 		 * Loop through length of segment after first fragment,
 		 * make new header and copy data of each part and link onto
 		 * chain.
 		 */
 		m0 = m;
 		for (off = hlen; off < tlen; off += len) {
 			MGETHDR(m, M_DONTWAIT, MT_HEADER);
 			if (!m) {
 				error = ENOBUFS;
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			m->m_pkthdr.rcvif = NULL;
 			m->m_flags = m0->m_flags & M_COPYFLAGS;
 			*mnext = m;
 			mnext = &m->m_nextpkt;
 			m->m_data += max_linkhdr;
 			mhip6 = mtod(m, struct ip6_hdr *);
 			*mhip6 = *ip6;
 			m->m_len = sizeof(*mhip6);
  			error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
  			if (error) {
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
 			if (off + len >= tlen)
 				len = tlen - off;
 			else
 				ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
 			mhip6->ip6_plen = htons((u_short)(len + hlen +
 							  sizeof(*ip6f) -
 							  sizeof(struct ip6_hdr)));
 			if ((m_frgpart = m_copy(m0, off, len)) == 0) {
 				error = ENOBUFS;
 				ip6stat.ip6s_odropped++;
 				goto sendorfree;
 			}
 			m_cat(m, m_frgpart);
 			m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
 			m->m_pkthdr.rcvif = (struct ifnet *)0;
 			ip6f->ip6f_reserved = 0;
 			ip6f->ip6f_ident = id;
 			ip6f->ip6f_nxt = nextproto;
 			ip6stat.ip6s_ofragments++;
 			in6_ifstat_inc(ifp, ifs6_out_fragcreat);
 		}
 
 		in6_ifstat_inc(ifp, ifs6_out_fragok);
 	}
 
 	/*
 	 * Remove leading garbages.
 	 */
 sendorfree:
 	m = m0->m_nextpkt;
 	m0->m_nextpkt = 0;
 	m_freem(m0);
 	for (m0 = m; m; m = m0) {
 		m0 = m->m_nextpkt;
 		m->m_nextpkt = 0;
 		if (error == 0) {
  			/* Record statistics for this interface address. */
  			if (ia) {
  				ia->ia_ifa.if_opackets++;
  				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
  			}
 #ifdef IPSEC
 			/* clean ipsec history once it goes out of the node */
 			ipsec_delaux(m);
 #endif
 			error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
 		} else
 			m_freem(m);
 	}
 
 	if (error == 0)
 		ip6stat.ip6s_fragmented++;
 
 done:
 	if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
 		RTFREE(ro->ro_rt);
 	} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
 		RTFREE(ro_pmtu->ro_rt);
 	}
 
 #ifdef IPSEC
 	if (sp != NULL)
 		key_freesp(sp);
 #endif /* IPSEC */
 #ifdef FAST_IPSEC
 	if (sp != NULL)
 		KEY_FREESP(&sp);
 #endif /* FAST_IPSEC */
 
 	return(error);
 
 freehdrs:
 	m_freem(exthdrs.ip6e_hbh);	/* m_freem will check if mbuf is 0 */
 	m_freem(exthdrs.ip6e_dest1);
 	m_freem(exthdrs.ip6e_rthdr);
 	m_freem(exthdrs.ip6e_dest2);
 	/* fall through */
 bad:
 	m_freem(m);
 	goto done;
 }
 
 static int
 ip6_copyexthdr(mp, hdr, hlen)
 	struct mbuf **mp;
 	caddr_t hdr;
 	int hlen;
 {
 	struct mbuf *m;
 
 	if (hlen > MCLBYTES)
 		return(ENOBUFS); /* XXX */
 
 	MGET(m, M_DONTWAIT, MT_DATA);
 	if (!m)
 		return(ENOBUFS);
 
 	if (hlen > MLEN) {
 		MCLGET(m, M_DONTWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return(ENOBUFS);
 		}
 	}
 	m->m_len = hlen;
 	if (hdr)
 		bcopy(hdr, mtod(m, caddr_t), hlen);
 
 	*mp = m;
 	return(0);
 }
 
 /*
  * Insert jumbo payload option.
  */
 static int
 ip6_insert_jumboopt(exthdrs, plen)
 	struct ip6_exthdrs *exthdrs;
 	u_int32_t plen;
 {
 	struct mbuf *mopt;
 	u_char *optbuf;
 	u_int32_t v;
 
 #define JUMBOOPTLEN	8	/* length of jumbo payload option and padding */
 
 	/*
 	 * If there is no hop-by-hop options header, allocate new one.
 	 * If there is one but it doesn't have enough space to store the
 	 * jumbo payload option, allocate a cluster to store the whole options.
 	 * Otherwise, use it to store the options.
 	 */
 	if (exthdrs->ip6e_hbh == 0) {
 		MGET(mopt, M_DONTWAIT, MT_DATA);
 		if (mopt == 0)
 			return(ENOBUFS);
 		mopt->m_len = JUMBOOPTLEN;
 		optbuf = mtod(mopt, u_char *);
 		optbuf[1] = 0;	/* = ((JUMBOOPTLEN) >> 3) - 1 */
 		exthdrs->ip6e_hbh = mopt;
 	} else {
 		struct ip6_hbh *hbh;
 
 		mopt = exthdrs->ip6e_hbh;
 		if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
 			/*
 			 * XXX assumption:
 			 * - exthdrs->ip6e_hbh is not referenced from places
 			 *   other than exthdrs.
 			 * - exthdrs->ip6e_hbh is not an mbuf chain.
 			 */
 			int oldoptlen = mopt->m_len;
 			struct mbuf *n;
 
 			/*
 			 * XXX: give up if the whole (new) hbh header does
 			 * not fit even in an mbuf cluster.
 			 */
 			if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
 				return(ENOBUFS);
 
 			/*
 			 * As a consequence, we must always prepare a cluster
 			 * at this point.
 			 */
 			MGET(n, M_DONTWAIT, MT_DATA);
 			if (n) {
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_freem(n);
 					n = NULL;
 				}
 			}
 			if (!n)
 				return(ENOBUFS);
 			n->m_len = oldoptlen + JUMBOOPTLEN;
 			bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
 			      oldoptlen);
 			optbuf = mtod(n, caddr_t) + oldoptlen;
 			m_freem(mopt);
 			mopt = exthdrs->ip6e_hbh = n;
 		} else {
 			optbuf = mtod(mopt, u_char *) + mopt->m_len;
 			mopt->m_len += JUMBOOPTLEN;
 		}
 		optbuf[0] = IP6OPT_PADN;
 		optbuf[1] = 1;
 
 		/*
 		 * Adjust the header length according to the pad and
 		 * the jumbo payload option.
 		 */
 		hbh = mtod(mopt, struct ip6_hbh *);
 		hbh->ip6h_len += (JUMBOOPTLEN >> 3);
 	}
 
 	/* fill in the option. */
 	optbuf[2] = IP6OPT_JUMBO;
 	optbuf[3] = 4;
 	v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
 	bcopy(&v, &optbuf[4], sizeof(u_int32_t));
 
 	/* finally, adjust the packet header length */
 	exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
 
 	return(0);
 #undef JUMBOOPTLEN
 }
 
 /*
  * Insert fragment header and copy unfragmentable header portions.
  */
 static int
 ip6_insertfraghdr(m0, m, hlen, frghdrp)
 	struct mbuf *m0, *m;
 	int hlen;
 	struct ip6_frag **frghdrp;
 {
 	struct mbuf *n, *mlast;
 
 	if (hlen > sizeof(struct ip6_hdr)) {
 		n = m_copym(m0, sizeof(struct ip6_hdr),
 			    hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
 		if (n == 0)
 			return(ENOBUFS);
 		m->m_next = n;
 	} else
 		n = m;
 
 	/* Search for the last mbuf of unfragmentable part. */
 	for (mlast = n; mlast->m_next; mlast = mlast->m_next)
 		;
 
 	if ((mlast->m_flags & M_EXT) == 0 &&
 	    M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
 		/* use the trailing space of the last mbuf for the fragment hdr */
 		*frghdrp =
 			(struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len);
 		mlast->m_len += sizeof(struct ip6_frag);
 		m->m_pkthdr.len += sizeof(struct ip6_frag);
 	} else {
 		/* allocate a new mbuf for the fragment header */
 		struct mbuf *mfrg;
 
 		MGET(mfrg, M_DONTWAIT, MT_DATA);
 		if (mfrg == 0)
 			return(ENOBUFS);
 		mfrg->m_len = sizeof(struct ip6_frag);
 		*frghdrp = mtod(mfrg, struct ip6_frag *);
 		mlast->m_next = mfrg;
 	}
 
 	return(0);
 }
 
 /*
  * IP6 socket option processing.
  */
 int
 ip6_ctloutput(so, sopt)
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	int privileged;
 	struct inpcb *in6p = sotoinpcb(so);
 	int error, optval;
 	int level, op, optname;
 	int optlen;
 	struct thread *td;
 
 	if (sopt) {
 		level = sopt->sopt_level;
 		op = sopt->sopt_dir;
 		optname = sopt->sopt_name;
 		optlen = sopt->sopt_valsize;
 		td = sopt->sopt_td;
 	} else {
 		panic("ip6_ctloutput: arg soopt is NULL");
 	}
 	error = optval = 0;
 
 	privileged = (td == 0 || suser(td)) ? 0 : 1;
 
 	if (level == IPPROTO_IPV6) {
 		switch (op) {
 
 		case SOPT_SET:
 			switch (optname) {
 			case IPV6_PKTOPTIONS:
 			{
 				struct mbuf *m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				error = ip6_pcbopts(&in6p->in6p_outputopts,
 						    m, so, sopt);
 				m_freem(m); /* XXX */
 				break;
 			}
 
 			/*
 			 * Use of some Hop-by-Hop options or some
 			 * Destination options, might require special
 			 * privilege.  That is, normal applications
 			 * (without special privilege) might be forbidden
 			 * from setting certain options in outgoing packets,
 			 * and might never see certain options in received
 			 * packets. [RFC 2292 Section 6]
 			 * KAME specific note:
 			 *  KAME prevents non-privileged users from sending or
 			 *  receiving ANY hbh/dst options in order to avoid
 			 *  overhead of parsing options in the kernel.
 			 */
 			case IPV6_UNICAST_HOPS:
 			case IPV6_CHECKSUM:
 			case IPV6_FAITH:
 
 			case IPV6_V6ONLY:
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					if (optval < -1 || optval >= 256)
 						error = EINVAL;
 					else {
 						/* -1 = kernel default */
 						in6p->in6p_hops = optval;
 
 						if ((in6p->in6p_vflag &
 						     INP_IPV4) != 0)
 							in6p->inp_ip_ttl = optval;
 					}
 					break;
 #define OPTSET(bit) \
 do { \
 	if (optval) \
 		in6p->in6p_flags |= (bit); \
 	else \
 		in6p->in6p_flags &= ~(bit); \
 } while (0)
 #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
 
 				case IPV6_CHECKSUM:
 					in6p->in6p_cksum = optval;
 					break;
 
 				case IPV6_FAITH:
 					OPTSET(IN6P_FAITH);
 					break;
 
 				case IPV6_V6ONLY:
 					/*
 					 * make setsockopt(IPV6_V6ONLY)
 					 * available only prior to bind(2).
 					 * see ipng mailing list, Jun 22 2001.
 					 */
 					if (in6p->in6p_lport ||
 					    !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr))
 					{
 						error = EINVAL;
 						break;
 					}
 					OPTSET(IN6P_IPV6_V6ONLY);
 					if (optval)
 						in6p->in6p_vflag &= ~INP_IPV4;
 					else
 						in6p->in6p_vflag |= INP_IPV4;
 					break;
 				}
 				break;
 
 			case IPV6_PKTINFO:
 			case IPV6_HOPLIMIT:
 			case IPV6_HOPOPTS:
 			case IPV6_DSTOPTS:
 			case IPV6_RTHDR:
 				/* RFC 2292 */
 				if (optlen != sizeof(int)) {
 					error = EINVAL;
 					break;
 				}
 				error = sooptcopyin(sopt, &optval,
 					sizeof optval, sizeof optval);
 				if (error)
 					break;
 				switch (optname) {
 				case IPV6_PKTINFO:
 					OPTSET(IN6P_PKTINFO);
 					break;
 				case IPV6_HOPLIMIT:
 					OPTSET(IN6P_HOPLIMIT);
 					break;
 				case IPV6_HOPOPTS:
 					/*
 					 * Check super-user privilege.
 					 * See comments for IPV6_RECVHOPOPTS.
 					 */
 					if (!privileged)
 						return(EPERM);
 					OPTSET(IN6P_HOPOPTS);
 					break;
 				case IPV6_DSTOPTS:
 					if (!privileged)
 						return(EPERM);
 					OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
 					break;
 				case IPV6_RTHDR:
 					OPTSET(IN6P_RTHDR);
 					break;
 				}
 				break;
 #undef OPTSET
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			    {
 				struct mbuf *m;
 				if (sopt->sopt_valsize > MLEN) {
 					error = EMSGSIZE;
 					break;
 				}
 				/* XXX */
 				MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
 				if (m == 0) {
 					error = ENOBUFS;
 					break;
 				}
 				m->m_len = sopt->sopt_valsize;
 				error = sooptcopyin(sopt, mtod(m, char *),
 						    m->m_len, m->m_len);
 				error =	ip6_setmoptions(sopt->sopt_name,
 							&in6p->in6p_moptions,
 							m);
 				(void)m_free(m);
 			    }
 				break;
 
 			case IPV6_PORTRANGE:
 				error = sooptcopyin(sopt, &optval,
 				    sizeof optval, sizeof optval);
 				if (error)
 					break;
 
 				switch (optval) {
 				case IPV6_PORTRANGE_DEFAULT:
 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 					break;
 
 				case IPV6_PORTRANGE_HIGH:
 					in6p->in6p_flags &= ~(IN6P_LOWPORT);
 					in6p->in6p_flags |= IN6P_HIGHPORT;
 					break;
 
 				case IPV6_PORTRANGE_LOW:
 					in6p->in6p_flags &= ~(IN6P_HIGHPORT);
 					in6p->in6p_flags |= IN6P_LOWPORT;
 					break;
 
 				default:
 					error = EINVAL;
 					break;
 				}
 				break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 			case IPV6_IPSEC_POLICY:
 			    {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m;
 
 				if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
 					break;
 				if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
 					break;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec6_set_policy(in6p, optname, req,
 				                          len, privileged);
 				m_freem(m);
 			    }
 				break;
 #endif /* KAME IPSEC */
 
 			case IPV6_FW_ADD:
 			case IPV6_FW_DEL:
 			case IPV6_FW_FLUSH:
 			case IPV6_FW_ZERO:
 			    {
 				struct mbuf *m;
 				struct mbuf **mp = &m;
 
 				if (ip6_fw_ctl_ptr == NULL)
 					return EINVAL;
 				/* XXX */
 				if ((error = soopt_getm(sopt, &m)) != 0)
 					break;
 				/* XXX */
 				if ((error = soopt_mcopyin(sopt, m)) != 0)
 					break;
 				error = (*ip6_fw_ctl_ptr)(optname, mp);
 				m = *mp;
 			    }
 				break;
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 
 		case SOPT_GET:
 			switch (optname) {
 
 			case IPV6_PKTOPTIONS:
 				if (in6p->in6p_options) {
 					struct mbuf *m;
 					m = m_copym(in6p->in6p_options,
 					    0, M_COPYALL, M_TRYWAIT);
 					error = soopt_mcopyout(sopt, m);
 					if (error == 0)
 						m_freem(m);
 				} else
 					sopt->sopt_valsize = 0;
 				break;
 
 			case IPV6_UNICAST_HOPS:
 			case IPV6_CHECKSUM:
 
 			case IPV6_FAITH:
 			case IPV6_V6ONLY:
 			case IPV6_PORTRANGE:
 				switch (optname) {
 
 				case IPV6_UNICAST_HOPS:
 					optval = in6p->in6p_hops;
 					break;
 
 				case IPV6_CHECKSUM:
 					optval = in6p->in6p_cksum;
 					break;
 
 				case IPV6_FAITH:
 					optval = OPTBIT(IN6P_FAITH);
 					break;
 
 				case IPV6_V6ONLY:
 					optval = OPTBIT(IN6P_IPV6_V6ONLY);
 					break;
 
 				case IPV6_PORTRANGE:
 				    {
 					int flags;
 					flags = in6p->in6p_flags;
 					if (flags & IN6P_HIGHPORT)
 						optval = IPV6_PORTRANGE_HIGH;
 					else if (flags & IN6P_LOWPORT)
 						optval = IPV6_PORTRANGE_LOW;
 					else
 						optval = 0;
 					break;
 				    }
 				}
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_PKTINFO:
 			case IPV6_HOPLIMIT:
 			case IPV6_HOPOPTS:
 			case IPV6_RTHDR:
 			case IPV6_DSTOPTS:
 				if (optname == IPV6_HOPOPTS ||
 				    optname == IPV6_DSTOPTS ||
 				    !privileged)
 					return(EPERM);
 				switch (optname) {
 				case IPV6_PKTINFO:
 					optval = OPTBIT(IN6P_PKTINFO);
 					break;
 				case IPV6_HOPLIMIT:
 					optval = OPTBIT(IN6P_HOPLIMIT);
 					break;
 				case IPV6_HOPOPTS:
 					if (!privileged)
 						return(EPERM);
 					optval = OPTBIT(IN6P_HOPOPTS);
 					break;
 				case IPV6_RTHDR:
 					optval = OPTBIT(IN6P_RTHDR);
 					break;
 				case IPV6_DSTOPTS:
 					if (!privileged)
 						return(EPERM);
 					optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
 					break;
 				}
 				error = sooptcopyout(sopt, &optval,
 					sizeof optval);
 				break;
 
 			case IPV6_MULTICAST_IF:
 			case IPV6_MULTICAST_HOPS:
 			case IPV6_MULTICAST_LOOP:
 			case IPV6_JOIN_GROUP:
 			case IPV6_LEAVE_GROUP:
 			    {
 				struct mbuf *m;
 				error = ip6_getmoptions(sopt->sopt_name,
 						in6p->in6p_moptions, &m);
 				if (error == 0)
 					error = sooptcopyout(sopt,
 						mtod(m, char *), m->m_len);
 				m_freem(m);
 			    }
 				break;
 
 #if defined(IPSEC) || defined(FAST_IPSEC)
 			case IPV6_IPSEC_POLICY:
 			  {
 				caddr_t req = NULL;
 				size_t len = 0;
 				struct mbuf *m = NULL;
 				struct mbuf **mp = &m;
 
 				error = soopt_getm(sopt, &m); /* XXX */
 				if (error != 0)
 					break;
 				error = soopt_mcopyin(sopt, m); /* XXX */
 				if (error != 0)
 					break;
 				if (m) {
 					req = mtod(m, caddr_t);
 					len = m->m_len;
 				}
 				error = ipsec6_get_policy(in6p, req, len, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /*XXX*/
 				if (error == 0 && m)
 					m_freem(m);
 				break;
 			  }
 #endif /* KAME IPSEC */
 
 			case IPV6_FW_GET:
 			  {
 				struct mbuf *m;
 				struct mbuf **mp = &m;
 
 				if (ip6_fw_ctl_ptr == NULL)
 			        {
 					return EINVAL;
 				}
 				error = (*ip6_fw_ctl_ptr)(optname, mp);
 				if (error == 0)
 					error = soopt_mcopyout(sopt, m); /* XXX */
 				if (error == 0 && m)
 					m_freem(m);
 			  }
 				break;
 
 			default:
 				error = ENOPROTOOPT;
 				break;
 			}
 			break;
 		}
 	} else {
 		error = EINVAL;
 	}
 	return(error);
 }
 
 /*
  * Set up IP6 options in pcb for insertion in output packets or
  * specifying behavior of outgoing packets.
  */
 static int
 ip6_pcbopts(pktopt, m, so, sopt)
 	struct ip6_pktopts **pktopt;
 	struct mbuf *m;
 	struct socket *so;
 	struct sockopt *sopt;
 {
 	struct ip6_pktopts *opt = *pktopt;
 	int error = 0;
 	struct thread *td = sopt->sopt_td;
 	int priv = 0;
 
 	/* turn off any old options. */
 	if (opt) {
 #ifdef DIAGNOSTIC
 		if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
 		    opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
 		    opt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			printf("ip6_pcbopts: all specified options are cleared.\n");
 #endif
 		ip6_clearpktopts(opt, 1, -1);
 	} else
 		opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
 	*pktopt = NULL;
 
 	if (!m || m->m_len == 0) {
 		/*
 		 * Only turning off any previous options, regardless of
 		 * whether the opt is just created or given.
 		 */
 		free(opt, M_IP6OPT);
 		return(0);
 	}
 
 	/*  set options specified by user. */
 	if (td && !suser(td))
 		priv = 1;
 	if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) {
 		ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */
 		free(opt, M_IP6OPT);
 		return(error);
 	}
 	*pktopt = opt;
 	return(0);
 }
 
 /*
  * initialize ip6_pktopts.  beware that there are non-zero default values in
  * the struct.
  */
 void
 init_ip6pktopts(opt)
 	struct ip6_pktopts *opt;
 {
 
 	bzero(opt, sizeof(*opt));
 	opt->ip6po_hlim = -1;	/* -1 means default hop limit */
 }
 
 void
 ip6_clearpktopts(pktopt, needfree, optname)
 	struct ip6_pktopts *pktopt;
 	int needfree, optname;
 {
 	if (pktopt == NULL)
 		return;
 
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_pktinfo)
 			free(pktopt->ip6po_pktinfo, M_IP6OPT);
 		pktopt->ip6po_pktinfo = NULL;
 	}
 	if (optname == -1)
 		pktopt->ip6po_hlim = -1;
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_nexthop)
 			free(pktopt->ip6po_nexthop, M_IP6OPT);
 		pktopt->ip6po_nexthop = NULL;
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_hbh)
 			free(pktopt->ip6po_hbh, M_IP6OPT);
 		pktopt->ip6po_hbh = NULL;
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_dest1)
 			free(pktopt->ip6po_dest1, M_IP6OPT);
 		pktopt->ip6po_dest1 = NULL;
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
 			free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
 		pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
 		if (pktopt->ip6po_route.ro_rt) {
 			RTFREE(pktopt->ip6po_route.ro_rt);
 			pktopt->ip6po_route.ro_rt = NULL;
 		}
 	}
 	if (optname == -1) {
 		if (needfree && pktopt->ip6po_dest2)
 			free(pktopt->ip6po_dest2, M_IP6OPT);
 		pktopt->ip6po_dest2 = NULL;
 	}
 }
 
 #define PKTOPT_EXTHDRCPY(type) \
 do {\
 	if (src->type) {\
 		int hlen =\
 			(((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
 		dst->type = malloc(hlen, M_IP6OPT, canwait);\
 		if (dst->type == NULL && canwait == M_NOWAIT)\
 			goto bad;\
 		bcopy(src->type, dst->type, hlen);\
 	}\
 } while (0)
 
 struct ip6_pktopts *
 ip6_copypktopts(src, canwait)
 	struct ip6_pktopts *src;
 	int canwait;
 {
 	struct ip6_pktopts *dst;
 
 	if (src == NULL) {
 		printf("ip6_clearpktopts: invalid argument\n");
 		return(NULL);
 	}
 
 	dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
 	if (dst == NULL && canwait == M_NOWAIT)
 		return (NULL);
 	bzero(dst, sizeof(*dst));
 
 	dst->ip6po_hlim = src->ip6po_hlim;
 	if (src->ip6po_pktinfo) {
 		dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
 					    M_IP6OPT, canwait);
 		if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT)
 			goto bad;
 		*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
 	}
 	if (src->ip6po_nexthop) {
 		dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
 					    M_IP6OPT, canwait);
 		if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT)
 			goto bad;
 		bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
 		      src->ip6po_nexthop->sa_len);
 	}
 	PKTOPT_EXTHDRCPY(ip6po_hbh);
 	PKTOPT_EXTHDRCPY(ip6po_dest1);
 	PKTOPT_EXTHDRCPY(ip6po_dest2);
 	PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
 	return(dst);
 
   bad:
 	if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT);
 	if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT);
 	if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT);
 	if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT);
 	if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT);
 	if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT);
 	free(dst, M_IP6OPT);
 	return(NULL);
 }
 #undef PKTOPT_EXTHDRCPY
 
 void
 ip6_freepcbopts(pktopt)
 	struct ip6_pktopts *pktopt;
 {
 	if (pktopt == NULL)
 		return;
 
 	ip6_clearpktopts(pktopt, 1, -1);
 
 	free(pktopt, M_IP6OPT);
 }
 
 /*
  * Set the IP6 multicast options in response to user setsockopt().
  */
 static int
 ip6_setmoptions(optname, im6op, m)
 	int optname;
 	struct ip6_moptions **im6op;
 	struct mbuf *m;
 {
 	int error = 0;
 	u_int loop, ifindex;
 	struct ipv6_mreq *mreq;
 	struct ifnet *ifp;
 	struct ip6_moptions *im6o = *im6op;
 	struct route_in6 ro;
 	struct sockaddr_in6 *dst;
 	struct in6_multi_mship *imm;
 	struct thread *td = curthread;	/* XXX */
 
 	if (im6o == NULL) {
 		/*
 		 * No multicast option buffer attached to the pcb;
 		 * allocate one and initialize to default values.
 		 */
 		im6o = (struct ip6_moptions *)
 			malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK);
 
 		if (im6o == NULL)
 			return(ENOBUFS);
 		*im6op = im6o;
 		im6o->im6o_multicast_ifp = NULL;
 		im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 		im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
 		LIST_INIT(&im6o->im6o_memberships);
 	}
 
 	switch (optname) {
 
 	case IPV6_MULTICAST_IF:
 		/*
 		 * Select the interface for outgoing multicast packets.
 		 */
 		if (m == NULL || m->m_len != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
 		if (ifindex < 0 || if_index < ifindex) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		ifp = ifnet_byindex(ifindex);
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		im6o->im6o_multicast_ifp = ifp;
 		break;
 
 	case IPV6_MULTICAST_HOPS:
 	    {
 		/*
 		 * Set the IP6 hoplimit for outgoing multicast packets.
 		 */
 		int optval;
 		if (m == NULL || m->m_len != sizeof(int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &optval, sizeof(optval));
 		if (optval < -1 || optval >= 256)
 			error = EINVAL;
 		else if (optval == -1)
 			im6o->im6o_multicast_hlim = ip6_defmcasthlim;
 		else
 			im6o->im6o_multicast_hlim = optval;
 		break;
 	    }
 
 	case IPV6_MULTICAST_LOOP:
 		/*
 		 * Set the loopback flag for outgoing multicast packets.
 		 * Must be zero or one.
 		 */
 		if (m == NULL || m->m_len != sizeof(u_int)) {
 			error = EINVAL;
 			break;
 		}
 		bcopy(mtod(m, u_int *), &loop, sizeof(loop));
 		if (loop > 1) {
 			error = EINVAL;
 			break;
 		}
 		im6o->im6o_multicast_loop = loop;
 		break;
 
 	case IPV6_JOIN_GROUP:
 		/*
 		 * Add a multicast group membership.
 		 * Group must be a valid IP6 multicast address.
 		 */
 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
 			error = EINVAL;
 			break;
 		}
 		mreq = mtod(m, struct ipv6_mreq *);
 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
 			/*
 			 * We use the unspecified address to specify to accept
 			 * all multicast addresses. Only super user is allowed
 			 * to do this.
 			 */
 			if (suser(td))
 			{
 				error = EACCES;
 				break;
 			}
 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
 			error = EINVAL;
 			break;
 		}
 
 		/*
 		 * If the interface is specified, validate it.
 		 */
 		if (mreq->ipv6mr_interface < 0
 		 || if_index < mreq->ipv6mr_interface) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		/*
 		 * If no interface was explicitly specified, choose an
 		 * appropriate one according to the given multicast address.
 		 */
 		if (mreq->ipv6mr_interface == 0) {
 			/*
 			 * If the multicast address is in node-local scope,
 			 * the interface should be a loopback interface.
 			 * Otherwise, look up the routing table for the
 			 * address, and choose the outgoing interface.
 			 *   XXX: is it a good approach?
 			 */
 			if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) {
 				ifp = &loif[0];
 			} else {
 				ro.ro_rt = NULL;
 				dst = (struct sockaddr_in6 *)&ro.ro_dst;
 				bzero(dst, sizeof(*dst));
 				dst->sin6_len = sizeof(struct sockaddr_in6);
 				dst->sin6_family = AF_INET6;
 				dst->sin6_addr = mreq->ipv6mr_multiaddr;
 				rtalloc((struct route *)&ro);
 				if (ro.ro_rt == NULL) {
 					error = EADDRNOTAVAIL;
 					break;
 				}
 				ifp = ro.ro_rt->rt_ifp;
 				rtfree(ro.ro_rt);
 			}
 		} else
 			ifp = ifnet_byindex(mreq->ipv6mr_interface);
 
 		/*
 		 * See if we found an interface, and confirm that it
 		 * supports multicast
 		 */
 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		/*
 		 * Put interface index into the multicast address,
 		 * if the address has link-local scope.
 		 */
 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
 			mreq->ipv6mr_multiaddr.s6_addr16[1]
 				= htons(mreq->ipv6mr_interface);
 		}
 		/*
 		 * See if the membership already exists.
 		 */
 		for (imm = im6o->im6o_memberships.lh_first;
 		     imm != NULL; imm = imm->i6mm_chain.le_next)
 			if (imm->i6mm_maddr->in6m_ifp == ifp &&
 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 					       &mreq->ipv6mr_multiaddr))
 				break;
 		if (imm != NULL) {
 			error = EADDRINUSE;
 			break;
 		}
 		/*
 		 * Everything looks good; add a new record to the multicast
 		 * address list for the given interface.
 		 */
 		imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK);
 		if (imm == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 		if ((imm->i6mm_maddr =
 		     in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) {
 			free(imm, M_IPMADDR);
 			break;
 		}
 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
 		break;
 
 	case IPV6_LEAVE_GROUP:
 		/*
 		 * Drop a multicast group membership.
 		 * Group must be a valid IP6 multicast address.
 		 */
 		if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
 			error = EINVAL;
 			break;
 		}
 		mreq = mtod(m, struct ipv6_mreq *);
 		if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
 			if (suser(td)) {
 				error = EACCES;
 				break;
 			}
 		} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
 			error = EINVAL;
 			break;
 		}
 		/*
 		 * If an interface address was specified, get a pointer
 		 * to its ifnet structure.
 		 */
 		if (mreq->ipv6mr_interface < 0
 		 || if_index < mreq->ipv6mr_interface) {
 			error = ENXIO;	/* XXX EINVAL? */
 			break;
 		}
 		ifp = ifnet_byindex(mreq->ipv6mr_interface);
 		/*
 		 * Put interface index into the multicast address,
 		 * if the address has link-local scope.
 		 */
 		if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) {
 			mreq->ipv6mr_multiaddr.s6_addr16[1]
 				= htons(mreq->ipv6mr_interface);
 		}
 		/*
 		 * Find the membership in the membership list.
 		 */
 		for (imm = im6o->im6o_memberships.lh_first;
 		     imm != NULL; imm = imm->i6mm_chain.le_next) {
 			if ((ifp == NULL ||
 			     imm->i6mm_maddr->in6m_ifp == ifp) &&
 			    IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
 					       &mreq->ipv6mr_multiaddr))
 				break;
 		}
 		if (imm == NULL) {
 			/* Unable to resolve interface */
 			error = EADDRNOTAVAIL;
 			break;
 		}
 		/*
 		 * Give up the multicast address record to which the
 		 * membership points.
 		 */
 		LIST_REMOVE(imm, i6mm_chain);
 		in6_delmulti(imm->i6mm_maddr);
 		free(imm, M_IPMADDR);
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	/*
 	 * If all options have default values, no need to keep the mbuf.
 	 */
 	if (im6o->im6o_multicast_ifp == NULL &&
 	    im6o->im6o_multicast_hlim == ip6_defmcasthlim &&
 	    im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
 	    im6o->im6o_memberships.lh_first == NULL) {
 		free(*im6op, M_IPMOPTS);
 		*im6op = NULL;
 	}
 
 	return(error);
 }
 
 /*
  * Return the IP6 multicast options in response to user getsockopt().
  */
 static int
 ip6_getmoptions(optname, im6o, mp)
 	int optname;
 	struct ip6_moptions *im6o;
 	struct mbuf **mp;
 {
 	u_int *hlim, *loop, *ifindex;
 
 	*mp = m_get(M_TRYWAIT, MT_HEADER);		/* XXX */
 
 	switch (optname) {
 
 	case IPV6_MULTICAST_IF:
 		ifindex = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
 			*ifindex = 0;
 		else
 			*ifindex = im6o->im6o_multicast_ifp->if_index;
 		return(0);
 
 	case IPV6_MULTICAST_HOPS:
 		hlim = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL)
 			*hlim = ip6_defmcasthlim;
 		else
 			*hlim = im6o->im6o_multicast_hlim;
 		return(0);
 
 	case IPV6_MULTICAST_LOOP:
 		loop = mtod(*mp, u_int *);
 		(*mp)->m_len = sizeof(u_int);
 		if (im6o == NULL)
 			*loop = ip6_defmcasthlim;
 		else
 			*loop = im6o->im6o_multicast_loop;
 		return(0);
 
 	default:
 		return(EOPNOTSUPP);
 	}
 }
 
 /*
  * Discard the IP6 multicast options.
  */
 void
 ip6_freemoptions(im6o)
 	struct ip6_moptions *im6o;
 {
 	struct in6_multi_mship *imm;
 
 	if (im6o == NULL)
 		return;
 
 	while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
 		LIST_REMOVE(imm, i6mm_chain);
 		if (imm->i6mm_maddr)
 			in6_delmulti(imm->i6mm_maddr);
 		free(imm, M_IPMADDR);
 	}
 	free(im6o, M_IPMOPTS);
 }
 
 /*
  * Set IPv6 outgoing packet options based on advanced API.
  */
 int
 ip6_setpktoptions(control, opt, priv, needcopy)
 	struct mbuf *control;
 	struct ip6_pktopts *opt;
 	int priv, needcopy;
 {
 	struct cmsghdr *cm = 0;
 
 	if (control == 0 || opt == 0)
 		return(EINVAL);
 
 	init_ip6pktopts(opt);
 
 	/*
 	 * XXX: Currently, we assume all the optional information is stored
 	 * in a single mbuf.
 	 */
 	if (control->m_next)
 		return(EINVAL);
 
 	for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len),
 		     control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
 		cm = mtod(control, struct cmsghdr *);
 		if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
 			return(EINVAL);
 		if (cm->cmsg_level != IPPROTO_IPV6)
 			continue;
 
 		/*
 		 * XXX should check if RFC2292 API is mixed with 2292bis API
 		 */
 		switch (cm->cmsg_type) {
 		case IPV6_PKTINFO:
 			if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo)))
 				return(EINVAL);
 			if (needcopy) {
 				/* XXX: Is it really WAITOK? */
 				opt->ip6po_pktinfo =
 					malloc(sizeof(struct in6_pktinfo),
 					       M_IP6OPT, M_WAITOK);
 				bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo,
 				    sizeof(struct in6_pktinfo));
 			} else
 				opt->ip6po_pktinfo =
 					(struct in6_pktinfo *)CMSG_DATA(cm);
 			if (opt->ip6po_pktinfo->ipi6_ifindex &&
 			    IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr))
 				opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] =
 					htons(opt->ip6po_pktinfo->ipi6_ifindex);
 
 			if (opt->ip6po_pktinfo->ipi6_ifindex > if_index
 			 || opt->ip6po_pktinfo->ipi6_ifindex < 0) {
 				return(ENXIO);
 			}
 
 			/*
 			 * Check if the requested source address is indeed a
 			 * unicast address assigned to the node, and can be
 			 * used as the packet's source address.
 			 */
 			if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) {
 				struct in6_ifaddr *ia6;
 				struct sockaddr_in6 sin6;
 
 				bzero(&sin6, sizeof(sin6));
 				sin6.sin6_len = sizeof(sin6);
 				sin6.sin6_family = AF_INET6;
 				sin6.sin6_addr =
 					opt->ip6po_pktinfo->ipi6_addr;
 				ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6));
 				if (ia6 == NULL ||
 				    (ia6->ia6_flags & (IN6_IFF_ANYCAST |
 						       IN6_IFF_NOTREADY)) != 0)
 					return(EADDRNOTAVAIL);
 			}
 			break;
 
 		case IPV6_HOPLIMIT:
 			if (cm->cmsg_len != CMSG_LEN(sizeof(int)))
 				return(EINVAL);
 
 			opt->ip6po_hlim = *(int *)CMSG_DATA(cm);
 			if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255)
 				return(EINVAL);
 			break;
 
 		case IPV6_NEXTHOP:
 			if (!priv)
 				return(EPERM);
 
 			if (cm->cmsg_len < sizeof(u_char) ||
 			    /* check if cmsg_len is large enough for sa_len */
 			    cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm)))
 				return(EINVAL);
 
 			if (needcopy) {
 				opt->ip6po_nexthop =
 					malloc(*CMSG_DATA(cm),
 					       M_IP6OPT, M_WAITOK);
 				bcopy(CMSG_DATA(cm),
 				      opt->ip6po_nexthop,
 				      *CMSG_DATA(cm));
 			} else
 				opt->ip6po_nexthop =
 					(struct sockaddr *)CMSG_DATA(cm);
 			break;
 
 		case IPV6_HOPOPTS:
 		{
 			struct ip6_hbh *hbh;
 			int hbhlen;
 
 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh)))
 				return(EINVAL);
 			hbh = (struct ip6_hbh *)CMSG_DATA(cm);
 			hbhlen = (hbh->ip6h_len + 1) << 3;
 			if (cm->cmsg_len != CMSG_LEN(hbhlen))
 				return(EINVAL);
 
 			if (needcopy) {
 				opt->ip6po_hbh =
 					malloc(hbhlen, M_IP6OPT, M_WAITOK);
 				bcopy(hbh, opt->ip6po_hbh, hbhlen);
 			} else
 				opt->ip6po_hbh = hbh;
 			break;
 		}
 
 		case IPV6_DSTOPTS:
 		{
 			struct ip6_dest *dest, **newdest;
 			int destlen;
 
 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
 				return(EINVAL);
 			dest = (struct ip6_dest *)CMSG_DATA(cm);
 			destlen = (dest->ip6d_len + 1) << 3;
 			if (cm->cmsg_len != CMSG_LEN(destlen))
 				return(EINVAL);
 
 			/* 
 			 * The old advacned API is ambiguous on this
 			 * point. Our approach is to determine the
 			 * position based according to the existence
 			 * of a routing header. Note, however, that
 			 * this depends on the order of the extension
 			 * headers in the ancillary data; the 1st part
 			 * of the destination options header must
 			 * appear before the routing header in the
 			 * ancillary data, too.
 			 * RFC2292bis solved the ambiguity by
 			 * introducing separate cmsg types.
 			 */
 			if (opt->ip6po_rthdr == NULL)
 				newdest = &opt->ip6po_dest1;
 			else
 				newdest = &opt->ip6po_dest2;
 
 			if (needcopy) {
 				*newdest = malloc(destlen, M_IP6OPT, M_WAITOK);
 				bcopy(dest, *newdest, destlen);
 			} else
 				*newdest = dest;
 
 			break;
 		}
 
 		case IPV6_RTHDR:
 		{
 			struct ip6_rthdr *rth;
 			int rthlen;
 
 			if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
 				return(EINVAL);
 			rth = (struct ip6_rthdr *)CMSG_DATA(cm);
 			rthlen = (rth->ip6r_len + 1) << 3;
 			if (cm->cmsg_len != CMSG_LEN(rthlen))
 				return(EINVAL);
 
 			switch (rth->ip6r_type) {
 			case IPV6_RTHDR_TYPE_0:
 				/* must contain one addr */
 				if (rth->ip6r_len == 0)
 					return(EINVAL);
 				/* length must be even */
 				if (rth->ip6r_len % 2)
 					return(EINVAL);
 				if (rth->ip6r_len / 2 != rth->ip6r_segleft)
 					return(EINVAL);
 				break;
 			default:
 				return(EINVAL);	/* not supported */
 			}
 
 			if (needcopy) {
 				opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT,
 							  M_WAITOK);
 				bcopy(rth, opt->ip6po_rthdr, rthlen);
 			} else
 				opt->ip6po_rthdr = rth;
 
 			break;
 		}
 
 		default:
 			return(ENOPROTOOPT);
 		}
 	}
 
 	return(0);
 }
 
 /*
  * Routine called from ip6_output() to loop back a copy of an IP6 multicast
  * packet to the input queue of a specified interface.  Note that this
  * calls the output routine of the loopback "driver", but with an interface
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
 ip6_mloopback(ifp, m, dst)
 	struct ifnet *ifp;
 	struct mbuf *m;
 	struct sockaddr_in6 *dst;
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;
 
 	copym = m_copy(m, 0, M_COPYALL);
 	if (copym == NULL)
 		return;
 
 	/*
 	 * Make sure to deep-copy IPv6 header portion in case the data
 	 * is in an mbuf cluster, so that we can safely override the IPv6
 	 * header portion later.
 	 */
 	if ((copym->m_flags & M_EXT) != 0 ||
 	    copym->m_len < sizeof(struct ip6_hdr)) {
 		copym = m_pullup(copym, sizeof(struct ip6_hdr));
 		if (copym == NULL)
 			return;
 	}
 
 #ifdef DIAGNOSTIC
 	if (copym->m_len < sizeof(*ip6)) {
 		m_freem(copym);
 		return;
 	}
 #endif
 
 	ip6 = mtod(copym, struct ip6_hdr *);
 #ifndef SCOPEDROUTING
 	/*
 	 * clear embedded scope identifiers if necessary.
 	 * in6_clearscope will touch the addresses only when necessary.
 	 */
 	in6_clearscope(&ip6->ip6_src);
 	in6_clearscope(&ip6->ip6_dst);
 #endif
 
 	(void)if_simloop(ifp, copym, dst->sin6_family, 0);
 }
 
 /*
  * Chop IPv6 header off from the payload.
  */
 static int
 ip6_splithdr(m, exthdrs)
 	struct mbuf *m;
 	struct ip6_exthdrs *exthdrs;
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 
 	ip6 = mtod(m, struct ip6_hdr *);
 	if (m->m_len > sizeof(*ip6)) {
 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 		if (mh == 0) {
 			m_freem(m);
 			return ENOBUFS;
 		}
-		M_COPY_PKTHDR(mh, m);
+		M_MOVE_PKTHDR(mh, m);
 		MH_ALIGN(mh, sizeof(*ip6));
 		m->m_flags &= ~M_PKTHDR;
 		m->m_len -= sizeof(*ip6);
 		m->m_data += sizeof(*ip6);
 		mh->m_next = m;
 		m = mh;
 		m->m_len = sizeof(*ip6);
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
 	}
 	exthdrs->ip6e_ip6 = m;
 	return 0;
 }
 
 /*
  * Compute IPv6 extension header length.
  */
 int
 ip6_optlen(in6p)
 	struct in6pcb *in6p;
 {
 	int len;
 
 	if (!in6p->in6p_outputopts)
 		return 0;
 
 	len = 0;
 #define elen(x) \
     (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
 
 	len += elen(in6p->in6p_outputopts->ip6po_hbh);
 	if (in6p->in6p_outputopts->ip6po_rthdr)
 		/* dest1 is valid with rthdr only */
 		len += elen(in6p->in6p_outputopts->ip6po_dest1);
 	len += elen(in6p->in6p_outputopts->ip6po_rthdr);
 	len += elen(in6p->in6p_outputopts->ip6po_dest2);
 	return len;
 #undef elen
 }
Index: head/sys/netinet6/ipsec.c
===================================================================
--- head/sys/netinet6/ipsec.c	(revision 108465)
+++ head/sys/netinet6/ipsec.c	(revision 108466)
@@ -1,3502 +1,3496 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * IPsec controller part.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/syslog.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/ip_ecn.h>
 #ifdef INET6
 #include <netinet6/ip6_ecn.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
 
 #include <netinet/ip6.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/in_pcb.h>
 #ifdef INET6
 #include <netinet/icmp6.h>
 #endif
 
 #include <netinet6/ipsec.h>
 #ifdef INET6
 #include <netinet6/ipsec6.h>
 #endif
 #include <netinet6/ah.h>
 #ifdef INET6
 #include <netinet6/ah6.h>
 #endif
 #ifdef IPSEC_ESP
 #include <netinet6/esp.h>
 #ifdef INET6
 #include <netinet6/esp6.h>
 #endif
 #endif
 #include <netinet6/ipcomp.h>
 #ifdef INET6
 #include <netinet6/ipcomp6.h>
 #endif
 #include <netkey/key.h>
 #include <netkey/keydb.h>
 #include <netkey/key_debug.h>
 
 #include <machine/in_cksum.h>
 
 #include <net/net_osdep.h>
 
 #ifdef IPSEC_DEBUG
 int ipsec_debug = 1;
 #else
 int ipsec_debug = 0;
 #endif
 
 struct ipsecstat ipsecstat;
 int ip4_ah_cleartos = 1;
 int ip4_ah_offsetmask = 0;	/* maybe IP_DF? */
 int ip4_ipsec_dfbit = 0;	/* DF bit on encap. 0: clear 1: set 2: copy */
 int ip4_esp_trans_deflev = IPSEC_LEVEL_USE;
 int ip4_esp_net_deflev = IPSEC_LEVEL_USE;
 int ip4_ah_trans_deflev = IPSEC_LEVEL_USE;
 int ip4_ah_net_deflev = IPSEC_LEVEL_USE;
 struct secpolicy ip4_def_policy;
 int ip4_ipsec_ecn = 0;		/* ECN ignore(-1)/forbidden(0)/allowed(1) */
 int ip4_esp_randpad = -1;
 
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_ipsec);
 #ifdef INET6
 SYSCTL_DECL(_net_inet6_ipsec6);
 #endif
 #endif
 
 /* net.inet.ipsec */
 SYSCTL_STRUCT(_net_inet_ipsec, IPSECCTL_STATS,
 	stats, CTLFLAG_RD,	&ipsecstat,	ipsecstat, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY,
 	def_policy, CTLFLAG_RW,	&ip4_def_policy.policy,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
 	CTLFLAG_RW, &ip4_esp_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
 	CTLFLAG_RW, &ip4_esp_net_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
 	CTLFLAG_RW, &ip4_ah_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
 	CTLFLAG_RW, &ip4_ah_net_deflev,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS,
 	ah_cleartos, CTLFLAG_RW,	&ip4_ah_cleartos,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK,
 	ah_offsetmask, CTLFLAG_RW,	&ip4_ah_offsetmask,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT,
 	dfbit, CTLFLAG_RW,	&ip4_ipsec_dfbit,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN,
 	ecn, CTLFLAG_RW,	&ip4_ipsec_ecn,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG,
 	debug, CTLFLAG_RW,	&ipsec_debug,	0, "");
 SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ESP_RANDPAD,
 	esp_randpad, CTLFLAG_RW,	&ip4_esp_randpad,	0, "");
 
 #ifdef INET6
 struct ipsecstat ipsec6stat;
 int ip6_esp_trans_deflev = IPSEC_LEVEL_USE;
 int ip6_esp_net_deflev = IPSEC_LEVEL_USE;
 int ip6_ah_trans_deflev = IPSEC_LEVEL_USE;
 int ip6_ah_net_deflev = IPSEC_LEVEL_USE;
 struct secpolicy ip6_def_policy;
 int ip6_ipsec_ecn = 0;		/* ECN ignore(-1)/forbidden(0)/allowed(1) */
 int ip6_esp_randpad = -1;
 
 /* net.inet6.ipsec6 */
 SYSCTL_STRUCT(_net_inet6_ipsec6, IPSECCTL_STATS,
 	stats, CTLFLAG_RD, &ipsec6stat, ipsecstat, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY,
 	def_policy, CTLFLAG_RW,	&ip6_def_policy.policy,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev,
 	CTLFLAG_RW, &ip6_esp_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev,
 	CTLFLAG_RW, &ip6_esp_net_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev,
 	CTLFLAG_RW, &ip6_ah_trans_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev,
 	CTLFLAG_RW, &ip6_ah_net_deflev,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN,
 	ecn, CTLFLAG_RW,	&ip6_ipsec_ecn,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG,
 	debug, CTLFLAG_RW,	&ipsec_debug,	0, "");
 SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ESP_RANDPAD,
 	esp_randpad, CTLFLAG_RW,	&ip6_esp_randpad,	0, "");
 #endif /* INET6 */
 
 static int ipsec_setspidx_mbuf
 	__P((struct secpolicyindex *, u_int, u_int, struct mbuf *, int));
 static int ipsec4_setspidx_inpcb __P((struct mbuf *, struct inpcb *pcb));
 #ifdef INET6
 static int ipsec6_setspidx_in6pcb __P((struct mbuf *, struct in6pcb *pcb));
 #endif
 static int ipsec_setspidx __P((struct mbuf *, struct secpolicyindex *, int));
 static void ipsec4_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int));
 static int ipsec4_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *));
 #ifdef INET6
 static void ipsec6_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int));
 static int ipsec6_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *));
 #endif
 static struct inpcbpolicy *ipsec_newpcbpolicy __P((void));
 static void ipsec_delpcbpolicy __P((struct inpcbpolicy *));
 static struct secpolicy *ipsec_deepcopy_policy __P((struct secpolicy *src));
 static int ipsec_set_policy __P((struct secpolicy **pcb_sp,
 	int optname, caddr_t request, size_t len, int priv));
 static int ipsec_get_policy __P((struct secpolicy *pcb_sp, struct mbuf **mp));
 static void vshiftl __P((unsigned char *, int, int));
 static int ipsec_in_reject __P((struct secpolicy *, struct mbuf *));
 static size_t ipsec_hdrsiz __P((struct secpolicy *));
 #ifdef INET
 static struct mbuf *ipsec4_splithdr __P((struct mbuf *));
 #endif
 #ifdef INET6
 static struct mbuf *ipsec6_splithdr __P((struct mbuf *));
 #endif
 #ifdef INET
 static int ipsec4_encapsulate __P((struct mbuf *, struct secasvar *));
 #endif
 #ifdef INET6
 static int ipsec6_encapsulate __P((struct mbuf *, struct secasvar *));
 #endif
 
 /*
  * For OUTBOUND packet having a socket. Searching SPD for packet,
  * and return a pointer to SP.
  * OUT:	NULL:	no apropreate SP found, the following value is set to error.
  *		0	: bypass
  *		EACCES	: discard packet.
  *		ENOENT	: ipsec_acquire() in progress, maybe.
  *		others	: error occured.
  *	others:	a pointer to SP
  *
  * NOTE: IPv6 mapped adddress concern is implemented here.
  */
 struct secpolicy *
 ipsec4_getpolicybysock(m, dir, so, error)
 	struct mbuf *m;
 	u_int dir;
 	struct socket *so;
 	int *error;
 {
 	struct inpcbpolicy *pcbsp = NULL;
 	struct secpolicy *currsp = NULL;	/* policy on socket */
 	struct secpolicy *kernsp = NULL;	/* policy on kernel */
 
 	/* sanity check */
 	if (m == NULL || so == NULL || error == NULL)
 		panic("ipsec4_getpolicybysock: NULL pointer was passed.\n");
 
 	switch (so->so_proto->pr_domain->dom_family) {
 	case AF_INET:
 		/* set spidx in pcb */
 		*error = ipsec4_setspidx_inpcb(m, sotoinpcb(so));
 		break;
 #ifdef INET6
 	case AF_INET6:
 		/* set spidx in pcb */
 		*error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so));
 		break;
 #endif
 	default:
 		panic("ipsec4_getpolicybysock: unsupported address family\n");
 	}
 	if (*error)
 		return NULL;
 	switch (so->so_proto->pr_domain->dom_family) {
 	case AF_INET:
 		pcbsp = sotoinpcb(so)->inp_sp;
 		break;
 #ifdef INET6
 	case AF_INET6:
 		pcbsp = sotoin6pcb(so)->in6p_sp;
 		break;
 #endif
 	}
 
 	/* sanity check */
 	if (pcbsp == NULL)
 		panic("ipsec4_getpolicybysock: pcbsp is NULL.\n");
 
 	switch (dir) {
 	case IPSEC_DIR_INBOUND:
 		currsp = pcbsp->sp_in;
 		break;
 	case IPSEC_DIR_OUTBOUND:
 		currsp = pcbsp->sp_out;
 		break;
 	default:
 		panic("ipsec4_getpolicybysock: illegal direction.\n");
 	}
 
 	/* sanity check */
 	if (currsp == NULL)
 		panic("ipsec4_getpolicybysock: currsp is NULL.\n");
 
 	/* when privilieged socket */
 	if (pcbsp->priv) {
 		switch (currsp->policy) {
 		case IPSEC_POLICY_BYPASS:
 			currsp->refcnt++;
 			*error = 0;
 			return currsp;
 
 		case IPSEC_POLICY_ENTRUST:
 			/* look for a policy in SPD */
 			kernsp = key_allocsp(&currsp->spidx, dir);
 
 			/* SP found */
 			if (kernsp != NULL) {
 				KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 					printf("DP ipsec4_getpolicybysock called "
 					       "to allocate SP:%p\n", kernsp));
 				*error = 0;
 				return kernsp;
 			}
 
 			/* no SP found */
 			if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD
 			 && ip4_def_policy.policy != IPSEC_POLICY_NONE) {
 				ipseclog((LOG_INFO,
 				    "fixed system default policy: %d->%d\n",
 				    ip4_def_policy.policy, IPSEC_POLICY_NONE));
 				ip4_def_policy.policy = IPSEC_POLICY_NONE;
 			}
 			ip4_def_policy.refcnt++;
 			*error = 0;
 			return &ip4_def_policy;
 			
 		case IPSEC_POLICY_IPSEC:
 			currsp->refcnt++;
 			*error = 0;
 			return currsp;
 
 		default:
 			ipseclog((LOG_ERR, "ipsec4_getpolicybysock: "
 			      "Invalid policy for PCB %d\n", currsp->policy));
 			*error = EINVAL;
 			return NULL;
 		}
 		/* NOTREACHED */
 	}
 
 	/* when non-privilieged socket */
 	/* look for a policy in SPD */
 	kernsp = key_allocsp(&currsp->spidx, dir);
 
 	/* SP found */
 	if (kernsp != NULL) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP ipsec4_getpolicybysock called "
 			       "to allocate SP:%p\n", kernsp));
 		*error = 0;
 		return kernsp;
 	}
 
 	/* no SP found */
 	switch (currsp->policy) {
 	case IPSEC_POLICY_BYPASS:
 		ipseclog((LOG_ERR, "ipsec4_getpolicybysock: "
 		       "Illegal policy for non-priviliged defined %d\n",
 			currsp->policy));
 		*error = EINVAL;
 		return NULL;
 
 	case IPSEC_POLICY_ENTRUST:
 		if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD
 		 && ip4_def_policy.policy != IPSEC_POLICY_NONE) {
 			ipseclog((LOG_INFO,
 			    "fixed system default policy: %d->%d\n",
 			    ip4_def_policy.policy, IPSEC_POLICY_NONE));
 			ip4_def_policy.policy = IPSEC_POLICY_NONE;
 		}
 		ip4_def_policy.refcnt++;
 		*error = 0;
 		return &ip4_def_policy;
 
 	case IPSEC_POLICY_IPSEC:
 		currsp->refcnt++;
 		*error = 0;
 		return currsp;
 
 	default:
 		ipseclog((LOG_ERR, "ipsec4_getpolicybysock: "
 		   "Invalid policy for PCB %d\n", currsp->policy));
 		*error = EINVAL;
 		return NULL;
 	}
 	/* NOTREACHED */
 }
 
 /*
  * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
  * and return a pointer to SP.
  * OUT:	positive: a pointer to the entry for security policy leaf matched.
  *	NULL:	no apropreate SP found, the following value is set to error.
  *		0	: bypass
  *		EACCES	: discard packet.
  *		ENOENT	: ipsec_acquire() in progress, maybe.
  *		others	: error occured.
  */
 struct secpolicy *
 ipsec4_getpolicybyaddr(m, dir, flag, error)
 	struct mbuf *m;
 	u_int dir;
 	int flag;
 	int *error;
 {
 	struct secpolicy *sp = NULL;
 
 	/* sanity check */
 	if (m == NULL || error == NULL)
 		panic("ipsec4_getpolicybyaddr: NULL pointer was passed.\n");
 
     {
 	struct secpolicyindex spidx;
 
 	bzero(&spidx, sizeof(spidx));
 
 	/* make a index to look for a policy */
 	*error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET, m,
 	    (flag & IP_FORWARDING) ? 0 : 1);
 
 	if (*error != 0)
 		return NULL;
 
 	sp = key_allocsp(&spidx, dir);
     }
 
 	/* SP found */
 	if (sp != NULL) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP ipsec4_getpolicybyaddr called "
 			       "to allocate SP:%p\n", sp));
 		*error = 0;
 		return sp;
 	}
 
 	/* no SP found */
 	if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD
 	 && ip4_def_policy.policy != IPSEC_POLICY_NONE) {
 		ipseclog((LOG_INFO, "fixed system default policy:%d->%d\n",
 			ip4_def_policy.policy,
 			IPSEC_POLICY_NONE));
 		ip4_def_policy.policy = IPSEC_POLICY_NONE;
 	}
 	ip4_def_policy.refcnt++;
 	*error = 0;
 	return &ip4_def_policy;
 }
 
 #ifdef INET6
 /*
  * For OUTBOUND packet having a socket. Searching SPD for packet,
  * and return a pointer to SP.
  * OUT:	NULL:	no apropreate SP found, the following value is set to error.
  *		0	: bypass
  *		EACCES	: discard packet.
  *		ENOENT	: ipsec_acquire() in progress, maybe.
  *		others	: error occured.
  *	others:	a pointer to SP
  */
 struct secpolicy *
 ipsec6_getpolicybysock(m, dir, so, error)
 	struct mbuf *m;
 	u_int dir;
 	struct socket *so;
 	int *error;
 {
 	struct inpcbpolicy *pcbsp = NULL;
 	struct secpolicy *currsp = NULL;	/* policy on socket */
 	struct secpolicy *kernsp = NULL;	/* policy on kernel */
 
 	/* sanity check */
 	if (m == NULL || so == NULL || error == NULL)
 		panic("ipsec6_getpolicybysock: NULL pointer was passed.\n");
 
 #ifdef DIAGNOSTIC
 	if (so->so_proto->pr_domain->dom_family != AF_INET6)
 		panic("ipsec6_getpolicybysock: socket domain != inet6\n");
 #endif
 
 	/* set spidx in pcb */
 	ipsec6_setspidx_in6pcb(m, sotoin6pcb(so));
 
 	pcbsp = sotoin6pcb(so)->in6p_sp;
 
 	/* sanity check */
 	if (pcbsp == NULL)
 		panic("ipsec6_getpolicybysock: pcbsp is NULL.\n");
 
 	switch (dir) {
 	case IPSEC_DIR_INBOUND:
 		currsp = pcbsp->sp_in;
 		break;
 	case IPSEC_DIR_OUTBOUND:
 		currsp = pcbsp->sp_out;
 		break;
 	default:
 		panic("ipsec6_getpolicybysock: illegal direction.\n");
 	}
 
 	/* sanity check */
 	if (currsp == NULL)
 		panic("ipsec6_getpolicybysock: currsp is NULL.\n");
 
 	/* when privilieged socket */
 	if (pcbsp->priv) {
 		switch (currsp->policy) {
 		case IPSEC_POLICY_BYPASS:
 			currsp->refcnt++;
 			*error = 0;
 			return currsp;
 
 		case IPSEC_POLICY_ENTRUST:
 			/* look for a policy in SPD */
 			kernsp = key_allocsp(&currsp->spidx, dir);
 
 			/* SP found */
 			if (kernsp != NULL) {
 				KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 					printf("DP ipsec6_getpolicybysock called "
 					       "to allocate SP:%p\n", kernsp));
 				*error = 0;
 				return kernsp;
 			}
 
 			/* no SP found */
 			if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD
 			 && ip6_def_policy.policy != IPSEC_POLICY_NONE) {
 				ipseclog((LOG_INFO,
 				    "fixed system default policy: %d->%d\n",
 				    ip6_def_policy.policy, IPSEC_POLICY_NONE));
 				ip6_def_policy.policy = IPSEC_POLICY_NONE;
 			}
 			ip6_def_policy.refcnt++;
 			*error = 0;
 			return &ip6_def_policy;
 			
 		case IPSEC_POLICY_IPSEC:
 			currsp->refcnt++;
 			*error = 0;
 			return currsp;
 
 		default:
 			ipseclog((LOG_ERR, "ipsec6_getpolicybysock: "
 			    "Invalid policy for PCB %d\n", currsp->policy));
 			*error = EINVAL;
 			return NULL;
 		}
 		/* NOTREACHED */
 	}
 
 	/* when non-privilieged socket */
 	/* look for a policy in SPD */
 	kernsp = key_allocsp(&currsp->spidx, dir);
 
 	/* SP found */
 	if (kernsp != NULL) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP ipsec6_getpolicybysock called "
 			       "to allocate SP:%p\n", kernsp));
 		*error = 0;
 		return kernsp;
 	}
 
 	/* no SP found */
 	switch (currsp->policy) {
 	case IPSEC_POLICY_BYPASS:
 		ipseclog((LOG_ERR, "ipsec6_getpolicybysock: "
 		    "Illegal policy for non-priviliged defined %d\n",
 		    currsp->policy));
 		*error = EINVAL;
 		return NULL;
 
 	case IPSEC_POLICY_ENTRUST:
 		if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD
 		 && ip6_def_policy.policy != IPSEC_POLICY_NONE) {
 			ipseclog((LOG_INFO,
 			    "fixed system default policy: %d->%d\n",
 			    ip6_def_policy.policy, IPSEC_POLICY_NONE));
 			ip6_def_policy.policy = IPSEC_POLICY_NONE;
 		}
 		ip6_def_policy.refcnt++;
 		*error = 0;
 		return &ip6_def_policy;
 
 	case IPSEC_POLICY_IPSEC:
 		currsp->refcnt++;
 		*error = 0;
 		return currsp;
 
 	default:
 		ipseclog((LOG_ERR,
 		    "ipsec6_policybysock: Invalid policy for PCB %d\n",
 		    currsp->policy));
 		*error = EINVAL;
 		return NULL;
 	}
 	/* NOTREACHED */
 }
 
 /*
  * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet,
  * and return a pointer to SP.
  * `flag' means that packet is to be forwarded whether or not.
  *	flag = 1: forwad
  * OUT:	positive: a pointer to the entry for security policy leaf matched.
  *	NULL:	no apropreate SP found, the following value is set to error.
  *		0	: bypass
  *		EACCES	: discard packet.
  *		ENOENT	: ipsec_acquire() in progress, maybe.
  *		others	: error occured.
  */
 #ifndef IP_FORWARDING
 #define IP_FORWARDING 1
 #endif
 
 struct secpolicy *
 ipsec6_getpolicybyaddr(m, dir, flag, error)
 	struct mbuf *m;
 	u_int dir;
 	int flag;
 	int *error;
 {
 	struct secpolicy *sp = NULL;
 
 	/* sanity check */
 	if (m == NULL || error == NULL)
 		panic("ipsec6_getpolicybyaddr: NULL pointer was passed.\n");
 
     {
 	struct secpolicyindex spidx;
 
 	bzero(&spidx, sizeof(spidx));
 
 	/* make a index to look for a policy */
 	*error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET6, m,
 	    (flag & IP_FORWARDING) ? 0 : 1);
 
 	if (*error != 0)
 		return NULL;
 
 	sp = key_allocsp(&spidx, dir);
     }
 
 	/* SP found */
 	if (sp != NULL) {
 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 			printf("DP ipsec6_getpolicybyaddr called "
 			       "to allocate SP:%p\n", sp));
 		*error = 0;
 		return sp;
 	}
 
 	/* no SP found */
 	if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD
 	 && ip6_def_policy.policy != IPSEC_POLICY_NONE) {
 		ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n",
 		    ip6_def_policy.policy, IPSEC_POLICY_NONE));
 		ip6_def_policy.policy = IPSEC_POLICY_NONE;
 	}
 	ip6_def_policy.refcnt++;
 	*error = 0;
 	return &ip6_def_policy;
 }
 #endif /* INET6 */
 
 /*
  * set IP address into spidx from mbuf.
  * When Forwarding packet and ICMP echo reply, this function is used.
  *
  * IN:	get the followings from mbuf.
  *	protocol family, src, dst, next protocol
  * OUT:
  *	0:	success.
  *	other:	failure, and set errno.
  */
 int
 ipsec_setspidx_mbuf(spidx, dir, family, m, needport)
 	struct secpolicyindex *spidx;
 	u_int dir, family;
 	struct mbuf *m;
 	int needport;
 {
 	int error;
 
 	/* sanity check */
 	if (spidx == NULL || m == NULL)
 		panic("ipsec_setspidx_mbuf: NULL pointer was passed.\n");
 
 	bzero(spidx, sizeof(*spidx));
 
 	error = ipsec_setspidx(m, spidx, needport);
 	if (error)
 		goto bad;
 	spidx->dir = dir;
 
 	return 0;
 
     bad:
 	/* XXX initialize */
 	bzero(spidx, sizeof(*spidx));
 	return EINVAL;
 }
 
 static int
 ipsec4_setspidx_inpcb(m, pcb)
 	struct mbuf *m;
 	struct inpcb *pcb;
 {
 	struct secpolicyindex *spidx;
 	int error;
 
 	/* sanity check */
 	if (pcb == NULL)
 		panic("ipsec4_setspidx_inpcb: no PCB found.\n");
 	if (pcb->inp_sp == NULL)
 		panic("ipsec4_setspidx_inpcb: no inp_sp found.\n");
 	if (pcb->inp_sp->sp_out == NULL || pcb->inp_sp->sp_in == NULL)
 		panic("ipsec4_setspidx_inpcb: no sp_in/out found.\n");
 
 	bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx));
 	bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx));
 
 	spidx = &pcb->inp_sp->sp_in->spidx;
 	error = ipsec_setspidx(m, spidx, 1);
 	if (error)
 		goto bad;
 	spidx->dir = IPSEC_DIR_INBOUND;
 
 	spidx = &pcb->inp_sp->sp_out->spidx;
 	error = ipsec_setspidx(m, spidx, 1);
 	if (error)
 		goto bad;
 	spidx->dir = IPSEC_DIR_OUTBOUND;
 
 	return 0;
 
 bad:
 	bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx));
 	bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx));
 	return error;
 }
 
 #ifdef INET6
 static int
 ipsec6_setspidx_in6pcb(m, pcb)
 	struct mbuf *m;
 	struct in6pcb *pcb;
 {
 	struct secpolicyindex *spidx;
 	int error;
 
 	/* sanity check */
 	if (pcb == NULL)
 		panic("ipsec6_setspidx_in6pcb: no PCB found.\n");
 	if (pcb->in6p_sp == NULL)
 		panic("ipsec6_setspidx_in6pcb: no in6p_sp found.\n");
 	if (pcb->in6p_sp->sp_out == NULL || pcb->in6p_sp->sp_in == NULL)
 		panic("ipsec6_setspidx_in6pcb: no sp_in/out found.\n");
 
 	bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx));
 	bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx));
 
 	spidx = &pcb->in6p_sp->sp_in->spidx;
 	error = ipsec_setspidx(m, spidx, 1);
 	if (error)
 		goto bad;
 	spidx->dir = IPSEC_DIR_INBOUND;
 
 	spidx = &pcb->in6p_sp->sp_out->spidx;
 	error = ipsec_setspidx(m, spidx, 1);
 	if (error)
 		goto bad;
 	spidx->dir = IPSEC_DIR_OUTBOUND;
 
 	return 0;
 
 bad:
 	bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx));
 	bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx));
 	return error;
 }
 #endif
 
 /*
  * configure security policy index (src/dst/proto/sport/dport)
  * by looking at the content of mbuf.
  * the caller is responsible for error recovery (like clearing up spidx).
  */
 static int
 ipsec_setspidx(m, spidx, needport)
 	struct mbuf *m;
 	struct secpolicyindex *spidx;
 	int needport;
 {
 	struct ip *ip = NULL;
 	struct ip ipbuf;
 	u_int v;
 	struct mbuf *n;
 	int len;
 	int error;
 
 	if (m == NULL)
 		panic("ipsec_setspidx: m == 0 passed.\n");
 
 	/*
 	 * validate m->m_pkthdr.len.  we see incorrect length if we
 	 * mistakenly call this function with inconsistent mbuf chain
 	 * (like 4.4BSD tcp/udp processing).  XXX should we panic here?
 	 */
 	len = 0;
 	for (n = m; n; n = n->m_next)
 		len += n->m_len;
 	if (m->m_pkthdr.len != len) {
 		KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 			printf("ipsec_setspidx: "
 			       "total of m_len(%d) != pkthdr.len(%d), "
 			       "ignored.\n",
 				len, m->m_pkthdr.len));
 		return EINVAL;
 	}
 
 	if (m->m_pkthdr.len < sizeof(struct ip)) {
 		KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 			printf("ipsec_setspidx: "
 			    "pkthdr.len(%d) < sizeof(struct ip), ignored.\n",
 			    m->m_pkthdr.len));
 		return EINVAL;
 	}
 
 	if (m->m_len >= sizeof(*ip))
 		ip = mtod(m, struct ip *);
 	else {
 		m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf);
 		ip = &ipbuf;
 	}
 #ifdef _IP_VHL
 	v = _IP_VHL_V(ip->ip_vhl);
 #else
 	v = ip->ip_v;
 #endif
 	switch (v) {
 	case 4:
 		error = ipsec4_setspidx_ipaddr(m, spidx);
 		if (error)
 			return error;
 		ipsec4_get_ulp(m, spidx, needport);
 		return 0;
 #ifdef INET6
 	case 6:
 		if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) {
 			KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 				printf("ipsec_setspidx: "
 				    "pkthdr.len(%d) < sizeof(struct ip6_hdr), "
 				    "ignored.\n", m->m_pkthdr.len));
 			return EINVAL;
 		}
 		error = ipsec6_setspidx_ipaddr(m, spidx);
 		if (error)
 			return error;
 		ipsec6_get_ulp(m, spidx, needport);
 		return 0;
 #endif
 	default:
 		KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 			printf("ipsec_setspidx: "
 			    "unknown IP version %u, ignored.\n", v));
 		return EINVAL;
 	}
 }
 
 static void
 ipsec4_get_ulp(m, spidx, needport)
 	struct mbuf *m;
 	struct secpolicyindex *spidx;
 	int needport;
 {
 	struct ip ip;
 	struct ip6_ext ip6e;
 	u_int8_t nxt;
 	int off;
 	struct tcphdr th;
 	struct udphdr uh;
 
 	/* sanity check */
 	if (m == NULL)
 		panic("ipsec4_get_ulp: NULL pointer was passed.\n");
 	if (m->m_pkthdr.len < sizeof(ip))
 		panic("ipsec4_get_ulp: too short\n");
 
 	/* set default */
 	spidx->ul_proto = IPSEC_ULPROTO_ANY;
 	((struct sockaddr_in *)&spidx->src)->sin_port = IPSEC_PORT_ANY;
 	((struct sockaddr_in *)&spidx->dst)->sin_port = IPSEC_PORT_ANY;
 
 	m_copydata(m, 0, sizeof(ip), (caddr_t)&ip);
 	/* ip_input() flips it into host endian XXX need more checking */
 	if (ip.ip_off & (IP_MF | IP_OFFMASK))
 		return;
 
 	nxt = ip.ip_p;
 #ifdef _IP_VHL
 	off = _IP_VHL_HL(ip->ip_vhl) << 2;
 #else
 	off = ip.ip_hl << 2;
 #endif
 	while (off < m->m_pkthdr.len) {
 		switch (nxt) {
 		case IPPROTO_TCP:
 			spidx->ul_proto = nxt;
 			if (!needport)
 				return;
 			if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
 				return;
 			m_copydata(m, off, sizeof(th), (caddr_t)&th);
 			((struct sockaddr_in *)&spidx->src)->sin_port =
 			    th.th_sport;
 			((struct sockaddr_in *)&spidx->dst)->sin_port =
 			    th.th_dport;
 			return;
 		case IPPROTO_UDP:
 			spidx->ul_proto = nxt;
 			if (!needport)
 				return;
 			if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
 				return;
 			m_copydata(m, off, sizeof(uh), (caddr_t)&uh);
 			((struct sockaddr_in *)&spidx->src)->sin_port =
 			    uh.uh_sport;
 			((struct sockaddr_in *)&spidx->dst)->sin_port =
 			    uh.uh_dport;
 			return;
 		case IPPROTO_AH:
 			if (m->m_pkthdr.len > off + sizeof(ip6e))
 				return;
 			m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
 			off += (ip6e.ip6e_len + 2) << 2;
 			nxt = ip6e.ip6e_nxt;
 			break;
 		case IPPROTO_ICMP:
 		default:
 			/* XXX intermediate headers??? */
 			spidx->ul_proto = nxt;
 			return;
 		}
 	}
 }
 
 /* assumes that m is sane */
 static int
 ipsec4_setspidx_ipaddr(m, spidx)
 	struct mbuf *m;
 	struct secpolicyindex *spidx;
 {
 	struct ip *ip = NULL;
 	struct ip ipbuf;
 	struct sockaddr_in *sin;
 
 	if (m->m_len >= sizeof(*ip))
 		ip = mtod(m, struct ip *);
 	else {
 		m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf);
 		ip = &ipbuf;
 	}
 
 	sin = (struct sockaddr_in *)&spidx->src;
 	bzero(sin, sizeof(*sin));
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(struct sockaddr_in);
 	bcopy(&ip->ip_src, &sin->sin_addr, sizeof(ip->ip_src));
 	spidx->prefs = sizeof(struct in_addr) << 3;
 
 	sin = (struct sockaddr_in *)&spidx->dst;
 	bzero(sin, sizeof(*sin));
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(struct sockaddr_in);
 	bcopy(&ip->ip_dst, &sin->sin_addr, sizeof(ip->ip_dst));
 	spidx->prefd = sizeof(struct in_addr) << 3;
 	return 0;
 }
 
 #ifdef INET6
 static void
 ipsec6_get_ulp(m, spidx, needport)
 	struct mbuf *m;
 	struct secpolicyindex *spidx;
 	int needport;
 {
 	int off, nxt;
 	struct tcphdr th;
 	struct udphdr uh;
 
 	/* sanity check */
 	if (m == NULL)
 		panic("ipsec6_get_ulp: NULL pointer was passed.\n");
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 		printf("ipsec6_get_ulp:\n"); kdebug_mbuf(m));
 
 	/* set default */
 	spidx->ul_proto = IPSEC_ULPROTO_ANY;
 	((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY;
 	((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY;
 
 	nxt = -1;
 	off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
 	if (off < 0 || m->m_pkthdr.len < off)
 		return;
 
 	switch (nxt) {
 	case IPPROTO_TCP:
 		spidx->ul_proto = nxt;
 		if (!needport)
 			break;
 		if (off + sizeof(struct tcphdr) > m->m_pkthdr.len)
 			break;
 		m_copydata(m, off, sizeof(th), (caddr_t)&th);
 		((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport;
 		((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport;
 		break;
 	case IPPROTO_UDP:
 		spidx->ul_proto = nxt;
 		if (!needport)
 			break;
 		if (off + sizeof(struct udphdr) > m->m_pkthdr.len)
 			break;
 		m_copydata(m, off, sizeof(uh), (caddr_t)&uh);
 		((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport;
 		((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport;
 		break;
 	case IPPROTO_ICMPV6:
 	default:
 		/* XXX intermediate headers??? */
 		spidx->ul_proto = nxt;
 		break;
 	}
 }
 
 /* assumes that m is sane */
 static int
 ipsec6_setspidx_ipaddr(m, spidx)
 	struct mbuf *m;
 	struct secpolicyindex *spidx;
 {
 	struct ip6_hdr *ip6 = NULL;
 	struct ip6_hdr ip6buf;
 	struct sockaddr_in6 *sin6;
 
 	if (m->m_len >= sizeof(*ip6))
 		ip6 = mtod(m, struct ip6_hdr *);
 	else {
 		m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf);
 		ip6 = &ip6buf;
 	}
 
 	sin6 = (struct sockaddr_in6 *)&spidx->src;
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src));
 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 		sin6->sin6_addr.s6_addr16[1] = 0;
 		sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
 	}
 	spidx->prefs = sizeof(struct in6_addr) << 3;
 
 	sin6 = (struct sockaddr_in6 *)&spidx->dst;
 	bzero(sin6, sizeof(*sin6));
 	sin6->sin6_family = AF_INET6;
 	sin6->sin6_len = sizeof(struct sockaddr_in6);
 	bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst));
 	if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
 		sin6->sin6_addr.s6_addr16[1] = 0;
 		sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
 	}
 	spidx->prefd = sizeof(struct in6_addr) << 3;
 
 	return 0;
 }
 #endif
 
 static struct inpcbpolicy *
 ipsec_newpcbpolicy()
 {
 	struct inpcbpolicy *p;
 
 	p = (struct inpcbpolicy *)malloc(sizeof(*p), M_SECA, M_NOWAIT);
 	return p;
 }
 
 static void
 ipsec_delpcbpolicy(p)
 	struct inpcbpolicy *p;
 {
 	free(p, M_SECA);
 }
 
 /* initialize policy in PCB */
 int
 ipsec_init_policy(so, pcb_sp)
 	struct socket *so;
 	struct inpcbpolicy **pcb_sp;
 {
 	struct inpcbpolicy *new;
 
 	/* sanity check. */
 	if (so == NULL || pcb_sp == NULL)
 		panic("ipsec_init_policy: NULL pointer was passed.\n");
 
 	new = ipsec_newpcbpolicy();
 	if (new == NULL) {
 		ipseclog((LOG_DEBUG, "ipsec_init_policy: No more memory.\n"));
 		return ENOBUFS;
 	}
 	bzero(new, sizeof(*new));
 
 	if (so->so_cred != 0 && so->so_cred->cr_uid == 0)
 		new->priv = 1;
 	else
 		new->priv = 0;
 
 	if ((new->sp_in = key_newsp()) == NULL) {
 		ipsec_delpcbpolicy(new);
 		return ENOBUFS;
 	}
 	new->sp_in->state = IPSEC_SPSTATE_ALIVE;
 	new->sp_in->policy = IPSEC_POLICY_ENTRUST;
 
 	if ((new->sp_out = key_newsp()) == NULL) {
 		key_freesp(new->sp_in);
 		ipsec_delpcbpolicy(new);
 		return ENOBUFS;
 	}
 	new->sp_out->state = IPSEC_SPSTATE_ALIVE;
 	new->sp_out->policy = IPSEC_POLICY_ENTRUST;
 
 	*pcb_sp = new;
 
 	return 0;
 }
 
 /* copy old ipsec policy into new */
 int
 ipsec_copy_policy(old, new)
 	struct inpcbpolicy *old, *new;
 {
 	struct secpolicy *sp;
 
 	sp = ipsec_deepcopy_policy(old->sp_in);
 	if (sp) {
 		key_freesp(new->sp_in);
 		new->sp_in = sp;
 	} else
 		return ENOBUFS;
 
 	sp = ipsec_deepcopy_policy(old->sp_out);
 	if (sp) {
 		key_freesp(new->sp_out);
 		new->sp_out = sp;
 	} else
 		return ENOBUFS;
 
 	new->priv = old->priv;
 
 	return 0;
 }
 
 /* deep-copy a policy in PCB */
 static struct secpolicy *
 ipsec_deepcopy_policy(src)
 	struct secpolicy *src;
 {
 	struct ipsecrequest *newchain = NULL;
 	struct ipsecrequest *p;
 	struct ipsecrequest **q;
 	struct ipsecrequest *r;
 	struct secpolicy *dst;
 
 	dst = key_newsp();
 	if (src == NULL || dst == NULL)
 		return NULL;
 
 	/*
 	 * deep-copy IPsec request chain.  This is required since struct
 	 * ipsecrequest is not reference counted.
 	 */
 	q = &newchain;
 	for (p = src->req; p; p = p->next) {
 		*q = (struct ipsecrequest *)malloc(sizeof(struct ipsecrequest),
 			M_SECA, M_NOWAIT);
 		if (*q == NULL)
 			goto fail;
 		bzero(*q, sizeof(**q));
 		(*q)->next = NULL;
 
 		(*q)->saidx.proto = p->saidx.proto;
 		(*q)->saidx.mode = p->saidx.mode;
 		(*q)->level = p->level;
 		(*q)->saidx.reqid = p->saidx.reqid;
 
 		bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src));
 		bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst));
 
 		(*q)->sav = NULL;
 		(*q)->sp = dst;
 
 		q = &((*q)->next);
 	}
 
 	dst->req = newchain;
 	dst->state = src->state;
 	dst->policy = src->policy;
 	/* do not touch the refcnt fields */
 
 	return dst;
 
 fail:
 	for (p = newchain; p; p = r) {
 		r = p->next;
 		free(p, M_SECA);
 		p = NULL;
 	}
 	return NULL;
 }
 
 /* set policy and ipsec request if present. */
 static int
 ipsec_set_policy(pcb_sp, optname, request, len, priv)
 	struct secpolicy **pcb_sp;
 	int optname;
 	caddr_t request;
 	size_t len;
 	int priv;
 {
 	struct sadb_x_policy *xpl;
 	struct secpolicy *newsp = NULL;
 	int error;
 
 	/* sanity check. */
 	if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL)
 		return EINVAL;
 	if (len < sizeof(*xpl))
 		return EINVAL;
 	xpl = (struct sadb_x_policy *)request;
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 		printf("ipsec_set_policy: passed policy\n");
 		kdebug_sadb_x_policy((struct sadb_ext *)xpl));
 
 	/* check policy type */
 	/* ipsec_set_policy() accepts IPSEC, ENTRUST and BYPASS. */
 	if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD
 	 || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE)
 		return EINVAL;
 
 	/* check privileged socket */
 	if (priv == 0 && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS)
 		return EACCES;
 
 	/* allocation new SP entry */
 	if ((newsp = key_msg2sp(xpl, len, &error)) == NULL)
 		return error;
 
 	newsp->state = IPSEC_SPSTATE_ALIVE;
 
 	/* clear old SP and set new SP */
 	key_freesp(*pcb_sp);
 	*pcb_sp = newsp;
 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 		printf("ipsec_set_policy: new policy\n");
 		kdebug_secpolicy(newsp));
 
 	return 0;
 }
 
 static int
 ipsec_get_policy(pcb_sp, mp)
 	struct secpolicy *pcb_sp;
 	struct mbuf **mp;
 {
 
 	/* sanity check. */
 	if (pcb_sp == NULL || mp == NULL)
 		return EINVAL;
 
 	*mp = key_sp2msg(pcb_sp);
 	if (!*mp) {
 		ipseclog((LOG_DEBUG, "ipsec_get_policy: No more memory.\n"));
 		return ENOBUFS;
 	}
 
 	(*mp)->m_type = MT_DATA;
 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 		printf("ipsec_get_policy:\n");
 		kdebug_mbuf(*mp));
 
 	return 0;
 }
 
 int
 ipsec4_set_policy(inp, optname, request, len, priv)
 	struct inpcb *inp;
 	int optname;
 	caddr_t request;
 	size_t len;
 	int priv;
 {
 	struct sadb_x_policy *xpl;
 	struct secpolicy **pcb_sp;
 
 	/* sanity check. */
 	if (inp == NULL || request == NULL)
 		return EINVAL;
 	if (len < sizeof(*xpl))
 		return EINVAL;
 	xpl = (struct sadb_x_policy *)request;
 
 	/* select direction */
 	switch (xpl->sadb_x_policy_dir) {
 	case IPSEC_DIR_INBOUND:
 		pcb_sp = &inp->inp_sp->sp_in;
 		break;
 	case IPSEC_DIR_OUTBOUND:
 		pcb_sp = &inp->inp_sp->sp_out;
 		break;
 	default:
 		ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n",
 			xpl->sadb_x_policy_dir));
 		return EINVAL;
 	}
 
 	return ipsec_set_policy(pcb_sp, optname, request, len, priv);
 }
 
 int
 ipsec4_get_policy(inp, request, len, mp)
 	struct inpcb *inp;
 	caddr_t request;
 	size_t len;
 	struct mbuf **mp;
 {
 	struct sadb_x_policy *xpl;
 	struct secpolicy *pcb_sp;
 
 	/* sanity check. */
 	if (inp == NULL || request == NULL || mp == NULL)
 		return EINVAL;
 	if (inp->inp_sp == NULL)
 		panic("policy in PCB is NULL\n");
 	if (len < sizeof(*xpl))
 		return EINVAL;
 	xpl = (struct sadb_x_policy *)request;
 
 	/* select direction */
 	switch (xpl->sadb_x_policy_dir) {
 	case IPSEC_DIR_INBOUND:
 		pcb_sp = inp->inp_sp->sp_in;
 		break;
 	case IPSEC_DIR_OUTBOUND:
 		pcb_sp = inp->inp_sp->sp_out;
 		break;
 	default:
 		ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n",
 			xpl->sadb_x_policy_dir));
 		return EINVAL;
 	}
 
 	return ipsec_get_policy(pcb_sp, mp);
 }
 
 /* delete policy in PCB */
 int
 ipsec4_delete_pcbpolicy(inp)
 	struct inpcb *inp;
 {
 	/* sanity check. */
 	if (inp == NULL)
 		panic("ipsec4_delete_pcbpolicy: NULL pointer was passed.\n");
 
 	if (inp->inp_sp == NULL)
 		return 0;
 
 	if (inp->inp_sp->sp_in != NULL) {
 		key_freesp(inp->inp_sp->sp_in);
 		inp->inp_sp->sp_in = NULL;
 	}
 
 	if (inp->inp_sp->sp_out != NULL) {
 		key_freesp(inp->inp_sp->sp_out);
 		inp->inp_sp->sp_out = NULL;
 	}
 
 	ipsec_delpcbpolicy(inp->inp_sp);
 	inp->inp_sp = NULL;
 
 	return 0;
 }
 
 #ifdef INET6
 int
 ipsec6_set_policy(in6p, optname, request, len, priv)
 	struct in6pcb *in6p;
 	int optname;
 	caddr_t request;
 	size_t len;
 	int priv;
 {
 	struct sadb_x_policy *xpl;
 	struct secpolicy **pcb_sp;
 
 	/* sanity check. */
 	if (in6p == NULL || request == NULL)
 		return EINVAL;
 	if (len < sizeof(*xpl))
 		return EINVAL;
 	xpl = (struct sadb_x_policy *)request;
 
 	/* select direction */
 	switch (xpl->sadb_x_policy_dir) {
 	case IPSEC_DIR_INBOUND:
 		pcb_sp = &in6p->in6p_sp->sp_in;
 		break;
 	case IPSEC_DIR_OUTBOUND:
 		pcb_sp = &in6p->in6p_sp->sp_out;
 		break;
 	default:
 		ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n",
 			xpl->sadb_x_policy_dir));
 		return EINVAL;
 	}
 
 	return ipsec_set_policy(pcb_sp, optname, request, len, priv);
 }
 
 int
 ipsec6_get_policy(in6p, request, len, mp)
 	struct in6pcb *in6p;
 	caddr_t request;
 	size_t len;
 	struct mbuf **mp;
 {
 	struct sadb_x_policy *xpl;
 	struct secpolicy *pcb_sp;
 
 	/* sanity check. */
 	if (in6p == NULL || request == NULL || mp == NULL)
 		return EINVAL;
 	if (in6p->in6p_sp == NULL)
 		panic("policy in PCB is NULL\n");
 	if (len < sizeof(*xpl))
 		return EINVAL;
 	xpl = (struct sadb_x_policy *)request;
 
 	/* select direction */
 	switch (xpl->sadb_x_policy_dir) {
 	case IPSEC_DIR_INBOUND:
 		pcb_sp = in6p->in6p_sp->sp_in;
 		break;
 	case IPSEC_DIR_OUTBOUND:
 		pcb_sp = in6p->in6p_sp->sp_out;
 		break;
 	default:
 		ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n",
 			xpl->sadb_x_policy_dir));
 		return EINVAL;
 	}
 
 	return ipsec_get_policy(pcb_sp, mp);
 }
 
 int
 ipsec6_delete_pcbpolicy(in6p)
 	struct in6pcb *in6p;
 {
 	/* sanity check. */
 	if (in6p == NULL)
 		panic("ipsec6_delete_pcbpolicy: NULL pointer was passed.\n");
 
 	if (in6p->in6p_sp == NULL)
 		return 0;
 
 	if (in6p->in6p_sp->sp_in != NULL) {
 		key_freesp(in6p->in6p_sp->sp_in);
 		in6p->in6p_sp->sp_in = NULL;
 	}
 
 	if (in6p->in6p_sp->sp_out != NULL) {
 		key_freesp(in6p->in6p_sp->sp_out);
 		in6p->in6p_sp->sp_out = NULL;
 	}
 
 	ipsec_delpcbpolicy(in6p->in6p_sp);
 	in6p->in6p_sp = NULL;
 
 	return 0;
 }
 #endif
 
 /*
  * return current level.
  * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned.
  */
 u_int
 ipsec_get_reqlevel(isr)
 	struct ipsecrequest *isr;
 {
 	u_int level = 0;
 	u_int esp_trans_deflev, esp_net_deflev, ah_trans_deflev, ah_net_deflev;
 
 	/* sanity check */
 	if (isr == NULL || isr->sp == NULL)
 		panic("ipsec_get_reqlevel: NULL pointer is passed.\n");
 	if (((struct sockaddr *)&isr->sp->spidx.src)->sa_family
 			!= ((struct sockaddr *)&isr->sp->spidx.dst)->sa_family)
 		panic("ipsec_get_reqlevel: family mismatched.\n");
 
 /* XXX note that we have ipseclog() expanded here - code sync issue */
 #define IPSEC_CHECK_DEFAULT(lev) \
 	(((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE	      \
 			&& (lev) != IPSEC_LEVEL_UNIQUE)			      \
 		? (ipsec_debug						      \
 			? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\
 				(lev), IPSEC_LEVEL_REQUIRE)		      \
 			: 0),						      \
 			(lev) = IPSEC_LEVEL_REQUIRE,			      \
 			(lev)						      \
 		: (lev))
 
 	/* set default level */
 	switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) {
 #ifdef INET
 	case AF_INET:
 		esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_trans_deflev);
 		esp_net_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_net_deflev);
 		ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_trans_deflev);
 		ah_net_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_net_deflev);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_trans_deflev);
 		esp_net_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_net_deflev);
 		ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_trans_deflev);
 		ah_net_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_net_deflev);
 		break;
 #endif /* INET6 */
 	default:
 		panic("key_get_reqlevel: Unknown family. %d\n",
 			((struct sockaddr *)&isr->sp->spidx.src)->sa_family);
 	}
 
 #undef IPSEC_CHECK_DEFAULT
 
 	/* set level */
 	switch (isr->level) {
 	case IPSEC_LEVEL_DEFAULT:
 		switch (isr->saidx.proto) {
 		case IPPROTO_ESP:
 			if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 				level = esp_net_deflev;
 			else
 				level = esp_trans_deflev;
 			break;
 		case IPPROTO_AH:
 			if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 				level = ah_net_deflev;
 			else
 				level = ah_trans_deflev;
 		case IPPROTO_IPCOMP:
 			/*
 			 * we don't really care, as IPcomp document says that
 			 * we shouldn't compress small packets
 			 */
 			level = IPSEC_LEVEL_USE;
 			break;
 		default:
 			panic("ipsec_get_reqlevel: "
 				"Illegal protocol defined %u\n",
 				isr->saidx.proto);
 		}
 		break;
 
 	case IPSEC_LEVEL_USE:
 	case IPSEC_LEVEL_REQUIRE:
 		level = isr->level;
 		break;
 	case IPSEC_LEVEL_UNIQUE:
 		level = IPSEC_LEVEL_REQUIRE;
 		break;
 
 	default:
 		panic("ipsec_get_reqlevel: Illegal IPsec level %u\n",
 			isr->level);
 	}
 
 	return level;
 }
 
 /*
  * Check AH/ESP integrity.
  * OUT:
  *	0: valid
  *	1: invalid
  */
 static int
 ipsec_in_reject(sp, m)
 	struct secpolicy *sp;
 	struct mbuf *m;
 {
 	struct ipsecrequest *isr;
 	u_int level;
 	int need_auth, need_conf, need_icv;
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec_in_reject: using SP\n");
 		kdebug_secpolicy(sp));
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 		return 1;
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		return 0;
 	
 	case IPSEC_POLICY_IPSEC:
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		panic("ipsec_hdrsiz: Invalid policy found. %d\n", sp->policy);
 	}
 
 	need_auth = 0;
 	need_conf = 0;
 	need_icv = 0;
 
 	/* XXX should compare policy against ipsec header history */
 
 	for (isr = sp->req; isr != NULL; isr = isr->next) {
 
 		/* get current level */
 		level = ipsec_get_reqlevel(isr);
 
 		switch (isr->saidx.proto) {
 		case IPPROTO_ESP:
 			if (level == IPSEC_LEVEL_REQUIRE) {
 				need_conf++;
 
 				if (isr->sav != NULL
 				 && isr->sav->flags == SADB_X_EXT_NONE
 				 && isr->sav->alg_auth != SADB_AALG_NONE)
 					need_icv++;
 			}
 			break;
 		case IPPROTO_AH:
 			if (level == IPSEC_LEVEL_REQUIRE) {
 				need_auth++;
 				need_icv++;
 			}
 			break;
 		case IPPROTO_IPCOMP:
 			/*
 			 * we don't really care, as IPcomp document says that
 			 * we shouldn't compress small packets, IPComp policy
 			 * should always be treated as being in "use" level.
 			 */
 			break;
 		}
 	}
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DUMP,
 		printf("ipsec_in_reject: auth:%d conf:%d icv:%d m_flags:%x\n",
 			need_auth, need_conf, need_icv, m->m_flags));
 
 	if ((need_conf && !(m->m_flags & M_DECRYPTED))
 	 || (!need_auth && need_icv && !(m->m_flags & M_AUTHIPDGM))
 	 || (need_auth && !(m->m_flags & M_AUTHIPHDR)))
 		return 1;
 
 	return 0;
 }
 
 /*
  * Check AH/ESP integrity.
  * This function is called from tcp_input(), udp_input(),
  * and {ah,esp}4_input for tunnel mode
  */
 int
 ipsec4_in_reject_so(m, so)
 	struct mbuf *m;
 	struct socket *so;
 {
 	struct secpolicy *sp = NULL;
 	int error;
 	int result;
 
 	/* sanity check */
 	if (m == NULL)
 		return 0;	/* XXX should be panic ? */
 
 	/* get SP for this packet.
 	 * When we are called from ip_forward(), we call
 	 * ipsec4_getpolicybyaddr() with IP_FORWARDING flag.
 	 */
 	if (so == NULL)
 		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
 	else
 		sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
 
 	if (sp == NULL)
 		return 0;	/* XXX should be panic ?
 				 * -> No, there may be error. */
 
 	result = ipsec_in_reject(sp, m);
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP ipsec4_in_reject_so call free SP:%p\n", sp));
 	key_freesp(sp);
 
 	return result;
 }
 
 int
 ipsec4_in_reject(m, inp)
 	struct mbuf *m;
 	struct inpcb *inp;
 {
 	if (inp == NULL)
 		return ipsec4_in_reject_so(m, NULL);
 	if (inp->inp_socket)
 		return ipsec4_in_reject_so(m, inp->inp_socket);
 	else
 		panic("ipsec4_in_reject: invalid inpcb/socket");
 }
 
 #ifdef INET6
 /*
  * Check AH/ESP integrity.
  * This function is called from tcp6_input(), udp6_input(),
  * and {ah,esp}6_input for tunnel mode
  */
 int
 ipsec6_in_reject_so(m, so)
 	struct mbuf *m;
 	struct socket *so;
 {
 	struct secpolicy *sp = NULL;
 	int error;
 	int result;
 
 	/* sanity check */
 	if (m == NULL)
 		return 0;	/* XXX should be panic ? */
 
 	/* get SP for this packet.
 	 * When we are called from ip_forward(), we call
 	 * ipsec6_getpolicybyaddr() with IP_FORWARDING flag.
 	 */
 	if (so == NULL)
 		sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error);
 	else
 		sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error);
 
 	if (sp == NULL)
 		return 0;	/* XXX should be panic ? */
 
 	result = ipsec_in_reject(sp, m);
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP ipsec6_in_reject_so call free SP:%p\n", sp));
 	key_freesp(sp);
 
 	return result;
 }
 
 int
 ipsec6_in_reject(m, in6p)
 	struct mbuf *m;
 	struct in6pcb *in6p;
 {
 	if (in6p == NULL)
 		return ipsec6_in_reject_so(m, NULL);
 	if (in6p->in6p_socket)
 		return ipsec6_in_reject_so(m, in6p->in6p_socket);
 	else
 		panic("ipsec6_in_reject: invalid in6p/socket");
 }
 #endif
 
 /*
  * compute the byte size to be occupied by IPsec header.
  * in case it is tunneled, it includes the size of outer IP header.
  * NOTE: SP passed is free in this function.
  */
 static size_t
 ipsec_hdrsiz(sp)
 	struct secpolicy *sp;
 {
 	struct ipsecrequest *isr;
 	size_t siz, clen;
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec_hdrsiz: using SP\n");
 		kdebug_secpolicy(sp));
 
 	/* check policy */
 	switch (sp->policy) {
 	case IPSEC_POLICY_DISCARD:
 	case IPSEC_POLICY_BYPASS:
 	case IPSEC_POLICY_NONE:
 		return 0;
 	
 	case IPSEC_POLICY_IPSEC:
 		break;
 
 	case IPSEC_POLICY_ENTRUST:
 	default:
 		panic("ipsec_hdrsiz: Invalid policy found. %d\n", sp->policy);
 	}
 
 	siz = 0;
 
 	for (isr = sp->req; isr != NULL; isr = isr->next) {
 
 		clen = 0;
 
 		switch (isr->saidx.proto) {
 		case IPPROTO_ESP:
 #ifdef IPSEC_ESP
 			clen = esp_hdrsiz(isr);
 #else
 			clen = 0;	/* XXX */
 #endif
 			break;
 		case IPPROTO_AH:
 			clen = ah_hdrsiz(isr);
 			break;
 		case IPPROTO_IPCOMP:
 			clen = sizeof(struct ipcomp);
 			break;
 		}
 
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 			switch (((struct sockaddr *)&isr->saidx.dst)->sa_family) {
 			case AF_INET:
 				clen += sizeof(struct ip);
 				break;
 #ifdef INET6
 			case AF_INET6:
 				clen += sizeof(struct ip6_hdr);
 				break;
 #endif
 			default:
 				ipseclog((LOG_ERR, "ipsec_hdrsiz: "
 				    "unknown AF %d in IPsec tunnel SA\n",
 				    ((struct sockaddr *)&isr->saidx.dst)->sa_family));
 				break;
 			}
 		}
 		siz += clen;
 	}
 
 	return siz;
 }
 
 /* This function is called from ip_forward() and ipsec4_hdrsize_tcp(). */
 size_t
 ipsec4_hdrsiz(m, dir, inp)
 	struct mbuf *m;
 	u_int dir;
 	struct inpcb *inp;
 {
 	struct secpolicy *sp = NULL;
 	int error;
 	size_t size;
 
 	/* sanity check */
 	if (m == NULL)
 		return 0;	/* XXX should be panic ? */
 	if (inp != NULL && inp->inp_socket == NULL)
 		panic("ipsec4_hdrsize: why is socket NULL but there is PCB.");
 
 	/* get SP for this packet.
 	 * When we are called from ip_forward(), we call
 	 * ipsec4_getpolicybyaddr() with IP_FORWARDING flag.
 	 */
 	if (inp == NULL)
 		sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
 	else
 		sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error);
 
 	if (sp == NULL)
 		return 0;	/* XXX should be panic ? */
 
 	size = ipsec_hdrsiz(sp);
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP ipsec4_hdrsiz call free SP:%p\n", sp));
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec4_hdrsiz: size:%lu.\n", (unsigned long)size));
 	key_freesp(sp);
 
 	return size;
 }
 
 #ifdef INET6
 /* This function is called from ipsec6_hdrsize_tcp(),
  * and maybe from ip6_forward.()
  */
 size_t
 ipsec6_hdrsiz(m, dir, in6p)
 	struct mbuf *m;
 	u_int dir;
 	struct in6pcb *in6p;
 {
 	struct secpolicy *sp = NULL;
 	int error;
 	size_t size;
 
 	/* sanity check */
 	if (m == NULL)
 		return 0;	/* XXX shoud be panic ? */
 	if (in6p != NULL && in6p->in6p_socket == NULL)
 		panic("ipsec6_hdrsize: why is socket NULL but there is PCB.");
 
 	/* get SP for this packet */
 	/* XXX Is it right to call with IP_FORWARDING. */
 	if (in6p == NULL)
 		sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error);
 	else
 		sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error);
 
 	if (sp == NULL)
 		return 0;
 	size = ipsec_hdrsiz(sp);
 	KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
 		printf("DP ipsec6_hdrsiz call free SP:%p\n", sp));
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec6_hdrsiz: size:%lu.\n", (unsigned long)size));
 	key_freesp(sp);
 
 	return size;
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * encapsulate for ipsec tunnel.
  * ip->ip_src must be fixed later on.
  */
 static int
 ipsec4_encapsulate(m, sav)
 	struct mbuf *m;
 	struct secasvar *sav;
 {
 	struct ip *oip;
 	struct ip *ip;
 	size_t hlen;
 	size_t plen;
 
 	/* can't tunnel between different AFs */
 	if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
 		!= ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
 	 || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) {
 		m_freem(m);
 		return EINVAL;
 	}
 #if 0
 	/* XXX if the dst is myself, perform nothing. */
 	if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) {
 		m_freem(m);
 		return EINVAL;
 	}
 #endif
 
 	if (m->m_len < sizeof(*ip))
 		panic("ipsec4_encapsulate: assumption failed (first mbuf length)");
 
 	ip = mtod(m, struct ip *);
 #ifdef _IP_VHL
 	hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
 #else
 	hlen = ip->ip_hl << 2;
 #endif
 
 	if (m->m_len != hlen)
 		panic("ipsec4_encapsulate: assumption failed (first mbuf length)");
 
 	/* generate header checksum */
 	ip->ip_sum = 0;
 #ifdef _IP_VHL
 	if (ip->ip_vhl == IP_VHL_BORING)
 		ip->ip_sum = in_cksum_hdr(ip);
 	else
 		ip->ip_sum = in_cksum(m, hlen);
 #else
 	ip->ip_sum = in_cksum(m, hlen);
 #endif
 
 	plen = m->m_pkthdr.len;
 
 	/*
 	 * grow the mbuf to accomodate the new IPv4 header.
 	 * NOTE: IPv4 options will never be copied.
 	 */
 	if (M_LEADINGSPACE(m->m_next) < hlen) {
 		struct mbuf *n;
 		MGET(n, M_DONTWAIT, MT_DATA);
 		if (!n) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		n->m_len = hlen;
 		n->m_next = m->m_next;
 		m->m_next = n;
 		m->m_pkthdr.len += hlen;
 		oip = mtod(n, struct ip *);
 	} else {
 		m->m_next->m_len += hlen;
 		m->m_next->m_data -= hlen;
 		m->m_pkthdr.len += hlen;
 		oip = mtod(m->m_next, struct ip *);
 	}
 	ip = mtod(m, struct ip *);
 	ovbcopy((caddr_t)ip, (caddr_t)oip, hlen);
 	m->m_len = sizeof(struct ip);
 	m->m_pkthdr.len -= (hlen - sizeof(struct ip));
 
 	/* construct new IPv4 header. see RFC 2401 5.1.2.1 */
 	/* ECN consideration. */
 	ip_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &oip->ip_tos);
 #ifdef _IP_VHL
 	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2);
 #else
 	ip->ip_hl = sizeof(struct ip) >> 2;
 #endif
 	ip->ip_off &= htons(~IP_OFFMASK);
 	ip->ip_off &= htons(~IP_MF);
 	switch (ip4_ipsec_dfbit) {
 	case 0:	/* clear DF bit */
 		ip->ip_off &= htons(~IP_DF);
 		break;
 	case 1:	/* set DF bit */
 		ip->ip_off |= htons(IP_DF);
 		break;
 	default:	/* copy DF bit */
 		break;
 	}
 	ip->ip_p = IPPROTO_IPIP;
 	if (plen + sizeof(struct ip) < IP_MAXPACKET)
 		ip->ip_len = htons(plen + sizeof(struct ip));
 	else {
 		ipseclog((LOG_ERR, "IPv4 ipsec: size exceeds limit: "
 			"leave ip_len as is (invalid packet)\n"));
 	}
 #ifdef RANDOM_IP_ID
 	ip->ip_id = ip_randomid();
 #else
 	ip->ip_id = htons(ip_id++);
 #endif
 	bcopy(&((struct sockaddr_in *)&sav->sah->saidx.src)->sin_addr,
 		&ip->ip_src, sizeof(ip->ip_src));
 	bcopy(&((struct sockaddr_in *)&sav->sah->saidx.dst)->sin_addr,
 		&ip->ip_dst, sizeof(ip->ip_dst));
 	ip->ip_ttl = IPDEFTTL;
 
 	/* XXX Should ip_src be updated later ? */
 
 	return 0;
 }
 #endif /* INET */
 
 #ifdef INET6
 static int
 ipsec6_encapsulate(m, sav)
 	struct mbuf *m;
 	struct secasvar *sav;
 {
 	struct ip6_hdr *oip6;
 	struct ip6_hdr *ip6;
 	size_t plen;
 
 	/* can't tunnel between different AFs */
 	if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
 		!= ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family
 	 || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET6) {
 		m_freem(m);
 		return EINVAL;
 	}
 #if 0
 	/* XXX if the dst is myself, perform nothing. */
 	if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) {
 		m_freem(m);
 		return EINVAL;
 	}
 #endif
 
 	plen = m->m_pkthdr.len;
 
 	/*
 	 * grow the mbuf to accomodate the new IPv6 header.
 	 */
 	if (m->m_len != sizeof(struct ip6_hdr))
 		panic("ipsec6_encapsulate: assumption failed (first mbuf length)");
 	if (M_LEADINGSPACE(m->m_next) < sizeof(struct ip6_hdr)) {
 		struct mbuf *n;
 		MGET(n, M_DONTWAIT, MT_DATA);
 		if (!n) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		n->m_len = sizeof(struct ip6_hdr);
 		n->m_next = m->m_next;
 		m->m_next = n;
 		m->m_pkthdr.len += sizeof(struct ip6_hdr);
 		oip6 = mtod(n, struct ip6_hdr *);
 	} else {
 		m->m_next->m_len += sizeof(struct ip6_hdr);
 		m->m_next->m_data -= sizeof(struct ip6_hdr);
 		m->m_pkthdr.len += sizeof(struct ip6_hdr);
 		oip6 = mtod(m->m_next, struct ip6_hdr *);
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	ovbcopy((caddr_t)ip6, (caddr_t)oip6, sizeof(struct ip6_hdr));
 
 	/* Fake link-local scope-class addresses */
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
 		oip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
 		oip6->ip6_dst.s6_addr16[1] = 0;
 
 	/* construct new IPv6 header. see RFC 2401 5.1.2.2 */
 	/* ECN consideration. */
 	ip6_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &oip6->ip6_flow);
 	if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr))
 		ip6->ip6_plen = htons(plen);
 	else {
 		/* ip6->ip6_plen will be updated in ip6_output() */
 	}
 	ip6->ip6_nxt = IPPROTO_IPV6;
 	bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.src)->sin6_addr,
 		&ip6->ip6_src, sizeof(ip6->ip6_src));
 	bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.dst)->sin6_addr,
 		&ip6->ip6_dst, sizeof(ip6->ip6_dst));
 	ip6->ip6_hlim = IPV6_DEFHLIM;
 
 	/* XXX Should ip6_src be updated later ? */
 
 	return 0;
 }
 #endif /* INET6 */
 
 /*
  * Check the variable replay window.
  * ipsec_chkreplay() performs replay check before ICV verification.
  * ipsec_updatereplay() updates replay bitmap.  This must be called after
  * ICV verification (it also performs replay check, which is usually done
  * beforehand).
  * 0 (zero) is returned if packet disallowed, 1 if packet permitted.
  *
  * based on RFC 2401.
  */
 int
 ipsec_chkreplay(seq, sav)
 	u_int32_t seq;
 	struct secasvar *sav;
 {
 	const struct secreplay *replay;
 	u_int32_t diff;
 	int fr;
 	u_int32_t wsizeb;	/* constant: bits of window size */
 	int frlast;		/* constant: last frame */
 
 	/* sanity check */
 	if (sav == NULL)
 		panic("ipsec_chkreplay: NULL pointer was passed.\n");
 
 	replay = sav->replay;
 
 	if (replay->wsize == 0)
 		return 1;	/* no need to check replay. */
 
 	/* constant */
 	frlast = replay->wsize - 1;
 	wsizeb = replay->wsize << 3;
 
 	/* sequence number of 0 is invalid */
 	if (seq == 0)
 		return 0;
 
 	/* first time is always okay */
 	if (replay->count == 0)
 		return 1;
 
 	if (seq > replay->lastseq) {
 		/* larger sequences are okay */
 		return 1;
 	} else {
 		/* seq is equal or less than lastseq. */
 		diff = replay->lastseq - seq;
 
 		/* over range to check, i.e. too old or wrapped */
 		if (diff >= wsizeb)
 			return 0;
 
 		fr = frlast - diff / 8;
 
 		/* this packet already seen ? */
 		if ((replay->bitmap)[fr] & (1 << (diff % 8)))
 			return 0;
 
 		/* out of order but good */
 		return 1;
 	}
 }
 
 /*
  * check replay counter whether to update or not.
  * OUT:	0:	OK
  *	1:	NG
  */
 int
 ipsec_updatereplay(seq, sav)
 	u_int32_t seq;
 	struct secasvar *sav;
 {
 	struct secreplay *replay;
 	u_int32_t diff;
 	int fr;
 	u_int32_t wsizeb;	/* constant: bits of window size */
 	int frlast;		/* constant: last frame */
 
 	/* sanity check */
 	if (sav == NULL)
 		panic("ipsec_chkreplay: NULL pointer was passed.\n");
 
 	replay = sav->replay;
 
 	if (replay->wsize == 0)
 		goto ok;	/* no need to check replay. */
 
 	/* constant */
 	frlast = replay->wsize - 1;
 	wsizeb = replay->wsize << 3;
 
 	/* sequence number of 0 is invalid */
 	if (seq == 0)
 		return 1;
 
 	/* first time */
 	if (replay->count == 0) {
 		replay->lastseq = seq;
 		bzero(replay->bitmap, replay->wsize);
 		(replay->bitmap)[frlast] = 1;
 		goto ok;
 	}
 
 	if (seq > replay->lastseq) {
 		/* seq is larger than lastseq. */
 		diff = seq - replay->lastseq;
 
 		/* new larger sequence number */
 		if (diff < wsizeb) {
 			/* In window */
 			/* set bit for this packet */
 			vshiftl(replay->bitmap, diff, replay->wsize);
 			(replay->bitmap)[frlast] |= 1;
 		} else {
 			/* this packet has a "way larger" */
 			bzero(replay->bitmap, replay->wsize);
 			(replay->bitmap)[frlast] = 1;
 		}
 		replay->lastseq = seq;
 
 		/* larger is good */
 	} else {
 		/* seq is equal or less than lastseq. */
 		diff = replay->lastseq - seq;
 
 		/* over range to check, i.e. too old or wrapped */
 		if (diff >= wsizeb)
 			return 1;
 
 		fr = frlast - diff / 8;
 
 		/* this packet already seen ? */
 		if ((replay->bitmap)[fr] & (1 << (diff % 8)))
 			return 1;
 
 		/* mark as seen */
 		(replay->bitmap)[fr] |= (1 << (diff % 8));
 
 		/* out of order but good */
 	}
 
 ok:
 	if (replay->count == ~0) {
 
 		/* set overflow flag */
 		replay->overflow++;
 
 		/* don't increment, no more packets accepted */
 		if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0)
 			return 1;
 
 		ipseclog((LOG_WARNING, "replay counter made %d cycle. %s\n",
 		    replay->overflow, ipsec_logsastr(sav)));
 	}
 
 	replay->count++;
 
 	return 0;
 }
 
 /*
  * shift variable length buffer to left.
  * IN:	bitmap: pointer to the buffer
  * 	nbit:	the number of to shift.
  *	wsize:	buffer size (bytes).
  */
 static void
 vshiftl(bitmap, nbit, wsize)
 	unsigned char *bitmap;
 	int nbit, wsize;
 {
 	int s, j, i;
 	unsigned char over;
 
 	for (j = 0; j < nbit; j += 8) {
 		s = (nbit - j < 8) ? (nbit - j): 8;
 		bitmap[0] <<= s;
 		for (i = 1; i < wsize; i++) {
 			over = (bitmap[i] >> (8 - s));
 			bitmap[i] <<= s;
 			bitmap[i-1] |= over;
 		}
 	}
 
 	return;
 }
 
 const char *
 ipsec4_logpacketstr(ip, spi)
 	struct ip *ip;
 	u_int32_t spi;
 {
 	static char buf[256];
 	char *p;
 	u_int8_t *s, *d;
 
 	s = (u_int8_t *)(&ip->ip_src);
 	d = (u_int8_t *)(&ip->ip_dst);
 
 	p = buf;
 	snprintf(buf, sizeof(buf), "packet(SPI=%u ", (u_int32_t)ntohl(spi));
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), "src=%u.%u.%u.%u",
 		s[0], s[1], s[2], s[3]);
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), " dst=%u.%u.%u.%u",
 		d[0], d[1], d[2], d[3]);
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), ")");
 
 	return buf;
 }
 
 #ifdef INET6
 const char *
 ipsec6_logpacketstr(ip6, spi)
 	struct ip6_hdr *ip6;
 	u_int32_t spi;
 {
 	static char buf[256];
 	char *p;
 
 	p = buf;
 	snprintf(buf, sizeof(buf), "packet(SPI=%u ", (u_int32_t)ntohl(spi));
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), "src=%s",
 		ip6_sprintf(&ip6->ip6_src));
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), " dst=%s",
 		ip6_sprintf(&ip6->ip6_dst));
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), ")");
 
 	return buf;
 }
 #endif /* INET6 */
 
 const char *
 ipsec_logsastr(sav)
 	struct secasvar *sav;
 {
 	static char buf[256];
 	char *p;
 	struct secasindex *saidx = &sav->sah->saidx;
 
 	/* validity check */
 	if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family
 			!= ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family)
 		panic("ipsec_logsastr: family mismatched.\n");
 
 	p = buf;
 	snprintf(buf, sizeof(buf), "SA(SPI=%u ", (u_int32_t)ntohl(sav->spi));
 	while (p && *p)
 		p++;
 	if (((struct sockaddr *)&saidx->src)->sa_family == AF_INET) {
 		u_int8_t *s, *d;
 		s = (u_int8_t *)&((struct sockaddr_in *)&saidx->src)->sin_addr;
 		d = (u_int8_t *)&((struct sockaddr_in *)&saidx->dst)->sin_addr;
 		snprintf(p, sizeof(buf) - (p - buf),
 			"src=%d.%d.%d.%d dst=%d.%d.%d.%d",
 			s[0], s[1], s[2], s[3], d[0], d[1], d[2], d[3]);
 	}
 #ifdef INET6
 	else if (((struct sockaddr *)&saidx->src)->sa_family == AF_INET6) {
 		snprintf(p, sizeof(buf) - (p - buf),
 			"src=%s",
 			ip6_sprintf(&((struct sockaddr_in6 *)&saidx->src)->sin6_addr));
 		while (p && *p)
 			p++;
 		snprintf(p, sizeof(buf) - (p - buf),
 			" dst=%s",
 			ip6_sprintf(&((struct sockaddr_in6 *)&saidx->dst)->sin6_addr));
 	}
 #endif
 	while (p && *p)
 		p++;
 	snprintf(p, sizeof(buf) - (p - buf), ")");
 
 	return buf;
 }
 
 void
 ipsec_dumpmbuf(m)
 	struct mbuf *m;
 {
 	int totlen;
 	int i;
 	u_char *p;
 
 	totlen = 0;
 	printf("---\n");
 	while (m) {
 		p = mtod(m, u_char *);
 		for (i = 0; i < m->m_len; i++) {
 			printf("%02x ", p[i]);
 			totlen++;
 			if (totlen % 16 == 0)
 				printf("\n");
 		}
 		m = m->m_next;
 	}
 	if (totlen % 16 != 0)
 		printf("\n");
 	printf("---\n");
 }
 
 #ifdef INET
 /*
  * IPsec output logic for IPv4.
  */
 int
 ipsec4_output(state, sp, flags)
 	struct ipsec_output_state *state;
 	struct secpolicy *sp;
 	int flags;
 {
 	struct ip *ip = NULL;
 	struct ipsecrequest *isr = NULL;
 	struct secasindex saidx;
 	int s;
 	int error;
 	struct sockaddr_in *dst4;
 	struct sockaddr_in *sin;
 
 	if (!state)
 		panic("state == NULL in ipsec4_output");
 	if (!state->m)
 		panic("state->m == NULL in ipsec4_output");
 	if (!state->ro)
 		panic("state->ro == NULL in ipsec4_output");
 	if (!state->dst)
 		panic("state->dst == NULL in ipsec4_output");
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec4_output: applyed SP\n");
 		kdebug_secpolicy(sp));
 
 	for (isr = sp->req; isr != NULL; isr = isr->next) {
 
 #if 0	/* give up to check restriction of transport mode */
 	/* XXX but should be checked somewhere */
 		/*
 		 * some of the IPsec operation must be performed only in
 		 * originating case.
 		 */
 		if (isr->saidx.mode == IPSEC_MODE_TRANSPORT
 		 && (flags & IP_FORWARDING))
 			continue;
 #endif
 
 		/* make SA index for search proper SA */
 		ip = mtod(state->m, struct ip *);
 		bcopy(&isr->saidx, &saidx, sizeof(saidx));
 		saidx.mode = isr->saidx.mode;
 		saidx.reqid = isr->saidx.reqid;
 		sin = (struct sockaddr_in *)&saidx.src;
 		if (sin->sin_len == 0) {
 			sin->sin_len = sizeof(*sin);
 			sin->sin_family = AF_INET;
 			sin->sin_port = IPSEC_PORT_ANY;
 			bcopy(&ip->ip_src, &sin->sin_addr,
 			    sizeof(sin->sin_addr));
 		}
 		sin = (struct sockaddr_in *)&saidx.dst;
 		if (sin->sin_len == 0) {
 			sin->sin_len = sizeof(*sin);
 			sin->sin_family = AF_INET;
 			sin->sin_port = IPSEC_PORT_ANY;
 			bcopy(&ip->ip_dst, &sin->sin_addr,
 			    sizeof(sin->sin_addr));
 		}
 
 		if ((error = key_checkrequest(isr, &saidx)) != 0) {
 			/*
 			 * IPsec processing is required, but no SA found.
 			 * I assume that key_acquire() had been called
 			 * to get/establish the SA. Here I discard
 			 * this packet because it is responsibility for
 			 * upper layer to retransmit the packet.
 			 */
 			ipsecstat.out_nosa++;
 			goto bad;
 		}
 
 		/* validity check */
 		if (isr->sav == NULL) {
 			switch (ipsec_get_reqlevel(isr)) {
 			case IPSEC_LEVEL_USE:
 				continue;
 			case IPSEC_LEVEL_REQUIRE:
 				/* must be not reached here. */
 				panic("ipsec4_output: no SA found, but required.");
 			}
 		}
 
 		/*
 		 * If there is no valid SA, we give up to process any
 		 * more.  In such a case, the SA's status is changed
 		 * from DYING to DEAD after allocating.  If a packet
 		 * send to the receiver by dead SA, the receiver can
 		 * not decode a packet because SA has been dead.
 		 */
 		if (isr->sav->state != SADB_SASTATE_MATURE
 		 && isr->sav->state != SADB_SASTATE_DYING) {
 			ipsecstat.out_nosa++;
 			error = EINVAL;
 			goto bad;
 		}
 
 		/*
 		 * There may be the case that SA status will be changed when
 		 * we are refering to one. So calling splsoftnet().
 		 */
 		s = splnet();
 
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 			/*
 			 * build IPsec tunnel.
 			 */
 			/* XXX should be processed with other familiy */
 			if (((struct sockaddr *)&isr->sav->sah->saidx.src)->sa_family != AF_INET) {
 				ipseclog((LOG_ERR, "ipsec4_output: "
 				    "family mismatched between inner and outer spi=%u\n",
 				    (u_int32_t)ntohl(isr->sav->spi)));
 				splx(s);
 				error = EAFNOSUPPORT;
 				goto bad;
 			}
 
 			state->m = ipsec4_splithdr(state->m);
 			if (!state->m) {
 				splx(s);
 				error = ENOMEM;
 				goto bad;
 			}
 			error = ipsec4_encapsulate(state->m, isr->sav);
 			splx(s);
 			if (error) {
 				state->m = NULL;
 				goto bad;
 			}
 			ip = mtod(state->m, struct ip *);
 
 			state->ro = &isr->sav->sah->sa_route;
 			state->dst = (struct sockaddr *)&state->ro->ro_dst;
 			dst4 = (struct sockaddr_in *)state->dst;
 			if (state->ro->ro_rt
 			 && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0
 			  || dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 				RTFREE(state->ro->ro_rt);
 				state->ro->ro_rt = NULL;
 			}
 			if (state->ro->ro_rt == 0) {
 				dst4->sin_family = AF_INET;
 				dst4->sin_len = sizeof(*dst4);
 				dst4->sin_addr = ip->ip_dst;
 				rtalloc(state->ro);
 			}
 			if (state->ro->ro_rt == 0) {
 				ipstat.ips_noroute++;
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 
 			/* adjust state->dst if tunnel endpoint is offlink */
 			if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) {
 				state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
 				dst4 = (struct sockaddr_in *)state->dst;
 			}
 		} else
 			splx(s);
 
 		state->m = ipsec4_splithdr(state->m);
 		if (!state->m) {
 			error = ENOMEM;
 			goto bad;
 		}
 		switch (isr->saidx.proto) {
 		case IPPROTO_ESP:
 #ifdef IPSEC_ESP
 			if ((error = esp4_output(state->m, isr)) != 0) {
 				state->m = NULL;
 				goto bad;
 			}
 			break;
 #else
 			m_freem(state->m);
 			state->m = NULL;
 			error = EINVAL;
 			goto bad;
 #endif
 		case IPPROTO_AH:
 			if ((error = ah4_output(state->m, isr)) != 0) {
 				state->m = NULL;
 				goto bad;
 			}
 			break;
 		case IPPROTO_IPCOMP:
 			if ((error = ipcomp4_output(state->m, isr)) != 0) {
 				state->m = NULL;
 				goto bad;
 			}
 			break;
 		default:
 			ipseclog((LOG_ERR,
 			    "ipsec4_output: unknown ipsec protocol %d\n",
 			    isr->saidx.proto));
 			m_freem(state->m);
 			state->m = NULL;
 			error = EINVAL;
 			goto bad;
 		}
 
 		if (state->m == 0) {
 			error = ENOMEM;
 			goto bad;
 		}
 		ip = mtod(state->m, struct ip *);
 	}
 
 	return 0;
 
 bad:
 	m_freem(state->m);
 	state->m = NULL;
 	return error;
 }
 #endif
 
 #ifdef INET6
 /*
  * IPsec output logic for IPv6, transport mode.
  */
 int
 ipsec6_output_trans(state, nexthdrp, mprev, sp, flags, tun)
 	struct ipsec_output_state *state;
 	u_char *nexthdrp;
 	struct mbuf *mprev;
 	struct secpolicy *sp;
 	int flags;
 	int *tun;
 {
 	struct ip6_hdr *ip6;
 	struct ipsecrequest *isr = NULL;
 	struct secasindex saidx;
 	int error = 0;
 	int plen;
 	struct sockaddr_in6 *sin6;
 
 	if (!state)
 		panic("state == NULL in ipsec6_output_trans");
 	if (!state->m)
 		panic("state->m == NULL in ipsec6_output_trans");
 	if (!nexthdrp)
 		panic("nexthdrp == NULL in ipsec6_output_trans");
 	if (!mprev)
 		panic("mprev == NULL in ipsec6_output_trans");
 	if (!sp)
 		panic("sp == NULL in ipsec6_output_trans");
 	if (!tun)
 		panic("tun == NULL in ipsec6_output_trans");
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec6_output_trans: applyed SP\n");
 		kdebug_secpolicy(sp));
 
 	*tun = 0;
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 			/* the rest will be handled by ipsec6_output_tunnel() */
 			break;
 		}
 
 		/* make SA index for search proper SA */
 		ip6 = mtod(state->m, struct ip6_hdr *);
 		bcopy(&isr->saidx, &saidx, sizeof(saidx));
 		saidx.mode = isr->saidx.mode;
 		saidx.reqid = isr->saidx.reqid;
 		sin6 = (struct sockaddr_in6 *)&saidx.src;
 		if (sin6->sin6_len == 0) {
 			sin6->sin6_len = sizeof(*sin6);
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_port = IPSEC_PORT_ANY;
 			bcopy(&ip6->ip6_src, &sin6->sin6_addr,
 			    sizeof(ip6->ip6_src));
 			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 				/* fix scope id for comparing SPD */
 				sin6->sin6_addr.s6_addr16[1] = 0;
 				sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
 			}
 		}
 		sin6 = (struct sockaddr_in6 *)&saidx.dst;
 		if (sin6->sin6_len == 0) {
 			sin6->sin6_len = sizeof(*sin6);
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_port = IPSEC_PORT_ANY;
 			bcopy(&ip6->ip6_dst, &sin6->sin6_addr,
 			    sizeof(ip6->ip6_dst));
 			if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
 				/* fix scope id for comparing SPD */
 				sin6->sin6_addr.s6_addr16[1] = 0;
 				sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
 			}
 		}
 
 		if (key_checkrequest(isr, &saidx) == ENOENT) {
 			/*
 			 * IPsec processing is required, but no SA found.
 			 * I assume that key_acquire() had been called
 			 * to get/establish the SA. Here I discard
 			 * this packet because it is responsibility for
 			 * upper layer to retransmit the packet.
 			 */
 			ipsec6stat.out_nosa++;
 			error = ENOENT;
 
 			/*
 			 * Notify the fact that the packet is discarded
 			 * to ourselves. I believe this is better than
 			 * just silently discarding. (jinmei@kame.net)
 			 * XXX: should we restrict the error to TCP packets?
 			 * XXX: should we directly notify sockets via
 			 *      pfctlinputs?
 			 */
 			icmp6_error(state->m, ICMP6_DST_UNREACH,
 				    ICMP6_DST_UNREACH_ADMIN, 0);
 			state->m = NULL; /* icmp6_error freed the mbuf */
 			goto bad;
 		}
 
 		/* validity check */
 		if (isr->sav == NULL) {
 			switch (ipsec_get_reqlevel(isr)) {
 			case IPSEC_LEVEL_USE:
 				continue;
 			case IPSEC_LEVEL_REQUIRE:
 				/* must be not reached here. */
 				panic("ipsec6_output_trans: no SA found, but required.");
 			}
 		}
 
 		/*
 		 * If there is no valid SA, we give up to process.
 		 * see same place at ipsec4_output().
 		 */
 		if (isr->sav->state != SADB_SASTATE_MATURE
 		 && isr->sav->state != SADB_SASTATE_DYING) {
 			ipsec6stat.out_nosa++;
 			error = EINVAL;
 			goto bad;
 		}
 
 		switch (isr->saidx.proto) {
 		case IPPROTO_ESP:
 #ifdef IPSEC_ESP
 			error = esp6_output(state->m, nexthdrp, mprev->m_next, isr);
 #else
 			m_freem(state->m);
 			error = EINVAL;
 #endif
 			break;
 		case IPPROTO_AH:
 			error = ah6_output(state->m, nexthdrp, mprev->m_next, isr);
 			break;
 		case IPPROTO_IPCOMP:
 			error = ipcomp6_output(state->m, nexthdrp, mprev->m_next, isr);
 			break;
 		default:
 			ipseclog((LOG_ERR, "ipsec6_output_trans: "
 			    "unknown ipsec protocol %d\n", isr->saidx.proto));
 			m_freem(state->m);
 			ipsec6stat.out_inval++;
 			error = EINVAL;
 			break;
 		}
 		if (error) {
 			state->m = NULL;
 			goto bad;
 		}
 		plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr);
 		if (plen > IPV6_MAXPACKET) {
 			ipseclog((LOG_ERR, "ipsec6_output_trans: "
 			    "IPsec with IPv6 jumbogram is not supported\n"));
 			ipsec6stat.out_inval++;
 			error = EINVAL;	/* XXX */
 			goto bad;
 		}
 		ip6 = mtod(state->m, struct ip6_hdr *);
 		ip6->ip6_plen = htons(plen);
 	}
 
 	/* if we have more to go, we need a tunnel mode processing */
 	if (isr != NULL)
 		*tun = 1;
 
 	return 0;
 
 bad:
 	m_freem(state->m);
 	state->m = NULL;
 	return error;
 }
 
 /*
  * IPsec output logic for IPv6, tunnel mode.
  */
 int
 ipsec6_output_tunnel(state, sp, flags)
 	struct ipsec_output_state *state;
 	struct secpolicy *sp;
 	int flags;
 {
 	struct ip6_hdr *ip6;
 	struct ipsecrequest *isr = NULL;
 	struct secasindex saidx;
 	int error = 0;
 	int plen;
 	struct sockaddr_in6* dst6;
 	int s;
 
 	if (!state)
 		panic("state == NULL in ipsec6_output_tunnel");
 	if (!state->m)
 		panic("state->m == NULL in ipsec6_output_tunnel");
 	if (!sp)
 		panic("sp == NULL in ipsec6_output_tunnel");
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec6_output_tunnel: applyed SP\n");
 		kdebug_secpolicy(sp));
 
 	/*
 	 * transport mode ipsec (before the 1st tunnel mode) is already
 	 * processed by ipsec6_output_trans().
 	 */
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 			break;
 	}
 
 	for (/* already initialized */; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 			/* When tunnel mode, SA peers must be specified. */
 			bcopy(&isr->saidx, &saidx, sizeof(saidx));
 		} else {
 			/* make SA index to look for a proper SA */
 			struct sockaddr_in6 *sin6;
 
 			bzero(&saidx, sizeof(saidx));
 			saidx.proto = isr->saidx.proto;
 			saidx.mode = isr->saidx.mode;
 			saidx.reqid = isr->saidx.reqid;
 
 			ip6 = mtod(state->m, struct ip6_hdr *);
 			sin6 = (struct sockaddr_in6 *)&saidx.src;
 			if (sin6->sin6_len == 0) {
 				sin6->sin6_len = sizeof(*sin6);
 				sin6->sin6_family = AF_INET6;
 				sin6->sin6_port = IPSEC_PORT_ANY;
 				bcopy(&ip6->ip6_src, &sin6->sin6_addr,
 				    sizeof(ip6->ip6_src));
 				if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 					/* fix scope id for comparing SPD */
 					sin6->sin6_addr.s6_addr16[1] = 0;
 					sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]);
 				}
 			}
 			sin6 = (struct sockaddr_in6 *)&saidx.dst;
 			if (sin6->sin6_len == 0) {
 				sin6->sin6_len = sizeof(*sin6);
 				sin6->sin6_family = AF_INET6;
 				sin6->sin6_port = IPSEC_PORT_ANY;
 				bcopy(&ip6->ip6_dst, &sin6->sin6_addr,
 				    sizeof(ip6->ip6_dst));
 				if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
 					/* fix scope id for comparing SPD */
 					sin6->sin6_addr.s6_addr16[1] = 0;
 					sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]);
 				}
 			}
 		}
 
 		if (key_checkrequest(isr, &saidx) == ENOENT) {
 			/*
 			 * IPsec processing is required, but no SA found.
 			 * I assume that key_acquire() had been called
 			 * to get/establish the SA. Here I discard
 			 * this packet because it is responsibility for
 			 * upper layer to retransmit the packet.
 			 */
 			ipsec6stat.out_nosa++;
 			error = ENOENT;
 			goto bad;
 		}
 
 		/* validity check */
 		if (isr->sav == NULL) {
 			switch (ipsec_get_reqlevel(isr)) {
 			case IPSEC_LEVEL_USE:
 				continue;
 			case IPSEC_LEVEL_REQUIRE:
 				/* must be not reached here. */
 				panic("ipsec6_output_tunnel: no SA found, but required.");
 			}
 		}
 
 		/*
 		 * If there is no valid SA, we give up to process.
 		 * see same place at ipsec4_output().
 		 */
 		if (isr->sav->state != SADB_SASTATE_MATURE
 		 && isr->sav->state != SADB_SASTATE_DYING) {
 			ipsec6stat.out_nosa++;
 			error = EINVAL;
 			goto bad;
 		}
 
 		/*
 		 * There may be the case that SA status will be changed when
 		 * we are refering to one. So calling splsoftnet().
 		 */
 		s = splnet();
 
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 			/*
 			 * build IPsec tunnel.
 			 */
 			/* XXX should be processed with other familiy */
 			if (((struct sockaddr *)&isr->sav->sah->saidx.src)->sa_family != AF_INET6) {
 				ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
 				    "family mismatched between inner and outer, spi=%u\n",
 				    (u_int32_t)ntohl(isr->sav->spi)));
 				splx(s);
 				ipsec6stat.out_inval++;
 				error = EAFNOSUPPORT;
 				goto bad;
 			}
 
 			state->m = ipsec6_splithdr(state->m);
 			if (!state->m) {
 				splx(s);
 				ipsec6stat.out_nomem++;
 				error = ENOMEM;
 				goto bad;
 			}
 			error = ipsec6_encapsulate(state->m, isr->sav);
 			splx(s);
 			if (error) {
 				state->m = 0;
 				goto bad;
 			}
 			ip6 = mtod(state->m, struct ip6_hdr *);
 
 			state->ro = &isr->sav->sah->sa_route;
 			state->dst = (struct sockaddr *)&state->ro->ro_dst;
 			dst6 = (struct sockaddr_in6 *)state->dst;
 			if (state->ro->ro_rt
 			 && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0
 			  || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) {
 				RTFREE(state->ro->ro_rt);
 				state->ro->ro_rt = NULL;
 			}
 			if (state->ro->ro_rt == 0) {
 				bzero(dst6, sizeof(*dst6));
 				dst6->sin6_family = AF_INET6;
 				dst6->sin6_len = sizeof(*dst6);
 				dst6->sin6_addr = ip6->ip6_dst;
 				rtalloc(state->ro);
 			}
 			if (state->ro->ro_rt == 0) {
 				ip6stat.ip6s_noroute++;
 				ipsec6stat.out_noroute++;
 				error = EHOSTUNREACH;
 				goto bad;
 			}
 
 			/* adjust state->dst if tunnel endpoint is offlink */
 			if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) {
 				state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
 				dst6 = (struct sockaddr_in6 *)state->dst;
 			}
 		} else
 			splx(s);
 
 		state->m = ipsec6_splithdr(state->m);
 		if (!state->m) {
 			ipsec6stat.out_nomem++;
 			error = ENOMEM;
 			goto bad;
 		}
 		ip6 = mtod(state->m, struct ip6_hdr *);
 		switch (isr->saidx.proto) {
 		case IPPROTO_ESP:
 #ifdef IPSEC_ESP
 			error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, isr);
 #else
 			m_freem(state->m);
 			error = EINVAL;
 #endif
 			break;
 		case IPPROTO_AH:
 			error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, isr);
 			break;
 		case IPPROTO_IPCOMP:
 			/* XXX code should be here */
 			/* FALLTHROUGH */
 		default:
 			ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
 			    "unknown ipsec protocol %d\n", isr->saidx.proto));
 			m_freem(state->m);
 			ipsec6stat.out_inval++;
 			error = EINVAL;
 			break;
 		}
 		if (error) {
 			state->m = NULL;
 			goto bad;
 		}
 		plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr);
 		if (plen > IPV6_MAXPACKET) {
 			ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
 			    "IPsec with IPv6 jumbogram is not supported\n"));
 			ipsec6stat.out_inval++;
 			error = EINVAL;	/* XXX */
 			goto bad;
 		}
 		ip6 = mtod(state->m, struct ip6_hdr *);
 		ip6->ip6_plen = htons(plen);
 	}
 
 	return 0;
 
 bad:
 	m_freem(state->m);
 	state->m = NULL;
 	return error;
 }
 #endif /* INET6 */
 
 #ifdef INET
 /*
  * Chop IP header and option off from the payload.
  */
 static struct mbuf *
 ipsec4_splithdr(m)
 	struct mbuf *m;
 {
 	struct mbuf *mh;
 	struct ip *ip;
 	int hlen;
 
 	if (m->m_len < sizeof(struct ip))
 		panic("ipsec4_splithdr: first mbuf too short");
 	ip = mtod(m, struct ip *);
 #ifdef _IP_VHL
 	hlen = _IP_VHL_HL(ip->ip_vhl) << 2;
 #else
 	hlen = ip->ip_hl << 2;
 #endif
 	if (m->m_len > hlen) {
 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 		if (!mh) {
 			m_freem(m);
 			return NULL;
 		}
-		M_COPY_PKTHDR(mh, m);
+		M_MOVE_PKTHDR(mh, m);
 		MH_ALIGN(mh, hlen);
 		m->m_flags &= ~M_PKTHDR;
 		m->m_len -= hlen;
 		m->m_data += hlen;
 		mh->m_next = m;
 		m = mh;
 		m->m_len = hlen;
 		bcopy((caddr_t)ip, mtod(m, caddr_t), hlen);
 	} else if (m->m_len < hlen) {
 		m = m_pullup(m, hlen);
 		if (!m)
 			return NULL;
 	}
 	return m;
 }
 #endif
 
 #ifdef INET6
 static struct mbuf *
 ipsec6_splithdr(m)
 	struct mbuf *m;
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 	int hlen;
 
 	if (m->m_len < sizeof(struct ip6_hdr))
 		panic("ipsec6_splithdr: first mbuf too short");
 	ip6 = mtod(m, struct ip6_hdr *);
 	hlen = sizeof(struct ip6_hdr);
 	if (m->m_len > hlen) {
 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 		if (!mh) {
 			m_freem(m);
 			return NULL;
 		}
-		M_COPY_PKTHDR(mh, m);
+		M_MOVE_PKTHDR(mh, m);
 		MH_ALIGN(mh, hlen);
 		m->m_flags &= ~M_PKTHDR;
 		m->m_len -= hlen;
 		m->m_data += hlen;
 		mh->m_next = m;
 		m = mh;
 		m->m_len = hlen;
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), hlen);
 	} else if (m->m_len < hlen) {
 		m = m_pullup(m, hlen);
 		if (!m)
 			return NULL;
 	}
 	return m;
 }
 #endif
 
 /* validate inbound IPsec tunnel packet. */
 int
 ipsec4_tunnel_validate(m, off, nxt0, sav)
 	struct mbuf *m;		/* no pullup permitted, m->m_len >= ip */
 	int off;
 	u_int nxt0;
 	struct secasvar *sav;
 {
 	u_int8_t nxt = nxt0 & 0xff;
 	struct sockaddr_in *sin;
 	struct sockaddr_in osrc, odst, isrc, idst;
 	int hlen;
 	struct secpolicy *sp;
 	struct ip *oip;
 
 #ifdef DIAGNOSTIC
 	if (m->m_len < sizeof(struct ip))
 		panic("too short mbuf on ipsec4_tunnel_validate");
 #endif
 	if (nxt != IPPROTO_IPV4)
 		return 0;
 	if (m->m_pkthdr.len < off + sizeof(struct ip))
 		return 0;
 	/* do not decapsulate if the SA is for transport mode only */
 	if (sav->sah->saidx.mode == IPSEC_MODE_TRANSPORT)
 		return 0;
 
 	oip = mtod(m, struct ip *);
 #ifdef _IP_VHL
 	hlen = _IP_VHL_HL(oip->ip_vhl) << 2;
 #else
 	hlen = oip->ip_hl << 2;
 #endif
 	if (hlen != sizeof(struct ip))
 		return 0;
 
 	/* AF_INET6 should be supported, but at this moment we don't. */
 	sin = (struct sockaddr_in *)&sav->sah->saidx.dst;
 	if (sin->sin_family != AF_INET)
 		return 0;
 	if (bcmp(&oip->ip_dst, &sin->sin_addr, sizeof(oip->ip_dst)) != 0)
 		return 0;
 
 	/* XXX slow */
 	bzero(&osrc, sizeof(osrc));
 	bzero(&odst, sizeof(odst));
 	bzero(&isrc, sizeof(isrc));
 	bzero(&idst, sizeof(idst));
 	osrc.sin_family = odst.sin_family = isrc.sin_family = idst.sin_family = 
 	    AF_INET;
 	osrc.sin_len = odst.sin_len = isrc.sin_len = idst.sin_len = 
 	    sizeof(struct sockaddr_in);
 	osrc.sin_addr = oip->ip_src;
 	odst.sin_addr = oip->ip_dst;
 	m_copydata(m, off + offsetof(struct ip, ip_src), sizeof(isrc.sin_addr),
 	    (caddr_t)&isrc.sin_addr);
 	m_copydata(m, off + offsetof(struct ip, ip_dst), sizeof(idst.sin_addr),
 	    (caddr_t)&idst.sin_addr);
 
 	/*
 	 * RFC2401 5.2.1 (b): (assume that we are using tunnel mode)
 	 * - if the inner destination is multicast address, there can be
 	 *   multiple permissible inner source address.  implementation
 	 *   may want to skip verification of inner source address against
 	 *   SPD selector.
 	 * - if the inner protocol is ICMP, the packet may be an error report
 	 *   from routers on the other side of the VPN cloud (R in the
 	 *   following diagram).  in this case, we cannot verify inner source
 	 *   address against SPD selector.
 	 *	me -- gw === gw -- R -- you
 	 *
 	 * we consider the first bullet to be users responsibility on SPD entry
 	 * configuration (if you need to encrypt multicast traffic, set
 	 * the source range of SPD selector to 0.0.0.0/0, or have explicit
 	 * address ranges for possible senders).
 	 * the second bullet is not taken care of (yet).
 	 *
 	 * therefore, we do not do anything special about inner source.
 	 */
 
 	sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst,
 	    (struct sockaddr *)&isrc, (struct sockaddr *)&idst);
 	if (!sp)
 		return 0;
 	key_freesp(sp);
 
 	return 1;
 }
 
 #ifdef INET6
 /* validate inbound IPsec tunnel packet. */
 int
 ipsec6_tunnel_validate(m, off, nxt0, sav)
 	struct mbuf *m;		/* no pullup permitted, m->m_len >= ip */
 	int off;
 	u_int nxt0;
 	struct secasvar *sav;
 {
 	u_int8_t nxt = nxt0 & 0xff;
 	struct sockaddr_in6 *sin6;
 	struct sockaddr_in6 osrc, odst, isrc, idst;
 	struct secpolicy *sp;
 	struct ip6_hdr *oip6;
 
 #ifdef DIAGNOSTIC
 	if (m->m_len < sizeof(struct ip6_hdr))
 		panic("too short mbuf on ipsec6_tunnel_validate");
 #endif
 	if (nxt != IPPROTO_IPV6)
 		return 0;
 	if (m->m_pkthdr.len < off + sizeof(struct ip6_hdr))
 		return 0;
 	/* do not decapsulate if the SA is for transport mode only */
 	if (sav->sah->saidx.mode == IPSEC_MODE_TRANSPORT)
 		return 0;
 
 	oip6 = mtod(m, struct ip6_hdr *);
 	/* AF_INET should be supported, but at this moment we don't. */
 	sin6 = (struct sockaddr_in6 *)&sav->sah->saidx.dst;
 	if (sin6->sin6_family != AF_INET6)
 		return 0;
 	if (!IN6_ARE_ADDR_EQUAL(&oip6->ip6_dst, &sin6->sin6_addr))
 		return 0;
 
 	/* XXX slow */
 	bzero(&osrc, sizeof(osrc));
 	bzero(&odst, sizeof(odst));
 	bzero(&isrc, sizeof(isrc));
 	bzero(&idst, sizeof(idst));
 	osrc.sin6_family = odst.sin6_family = isrc.sin6_family =
 	    idst.sin6_family = AF_INET6;
 	osrc.sin6_len = odst.sin6_len = isrc.sin6_len = idst.sin6_len = 
 	    sizeof(struct sockaddr_in6);
 	osrc.sin6_addr = oip6->ip6_src;
 	odst.sin6_addr = oip6->ip6_dst;
 	m_copydata(m, off + offsetof(struct ip6_hdr, ip6_src),
 	    sizeof(isrc.sin6_addr), (caddr_t)&isrc.sin6_addr);
 	m_copydata(m, off + offsetof(struct ip6_hdr, ip6_dst),
 	    sizeof(idst.sin6_addr), (caddr_t)&idst.sin6_addr);
 
 	/*
 	 * regarding to inner source address validation, see a long comment
 	 * in ipsec4_tunnel_validate.
 	 */
 
 	sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst,
 	    (struct sockaddr *)&isrc, (struct sockaddr *)&idst);
 	/*
 	 * when there is no suitable inbound policy for the packet of the ipsec
 	 * tunnel mode, the kernel never decapsulate the tunneled packet
 	 * as the ipsec tunnel mode even when the system wide policy is "none".
 	 * then the kernel leaves the generic tunnel module to process this
 	 * packet.  if there is no rule of the generic tunnel, the packet
 	 * is rejected and the statistics will be counted up.
 	 */
 	if (!sp)
 		return 0;
 	key_freesp(sp);
 
 	return 1;
 }
 #endif
 
 /*
  * Make a mbuf chain for encryption.
  * If the original mbuf chain contains a mbuf with a cluster,
  * allocate a new cluster and copy the data to the new cluster.
  * XXX: this hack is inefficient, but is necessary to handle cases
  * of TCP retransmission...
  */
 struct mbuf *
 ipsec_copypkt(m)
 	struct mbuf *m;
 {
 	struct mbuf *n, **mpp, *mnew;
 
 	for (n = m, mpp = &m; n; n = n->m_next) {
 		if (n->m_flags & M_EXT) {
 			/*
 			 * Make a copy only if there are more than one
 			 * references to the cluster.
 			 * XXX: is this approach effective?
 			 */
 			if (n->m_ext.ext_type != EXT_CLUSTER || MEXT_IS_REF(n)) 
 			{
 				int remain, copied;
 				struct mbuf *mm;
 
 				if (n->m_flags & M_PKTHDR) {
 					MGETHDR(mnew, M_DONTWAIT, MT_HEADER);
 					if (mnew == NULL)
 						goto fail;
-					mnew->m_pkthdr = n->m_pkthdr;
-#if 0
-					if (n->m_pkthdr.aux) {
-						mnew->m_pkthdr.aux =
-						    m_copym(n->m_pkthdr.aux,
-						    0, M_COPYALL, M_DONTWAIT);
+					if (!m_dup_pkthdr(mnew, n, M_DONTWAIT)) {
+						m_free(mnew);
+						goto fail;
 					}
-#endif
-					M_COPY_PKTHDR(mnew, n);
-					mnew->m_flags = n->m_flags & M_COPYFLAGS;
 				}
 				else {
 					MGET(mnew, M_DONTWAIT, MT_DATA);
 					if (mnew == NULL)
 						goto fail;
 				}
 				mnew->m_len = 0;
 				mm = mnew;
 
 				/*
 				 * Copy data. If we don't have enough space to
 				 * store the whole data, allocate a cluster
 				 * or additional mbufs.
 				 * XXX: we don't use m_copyback(), since the
 				 * function does not use clusters and thus is
 				 * inefficient.
 				 */
 				remain = n->m_len;
 				copied = 0;
 				while (1) {
 					int len;
 					struct mbuf *mn;
 
 					if (remain <= (mm->m_flags & M_PKTHDR ? MHLEN : MLEN))
 						len = remain;
 					else { /* allocate a cluster */
 						MCLGET(mm, M_DONTWAIT);
 						if (!(mm->m_flags & M_EXT)) {
 							m_free(mm);
 							goto fail;
 						}
 						len = remain < MCLBYTES ?
 							remain : MCLBYTES;
 					}
 
 					bcopy(n->m_data + copied, mm->m_data,
 					      len);
 
 					copied += len;
 					remain -= len;
 					mm->m_len = len;
 
 					if (remain <= 0) /* completed? */
 						break;
 
 					/* need another mbuf */
 					MGETHDR(mn, M_DONTWAIT, MT_HEADER);
 					if (mn == NULL)
 						goto fail;
 					mn->m_pkthdr.rcvif = NULL;
 					mm->m_next = mn;
 					mm = mn;
 				}
 
 				/* adjust chain */
 				mm->m_next = m_free(n);
 				n = mm;
 				*mpp = mnew;
 				mpp = &n->m_next;
 
 				continue;
 			}
 		}
 		*mpp = n;
 		mpp = &n->m_next;
 	}
 
 	return(m);
   fail:
 	m_freem(m);
 	return(NULL);
 }
 
 void
 ipsec_delaux(m)
 	struct mbuf *m;
 {
 	struct m_tag *tag;
 
 	while ((tag = m_tag_find(m, PACKET_TAG_IPSEC_HISTORY, NULL)) != NULL)
 		m_tag_delete(m, tag);
 }
 
 int
 ipsec_addhist(m, proto, spi)
 	struct mbuf *m;
 	int proto;
 	u_int32_t spi;
 {
 	struct m_tag *tag;
 	struct ipsec_history *p;
 
 	tag = m_tag_get(PACKET_TAG_IPSEC_HISTORY,
 			sizeof (struct ipsec_history), M_NOWAIT);
 	if (tag == NULL)
 		return ENOBUFS;
 	p = (struct ipsec_history *)(tag+1);
 	bzero(p, sizeof(*p));
 	p->ih_proto = proto;
 	p->ih_spi = spi;
 	m_tag_prepend(m, tag);
 	return 0;
 }
 
 struct ipsec_history *
 ipsec_gethist(m, lenp)
 	struct mbuf *m;
 	int *lenp;
 {
 	struct m_tag *tag;
 
 	tag = m_tag_find(m, PACKET_TAG_IPSEC_HISTORY, NULL);
 	if (tag == NULL)
 		return NULL;
 	/* XXX NB: noone uses this so fake it */
 	if (lenp)
 		*lenp = sizeof (struct ipsec_history);
 	return ((struct ipsec_history *)(tag+1));
 }
Index: head/sys/netipsec/ipsec_mbuf.c
===================================================================
--- head/sys/netipsec/ipsec_mbuf.c	(revision 108465)
+++ head/sys/netipsec/ipsec_mbuf.c	(revision 108466)
@@ -1,422 +1,422 @@
 /*	$FreeBSD$	*/
 
 /*
  * IPsec-specific mbuf routines.
  */
 
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 
 #include <net/route.h>
 #include <netinet/in.h>
 
 #include <netipsec/ipsec.h>
 
 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
 
 /*
  * Create a writable copy of the mbuf chain.  While doing this
  * we compact the chain with a goal of producing a chain with
  * at most two mbufs.  The second mbuf in this chain is likely
  * to be a cluster.  The primary purpose of this work is to create
  * a writable packet for encryption, compression, etc.  The
  * secondary goal is to linearize the data so the data can be
  * passed to crypto hardware in the most efficient manner possible.
  */
 struct mbuf *
 m_clone(struct mbuf *m0)
 {
 	struct mbuf *m, *mprev;
 
 	KASSERT(m0 != NULL, ("m_clone: null mbuf"));
 
 	mprev = NULL;
 	for (m = m0; m != NULL; m = mprev->m_next) {
 		/*
 		 * Regular mbufs are ignored unless there's a cluster
 		 * in front of it that we can use to coalesce.  We do
 		 * the latter mainly so later clusters can be coalesced
 		 * also w/o having to handle them specially (i.e. convert
 		 * mbuf+cluster -> cluster).  This optimization is heavily
 		 * influenced by the assumption that we're running over
 		 * Ethernet where MCBYTES is large enough that the max
 		 * packet size will permit lots of coalescing into a
 		 * single cluster.  This in turn permits efficient
 		 * crypto operations, especially when using hardware.
 		 */
 		if ((m->m_flags & M_EXT) == 0) {
 			if (mprev && (mprev->m_flags & M_EXT) &&
 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
 				/* XXX: this ignores mbuf types */
 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
 				       mtod(m, caddr_t), m->m_len);
 				mprev->m_len += m->m_len;
 				mprev->m_next = m->m_next;	/* unlink from chain */
 				m_free(m);			/* reclaim mbuf */
 				newipsecstat.ips_mbcoalesced++;
 			} else {
 				mprev = m;
 			}
 			continue;
 		}
 		/*
 		 * Cluster'd mbufs are left alone (for now).
 		 */
 		if (!MEXT_IS_REF(m)) {
 			mprev = m;
 			continue;
 		}
 		/*
 		 * Not writable, replace with a copy or coalesce with
 		 * the previous mbuf if possible (since we have to copy
 		 * it anyway, we try to reduce the number of mbufs and
 		 * clusters so that future work is easier).
 		 */
 		/* XXX why can M_PKTHDR be set past the first mbuf? */
 		KASSERT(m->m_flags & M_EXT,
 			("m_clone: m_flags 0x%x", m->m_flags));
 		/* NB: we only coalesce into a cluster */
 		if (mprev == NULL || (mprev->m_flags & M_EXT) == 0 ||
 		    m->m_len > M_TRAILINGSPACE(mprev)) {
 			struct mbuf *n;
 
 			/*
 			 * Allocate a new page, copy the data to the front
 			 * and release the reference to the old page.
 			 */
 			if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
 				/*
 				 * NB: if a packet header is present we
 				 * must allocate the mbuf separately from
 				 * the cluster 'cuz M_COPY_PKTHDR will
 				 * smash the data pointer and drop the
 				 * M_EXT marker.
 				 */
 				MGETHDR(n, M_DONTWAIT, m->m_type);
 				if (n == NULL) {
 					m_freem(m0);
 					return (NULL);
 				}
-				M_COPY_PKTHDR(n, m);
+				M_MOVE_PKTHDR(n, m);
 				MCLGET(n, M_DONTWAIT);
 				if ((n->m_flags & M_EXT) == 0) {
 					m_free(n);
 					m_freem(m0);
 					return (NULL);
 				}
 			} else {
 				n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
 				if (n == NULL) {
 					m_freem(m0);
 					return (NULL);
 				}
 			}
 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t), m->m_len);
 			n->m_len = m->m_len;
 			n->m_next = m->m_next;
 			if (mprev == NULL)
 				m0 = n;			/* new head of chain */
 			else
 				mprev->m_next = n;	/* replace old mbuf */
 			m_free(m);			/* release old mbuf */
 			mprev = n;
 			newipsecstat.ips_clcopied++;
 		} else {
 			/* XXX: this ignores mbuf types */
 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
 			       mtod(m, caddr_t), m->m_len);
 			mprev->m_len += m->m_len;
 			mprev->m_next = m->m_next;	/* unlink from chain */
 			m_free(m);			/* reclaim mbuf */
 			newipsecstat.ips_clcoalesced++;
 		}
 	}
 	return (m0);
 }
 
 /*
  * Make space for a new header of length hlen at offset off
  * in the packet.  When doing this we allocate new mbufs only
  * when absolutely necessary.  The mbuf where the new header
  * is to go is returned together with an offset into the mbuf.
  * If NULL is returned then the mbuf chain may have been modified;
  * the caller is assumed to always free the chain.
  */
 struct mbuf *
 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
 {
 	struct mbuf *m;
 	unsigned remain;
 
 	KASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
 	KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
 
 	for (m = m0; m && skip > m->m_len; m = m->m_next)
 		skip -= m->m_len;
 	if (m == NULL)
 		return (NULL);
 	/*
 	 * At this point skip is the offset into the mbuf m
 	 * where the new header should be placed.  Figure out
 	 * if there's space to insert the new header.  If so,
 	 * and copying the remainder makese sense then do so.
 	 * Otherwise insert a new mbuf in the chain, splitting
 	 * the contents of m as needed.
 	 */
 	remain = m->m_len - skip;		/* data to move */
 	if (hlen > M_TRAILINGSPACE(m)) {
 		struct mbuf *n;
 
 		/* XXX code doesn't handle clusters XXX */
 		KASSERT(remain < MLEN,
 			("m_makespace: remainder too big: %u", remain));
 		/*
 		 * Not enough space in m, split the contents
 		 * of m, inserting new mbufs as required.
 		 *
 		 * NB: this ignores mbuf types.
 		 */
 		MGET(n, M_DONTWAIT, MT_DATA);
 		if (n == NULL)
 			return (NULL);
 		n->m_next = m->m_next;		/* splice new mbuf */
 		m->m_next = n;
 		newipsecstat.ips_mbinserted++;
 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
 			/*
 			 * New header fits in the old mbuf if we copy
 			 * the remainder; just do the copy to the new
 			 * mbuf and we're good to go.
 			 */
 			memcpy(mtod(n, caddr_t),
 			       mtod(m, caddr_t) + skip, remain);
 			n->m_len = remain;
 			m->m_len = skip + hlen;
 			*off = skip;
 		} else {
 			/*
 			 * No space in the old mbuf for the new header.
 			 * Make space in the new mbuf and check the
 			 * remainder'd data fits too.  If not then we
 			 * must allocate an additional mbuf (yech).
 			 */
 			n->m_len = 0;
 			if (remain + hlen > M_TRAILINGSPACE(n)) {
 				struct mbuf *n2;
 
 				MGET(n2, M_DONTWAIT, MT_DATA);
 				/* NB: new mbuf is on chain, let caller free */
 				if (n2 == NULL)
 					return (NULL);
 				n2->m_len = 0;
 				memcpy(mtod(n2, caddr_t),
 				       mtod(m, caddr_t) + skip, remain);
 				n2->m_len = remain;
 				/* splice in second mbuf */
 				n2->m_next = n->m_next;
 				n->m_next = n2;
 				newipsecstat.ips_mbinserted++;
 			} else {
 				memcpy(mtod(n, caddr_t) + hlen,
 				       mtod(m, caddr_t) + skip, remain);
 				n->m_len += remain;
 			}
 			m->m_len -= remain;
 			n->m_len += hlen;
 			m = n;			/* header is at front ... */
 			*off = 0;		/* ... of new mbuf */
 		}
 	} else {
 		/*
 		 * Copy the remainder to the back of the mbuf
 		 * so there's space to write the new header.
 		 */
 		/* XXX can this be memcpy? does it handle overlap? */
 		ovbcopy(mtod(m, caddr_t) + skip,
 			mtod(m, caddr_t) + skip + hlen, remain);
 		m->m_len += hlen;
 		*off = skip;
 	}
 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
 	return m;
 }
 
 /*
  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
  * length is updated, and a pointer to the first byte of the padding
  * (which is guaranteed to be all in one mbuf) is returned.
  */
 caddr_t
 m_pad(struct mbuf *m, int n)
 {
 	register struct mbuf *m0, *m1;
 	register int len, pad;
 	caddr_t retval;
 
 	if (n <= 0) {  /* No stupid arguments. */
 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
 		m_freem(m);
 		return NULL;
 	}
 
 	len = m->m_pkthdr.len;
 	pad = n;
 	m0 = m;
 
 	while (m0->m_len < len) {
 KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
 		len -= m0->m_len;
 		m0 = m0->m_next;
 	}
 
 	if (m0->m_len != len) {
 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
 
 		m_freem(m);
 		return NULL;
 	}
 
 	/* Check for zero-length trailing mbufs, and find the last one. */
 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
 		if (m1->m_next->m_len != 0) {
 			DPRINTF(("m_pad: length mismatch (should be %d "
 			    "instead of %d)\n",
 			    m->m_pkthdr.len,
 			    m->m_pkthdr.len + m1->m_next->m_len));
 
 			m_freem(m);
 			return NULL;
 		}
 
 		m0 = m1->m_next;
 	}
 
 	if (pad > M_TRAILINGSPACE(m0)) {
 		/* Add an mbuf to the chain. */
 		MGET(m1, M_DONTWAIT, MT_DATA);
 		if (m1 == 0) {
 			m_freem(m0);
 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
 			return NULL;
 		}
 
 		m0->m_next = m1;
 		m0 = m1;
 		m0->m_len = 0;
 	}
 
 	retval = m0->m_data + m0->m_len;
 	m0->m_len += pad;
 	m->m_pkthdr.len += pad;
 
 	return retval;
 }
 
 /*
  * Remove hlen data at offset skip in the packet.  This is used by
  * the protocols strip protocol headers and associated data (e.g. IV,
  * authenticator) on input.
  */
 int
 m_striphdr(struct mbuf *m, int skip, int hlen)
 {
 	struct mbuf *m1;
 	int roff;
 
 	/* Find beginning of header */
 	m1 = m_getptr(m, skip, &roff);
 	if (m1 == NULL)
 		return (EINVAL);
 
 	/* Remove the header and associated data from the mbuf. */
 	if (roff == 0) {
 		/* The header was at the beginning of the mbuf */
 		newipsecstat.ips_input_front++;
 		m_adj(m1, hlen);
 		if ((m1->m_flags & M_PKTHDR) == 0)
 			m->m_pkthdr.len -= hlen;
 	} else if (roff + hlen >= m1->m_len) {
 		struct mbuf *mo;
 
 		/*
 		 * Part or all of the header is at the end of this mbuf,
 		 * so first let's remove the remainder of the header from
 		 * the beginning of the remainder of the mbuf chain, if any.
 		 */
 		newipsecstat.ips_input_end++;
 		if (roff + hlen > m1->m_len) {
 			/* Adjust the next mbuf by the remainder */
 			m_adj(m1->m_next, roff + hlen - m1->m_len);
 
 			/* The second mbuf is guaranteed not to have a pkthdr... */
 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
 		}
 
 		/* Now, let's unlink the mbuf chain for a second...*/
 		mo = m1->m_next;
 		m1->m_next = NULL;
 
 		/* ...and trim the end of the first part of the chain...sick */
 		m_adj(m1, -(m1->m_len - roff));
 		if ((m1->m_flags & M_PKTHDR) == 0)
 			m->m_pkthdr.len -= (m1->m_len - roff);
 
 		/* Finally, let's relink */
 		m1->m_next = mo;
 	} else {
 		/*
 		 * The header lies in the "middle" of the mbuf; copy
 		 * the remainder of the mbuf down over the header.
 		 */
 		newipsecstat.ips_input_middle++;
 		bcopy(mtod(m1, u_char *) + roff + hlen,
 		      mtod(m1, u_char *) + roff,
 		      m1->m_len - (roff + hlen));
 		m1->m_len -= hlen;
 		m->m_pkthdr.len -= hlen;
 	}
 	return (0);
 }
 
 /*
  * Diagnostic routine to check mbuf alignment as required by the
  * crypto device drivers (that use DMA).
  */
 void
 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
 {
 	int roff;
 	struct mbuf *m = m_getptr(m0, off, &roff);
 	caddr_t addr;
 
 	if (m == NULL)
 		return;
 	printf("%s (off %u len %u): ", where, off, len);
 	addr = mtod(m, caddr_t) + roff;
 	do {
 		int mlen;
 
 		if (((uintptr_t) addr) & 3) {
 			printf("addr misaligned %p,", addr);
 			break;
 		}
 		mlen = m->m_len;
 		if (mlen > len)
 			mlen = len;
 		len -= mlen;
 		if (len && (mlen & 3)) {
 			printf("len mismatch %u,", mlen);
 			break;
 		}
 		m = m->m_next;
 		addr = m ? mtod(m, caddr_t) : NULL;
 	} while (m && len > 0);
 	for (m = m0; m; m = m->m_next)
 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
 	printf("\n");
 }
Index: head/sys/netipsec/ipsec_output.c
===================================================================
--- head/sys/netipsec/ipsec_output.c	(revision 108465)
+++ head/sys/netipsec/ipsec_output.c	(revision 108466)
@@ -1,737 +1,737 @@
 /*	$FreeBSD$	*/
 /*	$KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $	*/
 
 /*
  * IPsec output processing.
  */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_ecn.h>
 #ifdef INET6
 #include <netinet6/ip6_ecn.h>
 #endif
 
 #include <netinet/ip6.h>
 #ifdef INET6
 #include <netinet6/ip6_var.h>
 #endif
 #include <netinet/in_pcb.h>
 #ifdef INET6
 #include <netinet/icmp6.h>
 #endif
 
 #include <netipsec/ipsec.h>
 #ifdef INET6
 #include <netipsec/ipsec6.h>
 #endif
 #include <netipsec/ah_var.h>
 #include <netipsec/esp_var.h>
 #include <netipsec/ipcomp_var.h>
 
 #include <netipsec/xform.h>
 
 #include <netipsec/key.h>
 #include <netipsec/keydb.h>
 #include <netipsec/key_debug.h>
 
 #include <machine/in_cksum.h>
 
 int
 ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
 {
 	struct tdb_ident *tdbi;
 	struct m_tag *mtag;
 	struct secasvar *sav;
 	struct secasindex *saidx;
 	int error;
 
 #if 0
 	SPLASSERT(net, "ipsec_process_done");
 #endif
 
 	KASSERT(m != NULL, ("ipsec_process_done: null mbuf"));
 	KASSERT(isr != NULL, ("ipsec_process_done: null ISR"));
 	sav = isr->sav;
 	KASSERT(sav != NULL, ("ipsec_process_done: null SA"));
 	KASSERT(sav->sah != NULL, ("ipsec_process_done: null SAH"));
 
 	saidx = &sav->sah->saidx;
 	switch (saidx->dst.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		/* Fix the header length, for AH processing. */
 		mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		/* Fix the header length, for AH processing. */
 		if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) {
 			error = ENXIO;
 			goto bad;
 		}
 		if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) {
 			/* No jumbogram support. */
 			error = ENXIO;	/*?*/
 			goto bad;
 		}
 		mtod(m, struct ip6_hdr *)->ip6_plen =
 			htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
 		break;
 #endif /* INET6 */
 	default:
 		DPRINTF(("ipsec_process_done: unknown protocol family %u\n",
 		    saidx->dst.sa.sa_family));
 		error = ENXIO;
 		goto bad;
 	}
 
 	/*
 	 * Add a record of what we've done or what needs to be done to the
 	 * packet.
 	 */
 	mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE,
 			sizeof(struct tdb_ident), M_NOWAIT);
 	if (mtag == NULL) {
 		DPRINTF(("ipsec_process_done: could not get packet tag\n"));
 		error = ENOMEM;
 		goto bad;
 	}
 
 	tdbi = (struct tdb_ident *)(mtag + 1);
 	tdbi->dst = saidx->dst;
 	tdbi->proto = saidx->proto;
 	tdbi->spi = sav->spi;
 	m_tag_prepend(m, mtag);
 
 	/*
 	 * If there's another (bundled) SA to apply, do so.
 	 * Note that this puts a burden on the kernel stack size.
 	 * If this is a problem we'll need to introduce a queue
 	 * to set the packet on so we can unwind the stack before
 	 * doing further processing.
 	 */
 	if (isr->next) {
 		newipsecstat.ips_out_bundlesa++;
 		return ipsec4_process_packet(m, isr->next, 0, 0);
 	}
 
 	/*
 	 * We're done with IPsec processing, transmit the packet using the
 	 * appropriate network protocol (IP or IPv6). SPD lookup will be
 	 * performed again there.
 	 */
 	switch (saidx->dst.sa.sa_family) {
 #ifdef INET
 	struct ip *ip;
 	case AF_INET:
 		ip = mtod(m, struct ip *);
 		ip->ip_len = ntohs(ip->ip_len);
 		ip->ip_off = ntohs(ip->ip_off);
 
 		return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL);
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		/*
 		 * We don't need massage, IPv6 header fields are always in
 		 * net endian.
 		 */
 		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 #endif /* INET6 */
 	}
 	panic("ipsec_process_done");
 bad:
 	m_freem(m);
 	KEY_FREESAV(&sav);
 	return (error);
 }
 
 static struct ipsecrequest *
 ipsec_nextisr(
 	struct mbuf *m,
 	struct ipsecrequest *isr,
 	int af,
 	struct secasindex *saidx,
 	int *error
 )
 {
 #define IPSEC_OSTAT(x,y,z) (isr->saidx.proto == IPPROTO_ESP ? (x)++ : \
 			    isr->saidx.proto == IPPROTO_AH ? (y)++ : (z)++)
 	struct secasvar *sav;
 
 #if 0
 	SPLASSERT(net, "ipsec_nextisr");
 #endif
 	KASSERT(af == AF_INET || af == AF_INET6,
 		("ipsec_nextisr: invalid address family %u", af));
 again:
 	/*
 	 * Craft SA index to search for proper SA.  Note that
 	 * we only fillin unspecified SA peers for transport
 	 * mode; for tunnel mode they must already be filled in.
 	 */
 	*saidx = isr->saidx;
 	if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) {
 		/* Fillin unspecified SA peers only for transport mode */
 		if (af == AF_INET) {
 			struct sockaddr_in *sin;
 			struct ip *ip = mtod(m, struct ip *);
 
 			if (saidx->src.sa.sa_len == 0) {
 				sin = &saidx->src.sin;
 				sin->sin_len = sizeof(*sin);
 				sin->sin_family = AF_INET;
 				sin->sin_port = IPSEC_PORT_ANY;
 				sin->sin_addr = ip->ip_src;
 			}
 			if (saidx->dst.sa.sa_len == 0) {
 				sin = &saidx->dst.sin;
 				sin->sin_len = sizeof(*sin);
 				sin->sin_family = AF_INET;
 				sin->sin_port = IPSEC_PORT_ANY;
 				sin->sin_addr = ip->ip_dst;
 			}
 		} else {
 			struct sockaddr_in6 *sin6;
 			struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
 
 			if (saidx->src.sin6.sin6_len == 0) {
 				sin6 = (struct sockaddr_in6 *)&saidx->src;
 				sin6->sin6_len = sizeof(*sin6);
 				sin6->sin6_family = AF_INET6;
 				sin6->sin6_port = IPSEC_PORT_ANY;
 				sin6->sin6_addr = ip6->ip6_src;
 				if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 					/* fix scope id for comparing SPD */
 					sin6->sin6_addr.s6_addr16[1] = 0;
 					sin6->sin6_scope_id =
 					    ntohs(ip6->ip6_src.s6_addr16[1]);
 				}
 			}
 			if (saidx->dst.sin6.sin6_len == 0) {
 				sin6 = (struct sockaddr_in6 *)&saidx->dst;
 				sin6->sin6_len = sizeof(*sin6);
 				sin6->sin6_family = AF_INET6;
 				sin6->sin6_port = IPSEC_PORT_ANY;
 				sin6->sin6_addr = ip6->ip6_dst;
 				if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) {
 					/* fix scope id for comparing SPD */
 					sin6->sin6_addr.s6_addr16[1] = 0;
 					sin6->sin6_scope_id =
 					    ntohs(ip6->ip6_dst.s6_addr16[1]);
 				}
 			}
 		}
 	}
 
 	/*
 	 * Lookup SA and validate it.
 	 */
 	*error = key_checkrequest(isr, saidx);
 	if (*error != 0) {
 		/*
 		 * IPsec processing is required, but no SA found.
 		 * I assume that key_acquire() had been called
 		 * to get/establish the SA. Here I discard
 		 * this packet because it is responsibility for
 		 * upper layer to retransmit the packet.
 		 */
 		newipsecstat.ips_out_nosa++;
 		goto bad;
 	}
 	sav = isr->sav;
 	if (sav == NULL) {		/* XXX valid return */
 		KASSERT(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE,
 			("ipsec_nextisr: no SA found, but required; level %u",
 			ipsec_get_reqlevel(isr)));
 		isr = isr->next;
 		if (isr == NULL) {
 			/*XXXstatistic??*/
 			*error = EINVAL;		/*XXX*/
 			return isr;
 		}
 		goto again;
 	}
 
 	/*
 	 * Check system global policy controls.
 	 */
 	if ((isr->saidx.proto == IPPROTO_ESP && !esp_enable) ||
 	    (isr->saidx.proto == IPPROTO_AH && !ah_enable) ||
 	    (isr->saidx.proto == IPPROTO_IPCOMP && !ipcomp_enable)) {
 		DPRINTF(("ipsec_nextisr: IPsec outbound packet dropped due"
 			" to policy (check your sysctls)\n"));
 		IPSEC_OSTAT(espstat.esps_pdrops, ahstat.ahs_pdrops,
 		    ipcompstat.ipcomps_pdrops);
 		*error = EHOSTUNREACH;
 		goto bad;
 	}
 
 	/*
 	 * Sanity check the SA contents for the caller
 	 * before they invoke the xform output method.
 	 */
 	if (sav->tdb_xform == NULL) {
 		DPRINTF(("ipsec_nextisr: no transform for SA\n"));
 		IPSEC_OSTAT(espstat.esps_noxform, ahstat.ahs_noxform,
 		    ipcompstat.ipcomps_noxform);
 		*error = EHOSTUNREACH;
 		goto bad;
 	}
 	return isr;
 bad:
 	KASSERT(*error != 0, ("ipsec_nextisr: error return w/ no error code"));
 	return NULL;
 #undef IPSEC_OSTAT
 }
 
 #ifdef INET
 /*
  * IPsec output logic for IPv4.
  */
 int
 ipsec4_process_packet(
 	struct mbuf *m,
 	struct ipsecrequest *isr,
 	int flags,
 	int tunalready)
 {
 	struct secasindex saidx;
 	struct secasvar *sav;
 	struct ip *ip;
 	int s, error, i, off;
 
 	KASSERT(m != NULL, ("ipsec4_process_packet: null mbuf"));
 	KASSERT(isr != NULL, ("ipsec4_process_packet: null isr"));
 
 	s = splnet();			/* insure SA contents don't change */
 
 	isr = ipsec_nextisr(m, isr, AF_INET, &saidx, &error);
 	if (isr == NULL)
 		goto bad;
 
 	sav = isr->sav;
 	if (!tunalready) {
 		union sockaddr_union *dst = &sav->sah->saidx.dst;
 		int setdf;
 
 		/*
 		 * Collect IP_DF state from the outer header.
 		 */
 		if (dst->sa.sa_family == AF_INET) {
 			if (m->m_len < sizeof (struct ip) &&
 			    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 				error = ENOBUFS;
 				goto bad;
 			}
 			ip = mtod(m, struct ip *);
 			/* Honor system-wide control of how to handle IP_DF */
 			switch (ip4_ipsec_dfbit) {
 			case 0:			/* clear in outer header */
 			case 1:			/* set in outer header */
 				setdf = ip4_ipsec_dfbit;
 				break;
 			default:		/* propagate to outer header */
 				setdf = ntohs(ip->ip_off & IP_DF);
 				break;
 			}
 		} else {
 			ip = NULL;		/* keep compiler happy */
 			setdf = 0;
 		}
 		/* Do the appropriate encapsulation, if necessary */
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */
 		    dst->sa.sa_family != AF_INET ||	    /* PF mismatch */
 #if 0
 		    (sav->flags & SADB_X_SAFLAGS_TUNNEL) || /* Tunnel requ'd */
 		    sav->tdb_xform->xf_type == XF_IP4 ||    /* ditto */
 #endif
 		    (dst->sa.sa_family == AF_INET &&	    /* Proxy */
 		     dst->sin.sin_addr.s_addr != INADDR_ANY &&
 		     dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) {
 			struct mbuf *mp;
 
 			/* Fix IPv4 header checksum and length */
 			if (m->m_len < sizeof (struct ip) &&
 			    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 				error = ENOBUFS;
 				goto bad;
 			}
 			ip = mtod(m, struct ip *);
 			ip->ip_len = htons(m->m_pkthdr.len);
 			ip->ip_sum = 0;
 #ifdef _IP_VHL
 			if (ip->ip_vhl == IP_VHL_BORING)
 				ip->ip_sum = in_cksum_hdr(ip);
 			else
 				ip->ip_sum = in_cksum(m,
 					_IP_VHL_HL(ip->ip_vhl) << 2);
 #else
 			ip->ip_sum = in_cksum(m, ip->ip_hl << 2);
 #endif
 
 			/* Encapsulate the packet */
 			error = ipip_output(m, isr, &mp, 0, 0);
 			if (mp == NULL && !error) {
 				/* Should never happen. */
 				DPRINTF(("ipsec4_process_packet: ipip_output "
 					"returns no mbuf and no error!"));
 				error = EFAULT;
 			}
 			if (error) {
 				if (mp)
 					m_freem(mp);
 				goto bad;
 			}
 			m = mp, mp = NULL;
 			/*
 			 * ipip_output clears IP_DF in the new header.  If
 			 * we need to propagate IP_DF from the outer header,
 			 * then we have to do it here.
 			 *
 			 * XXX shouldn't assume what ipip_output does.
 			 */
 			if (dst->sa.sa_family == AF_INET && setdf) {
 				if (m->m_len < sizeof (struct ip) &&
 				    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				ip = mtod(m, struct ip *);
 				ip->ip_off = ntohs(ip->ip_off);
 				ip->ip_off |= IP_DF;
 				ip->ip_off = htons(ip->ip_off);
 			}
 		}
 	}
 
 	/*
 	 * Dispatch to the appropriate IPsec transform logic.  The
 	 * packet will be returned for transmission after crypto
 	 * processing, etc. are completed.  For encapsulation we
 	 * bypass this call because of the explicit call done above
 	 * (necessary to deal with IP_DF handling for IPv4).
 	 *
 	 * NB: m & sav are ``passed to caller'' who's reponsible for
 	 *     for reclaiming their resources.
 	 */
 	if (sav->tdb_xform->xf_type != XF_IP4) {
 		ip = mtod(m, struct ip *);
 		i = ip->ip_hl << 2;
 		off = offsetof(struct ip, ip_p);
 		error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off);
 	} else {
 		error = ipsec_process_done(m, isr);
 	}
 	splx(s);
 	return error;
 bad:
 	splx(s);
 	if (m)
 		m_freem(m);
 	return error;
 }
 #endif
 
 #ifdef INET6
 /*
  * Chop IP6 header from the payload.
  */
 static struct mbuf *
 ipsec6_splithdr(struct mbuf *m)
 {
 	struct mbuf *mh;
 	struct ip6_hdr *ip6;
 	int hlen;
 
 	KASSERT(m->m_len >= sizeof (struct ip6_hdr),
 		("ipsec6_splithdr: first mbuf too short, len %u", m->m_len));
 	ip6 = mtod(m, struct ip6_hdr *);
 	hlen = sizeof(struct ip6_hdr);
 	if (m->m_len > hlen) {
 		MGETHDR(mh, M_DONTWAIT, MT_HEADER);
 		if (!mh) {
 			m_freem(m);
 			return NULL;
 		}
-		M_COPY_PKTHDR(mh, m);
+		M_MOVE_PKTHDR(mh, m);
 		MH_ALIGN(mh, hlen);
 		m->m_len -= hlen;
 		m->m_data += hlen;
 		mh->m_next = m;
 		m = mh;
 		m->m_len = hlen;
 		bcopy((caddr_t)ip6, mtod(m, caddr_t), hlen);
 	} else if (m->m_len < hlen) {
 		m = m_pullup(m, hlen);
 		if (!m)
 			return NULL;
 	}
 	return m;
 }
 
 /*
  * IPsec output logic for IPv6, transport mode.
  */
 int
 ipsec6_output_trans(
 	struct ipsec_output_state *state,
 	u_char *nexthdrp,
 	struct mbuf *mprev,
 	struct secpolicy *sp,
 	int flags,
 	int *tun)
 {
 	struct ipsecrequest *isr;
 	struct secasindex saidx;
 	int error = 0;
 	struct mbuf *m;
 
 	KASSERT(state != NULL, ("ipsec6_output: null state"));
 	KASSERT(state->m != NULL, ("ipsec6_output: null m"));
 	KASSERT(nexthdrp != NULL, ("ipsec6_output: null nexthdrp"));
 	KASSERT(mprev != NULL, ("ipsec6_output: null mprev"));
 	KASSERT(sp != NULL, ("ipsec6_output: null sp"));
 	KASSERT(tun != NULL, ("ipsec6_output: null tun"));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec6_output_trans: applyed SP\n");
 		kdebug_secpolicy(sp));
 
 	isr = sp->req;
 	if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 		/* the rest will be handled by ipsec6_output_tunnel() */
 		*tun = 1;		/* need tunnel-mode processing */
 		return 0;
 	}
 
 	*tun = 0;
 	m = state->m;
 
 	isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error);
 	if (isr == NULL) {
 #ifdef notdef
 		/* XXX should notification be done for all errors ? */
 		/*
 		 * Notify the fact that the packet is discarded
 		 * to ourselves. I believe this is better than
 		 * just silently discarding. (jinmei@kame.net)
 		 * XXX: should we restrict the error to TCP packets?
 		 * XXX: should we directly notify sockets via
 		 *      pfctlinputs?
 		 */
 		icmp6_error(m, ICMP6_DST_UNREACH,
 			    ICMP6_DST_UNREACH_ADMIN, 0);
 		m = NULL;	/* NB: icmp6_error frees mbuf */
 #endif
 		goto bad;
 	}
 
 	return (*isr->sav->tdb_xform->xf_output)(m, isr, NULL,
 		sizeof (struct ip6_hdr),
 		offsetof(struct ip6_hdr, ip6_nxt));
 bad:
 	if (m)
 		m_freem(m);
 	state->m = NULL;
 	return error;
 }
 
 static int
 ipsec6_encapsulate(struct mbuf *m, struct secasvar *sav)
 {
 	struct ip6_hdr *oip6;
 	struct ip6_hdr *ip6;
 	size_t plen;
 
 	/* can't tunnel between different AFs */
 	if (sav->sah->saidx.src.sa.sa_family != AF_INET6 ||
 	    sav->sah->saidx.dst.sa.sa_family != AF_INET6) {
 		m_freem(m);
 		return EINVAL;
 	}
 	KASSERT(m->m_len != sizeof (struct ip6_hdr),
 		("ipsec6_encapsulate: mbuf wrong size; len %u", m->m_len));
 
 
 	/*
 	 * grow the mbuf to accomodate the new IPv6 header.
 	 */
 	plen = m->m_pkthdr.len;
 	if (M_LEADINGSPACE(m->m_next) < sizeof(struct ip6_hdr)) {
 		struct mbuf *n;
 		MGET(n, M_DONTWAIT, MT_DATA);
 		if (!n) {
 			m_freem(m);
 			return ENOBUFS;
 		}
 		n->m_len = sizeof(struct ip6_hdr);
 		n->m_next = m->m_next;
 		m->m_next = n;
 		m->m_pkthdr.len += sizeof(struct ip6_hdr);
 		oip6 = mtod(n, struct ip6_hdr *);
 	} else {
 		m->m_next->m_len += sizeof(struct ip6_hdr);
 		m->m_next->m_data -= sizeof(struct ip6_hdr);
 		m->m_pkthdr.len += sizeof(struct ip6_hdr);
 		oip6 = mtod(m->m_next, struct ip6_hdr *);
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	ovbcopy((caddr_t)ip6, (caddr_t)oip6, sizeof(struct ip6_hdr));
 
 	/* Fake link-local scope-class addresses */
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src))
 		oip6->ip6_src.s6_addr16[1] = 0;
 	if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst))
 		oip6->ip6_dst.s6_addr16[1] = 0;
 
 	/* construct new IPv6 header. see RFC 2401 5.1.2.2 */
 	/* ECN consideration. */
 	ip6_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &oip6->ip6_flow);
 	if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr))
 		ip6->ip6_plen = htons(plen);
 	else {
 		/* ip6->ip6_plen will be updated in ip6_output() */
 	}
 	ip6->ip6_nxt = IPPROTO_IPV6;
 	sav->sah->saidx.src.sin6.sin6_addr = ip6->ip6_src;
 	sav->sah->saidx.dst.sin6.sin6_addr = ip6->ip6_dst;
 	ip6->ip6_hlim = IPV6_DEFHLIM;
 
 	/* XXX Should ip6_src be updated later ? */
 
 	return 0;
 }
 
 /*
  * IPsec output logic for IPv6, tunnel mode.
  */
 int
 ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int flags)
 {
 	struct ip6_hdr *ip6;
 	struct ipsecrequest *isr;
 	struct secasindex saidx;
 	int error;
 	struct sockaddr_in6* dst6;
 	struct mbuf *m;
 
 	KASSERT(state != NULL, ("ipsec6_output: null state"));
 	KASSERT(state->m != NULL, ("ipsec6_output: null m"));
 	KASSERT(sp != NULL, ("ipsec6_output: null sp"));
 
 	KEYDEBUG(KEYDEBUG_IPSEC_DATA,
 		printf("ipsec6_output_tunnel: applyed SP\n");
 		kdebug_secpolicy(sp));
 
 	m = state->m;
 	/*
 	 * transport mode ipsec (before the 1st tunnel mode) is already
 	 * processed by ipsec6_output_trans().
 	 */
 	for (isr = sp->req; isr; isr = isr->next) {
 		if (isr->saidx.mode == IPSEC_MODE_TUNNEL)
 			break;
 	}
 	isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error);
 	if (isr == NULL)
 		goto bad;
 
 	/*
 	 * There may be the case that SA status will be changed when
 	 * we are refering to one. So calling splsoftnet().
 	 */
 	if (isr->saidx.mode == IPSEC_MODE_TUNNEL) {
 		/*
 		 * build IPsec tunnel.
 		 */
 		/* XXX should be processed with other familiy */
 		if (isr->sav->sah->saidx.src.sa.sa_family != AF_INET6) {
 			ipseclog((LOG_ERR, "ipsec6_output_tunnel: "
 			    "family mismatched between inner and outer, spi=%u\n",
 			    ntohl(isr->sav->spi)));
 			newipsecstat.ips_out_inval++;
 			error = EAFNOSUPPORT;
 			goto bad;
 		}
 
 		m = ipsec6_splithdr(m);
 		if (!m) {
 			newipsecstat.ips_out_nomem++;
 			error = ENOMEM;
 			goto bad;
 		}
 		error = ipsec6_encapsulate(m, isr->sav);
 		if (error) {
 			m = NULL;
 			goto bad;
 		}
 		ip6 = mtod(m, struct ip6_hdr *);
 
 		state->ro = &isr->sav->sah->sa_route;
 		state->dst = (struct sockaddr *)&state->ro->ro_dst;
 		dst6 = (struct sockaddr_in6 *)state->dst;
 		if (state->ro->ro_rt
 		 && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0
 		  || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) {
 			RTFREE(state->ro->ro_rt);
 			state->ro->ro_rt = NULL;
 		}
 		if (state->ro->ro_rt == 0) {
 			bzero(dst6, sizeof(*dst6));
 			dst6->sin6_family = AF_INET6;
 			dst6->sin6_len = sizeof(*dst6);
 			dst6->sin6_addr = ip6->ip6_dst;
 			rtalloc(state->ro);
 		}
 		if (state->ro->ro_rt == 0) {
 			ip6stat.ip6s_noroute++;
 			newipsecstat.ips_out_noroute++;
 			error = EHOSTUNREACH;
 			goto bad;
 		}
 
 		/* adjust state->dst if tunnel endpoint is offlink */
 		if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) {
 			state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway;
 			dst6 = (struct sockaddr_in6 *)state->dst;
 		}
 	}
 
 	m = ipsec6_splithdr(m);
 	if (!m) {
 		newipsecstat.ips_out_nomem++;
 		error = ENOMEM;
 		goto bad;
 	}
 	ip6 = mtod(m, struct ip6_hdr *);
 	return (*isr->sav->tdb_xform->xf_output)(m, isr, NULL,
 		sizeof (struct ip6_hdr),
 		offsetof(struct ip6_hdr, ip6_nxt));
 bad:
 	if (m)
 		m_freem(m);
 	state->m = NULL;
 	return error;
 }
 #endif /*INET6*/
Index: head/sys/sys/mbuf.h
===================================================================
--- head/sys/sys/mbuf.h	(revision 108465)
+++ head/sys/sys/mbuf.h	(revision 108466)
@@ -1,553 +1,557 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mbuf.h	8.5 (Berkeley) 2/19/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_MBUF_H_
 #define	_SYS_MBUF_H_
 
 #include <sys/_label.h>
 #include <sys/queue.h>
 
 /*
  * Mbufs are of a single size, MSIZE (machine/param.h), which
  * includes overhead.  An mbuf may add a single "mbuf cluster" of size
  * MCLBYTES (also in machine/param.h), which has no additional overhead
  * and is used instead of the internal data area; this is done when
  * at least MINCLSIZE of data must be stored.  Additionally, it is possible
  * to allocate a separate buffer externally and attach it to the mbuf in
  * a way similar to that of mbuf clusters.
  */
 #define	MLEN		(MSIZE - sizeof(struct m_hdr))	/* normal data len */
 #define	MHLEN		(MLEN - sizeof(struct pkthdr))	/* data len w/pkthdr */
 #define	MINCLSIZE	(MHLEN + 1)	/* smallest amount to put in cluster */
 #define	M_MAXCOMPRESS	(MHLEN / 2)	/* max amount to copy for compression */
 
 #ifdef _KERNEL
 /*-
  * Macros for type conversion:
  * mtod(m, t)	-- Convert mbuf pointer to data pointer of correct type.
  * dtom(x)	-- Convert data pointer within mbuf to mbuf pointer (XXX).
  */
 #define	mtod(m, t)	((t)((m)->m_data))
 #define	dtom(x)		((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1)))
 #endif /* _KERNEL */
 
 /*
  * Header present at the beginning of every mbuf.
  */
 struct m_hdr {
 	struct	mbuf *mh_next;		/* next buffer in chain */
 	struct	mbuf *mh_nextpkt;	/* next chain in queue/record */
 	caddr_t	mh_data;		/* location of data */
 	int	mh_len;			/* amount of data in this mbuf */
 	int	mh_flags;		/* flags; see below */
 	short	mh_type;		/* type of data in this mbuf */
 };
 
 /*
  * Packet tag structure (see below for details).
  */
 struct m_tag {
 	SLIST_ENTRY(m_tag)	m_tag_link;	/* List of packet tags */
 	u_int16_t		m_tag_id;	/* Tag ID */
 	u_int16_t		m_tag_len;	/* Length of data */
 	u_int32_t		m_tag_cookie;	/* ABI/Module ID */
 };
 
 /*
  * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set.
  */
 struct pkthdr {
 	struct	ifnet *rcvif;		/* rcv interface */
 	int	len;			/* total packet length */
 	/* variables for ip and tcp reassembly */
 	void	*header;		/* pointer to packet header */
 	/* variables for hardware checksum */
 	int	csum_flags;		/* flags regarding checksum */
 	int	csum_data;		/* data field used by csum routines */
 	SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */
 	struct	label label;		/* MAC label of data in packet */
 };
 
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is set.
  */
 struct m_ext {
 	caddr_t	ext_buf;		/* start of buffer */
 	void	(*ext_free)		/* free routine if not the usual */
 		    (void *, void *);
 	void	*ext_args;		/* optional argument pointer */
 	u_int	ext_size;		/* size of buffer, for ext_free */
 	u_int	*ref_cnt;		/* pointer to ref count info */
 	int	ext_type;		/* type of external storage */
 };
 
 /*
  * The core of the mbuf object along with some shortcut defines for
  * practical purposes.
  */
 struct mbuf {
 	struct	m_hdr m_hdr;
 	union {
 		struct {
 			struct	pkthdr MH_pkthdr;	/* M_PKTHDR set */
 			union {
 				struct	m_ext MH_ext;	/* M_EXT set */
 				char	MH_databuf[MHLEN];
 			} MH_dat;
 		} MH;
 		char	M_databuf[MLEN];		/* !M_PKTHDR, !M_EXT */
 	} M_dat;
 };
 #define	m_next		m_hdr.mh_next
 #define	m_len		m_hdr.mh_len
 #define	m_data		m_hdr.mh_data
 #define	m_type		m_hdr.mh_type
 #define	m_flags		m_hdr.mh_flags
 #define	m_nextpkt	m_hdr.mh_nextpkt
 #define	m_act		m_nextpkt
 #define	m_pkthdr	M_dat.MH.MH_pkthdr
 #define	m_ext		M_dat.MH.MH_dat.MH_ext
 #define	m_pktdat	M_dat.MH.MH_dat.MH_databuf
 #define	m_dat		M_dat.M_databuf
 
 /*
  * mbuf flags.
  */
 #define	M_EXT		0x0001	/* has associated external storage */
 #define	M_PKTHDR	0x0002	/* start of record */
 #define	M_EOR		0x0004	/* end of record */
 #define	M_RDONLY	0x0008	/* associated data is marked read-only */
 #define	M_PROTO1	0x0010	/* protocol-specific */
 #define	M_PROTO2	0x0020	/* protocol-specific */
 #define	M_PROTO3	0x0040	/* protocol-specific */
 #define	M_PROTO4	0x0080	/* protocol-specific */
 #define	M_PROTO5	0x0100	/* protocol-specific */
 
 /*
  * mbuf pkthdr flags (also stored in m_flags).
  */
 #define	M_BCAST		0x0200	/* send/received as link-level broadcast */
 #define	M_MCAST		0x0400	/* send/received as link-level multicast */
 #define	M_FRAG		0x0800	/* packet is a fragment of a larger packet */
 #define	M_FIRSTFRAG	0x1000	/* packet is first fragment */
 #define	M_LASTFRAG	0x2000	/* packet is last fragment */
 
 /*
  * External buffer types: identify ext_buf type.
  */
 #define	EXT_CLUSTER	1	/* mbuf cluster */
 #define	EXT_SFBUF	2	/* sendfile(2)'s sf_bufs */
 #define	EXT_NET_DRV	100	/* custom ext_buf provided by net driver(s) */
 #define	EXT_MOD_TYPE	200	/* custom module's ext_buf type */
 #define	EXT_DISPOSABLE	300	/* can throw this buffer away w/page flipping */
 
 /*
  * Flags copied when copying m_pkthdr.
  */
-#define	M_COPYFLAGS	(M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \
-			    M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY)
+#define	M_COPYFLAGS	(M_PKTHDR|M_EOR|M_RDONLY|M_PROTO1|M_PROTO1|M_PROTO2|\
+			    M_PROTO3|M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|\
+			    M_FRAG|M_FIRSTFRAG|M_LASTFRAG)
 
 /*
  * Flags indicating hw checksum support and sw checksum requirements.
  */
 #define	CSUM_IP			0x0001		/* will csum IP */
 #define	CSUM_TCP		0x0002		/* will csum TCP */
 #define	CSUM_UDP		0x0004		/* will csum UDP */
 #define	CSUM_IP_FRAGS		0x0008		/* will csum IP fragments */
 #define	CSUM_FRAGMENT		0x0010		/* will do IP fragmentation */
 
 #define	CSUM_IP_CHECKED		0x0100		/* did csum IP */
 #define	CSUM_IP_VALID		0x0200		/*   ... the csum is valid */
 #define	CSUM_DATA_VALID		0x0400		/* csum_data field is valid */
 #define	CSUM_PSEUDO_HDR		0x0800		/* csum_data has pseudo hdr */
 
 #define	CSUM_DELAY_DATA		(CSUM_TCP | CSUM_UDP)
 #define	CSUM_DELAY_IP		(CSUM_IP)	/* XXX add ipv6 here too? */
 
 /*
  * mbuf types.
  */
 #define	MT_NOTMBUF	0	/* USED INTERNALLY ONLY! Object is not mbuf */
 #define	MT_DATA		1	/* dynamic (data) allocation */
 #define	MT_HEADER	2	/* packet header */
 #if 0
 #define	MT_SOCKET	3	/* socket structure */
 #define	MT_PCB		4	/* protocol control block */
 #define	MT_RTABLE	5	/* routing tables */
 #define	MT_HTABLE	6	/* IMP host tables */
 #define	MT_ATABLE	7	/* address resolution tables */
 #endif
 #define	MT_SONAME	8	/* socket name */
 #if 0
 #define	MT_SOOPTS	10	/* socket options */
 #endif
 #define	MT_FTABLE	11	/* fragment reassembly header */
 #if 0
 #define	MT_RIGHTS	12	/* access rights */
 #define	MT_IFADDR	13	/* interface address */
 #endif
 #define	MT_TAG		13	/* volatile metadata associated to pkts */
 #define	MT_CONTROL	14	/* extra-data protocol message */
 #define	MT_OOBDATA	15	/* expedited data  */
 #define	MT_NTYPES	16	/* number of mbuf types for mbtypes[] */
 
 /*
  * Mbuf and cluster allocation statistics PCPU structure.
  */
 struct mbpstat {
 	u_long	mb_mbfree;
 	u_long	mb_mbpgs;
 	u_long	mb_clfree;
 	u_long	mb_clpgs;
 	long	mb_mbtypes[MT_NTYPES];
 	short	mb_active;
 };
 
 /*
  * General mbuf allocator statistics structure.
  * XXX: Modifications of these are not protected by any mutex locks nor by
  * any atomic() manipulations.  As a result, we may occasionally lose
  * a count or two.  Luckily, not all of these fields are modified at all
  * and remain static, and those that are manipulated are only manipulated
  * in failure situations, which do not occur (hopefully) very often.
  */
 struct mbstat {
 	u_long	m_drops;	/* times failed to allocate */
 	u_long	m_wait;		/* times succesfully returned from wait */
 	u_long	m_drain;	/* times drained protocols for space */
 	u_long	m_mcfail;	/* XXX: times m_copym failed */
 	u_long	m_mpfail;	/* XXX: times m_pullup failed */
 	u_long	m_msize;	/* length of an mbuf */
 	u_long	m_mclbytes;	/* length of an mbuf cluster */
 	u_long	m_minclsize;	/* min length of data to allocate a cluster */
 	u_long	m_mlen;		/* length of data in an mbuf */
 	u_long	m_mhlen;	/* length of data in a header mbuf */
 	/* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */
 	short	m_numtypes;
 };
 
 /*
  * Flags specifying how an allocation should be made.
  * M_DONTWAIT means "don't block if nothing is available" whereas
  * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is
  * available."
  */
 #define	M_DONTWAIT	1
 #define	M_TRYWAIT	0
 #define	M_WAIT		M_TRYWAIT	/* XXX: Deprecated. */
 
 #ifdef _KERNEL
 /*-
  * mbuf external reference count management macros.
  *
  * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing
  *     the external buffer ext_buf.
  *
  * MEXT_REM_REF(m): remove reference to m_ext object.
  *
  * MEXT_ADD_REF(m): add reference to m_ext object already
  *     referred to by (m).
  */
 #define	MEXT_IS_REF(m)	(*((m)->m_ext.ref_cnt) > 1)
 
 #define	MEXT_REM_REF(m) do {						\
 	KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0"));	\
 	atomic_subtract_int((m)->m_ext.ref_cnt, 1);			\
 } while(0)
 
 #define	MEXT_ADD_REF(m)	atomic_add_int((m)->m_ext.ref_cnt, 1)
 
 /*
  * mbuf, cluster, and external object allocation macros
  * (for compatibility purposes).
  */
-#define	M_COPY_PKTHDR(to, from)	m_copy_pkthdr((to), (from))
+/* NB: M_COPY_PKTHDR is deprecated, use M_MOVE_PKTHDR or m_dup_pktdr */
+#define	M_MOVE_PKTHDR(to, from)	m_move_pkthdr((to), (from))
 #define	m_getclr(how, type)	m_get_clrd((how), (type))
 #define	MGET(m, how, type)	((m) = m_get((how), (type)))
 #define	MGETHDR(m, how, type)	((m) = m_gethdr((how), (type)))
 #define	MCLGET(m, how)		m_clget((m), (how))
 #define	MEXTADD(m, buf, size, free, args, flags, type) 			\
     m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type))
 
 /*
  * MEXTFREE(m): disassociate (and possibly free) an external object from (m).
  * 
  * If the atomic_cmpset_int() returns 0, then we effectively do nothing
  * in terms of "cleaning up" (freeing the ext buf and ref. counter) as
  * this means that either there are still references, or another thread
  * is taking care of the clean-up.
  */
 #define	MEXTFREE(m) do {						\
 	struct mbuf *_mb = (m);						\
 									\
 	MEXT_REM_REF(_mb);						\
 	if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1))		\
 		_mext_free(_mb);					\
 	_mb->m_flags &= ~M_EXT;						\
 } while (0)
 
 /*
  * Evaluate TRUE if it's safe to write to the mbuf m's data region (this
  * can be both the local data payload, or an external buffer area,
  * depending on whether M_EXT is set).
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) && (!((m)->m_flags  \
 			    & M_EXT) || !MEXT_IS_REF(m)))
 
 /*
  * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place
  * an object of the specified size at the end of the mbuf, longword aligned.
  */
 #define	M_ALIGN(m, len) do {						\
 	(m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1);		\
 } while (0)
 
 /*
  * As above, for mbufs allocated with m_gethdr/MGETHDR
  * or initialized by M_COPY_PKTHDR.
  */
 #define	MH_ALIGN(m, len) do {						\
 	(m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1);		\
 } while (0)
 
 /*
  * Compute the amount of space available
  * before the current start of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  */
 #define	M_LEADINGSPACE(m)						\
 	((m)->m_flags & M_EXT ?						\
 	    (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0):	\
 	    (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat :	\
 	    (m)->m_data - (m)->m_dat)
 
 /*
  * Compute the amount of space available
  * after the end of data in an mbuf.
  *
  * The M_WRITABLE() is a temporary, conservative safety measure: the burden
  * of checking writability of the mbuf data area rests solely with the caller.
  */
 #define	M_TRAILINGSPACE(m)						\
 	((m)->m_flags & M_EXT ?						\
 	    (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size	\
 		- ((m)->m_data + (m)->m_len) : 0) :			\
 	    &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len))
 
 /*
  * Arrange to prepend space of size plen to mbuf m.
  * If a new mbuf must be allocated, how specifies whether to wait.
  * If the allocation fails, the original mbuf chain is freed and m is
  * set to NULL.
  */
 #define	M_PREPEND(m, plen, how) do {					\
 	struct mbuf **_mmp = &(m);					\
 	struct mbuf *_mm = *_mmp;					\
 	int _mplen = (plen);						\
 	int __mhow = (how);						\
 									\
 	if (M_LEADINGSPACE(_mm) >= _mplen) {				\
 		_mm->m_data -= _mplen;					\
 		_mm->m_len += _mplen;					\
 	} else								\
 		_mm = m_prepend(_mm, _mplen, __mhow);			\
 	if (_mm != NULL && _mm->m_flags & M_PKTHDR)			\
 		_mm->m_pkthdr.len += _mplen;				\
 	*_mmp = _mm;							\
 } while (0)
 
 /*
  * Change mbuf to new type.
  * This is a relatively expensive operation and should be avoided.
  */
 #define	MCHTYPE(m, t)	m_chtype((m), (t))
 
 /* Length to m_copy to copy all. */
 #define	M_COPYALL	1000000000
 
 /* Compatibility with 4.3. */
 #define	m_copy(m, o, l)	m_copym((m), (o), (l), M_DONTWAIT)
 
 extern	int max_datalen;		/* MHLEN - max_hdr */
 extern	int max_hdr;			/* Largest link + protocol header */
 extern	int max_linkhdr;		/* Largest link-level header */
 extern	int max_protohdr;		/* Largest protocol header */
 extern	struct mbstat mbstat;		/* General mbuf stats/infos */
 extern	int nmbclusters;		/* Maximum number of clusters */
 extern	int nmbcnt;			/* Scale kmem_map for counter space */
 extern	int nmbufs;			/* Maximum number of mbufs */
 extern	int nsfbufs;			/* Number of sendfile(2) bufs */
 
 void		 _mext_free(struct mbuf *);
 void		 m_adj(struct mbuf *, int);
 void		 m_cat(struct mbuf *, struct mbuf *);
 void		 m_chtype(struct mbuf *, short);
 void		 m_clget(struct mbuf *, int);
 void		 m_extadd(struct mbuf *, caddr_t, u_int,
 		    void (*)(void *, void *), void *, int, int);
 void		 m_copyback(struct mbuf *, int, int, caddr_t);
 void		 m_copydata(const struct mbuf *, int, int, caddr_t);
 struct	mbuf	*m_copym(struct mbuf *, int, int, int);
 struct	mbuf	*m_copypacket(struct mbuf *, int);
 void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
 struct	mbuf	*m_devget(char *, int, int, struct ifnet *,
 		    void (*)(char *, caddr_t, u_int));
 struct	mbuf	*m_dup(struct mbuf *, int);
+int		 m_dup_pkthdr(struct mbuf *, struct mbuf *, int);
 u_int		 m_fixhdr(struct mbuf *);
 struct	mbuf	*m_free(struct mbuf *);
 void		 m_freem(struct mbuf *);
 struct	mbuf	*m_get(int, short);
 struct	mbuf	*m_get_clrd(int, short);
 struct	mbuf	*m_getcl(int, short, int);
 struct	mbuf	*m_gethdr(int, short);
 struct	mbuf	*m_gethdr_clrd(int, short);
 struct	mbuf	*m_getm(struct mbuf *, int, int, short);
 u_int		 m_length(struct mbuf *, struct mbuf **);
+void		 m_move_pkthdr(struct mbuf *, struct mbuf *);
 struct	mbuf	*m_prepend(struct mbuf *, int, int);
 void		 m_print(const struct mbuf *);
 struct	mbuf	*m_pulldown(struct mbuf *, int, int, int *);
 struct	mbuf	*m_pullup(struct mbuf *, int);
 struct	mbuf	*m_split(struct mbuf *, int, int);
 
 /*
  * Packets may have annotations attached by affixing a list
  * of "packet tags" to the pkthdr structure.  Packet tags are
  * dynamically allocated semi-opaque data structures that have
  * a fixed header (struct m_tag) that specifies the size of the
  * memory block and a <cookie,type> pair that identifies it.
  * The cookie is a 32-bit unique unsigned value used to identify
  * a module or ABI.  By convention this value is chose as the
  * date+time that the module is created, expressed as the number of
  * seconds since the epoch (e.g. using date -u +'%s').  The type value
  * is an ABI/module-specific value that identifies a particular annotation
  * and is private to the module.  For compatibility with systems
  * like openbsd that define packet tags w/o an ABI/module cookie,
  * the value PACKET_ABI_COMPAT is used to implement m_tag_get and
  * m_tag_find compatibility shim functions and several tag types are
  * defined below.  Users that do not require compatibility should use
  * a private cookie value so that packet tag-related definitions
  * can be maintained privately.
  *
  * Note that the packet tag returned by m_tag_allocate has the default
  * memory alignment implemented by malloc.  To reference private data
  * one can use a construct like:
  *
  *	struct m_tag *mtag = m_tag_allocate(...);
  *	struct foo *p = (struct foo *)(mtag+1);
  *
  * if the alignment of struct m_tag is sufficient for referencing members
  * of struct foo.  Otherwise it is necessary to embed struct m_tag within
  * the private data structure to insure proper alignment; e.g.
  *
  *	struct foo {
  *		struct m_tag	tag;
  *		...
  *	};
  *	struct foo *p = (struct foo *) m_tag_allocate(...);
  *	struct m_tag *mtag = &p->tag;
  */
 
 #define	PACKET_TAG_NONE				0  /* Nadda */
 
 /* Packet tag for use with PACKET_ABI_COMPAT */
 #define	PACKET_TAG_IPSEC_IN_DONE		1  /* IPsec applied, in */
 #define	PACKET_TAG_IPSEC_OUT_DONE		2  /* IPsec applied, out */
 #define	PACKET_TAG_IPSEC_IN_CRYPTO_DONE		3  /* NIC IPsec crypto done */
 #define	PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED	4  /* NIC IPsec crypto req'ed */
 #define	PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO	5  /* NIC notifies IPsec */
 #define	PACKET_TAG_IPSEC_PENDING_TDB		6  /* Reminder to do IPsec */
 #define	PACKET_TAG_BRIDGE			7  /* Bridge processing done */
 #define	PACKET_TAG_GIF				8  /* GIF processing done */
 #define	PACKET_TAG_GRE				9  /* GRE processing done */
 #define	PACKET_TAG_IN_PACKET_CHECKSUM		10 /* NIC checksumming done */
 #define	PACKET_TAG_ENCAP			11 /* Encap.  processing */
 #define	PACKET_TAG_IPSEC_SOCKET			12 /* IPSEC socket ref */
 #define	PACKET_TAG_IPSEC_HISTORY		13 /* IPSEC history */
 #define	PACKET_TAG_IPV6_INPUT			14 /* IPV6 input processing */
 
 /*
  * As a temporary and low impact solution to replace the even uglier
  * approach used so far in some parts of the network stack (which relies
  * on global variables), packet tag-like annotations are stored in MT_TAG
  * mbufs (or lookalikes) prepended to the actual mbuf chain.
  *
  *	m_type	= MT_TAG
  *	m_flags = m_tag_id
  *	m_next	= next buffer in chain.
  *
  * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines.
  */
 #define	_m_tag_id	m_hdr.mh_flags
 
 /* Packet tags used in the FreeBSD network stack */
 #define	PACKET_TAG_DUMMYNET			15 /* dummynet info */
 #define	PACKET_TAG_IPFW				16 /* ipfw classification */
 #define	PACKET_TAG_DIVERT			17 /* divert info */
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 
 /* Packet tag routines */
 struct	m_tag 	*m_tag_alloc(u_int32_t, int, int, int);
 void		 m_tag_free(struct m_tag *);
 void		 m_tag_prepend(struct mbuf *, struct m_tag *);
 void		 m_tag_unlink(struct mbuf *, struct m_tag *);
 void		 m_tag_delete(struct mbuf *, struct m_tag *);
 void		 m_tag_delete_chain(struct mbuf *, struct m_tag *);
 struct	m_tag	*m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *);
-struct	m_tag	*m_tag_copy(struct m_tag *);
-int		 m_tag_copy_chain(struct mbuf *, struct mbuf *);
+struct	m_tag	*m_tag_copy(struct m_tag *, int);
+int		 m_tag_copy_chain(struct mbuf *, struct mbuf *, int);
 void		 m_tag_init(struct mbuf *);
 struct	m_tag	*m_tag_first(struct mbuf *);
 struct	m_tag	*m_tag_next(struct mbuf *, struct m_tag *);
 
 /* these are for openbsd compatibility */
 #define	MTAG_ABI_COMPAT		0		/* compatibility ABI */
 
 static __inline struct m_tag *
 m_tag_get(int type, int length, int wait)
 {
 	return m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait);
 }
 
 static __inline struct m_tag *
 m_tag_find(struct mbuf *m, int type, struct m_tag *start)
 {
 	return m_tag_locate(m, MTAG_ABI_COMPAT, type, start);
 }
 #endif /* _KERNEL */
 
 #endif /* !_SYS_MBUF_H_ */