Index: head/sys/dev/awi/awi.c =================================================================== --- head/sys/dev/awi/awi.c (revision 108465) +++ head/sys/dev/awi/awi.c (revision 108466) @@ -1,2993 +1,2993 @@ /* $NetBSD: awi.c,v 1.26 2000/07/21 04:48:55 onoe Exp $ */ /* $FreeBSD$ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Bill Sommerfeld * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Driver for AMD 802.11 firmware. * Uses am79c930 chip driver to talk to firmware running on the am79c930. * * More-or-less a generic ethernet-like if driver, with 802.11 gorp added. */ /* * todo: * - flush tx queue on resynch. * - clear oactive on "down". * - rewrite copy-into-mbuf code * - mgmt state machine gets stuck retransmitting assoc requests. * - multicast filter. * - fix device reset so it's more likely to work * - show status goo through ifmedia. * * more todo: * - deal with more 802.11 frames. * - send reassoc request * - deal with reassoc response * - send/deal with disassociation * - deal with "full" access points (no room for me). * - power save mode * * later: * - SSID preferences * - need ioctls for poking at the MIBs * - implement ad-hoc mode (including bss creation). * - decide when to do "ad hoc" vs. infrastructure mode (IFF_LINK flags?) * (focus on inf. mode since that will be needed for ietf) * - deal with DH vs. FH versions of the card * - deal with faster cards (2mb/s) * - ?WEP goo (mmm, rc4) (it looks not particularly useful). * - ifmedia revision. * - common 802.11 mibish things. * - common 802.11 media layer. */ /* * Driver for AMD 802.11 PCnetMobile firmware. * Uses am79c930 chip driver to talk to firmware running on the am79c930. * * The initial version of the driver was written by * Bill Sommerfeld . * Then the driver module completely rewritten to support cards with DS phy * and to support adhoc mode by Atsushi Onoe */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) && __FreeBSD_version >= 400000 #include #else #include #endif #include #include #ifdef __FreeBSD__ #include #else #include #endif #include #include #include #ifdef INET #include #include #include #include #ifdef __NetBSD__ #include #else #include #endif #endif #if defined(__FreeBSD__) && __FreeBSD_version >= 400000 #define NBPFILTER 1 #elif defined(__FreeBSD__) && __FreeBSD_version >= 300000 #include "bpf.h" #define NBPFILTER NBPF #else #include "bpfilter.h" #endif #if NBPFILTER > 0 #include #include #endif #include #include #ifdef __NetBSD__ #include #endif #ifdef __NetBSD__ #include #include #include #include #endif #ifdef __FreeBSD__ #include #include #include #include #endif static int awi_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); #ifdef IFM_IEEE80211 static int awi_media_rate2opt(struct awi_softc *sc, int rate); static int awi_media_opt2rate(struct awi_softc *sc, int opt); static int awi_media_change(struct ifnet *ifp); static void awi_media_status(struct ifnet *ifp, struct ifmediareq *imr); #endif static void awi_watchdog(struct ifnet *ifp); static void awi_start(struct ifnet *ifp); static void awi_txint(struct awi_softc *sc); static struct mbuf * awi_fix_txhdr(struct awi_softc *sc, struct mbuf *m0); static struct mbuf * awi_fix_rxhdr(struct awi_softc *sc, struct mbuf *m0); static void awi_input(struct awi_softc *sc, struct mbuf *m, u_int32_t rxts, u_int8_t rssi); static void awi_rxint(struct awi_softc *sc); static struct mbuf * awi_devget(struct awi_softc *sc, u_int32_t off, u_int16_t len); static int awi_init_hw(struct awi_softc *sc); static int awi_init_mibs(struct awi_softc *sc); static int awi_init_txrx(struct awi_softc *sc); static void awi_stop_txrx(struct awi_softc *sc); static int awi_start_scan(struct awi_softc *sc); static int awi_next_scan(struct awi_softc *sc); static void awi_stop_scan(struct awi_softc *sc); static void awi_recv_beacon(struct awi_softc *sc, struct mbuf *m0, u_int32_t rxts, u_int8_t rssi); static int awi_set_ss(struct awi_softc *sc); static void awi_try_sync(struct awi_softc *sc); static void awi_sync_done(struct awi_softc *sc); static void awi_send_deauth(struct awi_softc *sc); static void awi_send_auth(struct awi_softc *sc, int seq); static void awi_recv_auth(struct awi_softc *sc, struct mbuf *m0); static void awi_send_asreq(struct awi_softc *sc, int reassoc); static void awi_recv_asresp(struct awi_softc *sc, struct mbuf *m0); static int awi_mib(struct awi_softc *sc, u_int8_t cmd, u_int8_t mib); static int awi_cmd_scan(struct awi_softc *sc); static int awi_cmd(struct awi_softc *sc, u_int8_t cmd); static void awi_cmd_done(struct awi_softc *sc); static int awi_next_txd(struct awi_softc *sc, int len, u_int32_t *framep, u_int32_t*ntxdp); static int awi_lock(struct awi_softc *sc); static void awi_unlock(struct awi_softc *sc); static int awi_intr_lock(struct awi_softc *sc); static void awi_intr_unlock(struct awi_softc *sc); static int awi_cmd_wait(struct awi_softc *sc); static void awi_print_essid(u_int8_t *essid); #ifdef AWI_DEBUG static void awi_dump_pkt(struct awi_softc *sc, struct mbuf *m, int rssi); int awi_verbose = 0; int awi_dump = 0; #define AWI_DUMP_MASK(fc0) (1 << (((fc0) & IEEE80211_FC0_SUBTYPE_MASK) >> 4)) int awi_dump_mask = AWI_DUMP_MASK(IEEE80211_FC0_SUBTYPE_BEACON); int awi_dump_hdr = 0; int awi_dump_len = 28; #endif #if NBPFILTER > 0 #define AWI_BPF_NORM 0 #define AWI_BPF_RAW 1 #ifdef __FreeBSD__ #define AWI_BPF_MTAP(sc, m, raw) do { \ if ((sc)->sc_rawbpf == (raw)) \ BPF_MTAP((sc)->sc_ifp, (m)); \ } while (0); #else #define AWI_BPF_MTAP(sc, m, raw) do { \ if ((sc)->sc_ifp->if_bpf && (sc)->sc_rawbpf == (raw)) \ bpf_mtap((sc)->sc_ifp->if_bpf, (m)); \ } while (0); #endif #else #define AWI_BPF_MTAP(sc, m, raw) #endif #ifndef llc_snap #define llc_snap llc_un.type_snap #endif #ifdef __FreeBSD__ #if __FreeBSD_version >= 400000 devclass_t awi_devclass; #endif #if __FreeBSD_version < 500043 /* NetBSD compatible functions */ static char * ether_sprintf(u_int8_t *); static char * ether_sprintf(enaddr) u_int8_t *enaddr; { static char strbuf[18]; sprintf(strbuf, "%6D", enaddr, ":"); return strbuf; } #endif #endif int awi_attach(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; int s; int error; #ifdef IFM_IEEE80211 int i; u_int8_t *phy_rates; int mword; struct ifmediareq imr; #endif s = splnet(); /* * Even if we can sleep in initialization state, * all other processes (e.g. ifconfig) have to wait for * completion of attaching interface. */ sc->sc_busy = 1; sc->sc_status = AWI_ST_INIT; TAILQ_INIT(&sc->sc_scan); error = awi_init_hw(sc); if (error) { sc->sc_invalid = 1; splx(s); return error; } error = awi_init_mibs(sc); splx(s); if (error) { sc->sc_invalid = 1; return error; } ifp->if_softc = sc; ifp->if_start = awi_start; ifp->if_ioctl = awi_ioctl; ifp->if_watchdog = awi_watchdog; ifp->if_mtu = ETHERMTU; ifp->if_hdrlen = sizeof(struct ieee80211_frame) + sizeof(struct ether_header); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; #ifdef IFF_NOTRAILERS ifp->if_flags |= IFF_NOTRAILERS; #endif #ifdef __NetBSD__ memcpy(ifp->if_xname, sc->sc_dev.dv_xname, IFNAMSIZ); #endif #ifdef __FreeBSD__ ifp->if_output = ether_output; ifp->if_snd.ifq_maxlen = ifqmaxlen; memcpy(sc->sc_ec.ac_enaddr, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN); #endif printf("%s: IEEE802.11 %s %dMbps (firmware %s)\n", sc->sc_dev.dv_xname, sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH ? "FH" : "DS", sc->sc_tx_rate / 10, sc->sc_banner); printf("%s: address %s\n", sc->sc_dev.dv_xname, ether_sprintf(sc->sc_mib_addr.aMAC_Address)); #ifdef __FreeBSD__ ether_ifattach(ifp, sc->sc_mib_addr.aMAC_Address); #else if_attach(ifp); ether_ifattach(ifp, sc->sc_mib_addr.aMAC_Address); #if NBPFILTER > 0 bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, sizeof(struct ether_header)); #endif #endif #ifdef IFM_IEEE80211 ifmedia_init(&sc->sc_media, 0, awi_media_change, awi_media_status); phy_rates = sc->sc_mib_phy.aSuprt_Data_Rates; for (i = 0; i < phy_rates[1]; i++) { mword = awi_media_rate2opt(sc, AWI_80211_RATE(phy_rates[2 + i])); if (mword == 0) continue; mword |= IFM_IEEE80211; ifmedia_add(&sc->sc_media, mword, 0, NULL); ifmedia_add(&sc->sc_media, mword | IFM_IEEE80211_ADHOC, 0, NULL); if (sc->sc_mib_phy.IEEE_PHY_Type != AWI_PHY_TYPE_FH) ifmedia_add(&sc->sc_media, mword | IFM_IEEE80211_ADHOC | IFM_FLAG0, 0, NULL); } awi_media_status(ifp, &imr); ifmedia_set(&sc->sc_media, imr.ifm_active); #endif /* ready to accept ioctl */ awi_unlock(sc); /* Attach is successful. */ sc->sc_attached = 1; return 0; } #ifdef __NetBSD__ int awi_detach(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; int s; /* Succeed if there is no work to do. */ if (!sc->sc_attached) return (0); s = splnet(); sc->sc_invalid = 1; awi_stop(sc); while (sc->sc_sleep_cnt > 0) { wakeup(sc); (void)tsleep(sc, PWAIT, "awidet", 1); } if (sc->sc_wep_ctx != NULL) free(sc->sc_wep_ctx, M_DEVBUF); #if NBPFILTER > 0 bpfdetach(ifp); #endif #ifdef IFM_IEEE80211 ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY); #endif ether_ifdetach(ifp); if_detach(ifp); if (sc->sc_enabled) { if (sc->sc_disable) (*sc->sc_disable)(sc); sc->sc_enabled = 0; } splx(s); return 0; } int awi_activate(self, act) struct device *self; enum devact act; { struct awi_softc *sc = (struct awi_softc *)self; int s, error = 0; s = splnet(); switch (act) { case DVACT_ACTIVATE: error = EOPNOTSUPP; break; case DVACT_DEACTIVATE: sc->sc_invalid = 1; if (sc->sc_ifp) if_deactivate(sc->sc_ifp); break; } splx(s); return error; } void awi_power(sc, why) struct awi_softc *sc; int why; { int s; int ocansleep; if (!sc->sc_enabled) return; s = splnet(); ocansleep = sc->sc_cansleep; sc->sc_cansleep = 0; #ifdef needtobefixed /*ONOE*/ if (why == PWR_RESUME) { sc->sc_enabled = 0; awi_init(sc); (void)awi_intr(sc); } else { awi_stop(sc); if (sc->sc_disable) (*sc->sc_disable)(sc); } #endif sc->sc_cansleep = ocansleep; splx(s); } #endif /* __NetBSD__ */ static int awi_ioctl(ifp, cmd, data) struct ifnet *ifp; u_long cmd; caddr_t data; { struct awi_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; struct ieee80211req *ireq = (struct ieee80211req *)data; int s, error; struct ieee80211_nwid nwid; u_int8_t *p; int len; u_int8_t tmpstr[IEEE80211_NWID_LEN*2]; #ifdef __FreeBSD_version #if __FreeBSD_version < 500028 struct proc *mythread = curproc; /* name a white lie */ #else struct thread *mythread = curthread; #endif #endif s = splnet(); /* serialize ioctl */ error = awi_lock(sc); if (error) goto cantlock; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: arp_ifinit((void *)ifp, ifa); break; #endif } /* FALLTHROUGH */ case SIOCSIFFLAGS: sc->sc_format_llc = !(ifp->if_flags & IFF_LINK0); if (!(ifp->if_flags & IFF_UP)) { if (sc->sc_enabled) { awi_stop(sc); if (sc->sc_disable) (*sc->sc_disable)(sc); sc->sc_enabled = 0; } break; } error = awi_init(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef __FreeBSD__ error = ENETRESET; /*XXX*/ #else error = (cmd == SIOCADDMULTI) ? ether_addmulti(ifr, &sc->sc_ec) : ether_delmulti(ifr, &sc->sc_ec); #endif /* * Do not rescan BSS. Rather, just reset multicast filter. */ if (error == ENETRESET) { if (sc->sc_enabled) error = awi_init(sc); else error = 0; } break; case SIOCSIFMTU: if (ifr->ifr_mtu > ETHERMTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCS80211NWID: #ifdef __FreeBSD__ error = suser(mythread); if (error) break; #endif error = copyin(ifr->ifr_data, &nwid, sizeof(nwid)); if (error) break; if (nwid.i_len > IEEE80211_NWID_LEN) { error = EINVAL; break; } if (sc->sc_mib_mac.aDesired_ESS_ID[1] == nwid.i_len && memcmp(&sc->sc_mib_mac.aDesired_ESS_ID[2], nwid.i_nwid, nwid.i_len) == 0) break; memset(sc->sc_mib_mac.aDesired_ESS_ID, 0, AWI_ESS_ID_SIZE); sc->sc_mib_mac.aDesired_ESS_ID[0] = IEEE80211_ELEMID_SSID; sc->sc_mib_mac.aDesired_ESS_ID[1] = nwid.i_len; memcpy(&sc->sc_mib_mac.aDesired_ESS_ID[2], nwid.i_nwid, nwid.i_len); if (sc->sc_enabled) { awi_stop(sc); error = awi_init(sc); } break; case SIOCG80211NWID: if (ifp->if_flags & IFF_RUNNING) p = sc->sc_bss.essid; else p = sc->sc_mib_mac.aDesired_ESS_ID; error = copyout(p + 1, ifr->ifr_data, 1 + IEEE80211_NWID_LEN); break; case SIOCS80211NWKEY: #ifdef __FreeBSD__ error = suser(mythread); if (error) break; #endif error = awi_wep_setnwkey(sc, (struct ieee80211_nwkey *)data); break; case SIOCG80211NWKEY: error = awi_wep_getnwkey(sc, (struct ieee80211_nwkey *)data); break; #ifdef IFM_IEEE80211 case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd); break; #endif #ifdef __FreeBSD__ case SIOCG80211: switch(ireq->i_type) { case IEEE80211_IOC_SSID: if (ireq->i_val != -1 && ireq->i_val != 0) { error = EINVAL; break; } if (!sc->sc_mib_local.Network_Mode) p = sc->sc_ownssid; else if (ireq->i_val == -1 && (ifp->if_flags & IFF_RUNNING)) p = sc->sc_bss.essid; else p = sc->sc_mib_mac.aDesired_ESS_ID; len = p[1]; p += 2; if (len > IEEE80211_NWID_LEN) { error = EINVAL; break; } if (len > 0) error = copyout(p, ireq->i_data, len); ireq->i_len = len; break; case IEEE80211_IOC_NUMSSIDS: ireq->i_val = 1; break; case IEEE80211_IOC_WEP: if (sc->sc_wep_algo != NULL) ireq->i_val = IEEE80211_WEP_MIXED; else ireq->i_val = IEEE80211_WEP_OFF; break; case IEEE80211_IOC_WEPKEY: if(ireq->i_val < 0 || ireq->i_val > 3) { error = EINVAL; break; } len = sizeof(tmpstr); error = awi_wep_getkey(sc, ireq->i_val, tmpstr, &len); if(error) break; #ifdef __FreeBSD__ if (!suser(mythread)) bzero(tmpstr, len); #endif ireq->i_len = len; error = copyout(tmpstr, ireq->i_data, len); break; case IEEE80211_IOC_NUMWEPKEYS: ireq->i_val = 4; break; case IEEE80211_IOC_WEPTXKEY: ireq->i_val = sc->sc_wep_defkid; break; case IEEE80211_IOC_AUTHMODE: ireq->i_val = IEEE80211_AUTH_OPEN; break; case IEEE80211_IOC_STATIONNAME: /* not used anywhere */ error = EINVAL; break; case IEEE80211_IOC_CHANNEL: /* XXX: Handle FH cards */ ireq->i_val = sc->sc_bss.chanset; break; case IEEE80211_IOC_POWERSAVE: /* * The powersave mode is not supported by the driver. */ ireq->i_val = IEEE80211_POWERSAVE_NOSUP; break; case IEEE80211_IOC_POWERSAVESLEEP: error = EINVAL; break; default: error = EINVAL; break; } break; case SIOCS80211: error = suser(mythread); if(error) break; switch(ireq->i_type) { case IEEE80211_IOC_SSID: if (ireq->i_val != 0 || ireq->i_len > IEEE80211_NWID_LEN) { error = EINVAL; break; } bzero(tmpstr, AWI_ESS_ID_SIZE); tmpstr[0] = IEEE80211_ELEMID_SSID; tmpstr[1] = ireq->i_len; error = copyin(ireq->i_data, tmpstr+2, ireq->i_len); if(error) break; bcopy(tmpstr, sc->sc_mib_mac.aDesired_ESS_ID, AWI_ESS_ID_SIZE); bcopy(tmpstr, sc->sc_ownssid, AWI_ESS_ID_SIZE); break; case IEEE80211_IOC_WEP: if(ireq->i_val == IEEE80211_WEP_OFF) error = awi_wep_setalgo(sc, 0); else error = awi_wep_setalgo(sc, 1); break; case IEEE80211_IOC_WEPKEY: if(ireq->i_val < 0 || ireq->i_val > 3 || ireq->i_len > 13) { error = EINVAL; break; } error = copyin(ireq->i_data, tmpstr, ireq->i_len); if(error) break; error = awi_wep_setkey(sc, ireq->i_val, tmpstr, ireq->i_len); break; case IEEE80211_IOC_WEPTXKEY: if(ireq->i_val < 0 || ireq->i_val > 3) { error = EINVAL; break; } sc->sc_wep_defkid = ireq->i_val; break; case IEEE80211_IOC_AUTHMODE: if(ireq->i_val != IEEE80211_AUTH_OPEN) error = EINVAL; break; case IEEE80211_IOC_STATIONNAME: error = EPERM; break; case IEEE80211_IOC_CHANNEL: if(ireq->i_val < sc->sc_scan_min || ireq->i_val > sc->sc_scan_max) { error = EINVAL; break; } sc->sc_ownch = ireq->i_val; break; case IEEE80211_IOC_POWERSAVE: if(ireq->i_val != IEEE80211_POWERSAVE_OFF) error = EINVAL; break; case IEEE80211_IOC_POWERSAVESLEEP: error = EINVAL; break; default: error = EINVAL; break; } /* Restart the card so the change takes effect */ if(!error) { if(sc->sc_enabled) { awi_stop(sc); error = awi_init(sc); } } break; #endif /* __FreeBSD__ */ default: error = awi_wicfg(ifp, cmd, data); break; } awi_unlock(sc); cantlock: splx(s); return error; } #ifdef IFM_IEEE80211 static int awi_media_rate2opt(sc, rate) struct awi_softc *sc; int rate; { int mword; mword = 0; switch (rate) { case 10: if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) mword = IFM_IEEE80211_FH1; else mword = IFM_IEEE80211_DS1; break; case 20: if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) mword = IFM_IEEE80211_FH2; else mword = IFM_IEEE80211_DS2; break; case 55: if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_DS) mword = IFM_IEEE80211_DS5; break; case 110: if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_DS) mword = IFM_IEEE80211_DS11; break; } return mword; } static int awi_media_opt2rate(sc, opt) struct awi_softc *sc; int opt; { int rate; rate = 0; switch (IFM_SUBTYPE(opt)) { case IFM_IEEE80211_FH1: case IFM_IEEE80211_FH2: if (sc->sc_mib_phy.IEEE_PHY_Type != AWI_PHY_TYPE_FH) return 0; break; case IFM_IEEE80211_DS1: case IFM_IEEE80211_DS2: case IFM_IEEE80211_DS5: case IFM_IEEE80211_DS11: if (sc->sc_mib_phy.IEEE_PHY_Type != AWI_PHY_TYPE_DS) return 0; break; } switch (IFM_SUBTYPE(opt)) { case IFM_IEEE80211_FH1: case IFM_IEEE80211_DS1: rate = 10; break; case IFM_IEEE80211_FH2: case IFM_IEEE80211_DS2: rate = 20; break; case IFM_IEEE80211_DS5: rate = 55; break; case IFM_IEEE80211_DS11: rate = 110; break; } return rate; } /* * Called from ifmedia_ioctl via awi_ioctl with lock obtained. */ static int awi_media_change(ifp) struct ifnet *ifp; { struct awi_softc *sc = ifp->if_softc; struct ifmedia_entry *ime; u_int8_t *phy_rates; int i, rate, error; error = 0; ime = sc->sc_media.ifm_cur; rate = awi_media_opt2rate(sc, ime->ifm_media); if (rate == 0) return EINVAL; if (rate != sc->sc_tx_rate) { phy_rates = sc->sc_mib_phy.aSuprt_Data_Rates; for (i = 0; i < phy_rates[1]; i++) { if (rate == AWI_80211_RATE(phy_rates[2 + i])) break; } if (i == phy_rates[1]) return EINVAL; } if (ime->ifm_media & IFM_IEEE80211_ADHOC) { sc->sc_mib_local.Network_Mode = 0; if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) sc->sc_no_bssid = 0; else sc->sc_no_bssid = (ime->ifm_media & IFM_FLAG0) ? 1 : 0; } else { sc->sc_mib_local.Network_Mode = 1; } if (sc->sc_enabled) { awi_stop(sc); error = awi_init(sc); } return error; } static void awi_media_status(ifp, imr) struct ifnet *ifp; struct ifmediareq *imr; { struct awi_softc *sc = ifp->if_softc; imr->ifm_status = IFM_AVALID; if (ifp->if_flags & IFF_RUNNING) imr->ifm_status |= IFM_ACTIVE; imr->ifm_active = IFM_IEEE80211; imr->ifm_active |= awi_media_rate2opt(sc, sc->sc_tx_rate); if (sc->sc_mib_local.Network_Mode == 0) { imr->ifm_active |= IFM_IEEE80211_ADHOC; if (sc->sc_no_bssid) imr->ifm_active |= IFM_FLAG0; } } #endif /* IFM_IEEE80211 */ int awi_intr(arg) void *arg; { struct awi_softc *sc = arg; u_int16_t status; int error, handled = 0, ocansleep; if (!sc->sc_enabled || !sc->sc_enab_intr || sc->sc_invalid) return 0; am79c930_gcr_setbits(&sc->sc_chip, AM79C930_GCR_DISPWDN | AM79C930_GCR_ECINT); awi_write_1(sc, AWI_DIS_PWRDN, 1); ocansleep = sc->sc_cansleep; sc->sc_cansleep = 0; for (;;) { error = awi_intr_lock(sc); if (error) break; status = awi_read_1(sc, AWI_INTSTAT); awi_write_1(sc, AWI_INTSTAT, 0); awi_write_1(sc, AWI_INTSTAT, 0); status |= awi_read_1(sc, AWI_INTSTAT2) << 8; awi_write_1(sc, AWI_INTSTAT2, 0); DELAY(10); awi_intr_unlock(sc); if (!sc->sc_cmd_inprog) status &= ~AWI_INT_CMD; /* make sure */ if (status == 0) break; handled = 1; if (status & AWI_INT_RX) awi_rxint(sc); if (status & AWI_INT_TX) awi_txint(sc); if (status & AWI_INT_CMD) awi_cmd_done(sc); if (status & AWI_INT_SCAN_CMPLT) { if (sc->sc_status == AWI_ST_SCAN && sc->sc_mgt_timer > 0) (void)awi_next_scan(sc); } } sc->sc_cansleep = ocansleep; am79c930_gcr_clearbits(&sc->sc_chip, AM79C930_GCR_DISPWDN); awi_write_1(sc, AWI_DIS_PWRDN, 0); return handled; } int awi_init(sc) struct awi_softc *sc; { int error, ostatus; int n; struct ifnet *ifp = sc->sc_ifp; #ifdef __FreeBSD__ struct ifmultiaddr *ifma; #else struct ether_multi *enm; struct ether_multistep step; #endif /* reinitialize muticast filter */ n = 0; ifp->if_flags |= IFF_ALLMULTI; sc->sc_mib_local.Accept_All_Multicast_Dis = 0; if (ifp->if_flags & IFF_PROMISC) { sc->sc_mib_mac.aPromiscuous_Enable = 1; goto set_mib; } sc->sc_mib_mac.aPromiscuous_Enable = 0; #ifdef __FreeBSD__ if (ifp->if_amcount != 0) goto set_mib; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (n == AWI_GROUP_ADDR_SIZE) goto set_mib; memcpy(sc->sc_mib_addr.aGroup_Addresses[n], LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETHER_ADDR_LEN); n++; } #else ETHER_FIRST_MULTI(step, &sc->sc_ec, enm); while (enm != NULL) { if (n == AWI_GROUP_ADDR_SIZE || memcmp(enm->enm_addrlo, enm->enm_addrhi, ETHER_ADDR_LEN) != 0) goto set_mib; memcpy(sc->sc_mib_addr.aGroup_Addresses[n], enm->enm_addrlo, ETHER_ADDR_LEN); n++; ETHER_NEXT_MULTI(step, enm); } #endif for (; n < AWI_GROUP_ADDR_SIZE; n++) memset(sc->sc_mib_addr.aGroup_Addresses[n], 0, ETHER_ADDR_LEN); ifp->if_flags &= ~IFF_ALLMULTI; sc->sc_mib_local.Accept_All_Multicast_Dis = 1; set_mib: #ifdef notdef /* allow non-encrypted frame for receiving. */ sc->sc_mib_mgt.Wep_Required = sc->sc_wep_algo != NULL ? 1 : 0; #endif if (!sc->sc_enabled) { sc->sc_enabled = 1; if (sc->sc_enable) (*sc->sc_enable)(sc); sc->sc_status = AWI_ST_INIT; error = awi_init_hw(sc); if (error) return error; } ostatus = sc->sc_status; sc->sc_status = AWI_ST_INIT; if ((error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_LOCAL)) != 0 || (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_ADDR)) != 0 || (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MAC)) != 0 || (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MGT)) != 0 || (error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_PHY)) != 0) { awi_stop(sc); return error; } if (ifp->if_flags & IFF_RUNNING) sc->sc_status = AWI_ST_RUNNING; else { if (ostatus == AWI_ST_INIT) { error = awi_init_txrx(sc); if (error) return error; } error = awi_start_scan(sc); } return error; } void awi_stop(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; struct awi_bss *bp; struct mbuf *m; sc->sc_status = AWI_ST_INIT; if (!sc->sc_invalid) { (void)awi_cmd_wait(sc); if (sc->sc_mib_local.Network_Mode && sc->sc_status > AWI_ST_AUTH) awi_send_deauth(sc); awi_stop_txrx(sc); } ifp->if_flags &= ~(IFF_RUNNING|IFF_OACTIVE); ifp->if_timer = 0; sc->sc_tx_timer = sc->sc_rx_timer = sc->sc_mgt_timer = 0; for (;;) { _IF_DEQUEUE(&sc->sc_mgtq, m); if (m == NULL) break; m_freem(m); } IF_DRAIN(&ifp->if_snd); while ((bp = TAILQ_FIRST(&sc->sc_scan)) != NULL) { TAILQ_REMOVE(&sc->sc_scan, bp, list); free(bp, M_DEVBUF); } } static void awi_watchdog(ifp) struct ifnet *ifp; { struct awi_softc *sc = ifp->if_softc; int ocansleep; if (sc->sc_invalid) { ifp->if_timer = 0; return; } ocansleep = sc->sc_cansleep; sc->sc_cansleep = 0; if (sc->sc_tx_timer && --sc->sc_tx_timer == 0) { printf("%s: transmit timeout\n", sc->sc_dev.dv_xname); awi_txint(sc); } if (sc->sc_rx_timer && --sc->sc_rx_timer == 0) { if (ifp->if_flags & IFF_DEBUG) { printf("%s: no recent beacons from %s; rescanning\n", sc->sc_dev.dv_xname, ether_sprintf(sc->sc_bss.bssid)); } ifp->if_flags &= ~IFF_RUNNING; awi_start_scan(sc); } if (sc->sc_mgt_timer && --sc->sc_mgt_timer == 0) { switch (sc->sc_status) { case AWI_ST_SCAN: awi_stop_scan(sc); break; case AWI_ST_AUTH: case AWI_ST_ASSOC: /* restart scan */ awi_start_scan(sc); break; default: break; } } if (sc->sc_tx_timer == 0 && sc->sc_rx_timer == 0 && sc->sc_mgt_timer == 0) ifp->if_timer = 0; else ifp->if_timer = 1; sc->sc_cansleep = ocansleep; } static void awi_start(ifp) struct ifnet *ifp; { struct awi_softc *sc = ifp->if_softc; struct mbuf *m0, *m; u_int32_t txd, frame, ntxd; u_int8_t rate; int len, sent = 0; for (;;) { txd = sc->sc_txnext; _IF_DEQUEUE(&sc->sc_mgtq, m0); if (m0 != NULL) { if (awi_next_txd(sc, m0->m_pkthdr.len, &frame, &ntxd)) { _IF_PREPEND(&sc->sc_mgtq, m0); ifp->if_flags |= IFF_OACTIVE; break; } } else { if (!(ifp->if_flags & IFF_RUNNING)) break; IF_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; len = m0->m_pkthdr.len + sizeof(struct ieee80211_frame); if (sc->sc_format_llc) len += sizeof(struct llc) - sizeof(struct ether_header); if (sc->sc_wep_algo != NULL) len += IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN + IEEE80211_WEP_CRCLEN; if (awi_next_txd(sc, len, &frame, &ntxd)) { IF_PREPEND(&ifp->if_snd, m0); ifp->if_flags |= IFF_OACTIVE; break; } AWI_BPF_MTAP(sc, m0, AWI_BPF_NORM); m0 = awi_fix_txhdr(sc, m0); if (sc->sc_wep_algo != NULL && m0 != NULL) m0 = awi_wep_encrypt(sc, m0, 1); if (m0 == NULL) { ifp->if_oerrors++; continue; } ifp->if_opackets++; } #ifdef AWI_DEBUG if (awi_dump) awi_dump_pkt(sc, m0, -1); #endif AWI_BPF_MTAP(sc, m0, AWI_BPF_RAW); len = 0; for (m = m0; m != NULL; m = m->m_next) { awi_write_bytes(sc, frame + len, mtod(m, u_int8_t *), m->m_len); len += m->m_len; } m_freem(m0); rate = sc->sc_tx_rate; /*XXX*/ awi_write_1(sc, ntxd + AWI_TXD_STATE, 0); awi_write_4(sc, txd + AWI_TXD_START, frame); awi_write_4(sc, txd + AWI_TXD_NEXT, ntxd); awi_write_4(sc, txd + AWI_TXD_LENGTH, len); awi_write_1(sc, txd + AWI_TXD_RATE, rate); awi_write_4(sc, txd + AWI_TXD_NDA, 0); awi_write_4(sc, txd + AWI_TXD_NRA, 0); awi_write_1(sc, txd + AWI_TXD_STATE, AWI_TXD_ST_OWN); sc->sc_txnext = ntxd; sent++; } if (sent) { if (sc->sc_tx_timer == 0) sc->sc_tx_timer = 5; ifp->if_timer = 1; #ifdef AWI_DEBUG if (awi_verbose) printf("awi_start: sent %d txdone %d txnext %d txbase %d txend %d\n", sent, sc->sc_txdone, sc->sc_txnext, sc->sc_txbase, sc->sc_txend); #endif } } static void awi_txint(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; u_int8_t flags; while (sc->sc_txdone != sc->sc_txnext) { flags = awi_read_1(sc, sc->sc_txdone + AWI_TXD_STATE); if ((flags & AWI_TXD_ST_OWN) || !(flags & AWI_TXD_ST_DONE)) break; if (flags & AWI_TXD_ST_ERROR) ifp->if_oerrors++; sc->sc_txdone = awi_read_4(sc, sc->sc_txdone + AWI_TXD_NEXT) & 0x7fff; } sc->sc_tx_timer = 0; ifp->if_flags &= ~IFF_OACTIVE; #ifdef AWI_DEBUG if (awi_verbose) printf("awi_txint: txdone %d txnext %d txbase %d txend %d\n", sc->sc_txdone, sc->sc_txnext, sc->sc_txbase, sc->sc_txend); #endif awi_start(ifp); } static struct mbuf * awi_fix_txhdr(sc, m0) struct awi_softc *sc; struct mbuf *m0; { struct ether_header eh; struct ieee80211_frame *wh; struct llc *llc; if (m0->m_len < sizeof(eh)) { m0 = m_pullup(m0, sizeof(eh)); if (m0 == NULL) return NULL; } memcpy(&eh, mtod(m0, caddr_t), sizeof(eh)); if (sc->sc_format_llc) { m_adj(m0, sizeof(struct ether_header) - sizeof(struct llc)); llc = mtod(m0, struct llc *); llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP; llc->llc_control = LLC_UI; llc->llc_snap.org_code[0] = llc->llc_snap.org_code[1] = llc->llc_snap.org_code[2] = 0; llc->llc_snap.ether_type = eh.ether_type; } M_PREPEND(m0, sizeof(struct ieee80211_frame), M_DONTWAIT); if (m0 == NULL) return NULL; wh = mtod(m0, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_DATA; LE_WRITE_2(wh->i_dur, 0); LE_WRITE_2(wh->i_seq, 0); if (sc->sc_mib_local.Network_Mode) { wh->i_fc[1] = IEEE80211_FC1_DIR_TODS; memcpy(wh->i_addr1, sc->sc_bss.bssid, ETHER_ADDR_LEN); memcpy(wh->i_addr2, eh.ether_shost, ETHER_ADDR_LEN); memcpy(wh->i_addr3, eh.ether_dhost, ETHER_ADDR_LEN); } else { wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; memcpy(wh->i_addr1, eh.ether_dhost, ETHER_ADDR_LEN); memcpy(wh->i_addr2, eh.ether_shost, ETHER_ADDR_LEN); memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN); } return m0; } static struct mbuf * awi_fix_rxhdr(sc, m0) struct awi_softc *sc; struct mbuf *m0; { struct ieee80211_frame wh; struct ether_header *eh; struct llc *llc; if (m0->m_len < sizeof(wh)) { m_freem(m0); return NULL; } llc = (struct llc *)(mtod(m0, caddr_t) + sizeof(wh)); if (llc->llc_dsap == LLC_SNAP_LSAP && llc->llc_ssap == LLC_SNAP_LSAP && llc->llc_control == LLC_UI && llc->llc_snap.org_code[0] == 0 && llc->llc_snap.org_code[1] == 0 && llc->llc_snap.org_code[2] == 0) { memcpy(&wh, mtod(m0, caddr_t), sizeof(wh)); m_adj(m0, sizeof(wh) + sizeof(*llc) - sizeof(*eh)); eh = mtod(m0, struct ether_header *); switch (wh.i_fc[1] & IEEE80211_FC1_DIR_MASK) { case IEEE80211_FC1_DIR_NODS: memcpy(eh->ether_dhost, wh.i_addr1, ETHER_ADDR_LEN); memcpy(eh->ether_shost, wh.i_addr2, ETHER_ADDR_LEN); break; case IEEE80211_FC1_DIR_TODS: memcpy(eh->ether_dhost, wh.i_addr3, ETHER_ADDR_LEN); memcpy(eh->ether_shost, wh.i_addr2, ETHER_ADDR_LEN); break; case IEEE80211_FC1_DIR_FROMDS: memcpy(eh->ether_dhost, wh.i_addr1, ETHER_ADDR_LEN); memcpy(eh->ether_shost, wh.i_addr3, ETHER_ADDR_LEN); break; case IEEE80211_FC1_DIR_DSTODS: m_freem(m0); return NULL; } } else { /* assuming ethernet encapsulation, just strip 802.11 header */ m_adj(m0, sizeof(wh)); } if (ALIGN(mtod(m0, caddr_t) + sizeof(struct ether_header)) != (uintptr_t)(mtod(m0, caddr_t) + sizeof(struct ether_header))) { /* XXX: we loose to estimate the type of encapsulation */ struct mbuf *n, *n0, **np; caddr_t newdata; int off; n0 = NULL; np = &n0; off = 0; while (m0->m_pkthdr.len > off) { if (n0 == NULL) { MGETHDR(n, M_DONTWAIT, MT_DATA); if (n == NULL) { m_freem(m0); return NULL; } - M_COPY_PKTHDR(n, m0); + M_MOVE_PKTHDR(n, m0); n->m_len = MHLEN; } else { MGET(n, M_DONTWAIT, MT_DATA); if (n == NULL) { m_freem(m0); m_freem(n0); return NULL; } n->m_len = MLEN; } if (m0->m_pkthdr.len - off >= MINCLSIZE) { MCLGET(n, M_DONTWAIT); if (n->m_flags & M_EXT) n->m_len = n->m_ext.ext_size; } if (n0 == NULL) { newdata = (caddr_t) ALIGN(n->m_data + sizeof(struct ether_header)) - sizeof(struct ether_header); n->m_len -= newdata - n->m_data; n->m_data = newdata; } if (n->m_len > m0->m_pkthdr.len - off) n->m_len = m0->m_pkthdr.len - off; m_copydata(m0, off, n->m_len, mtod(n, caddr_t)); off += n->m_len; *np = n; np = &n->m_next; } m_freem(m0); m0 = n0; } return m0; } static void awi_input(sc, m, rxts, rssi) struct awi_softc *sc; struct mbuf *m; u_int32_t rxts; u_int8_t rssi; { struct ifnet *ifp = sc->sc_ifp; struct ieee80211_frame *wh; /* trim CRC here for WEP can find its own CRC at the end of packet. */ m_adj(m, -ETHER_CRC_LEN); AWI_BPF_MTAP(sc, m, AWI_BPF_RAW); wh = mtod(m, struct ieee80211_frame *); if ((wh->i_fc[0] & IEEE80211_FC0_VERSION_MASK) != IEEE80211_FC0_VERSION_0) { printf("%s; receive packet with wrong version: %x\n", sc->sc_dev.dv_xname, wh->i_fc[0]); m_freem(m); ifp->if_ierrors++; return; } if (wh->i_fc[1] & IEEE80211_FC1_WEP) { m = awi_wep_encrypt(sc, m, 0); if (m == NULL) { ifp->if_ierrors++; return; } wh = mtod(m, struct ieee80211_frame *); } #ifdef AWI_DEBUG if (awi_dump) awi_dump_pkt(sc, m, rssi); #endif if ((sc->sc_mib_local.Network_Mode || !sc->sc_no_bssid) && sc->sc_status == AWI_ST_RUNNING) { if (memcmp(wh->i_addr2, sc->sc_bss.bssid, ETHER_ADDR_LEN) == 0) { sc->sc_rx_timer = 10; sc->sc_bss.rssi = rssi; } } switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) { case IEEE80211_FC0_TYPE_DATA: if (sc->sc_mib_local.Network_Mode) { if ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) != IEEE80211_FC1_DIR_FROMDS) { m_freem(m); return; } } else { if ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) != IEEE80211_FC1_DIR_NODS) { m_freem(m); return; } } m = awi_fix_rxhdr(sc, m); if (m == NULL) { ifp->if_ierrors++; break; } ifp->if_ipackets++; #if !(defined(__FreeBSD__) && __FreeBSD_version >= 400000) AWI_BPF_MTAP(sc, m, AWI_BPF_NORM); #endif (*ifp->if_input)(ifp, m); break; case IEEE80211_FC0_TYPE_MGT: if ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) != IEEE80211_FC1_DIR_NODS) { m_freem(m); return; } switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_PROBE_RESP: case IEEE80211_FC0_SUBTYPE_BEACON: awi_recv_beacon(sc, m, rxts, rssi); break; case IEEE80211_FC0_SUBTYPE_AUTH: awi_recv_auth(sc, m); break; case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: case IEEE80211_FC0_SUBTYPE_REASSOC_RESP: awi_recv_asresp(sc, m); break; case IEEE80211_FC0_SUBTYPE_DEAUTH: if (sc->sc_mib_local.Network_Mode) awi_send_auth(sc, 1); break; case IEEE80211_FC0_SUBTYPE_DISASSOC: if (sc->sc_mib_local.Network_Mode) awi_send_asreq(sc, 1); break; } m_freem(m); break; case IEEE80211_FC0_TYPE_CTL: default: /* should not come here */ m_freem(m); break; } } static void awi_rxint(sc) struct awi_softc *sc; { u_int8_t state, rate, rssi; u_int16_t len; u_int32_t frame, next, rxts, rxoff; struct mbuf *m; rxoff = sc->sc_rxdoff; for (;;) { state = awi_read_1(sc, rxoff + AWI_RXD_HOST_DESC_STATE); if (state & AWI_RXD_ST_OWN) break; if (!(state & AWI_RXD_ST_CONSUMED)) { if (state & AWI_RXD_ST_RXERROR) sc->sc_ifp->if_ierrors++; else { len = awi_read_2(sc, rxoff + AWI_RXD_LEN); rate = awi_read_1(sc, rxoff + AWI_RXD_RATE); rssi = awi_read_1(sc, rxoff + AWI_RXD_RSSI); frame = awi_read_4(sc, rxoff + AWI_RXD_START_FRAME) & 0x7fff; rxts = awi_read_4(sc, rxoff + AWI_RXD_LOCALTIME); m = awi_devget(sc, frame, len); if (state & AWI_RXD_ST_LF) awi_input(sc, m, rxts, rssi); else sc->sc_rxpend = m; } state |= AWI_RXD_ST_CONSUMED; awi_write_1(sc, rxoff + AWI_RXD_HOST_DESC_STATE, state); } next = awi_read_4(sc, rxoff + AWI_RXD_NEXT); if (next & AWI_RXD_NEXT_LAST) break; /* make sure the next pointer is correct */ if (next != awi_read_4(sc, rxoff + AWI_RXD_NEXT)) break; state |= AWI_RXD_ST_OWN; awi_write_1(sc, rxoff + AWI_RXD_HOST_DESC_STATE, state); rxoff = next & 0x7fff; } sc->sc_rxdoff = rxoff; } static struct mbuf * awi_devget(sc, off, len) struct awi_softc *sc; u_int32_t off; u_int16_t len; { struct mbuf *m; struct mbuf *top, **mp; u_int tlen; top = sc->sc_rxpend; mp = ⊤ if (top != NULL) { sc->sc_rxpend = NULL; top->m_pkthdr.len += len; m = top; while (*mp != NULL) { m = *mp; mp = &m->m_next; } if (m->m_flags & M_EXT) tlen = m->m_ext.ext_size; else if (m->m_flags & M_PKTHDR) tlen = MHLEN; else tlen = MLEN; tlen -= m->m_len; if (tlen > len) tlen = len; awi_read_bytes(sc, off, mtod(m, u_int8_t *) + m->m_len, tlen); off += tlen; len -= tlen; } while (len > 0) { if (top == NULL) { MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return NULL; m->m_pkthdr.rcvif = sc->sc_ifp; m->m_pkthdr.len = len; m->m_len = MHLEN; } else { MGET(m, M_DONTWAIT, MT_DATA); if (m == NULL) { m_freem(top); return NULL; } m->m_len = MLEN; } if (len >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if (m->m_flags & M_EXT) m->m_len = m->m_ext.ext_size; } if (top == NULL) { int hdrlen = sizeof(struct ieee80211_frame) + (sc->sc_format_llc ? sizeof(struct llc) : sizeof(struct ether_header)); caddr_t newdata = (caddr_t) ALIGN(m->m_data + hdrlen) - hdrlen; m->m_len -= newdata - m->m_data; m->m_data = newdata; } if (m->m_len > len) m->m_len = len; awi_read_bytes(sc, off, mtod(m, u_int8_t *), m->m_len); off += m->m_len; len -= m->m_len; *mp = m; mp = &m->m_next; } return top; } /* * Initialize hardware and start firmware to accept commands. * Called everytime after power on firmware. */ static int awi_init_hw(sc) struct awi_softc *sc; { u_int8_t status; u_int16_t intmask; int i, error; sc->sc_enab_intr = 0; sc->sc_invalid = 0; /* XXX: really? */ awi_drvstate(sc, AWI_DRV_RESET); /* reset firmware */ am79c930_gcr_setbits(&sc->sc_chip, AM79C930_GCR_CORESET); DELAY(100); awi_write_1(sc, AWI_SELFTEST, 0); awi_write_1(sc, AWI_CMD, 0); awi_write_1(sc, AWI_BANNER, 0); am79c930_gcr_clearbits(&sc->sc_chip, AM79C930_GCR_CORESET); DELAY(100); /* wait for selftest completion */ for (i = 0; ; i++) { if (i >= AWI_SELFTEST_TIMEOUT*hz/1000) { printf("%s: failed to complete selftest (timeout)\n", sc->sc_dev.dv_xname); return ENXIO; } status = awi_read_1(sc, AWI_SELFTEST); if ((status & 0xf0) == 0xf0) break; if (sc->sc_cansleep) { sc->sc_sleep_cnt++; (void)tsleep(sc, PWAIT, "awitst", 1); sc->sc_sleep_cnt--; } else { DELAY(1000*1000/hz); } } if (status != AWI_SELFTEST_PASSED) { printf("%s: failed to complete selftest (code %x)\n", sc->sc_dev.dv_xname, status); return ENXIO; } /* check banner to confirm firmware write it */ awi_read_bytes(sc, AWI_BANNER, sc->sc_banner, AWI_BANNER_LEN); if (memcmp(sc->sc_banner, "PCnetMobile:", 12) != 0) { printf("%s: failed to complete selftest (bad banner)\n", sc->sc_dev.dv_xname); for (i = 0; i < AWI_BANNER_LEN; i++) printf("%s%02x", i ? ":" : "\t", sc->sc_banner[i]); printf("\n"); return ENXIO; } /* initializing interrupt */ sc->sc_enab_intr = 1; error = awi_intr_lock(sc); if (error) return error; intmask = AWI_INT_GROGGY | AWI_INT_SCAN_CMPLT | AWI_INT_TX | AWI_INT_RX | AWI_INT_CMD; awi_write_1(sc, AWI_INTMASK, ~intmask & 0xff); awi_write_1(sc, AWI_INTMASK2, 0); awi_write_1(sc, AWI_INTSTAT, 0); awi_write_1(sc, AWI_INTSTAT2, 0); awi_intr_unlock(sc); am79c930_gcr_setbits(&sc->sc_chip, AM79C930_GCR_ENECINT); /* issueing interface test command */ error = awi_cmd(sc, AWI_CMD_NOP); if (error) { printf("%s: failed to complete selftest", sc->sc_dev.dv_xname); if (error == ENXIO) printf(" (no hardware)\n"); else if (error != EWOULDBLOCK) printf(" (error %d)\n", error); else if (sc->sc_cansleep) printf(" (lost interrupt)\n"); else printf(" (command timeout)\n"); } return error; } /* * Extract the factory default MIB value from firmware and assign the driver * default value. * Called once at attaching the interface. */ static int awi_init_mibs(sc) struct awi_softc *sc; { int i, error; u_int8_t *rate; if ((error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_LOCAL)) != 0 || (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_ADDR)) != 0 || (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_MAC)) != 0 || (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_MGT)) != 0 || (error = awi_mib(sc, AWI_CMD_GET_MIB, AWI_MIB_PHY)) != 0) { printf("%s: failed to get default mib value (error %d)\n", sc->sc_dev.dv_xname, error); return error; } rate = sc->sc_mib_phy.aSuprt_Data_Rates; sc->sc_tx_rate = AWI_RATE_1MBIT; for (i = 0; i < rate[1]; i++) { if (AWI_80211_RATE(rate[2 + i]) > sc->sc_tx_rate) sc->sc_tx_rate = AWI_80211_RATE(rate[2 + i]); } awi_init_region(sc); memset(&sc->sc_mib_mac.aDesired_ESS_ID, 0, AWI_ESS_ID_SIZE); sc->sc_mib_mac.aDesired_ESS_ID[0] = IEEE80211_ELEMID_SSID; sc->sc_mib_local.Fragmentation_Dis = 1; sc->sc_mib_local.Accept_All_Multicast_Dis = 1; sc->sc_mib_local.Power_Saving_Mode_Dis = 1; /* allocate buffers */ sc->sc_txbase = AWI_BUFFERS; sc->sc_txend = sc->sc_txbase + (AWI_TXD_SIZE + sizeof(struct ieee80211_frame) + sizeof(struct ether_header) + ETHERMTU) * AWI_NTXBUFS; LE_WRITE_4(&sc->sc_mib_local.Tx_Buffer_Offset, sc->sc_txbase); LE_WRITE_4(&sc->sc_mib_local.Tx_Buffer_Size, sc->sc_txend - sc->sc_txbase); LE_WRITE_4(&sc->sc_mib_local.Rx_Buffer_Offset, sc->sc_txend); LE_WRITE_4(&sc->sc_mib_local.Rx_Buffer_Size, AWI_BUFFERS_END - sc->sc_txend); sc->sc_mib_local.Network_Mode = 1; sc->sc_mib_local.Acting_as_AP = 0; return 0; } /* * Start transmitter and receiver of firmware * Called after awi_init_hw() to start operation. */ static int awi_init_txrx(sc) struct awi_softc *sc; { int error; /* start transmitter */ sc->sc_txdone = sc->sc_txnext = sc->sc_txbase; awi_write_4(sc, sc->sc_txbase + AWI_TXD_START, 0); awi_write_4(sc, sc->sc_txbase + AWI_TXD_NEXT, 0); awi_write_4(sc, sc->sc_txbase + AWI_TXD_LENGTH, 0); awi_write_1(sc, sc->sc_txbase + AWI_TXD_RATE, 0); awi_write_4(sc, sc->sc_txbase + AWI_TXD_NDA, 0); awi_write_4(sc, sc->sc_txbase + AWI_TXD_NRA, 0); awi_write_1(sc, sc->sc_txbase + AWI_TXD_STATE, 0); awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_DATA, sc->sc_txbase); awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_MGT, 0); awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_BCAST, 0); awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_PS, 0); awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_TX_CF, 0); error = awi_cmd(sc, AWI_CMD_INIT_TX); if (error) return error; /* start receiver */ if (sc->sc_rxpend) { m_freem(sc->sc_rxpend); sc->sc_rxpend = NULL; } error = awi_cmd(sc, AWI_CMD_INIT_RX); if (error) return error; sc->sc_rxdoff = awi_read_4(sc, AWI_CMD_PARAMS+AWI_CA_IRX_DATA_DESC); sc->sc_rxmoff = awi_read_4(sc, AWI_CMD_PARAMS+AWI_CA_IRX_PS_DESC); return 0; } static void awi_stop_txrx(sc) struct awi_softc *sc; { if (sc->sc_cmd_inprog) (void)awi_cmd_wait(sc); (void)awi_cmd(sc, AWI_CMD_KILL_RX); (void)awi_cmd_wait(sc); sc->sc_cmd_inprog = AWI_CMD_FLUSH_TX; awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_DATA, 1); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_MGT, 0); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_BCAST, 0); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_PS, 0); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_FTX_CF, 0); (void)awi_cmd(sc, AWI_CMD_FLUSH_TX); (void)awi_cmd_wait(sc); } int awi_init_region(sc) struct awi_softc *sc; { if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) { switch (sc->sc_mib_phy.aCurrent_Reg_Domain) { case AWI_REG_DOMAIN_US: case AWI_REG_DOMAIN_CA: case AWI_REG_DOMAIN_EU: sc->sc_scan_min = 0; sc->sc_scan_max = 77; break; case AWI_REG_DOMAIN_ES: sc->sc_scan_min = 0; sc->sc_scan_max = 26; break; case AWI_REG_DOMAIN_FR: sc->sc_scan_min = 0; sc->sc_scan_max = 32; break; case AWI_REG_DOMAIN_JP: sc->sc_scan_min = 6; sc->sc_scan_max = 17; break; default: return EINVAL; } sc->sc_scan_set = sc->sc_scan_cur % 3 + 1; } else { switch (sc->sc_mib_phy.aCurrent_Reg_Domain) { case AWI_REG_DOMAIN_US: case AWI_REG_DOMAIN_CA: sc->sc_scan_min = 1; sc->sc_scan_max = 11; sc->sc_scan_cur = 3; break; case AWI_REG_DOMAIN_EU: sc->sc_scan_min = 1; sc->sc_scan_max = 13; sc->sc_scan_cur = 3; break; case AWI_REG_DOMAIN_ES: sc->sc_scan_min = 10; sc->sc_scan_max = 11; sc->sc_scan_cur = 10; break; case AWI_REG_DOMAIN_FR: sc->sc_scan_min = 10; sc->sc_scan_max = 13; sc->sc_scan_cur = 10; break; case AWI_REG_DOMAIN_JP: sc->sc_scan_min = 14; sc->sc_scan_max = 14; sc->sc_scan_cur = 14; break; default: return EINVAL; } } sc->sc_ownch = sc->sc_scan_cur; return 0; } static int awi_start_scan(sc) struct awi_softc *sc; { int error = 0; struct awi_bss *bp; while ((bp = TAILQ_FIRST(&sc->sc_scan)) != NULL) { TAILQ_REMOVE(&sc->sc_scan, bp, list); free(bp, M_DEVBUF); } if (!sc->sc_mib_local.Network_Mode && sc->sc_no_bssid) { memset(&sc->sc_bss, 0, sizeof(sc->sc_bss)); sc->sc_bss.essid[0] = IEEE80211_ELEMID_SSID; if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) { sc->sc_bss.chanset = sc->sc_ownch % 3 + 1; sc->sc_bss.pattern = sc->sc_ownch; sc->sc_bss.index = 1; sc->sc_bss.dwell_time = 200; /*XXX*/ } else sc->sc_bss.chanset = sc->sc_ownch; sc->sc_status = AWI_ST_SETSS; error = awi_set_ss(sc); } else { if (sc->sc_mib_local.Network_Mode) awi_drvstate(sc, AWI_DRV_INFSC); else awi_drvstate(sc, AWI_DRV_ADHSC); sc->sc_start_bss = 0; sc->sc_active_scan = 1; sc->sc_mgt_timer = AWI_ASCAN_WAIT / 1000; sc->sc_ifp->if_timer = 1; sc->sc_status = AWI_ST_SCAN; error = awi_cmd_scan(sc); } return error; } static int awi_next_scan(sc) struct awi_softc *sc; { int error; for (;;) { /* * The pattern parameter for FH phy should be incremented * by 3. But BayStack 650 Access Points apparently always * assign hop pattern set parameter to 1 for any pattern. * So we try all combinations of pattern/set parameters. * Since this causes no error, it may be a bug of * PCnetMobile firmware. */ sc->sc_scan_cur++; if (sc->sc_scan_cur > sc->sc_scan_max) { sc->sc_scan_cur = sc->sc_scan_min; if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) sc->sc_scan_set = sc->sc_scan_set % 3 + 1; } error = awi_cmd_scan(sc); if (error != EINVAL) break; } return error; } static void awi_stop_scan(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; struct awi_bss *bp, *sbp; int fail; bp = TAILQ_FIRST(&sc->sc_scan); if (bp == NULL) { notfound: if (sc->sc_active_scan) { if (ifp->if_flags & IFF_DEBUG) printf("%s: entering passive scan mode\n", sc->sc_dev.dv_xname); sc->sc_active_scan = 0; } sc->sc_mgt_timer = AWI_PSCAN_WAIT / 1000; ifp->if_timer = 1; (void)awi_next_scan(sc); return; } sbp = NULL; if (ifp->if_flags & IFF_DEBUG) printf("%s:\tmacaddr ch/pat sig flag wep essid\n", sc->sc_dev.dv_xname); for (; bp != NULL; bp = TAILQ_NEXT(bp, list)) { if (bp->fails) { /* * The configuration of the access points may change * during my scan. So we retries to associate with * it unless there are any suitable AP. */ if (bp->fails++ < 3) continue; bp->fails = 0; } fail = 0; /* * Since the firmware apparently scans not only the specified * channel of SCAN command but all available channel within * the region, we should filter out unnecessary responses here. */ if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) { if (bp->pattern < sc->sc_scan_min || bp->pattern > sc->sc_scan_max) fail |= 0x01; } else { if (bp->chanset < sc->sc_scan_min || bp->chanset > sc->sc_scan_max) fail |= 0x01; } if (sc->sc_mib_local.Network_Mode) { if (!(bp->capinfo & IEEE80211_CAPINFO_ESS) || (bp->capinfo & IEEE80211_CAPINFO_IBSS)) fail |= 0x02; } else { if ((bp->capinfo & IEEE80211_CAPINFO_ESS) || !(bp->capinfo & IEEE80211_CAPINFO_IBSS)) fail |= 0x02; } if (sc->sc_wep_algo == NULL) { if (bp->capinfo & IEEE80211_CAPINFO_PRIVACY) fail |= 0x04; } else { if (!(bp->capinfo & IEEE80211_CAPINFO_PRIVACY)) fail |= 0x04; } if (sc->sc_mib_mac.aDesired_ESS_ID[1] != 0 && memcmp(&sc->sc_mib_mac.aDesired_ESS_ID, bp->essid, sizeof(bp->essid)) != 0) fail |= 0x08; if (ifp->if_flags & IFF_DEBUG) { printf(" %c %s", fail ? '-' : '+', ether_sprintf(bp->esrc)); if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) printf(" %2d/%d%c", bp->pattern, bp->chanset, fail & 0x01 ? '!' : ' '); else printf(" %4d%c", bp->chanset, fail & 0x01 ? '!' : ' '); printf(" %+4d", bp->rssi); printf(" %4s%c", (bp->capinfo & IEEE80211_CAPINFO_ESS) ? "ess" : (bp->capinfo & IEEE80211_CAPINFO_IBSS) ? "ibss" : "????", fail & 0x02 ? '!' : ' '); printf(" %3s%c ", (bp->capinfo & IEEE80211_CAPINFO_PRIVACY) ? "wep" : "no", fail & 0x04 ? '!' : ' '); awi_print_essid(bp->essid); printf("%s\n", fail & 0x08 ? "!" : ""); } if (!fail) { if (sbp == NULL || bp->rssi > sbp->rssi) sbp = bp; } } if (sbp == NULL) goto notfound; sc->sc_bss = *sbp; (void)awi_set_ss(sc); } static void awi_recv_beacon(sc, m0, rxts, rssi) struct awi_softc *sc; struct mbuf *m0; u_int32_t rxts; u_int8_t rssi; { struct ieee80211_frame *wh; struct awi_bss *bp; u_int8_t *frame, *eframe; u_int8_t *tstamp, *bintval, *capinfo, *ssid, *rates, *parms; if (sc->sc_status != AWI_ST_SCAN) return; wh = mtod(m0, struct ieee80211_frame *); frame = (u_int8_t *)&wh[1]; eframe = mtod(m0, u_int8_t *) + m0->m_len; /* * XXX: * timestamp [8] * beacon interval [2] * capability information [2] * ssid [tlv] * supported rates [tlv] * parameter set [tlv] * ... */ if (frame + 12 > eframe) { #ifdef AWI_DEBUG if (awi_verbose) printf("awi_recv_beacon: frame too short \n"); #endif return; } tstamp = frame; frame += 8; bintval = frame; frame += 2; capinfo = frame; frame += 2; ssid = rates = parms = NULL; while (frame < eframe) { switch (*frame) { case IEEE80211_ELEMID_SSID: ssid = frame; break; case IEEE80211_ELEMID_RATES: rates = frame; break; case IEEE80211_ELEMID_FHPARMS: case IEEE80211_ELEMID_DSPARMS: parms = frame; break; } frame += frame[1] + 2; } if (ssid == NULL || rates == NULL || parms == NULL) { #ifdef AWI_DEBUG if (awi_verbose) printf("awi_recv_beacon: ssid=%p, rates=%p, parms=%p\n", ssid, rates, parms); #endif return; } if (ssid[1] > IEEE80211_NWID_LEN) { #ifdef AWI_DEBUG if (awi_verbose) printf("awi_recv_beacon: bad ssid len: %d from %s\n", ssid[1], ether_sprintf(wh->i_addr2)); #endif return; } TAILQ_FOREACH(bp, &sc->sc_scan, list) { if (memcmp(bp->esrc, wh->i_addr2, ETHER_ADDR_LEN) == 0 && memcmp(bp->bssid, wh->i_addr3, ETHER_ADDR_LEN) == 0) break; } if (bp == NULL) { bp = malloc(sizeof(struct awi_bss), M_DEVBUF, M_NOWAIT); if (bp == NULL) return; TAILQ_INSERT_TAIL(&sc->sc_scan, bp, list); memcpy(bp->esrc, wh->i_addr2, ETHER_ADDR_LEN); memcpy(bp->bssid, wh->i_addr3, ETHER_ADDR_LEN); memset(bp->essid, 0, sizeof(bp->essid)); memcpy(bp->essid, ssid, 2 + ssid[1]); } bp->rssi = rssi; bp->rxtime = rxts; memcpy(bp->timestamp, tstamp, sizeof(bp->timestamp)); bp->interval = LE_READ_2(bintval); bp->capinfo = LE_READ_2(capinfo); if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) { bp->chanset = parms[4]; bp->pattern = parms[5]; bp->index = parms[6]; bp->dwell_time = LE_READ_2(parms + 2); } else { bp->chanset = parms[2]; bp->pattern = 0; bp->index = 0; bp->dwell_time = 0; } if (sc->sc_mgt_timer == 0) awi_stop_scan(sc); } static int awi_set_ss(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; struct awi_bss *bp; int error; sc->sc_status = AWI_ST_SETSS; bp = &sc->sc_bss; if (ifp->if_flags & IFF_DEBUG) { printf("%s: ch %d pat %d id %d dw %d iv %d bss %s ssid ", sc->sc_dev.dv_xname, bp->chanset, bp->pattern, bp->index, bp->dwell_time, bp->interval, ether_sprintf(bp->bssid)); awi_print_essid(bp->essid); printf("\n"); } memcpy(&sc->sc_mib_mgt.aCurrent_BSS_ID, bp->bssid, ETHER_ADDR_LEN); memcpy(&sc->sc_mib_mgt.aCurrent_ESS_ID, bp->essid, AWI_ESS_ID_SIZE); LE_WRITE_2(&sc->sc_mib_mgt.aBeacon_Period, bp->interval); error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MGT); return error; } static void awi_try_sync(sc) struct awi_softc *sc; { struct awi_bss *bp; sc->sc_status = AWI_ST_SYNC; bp = &sc->sc_bss; if (sc->sc_cmd_inprog) { if (awi_cmd_wait(sc)) return; } sc->sc_cmd_inprog = AWI_CMD_SYNC; awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_SET, bp->chanset); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_PATTERN, bp->pattern); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_IDX, bp->index); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_STARTBSS, sc->sc_start_bss ? 1 : 0); awi_write_2(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_DWELL, bp->dwell_time); awi_write_2(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_MBZ, 0); awi_write_bytes(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_TIMESTAMP, bp->timestamp, 8); awi_write_4(sc, AWI_CMD_PARAMS+AWI_CA_SYNC_REFTIME, bp->rxtime); (void)awi_cmd(sc, AWI_CMD_SYNC); } static void awi_sync_done(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; if (sc->sc_mib_local.Network_Mode) { awi_drvstate(sc, AWI_DRV_INFSY); awi_send_auth(sc, 1); } else { if (ifp->if_flags & IFF_DEBUG) { printf("%s: synced with", sc->sc_dev.dv_xname); if (sc->sc_no_bssid) printf(" no-bssid"); else { printf(" %s ssid ", ether_sprintf(sc->sc_bss.bssid)); awi_print_essid(sc->sc_bss.essid); } if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) printf(" at chanset %d pattern %d\n", sc->sc_bss.chanset, sc->sc_bss.pattern); else printf(" at channel %d\n", sc->sc_bss.chanset); } awi_drvstate(sc, AWI_DRV_ADHSY); sc->sc_status = AWI_ST_RUNNING; ifp->if_flags |= IFF_RUNNING; awi_start(ifp); } } static void awi_send_deauth(sc) struct awi_softc *sc; { struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct ieee80211_frame *wh; u_int8_t *deauth; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return; if (ifp->if_flags & IFF_DEBUG) printf("%s: sending deauth to %s\n", sc->sc_dev.dv_xname, ether_sprintf(sc->sc_bss.bssid)); wh = mtod(m, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_AUTH; wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; LE_WRITE_2(wh->i_dur, 0); LE_WRITE_2(wh->i_seq, 0); memcpy(wh->i_addr1, sc->sc_bss.bssid, ETHER_ADDR_LEN); memcpy(wh->i_addr2, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN); memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN); deauth = (u_int8_t *)&wh[1]; LE_WRITE_2(deauth, IEEE80211_REASON_AUTH_LEAVE); deauth += 2; m->m_pkthdr.len = m->m_len = deauth - mtod(m, u_int8_t *); _IF_ENQUEUE(&sc->sc_mgtq, m); awi_start(ifp); awi_drvstate(sc, AWI_DRV_INFTOSS); } static void awi_send_auth(sc, seq) struct awi_softc *sc; int seq; { struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct ieee80211_frame *wh; u_int8_t *auth; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return; sc->sc_status = AWI_ST_AUTH; if (ifp->if_flags & IFF_DEBUG) printf("%s: sending auth to %s\n", sc->sc_dev.dv_xname, ether_sprintf(sc->sc_bss.bssid)); wh = mtod(m, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT | IEEE80211_FC0_SUBTYPE_AUTH; wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; LE_WRITE_2(wh->i_dur, 0); LE_WRITE_2(wh->i_seq, 0); memcpy(wh->i_addr1, sc->sc_bss.esrc, ETHER_ADDR_LEN); memcpy(wh->i_addr2, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN); memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN); auth = (u_int8_t *)&wh[1]; /* algorithm number */ LE_WRITE_2(auth, IEEE80211_AUTH_ALG_OPEN); auth += 2; /* sequence number */ LE_WRITE_2(auth, seq); auth += 2; /* status */ LE_WRITE_2(auth, 0); auth += 2; m->m_pkthdr.len = m->m_len = auth - mtod(m, u_int8_t *); _IF_ENQUEUE(&sc->sc_mgtq, m); awi_start(ifp); sc->sc_mgt_timer = AWI_TRANS_TIMEOUT / 1000; ifp->if_timer = 1; } static void awi_recv_auth(sc, m0) struct awi_softc *sc; struct mbuf *m0; { struct ieee80211_frame *wh; u_int8_t *auth, *eframe; struct awi_bss *bp; u_int16_t status; wh = mtod(m0, struct ieee80211_frame *); auth = (u_int8_t *)&wh[1]; eframe = mtod(m0, u_int8_t *) + m0->m_len; if (sc->sc_ifp->if_flags & IFF_DEBUG) printf("%s: receive auth from %s\n", sc->sc_dev.dv_xname, ether_sprintf(wh->i_addr2)); /* algorithm number */ if (LE_READ_2(auth) != IEEE80211_AUTH_ALG_OPEN) return; auth += 2; if (!sc->sc_mib_local.Network_Mode) { if (sc->sc_status != AWI_ST_RUNNING) return; if (LE_READ_2(auth) == 1) awi_send_auth(sc, 2); return; } if (sc->sc_status != AWI_ST_AUTH) return; /* sequence number */ if (LE_READ_2(auth) != 2) return; auth += 2; /* status */ status = LE_READ_2(auth); if (status != 0) { printf("%s: authentication failed (reason %d)\n", sc->sc_dev.dv_xname, status); TAILQ_FOREACH(bp, &sc->sc_scan, list) { if (memcmp(bp->esrc, sc->sc_bss.esrc, ETHER_ADDR_LEN) == 0) { bp->fails++; break; } } return; } sc->sc_mgt_timer = 0; awi_drvstate(sc, AWI_DRV_INFAUTH); awi_send_asreq(sc, 0); } static void awi_send_asreq(sc, reassoc) struct awi_softc *sc; int reassoc; { struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct ieee80211_frame *wh; u_int16_t capinfo, lintval; u_int8_t *asreq; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return; sc->sc_status = AWI_ST_ASSOC; if (ifp->if_flags & IFF_DEBUG) printf("%s: sending %sassoc req to %s\n", sc->sc_dev.dv_xname, reassoc ? "re" : "", ether_sprintf(sc->sc_bss.bssid)); wh = mtod(m, struct ieee80211_frame *); wh->i_fc[0] = IEEE80211_FC0_VERSION_0 | IEEE80211_FC0_TYPE_MGT; if (reassoc) wh->i_fc[0] |= IEEE80211_FC0_SUBTYPE_REASSOC_REQ; else wh->i_fc[0] |= IEEE80211_FC0_SUBTYPE_ASSOC_REQ; wh->i_fc[1] = IEEE80211_FC1_DIR_NODS; LE_WRITE_2(wh->i_dur, 0); LE_WRITE_2(wh->i_seq, 0); memcpy(wh->i_addr1, sc->sc_bss.esrc, ETHER_ADDR_LEN); memcpy(wh->i_addr2, sc->sc_mib_addr.aMAC_Address, ETHER_ADDR_LEN); memcpy(wh->i_addr3, sc->sc_bss.bssid, ETHER_ADDR_LEN); asreq = (u_int8_t *)&wh[1]; /* capability info */ capinfo = IEEE80211_CAPINFO_CF_POLLABLE; if (sc->sc_mib_local.Network_Mode) capinfo |= IEEE80211_CAPINFO_ESS; else capinfo |= IEEE80211_CAPINFO_IBSS; if (sc->sc_wep_algo != NULL) capinfo |= IEEE80211_CAPINFO_PRIVACY; LE_WRITE_2(asreq, capinfo); asreq += 2; /* listen interval */ lintval = LE_READ_2(&sc->sc_mib_mgt.aListen_Interval); LE_WRITE_2(asreq, lintval); asreq += 2; if (reassoc) { /* current AP address */ memcpy(asreq, sc->sc_bss.bssid, ETHER_ADDR_LEN); asreq += ETHER_ADDR_LEN; } /* ssid */ memcpy(asreq, sc->sc_bss.essid, 2 + sc->sc_bss.essid[1]); asreq += 2 + asreq[1]; /* supported rates */ memcpy(asreq, &sc->sc_mib_phy.aSuprt_Data_Rates, 4); asreq += 2 + asreq[1]; m->m_pkthdr.len = m->m_len = asreq - mtod(m, u_int8_t *); _IF_ENQUEUE(&sc->sc_mgtq, m); awi_start(ifp); sc->sc_mgt_timer = AWI_TRANS_TIMEOUT / 1000; ifp->if_timer = 1; } static void awi_recv_asresp(sc, m0) struct awi_softc *sc; struct mbuf *m0; { struct ieee80211_frame *wh; u_int8_t *asresp, *eframe; u_int16_t status; u_int8_t rate, *phy_rates; struct awi_bss *bp; int i, j; wh = mtod(m0, struct ieee80211_frame *); asresp = (u_int8_t *)&wh[1]; eframe = mtod(m0, u_int8_t *) + m0->m_len; if (sc->sc_ifp->if_flags & IFF_DEBUG) printf("%s: receive assoc resp from %s\n", sc->sc_dev.dv_xname, ether_sprintf(wh->i_addr2)); if (!sc->sc_mib_local.Network_Mode) return; if (sc->sc_status != AWI_ST_ASSOC) return; /* capability info */ asresp += 2; /* status */ status = LE_READ_2(asresp); if (status != 0) { printf("%s: association failed (reason %d)\n", sc->sc_dev.dv_xname, status); TAILQ_FOREACH(bp, &sc->sc_scan, list) { if (memcmp(bp->esrc, sc->sc_bss.esrc, ETHER_ADDR_LEN) == 0) { bp->fails++; break; } } return; } asresp += 2; /* association id */ asresp += 2; /* supported rates */ rate = AWI_RATE_1MBIT; for (i = 0; i < asresp[1]; i++) { if (AWI_80211_RATE(asresp[2 + i]) <= rate) continue; phy_rates = sc->sc_mib_phy.aSuprt_Data_Rates; for (j = 0; j < phy_rates[1]; j++) { if (AWI_80211_RATE(asresp[2 + i]) == AWI_80211_RATE(phy_rates[2 + j])) rate = AWI_80211_RATE(asresp[2 + i]); } } if (sc->sc_ifp->if_flags & IFF_DEBUG) { printf("%s: associated with %s ssid ", sc->sc_dev.dv_xname, ether_sprintf(sc->sc_bss.bssid)); awi_print_essid(sc->sc_bss.essid); if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) printf(" chanset %d pattern %d\n", sc->sc_bss.chanset, sc->sc_bss.pattern); else printf(" channel %d\n", sc->sc_bss.chanset); } sc->sc_tx_rate = rate; sc->sc_mgt_timer = 0; sc->sc_rx_timer = 10; sc->sc_ifp->if_timer = 1; sc->sc_status = AWI_ST_RUNNING; sc->sc_ifp->if_flags |= IFF_RUNNING; awi_drvstate(sc, AWI_DRV_INFASSOC); awi_start(sc->sc_ifp); } static int awi_mib(sc, cmd, mib) struct awi_softc *sc; u_int8_t cmd; u_int8_t mib; { int error; u_int8_t size, *ptr; switch (mib) { case AWI_MIB_LOCAL: ptr = (u_int8_t *)&sc->sc_mib_local; size = sizeof(sc->sc_mib_local); break; case AWI_MIB_ADDR: ptr = (u_int8_t *)&sc->sc_mib_addr; size = sizeof(sc->sc_mib_addr); break; case AWI_MIB_MAC: ptr = (u_int8_t *)&sc->sc_mib_mac; size = sizeof(sc->sc_mib_mac); break; case AWI_MIB_STAT: ptr = (u_int8_t *)&sc->sc_mib_stat; size = sizeof(sc->sc_mib_stat); break; case AWI_MIB_MGT: ptr = (u_int8_t *)&sc->sc_mib_mgt; size = sizeof(sc->sc_mib_mgt); break; case AWI_MIB_PHY: ptr = (u_int8_t *)&sc->sc_mib_phy; size = sizeof(sc->sc_mib_phy); break; default: return EINVAL; } if (sc->sc_cmd_inprog) { error = awi_cmd_wait(sc); if (error) { if (error == EWOULDBLOCK) printf("awi_mib: cmd %d inprog", sc->sc_cmd_inprog); return error; } } sc->sc_cmd_inprog = cmd; if (cmd == AWI_CMD_SET_MIB) awi_write_bytes(sc, AWI_CMD_PARAMS+AWI_CA_MIB_DATA, ptr, size); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_MIB_TYPE, mib); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_MIB_SIZE, size); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_MIB_INDEX, 0); error = awi_cmd(sc, cmd); if (error) return error; if (cmd == AWI_CMD_GET_MIB) { awi_read_bytes(sc, AWI_CMD_PARAMS+AWI_CA_MIB_DATA, ptr, size); #ifdef AWI_DEBUG if (awi_verbose) { int i; printf("awi_mib: #%d:", mib); for (i = 0; i < size; i++) printf(" %02x", ptr[i]); printf("\n"); } #endif } return 0; } static int awi_cmd_scan(sc) struct awi_softc *sc; { int error; u_int8_t scan_mode; if (sc->sc_active_scan) scan_mode = AWI_SCAN_ACTIVE; else scan_mode = AWI_SCAN_PASSIVE; if (sc->sc_mib_mgt.aScan_Mode != scan_mode) { sc->sc_mib_mgt.aScan_Mode = scan_mode; error = awi_mib(sc, AWI_CMD_SET_MIB, AWI_MIB_MGT); return error; } if (sc->sc_cmd_inprog) { error = awi_cmd_wait(sc); if (error) return error; } sc->sc_cmd_inprog = AWI_CMD_SCAN; awi_write_2(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_DURATION, sc->sc_active_scan ? AWI_ASCAN_DURATION : AWI_PSCAN_DURATION); if (sc->sc_mib_phy.IEEE_PHY_Type == AWI_PHY_TYPE_FH) { awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_SET, sc->sc_scan_set); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_PATTERN, sc->sc_scan_cur); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_IDX, 1); } else { awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_SET, sc->sc_scan_cur); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_PATTERN, 0); awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_IDX, 0); } awi_write_1(sc, AWI_CMD_PARAMS+AWI_CA_SCAN_SUSP, 0); return awi_cmd(sc, AWI_CMD_SCAN); } static int awi_cmd(sc, cmd) struct awi_softc *sc; u_int8_t cmd; { u_int8_t status; int error = 0; sc->sc_cmd_inprog = cmd; awi_write_1(sc, AWI_CMD_STATUS, AWI_STAT_IDLE); awi_write_1(sc, AWI_CMD, cmd); if (sc->sc_status != AWI_ST_INIT) return 0; error = awi_cmd_wait(sc); if (error) return error; status = awi_read_1(sc, AWI_CMD_STATUS); awi_write_1(sc, AWI_CMD, 0); switch (status) { case AWI_STAT_OK: break; case AWI_STAT_BADPARM: return EINVAL; default: printf("%s: command %d failed %x\n", sc->sc_dev.dv_xname, cmd, status); return ENXIO; } return 0; } static void awi_cmd_done(sc) struct awi_softc *sc; { u_int8_t cmd, status; status = awi_read_1(sc, AWI_CMD_STATUS); if (status == AWI_STAT_IDLE) return; /* stray interrupt */ cmd = sc->sc_cmd_inprog; sc->sc_cmd_inprog = 0; if (sc->sc_status == AWI_ST_INIT) { wakeup(sc); return; } awi_write_1(sc, AWI_CMD, 0); if (status != AWI_STAT_OK) { printf("%s: command %d failed %x\n", sc->sc_dev.dv_xname, cmd, status); return; } switch (sc->sc_status) { case AWI_ST_SCAN: if (cmd == AWI_CMD_SET_MIB) awi_cmd_scan(sc); /* retry */ break; case AWI_ST_SETSS: awi_try_sync(sc); break; case AWI_ST_SYNC: awi_sync_done(sc); break; default: break; } } static int awi_next_txd(sc, len, framep, ntxdp) struct awi_softc *sc; int len; u_int32_t *framep, *ntxdp; { u_int32_t txd, ntxd, frame; txd = sc->sc_txnext; frame = txd + AWI_TXD_SIZE; if (frame + len > sc->sc_txend) frame = sc->sc_txbase; ntxd = frame + len; if (ntxd + AWI_TXD_SIZE > sc->sc_txend) ntxd = sc->sc_txbase; *framep = frame; *ntxdp = ntxd; /* * Determine if there are any room in ring buffer. * --- send wait, === new data, +++ conflict (ENOBUFS) * base........................end * done----txd=====ntxd OK * --txd=====done++++ntxd-- full * --txd=====ntxd done-- OK * ==ntxd done----txd=== OK * ==done++++ntxd----txd=== full * ++ntxd txd=====done++ full */ if (txd < ntxd) { if (txd < sc->sc_txdone && ntxd + AWI_TXD_SIZE > sc->sc_txdone) return ENOBUFS; } else { if (txd < sc->sc_txdone || ntxd + AWI_TXD_SIZE > sc->sc_txdone) return ENOBUFS; } return 0; } static int awi_lock(sc) struct awi_softc *sc; { int error = 0; if (curproc == NULL) { /* * XXX * Though driver ioctl should be called with context, * KAME ipv6 stack calls ioctl in interrupt for now. * We simply abort the request if there are other * ioctl requests in progress. */ if (sc->sc_busy) { return EWOULDBLOCK; if (sc->sc_invalid) return ENXIO; } sc->sc_busy = 1; sc->sc_cansleep = 0; return 0; } while (sc->sc_busy) { if (sc->sc_invalid) return ENXIO; sc->sc_sleep_cnt++; error = tsleep(sc, PWAIT | PCATCH, "awilck", 0); sc->sc_sleep_cnt--; if (error) return error; } sc->sc_busy = 1; sc->sc_cansleep = 1; return 0; } static void awi_unlock(sc) struct awi_softc *sc; { sc->sc_busy = 0; sc->sc_cansleep = 0; if (sc->sc_sleep_cnt) wakeup(sc); } static int awi_intr_lock(sc) struct awi_softc *sc; { u_int8_t status; int i, retry; status = 1; for (retry = 0; retry < 10; retry++) { for (i = 0; i < AWI_LOCKOUT_TIMEOUT*1000/5; i++) { status = awi_read_1(sc, AWI_LOCKOUT_HOST); if (status == 0) break; DELAY(5); } if (status != 0) break; awi_write_1(sc, AWI_LOCKOUT_MAC, 1); status = awi_read_1(sc, AWI_LOCKOUT_HOST); if (status == 0) break; awi_write_1(sc, AWI_LOCKOUT_MAC, 0); } if (status != 0) { printf("%s: failed to lock interrupt\n", sc->sc_dev.dv_xname); return ENXIO; } return 0; } static void awi_intr_unlock(sc) struct awi_softc *sc; { awi_write_1(sc, AWI_LOCKOUT_MAC, 0); } static int awi_cmd_wait(sc) struct awi_softc *sc; { int i, error = 0; i = 0; while (sc->sc_cmd_inprog) { if (sc->sc_invalid) return ENXIO; if (awi_read_1(sc, AWI_CMD) != sc->sc_cmd_inprog) { printf("%s: failed to access hardware\n", sc->sc_dev.dv_xname); sc->sc_invalid = 1; return ENXIO; } if (sc->sc_cansleep) { sc->sc_sleep_cnt++; error = tsleep(sc, PWAIT, "awicmd", AWI_CMD_TIMEOUT*hz/1000); sc->sc_sleep_cnt--; } else { if (awi_read_1(sc, AWI_CMD_STATUS) != AWI_STAT_IDLE) { awi_cmd_done(sc); break; } if (i++ >= AWI_CMD_TIMEOUT*1000/10) error = EWOULDBLOCK; else DELAY(10); } if (error) break; } return error; } static void awi_print_essid(essid) u_int8_t *essid; { int i, len; u_int8_t *p; len = essid[1]; if (len > IEEE80211_NWID_LEN) len = IEEE80211_NWID_LEN; /*XXX*/ /* determine printable or not */ for (i = 0, p = essid + 2; i < len; i++, p++) { if (*p < ' ' || *p > 0x7e) break; } if (i == len) { printf("\""); for (i = 0, p = essid + 2; i < len; i++, p++) printf("%c", *p); printf("\""); } else { printf("0x"); for (i = 0, p = essid + 2; i < len; i++, p++) printf("%02x", *p); } } #ifdef AWI_DEBUG static void awi_dump_pkt(sc, m, rssi) struct awi_softc *sc; struct mbuf *m; int rssi; { struct ieee80211_frame *wh; int i, l; wh = mtod(m, struct ieee80211_frame *); if (awi_dump_mask != 0 && ((wh->i_fc[1] & IEEE80211_FC1_DIR_MASK)==IEEE80211_FC1_DIR_NODS) && ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK)==IEEE80211_FC0_TYPE_MGT)) { if ((AWI_DUMP_MASK(wh->i_fc[0]) & awi_dump_mask) != 0) return; } if (awi_dump_mask < 0 && (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK)==IEEE80211_FC0_TYPE_DATA) return; if (rssi < 0) printf("tx: "); else printf("rx: "); switch (wh->i_fc[1] & IEEE80211_FC1_DIR_MASK) { case IEEE80211_FC1_DIR_NODS: printf("NODS %s", ether_sprintf(wh->i_addr2)); printf("->%s", ether_sprintf(wh->i_addr1)); printf("(%s)", ether_sprintf(wh->i_addr3)); break; case IEEE80211_FC1_DIR_TODS: printf("TODS %s", ether_sprintf(wh->i_addr2)); printf("->%s", ether_sprintf(wh->i_addr3)); printf("(%s)", ether_sprintf(wh->i_addr1)); break; case IEEE80211_FC1_DIR_FROMDS: printf("FRDS %s", ether_sprintf(wh->i_addr3)); printf("->%s", ether_sprintf(wh->i_addr1)); printf("(%s)", ether_sprintf(wh->i_addr2)); break; case IEEE80211_FC1_DIR_DSTODS: printf("DSDS %s", ether_sprintf((u_int8_t *)&wh[1])); printf("->%s", ether_sprintf(wh->i_addr3)); printf("(%s", ether_sprintf(wh->i_addr2)); printf("->%s)", ether_sprintf(wh->i_addr1)); break; } switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) { case IEEE80211_FC0_TYPE_DATA: printf(" data"); break; case IEEE80211_FC0_TYPE_MGT: switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_PROBE_REQ: printf(" probe_req"); break; case IEEE80211_FC0_SUBTYPE_PROBE_RESP: printf(" probe_resp"); break; case IEEE80211_FC0_SUBTYPE_BEACON: printf(" beacon"); break; case IEEE80211_FC0_SUBTYPE_AUTH: printf(" auth"); break; case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: printf(" assoc_req"); break; case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: printf(" assoc_resp"); break; case IEEE80211_FC0_SUBTYPE_REASSOC_REQ: printf(" reassoc_req"); break; case IEEE80211_FC0_SUBTYPE_REASSOC_RESP: printf(" reassoc_resp"); break; case IEEE80211_FC0_SUBTYPE_DEAUTH: printf(" deauth"); break; case IEEE80211_FC0_SUBTYPE_DISASSOC: printf(" disassoc"); break; default: printf(" mgt#%d", wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK); break; } break; default: printf(" type#%d", wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK); break; } if (wh->i_fc[1] & IEEE80211_FC1_WEP) printf(" WEP"); if (rssi >= 0) printf(" +%d", rssi); printf("\n"); if (awi_dump_len > 0) { l = m->m_len; if (l > awi_dump_len + sizeof(*wh)) l = awi_dump_len + sizeof(*wh); i = sizeof(*wh); if (awi_dump_hdr) i = 0; for (; i < l; i++) { if ((i & 1) == 0) printf(" "); printf("%02x", mtod(m, u_int8_t *)[i]); } printf("\n"); } } #endif Index: head/sys/dev/awi/awi_wep.c =================================================================== --- head/sys/dev/awi/awi_wep.c (revision 108465) +++ head/sys/dev/awi/awi_wep.c (revision 108466) @@ -1,531 +1,531 @@ /* $NetBSD: awi_wep.c,v 1.4 2000/08/14 11:28:03 onoe Exp $ */ /* $FreeBSD$ */ /* * Copyright (c) 2000 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Atsushi Onoe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * WEP support framework for the awi driver. * * No actual encryption capability is provided here, but any can be added * to awi_wep_algo table below. * * Note that IEEE802.11 specification states WEP uses RC4 with 40bit key, * which is a proprietary encryption algorithm available under license * from RSA Data Security Inc. Using another algorithm, includes null * encryption provided here, the awi driver cannot be able to communicate * with other stations. */ #include #include #include #include #include #include #include #include #include #if defined(__FreeBSD__) && __FreeBSD_version >= 400000 #include #else #include #endif #include #include #ifdef __FreeBSD__ #include #include #else #include #endif #include #include #include #include #ifdef __NetBSD__ #include #include #include #include #include #endif #ifdef __FreeBSD__ #include #include #include #include #include static __inline int arc4_ctxlen(void) { return sizeof(struct rc4_state); } static __inline void arc4_setkey(void *ctx, u_int8_t *key, int keylen) { rc4_init(ctx, key, keylen); } static __inline void arc4_encrypt(void *ctx, u_int8_t *dst, u_int8_t *src, int len) { rc4_crypt(ctx, src, dst, len); } #endif static void awi_crc_init(void); static u_int32_t awi_crc_update(u_int32_t crc, u_int8_t *buf, int len); static int awi_null_ctxlen(void); static void awi_null_setkey(void *ctx, u_int8_t *key, int keylen); static void awi_null_copy(void *ctx, u_int8_t *dst, u_int8_t *src, int len); /* XXX: the order should be known to wiconfig/user */ static struct awi_wep_algo awi_wep_algo[] = { /* 0: no wep */ { "no" }, /* dummy for no wep */ /* 1: normal wep (arc4) */ { "arc4", arc4_ctxlen, arc4_setkey, arc4_encrypt, arc4_encrypt }, /* 2: debug wep (null) */ { "null", awi_null_ctxlen, awi_null_setkey, awi_null_copy, awi_null_copy }, /* dummy for wep without encryption */ }; int awi_wep_setnwkey(sc, nwkey) struct awi_softc *sc; struct ieee80211_nwkey *nwkey; { int i, len, error; u_int8_t keybuf[AWI_MAX_KEYLEN]; if (nwkey->i_defkid <= 0 || nwkey->i_defkid > IEEE80211_WEP_NKID) return EINVAL; error = 0; for (i = 0; i < IEEE80211_WEP_NKID; i++) { if (nwkey->i_key[i].i_keydat == NULL) continue; len = nwkey->i_key[i].i_keylen; if (len > sizeof(keybuf)) { error = EINVAL; break; } error = copyin(nwkey->i_key[i].i_keydat, keybuf, len); if (error) break; error = awi_wep_setkey(sc, i, keybuf, len); if (error) break; } if (error == 0) { sc->sc_wep_defkid = nwkey->i_defkid - 1; error = awi_wep_setalgo(sc, nwkey->i_wepon); if (error == 0 && sc->sc_enabled) { awi_stop(sc); error = awi_init(sc); } } return error; } int awi_wep_getnwkey(sc, nwkey) struct awi_softc *sc; struct ieee80211_nwkey *nwkey; { int i, len, error, suerr; u_int8_t keybuf[AWI_MAX_KEYLEN]; nwkey->i_wepon = awi_wep_getalgo(sc); nwkey->i_defkid = sc->sc_wep_defkid + 1; /* do not show any keys to non-root user */ #ifdef __FreeBSD__ #if __FreeBSD_version < 500028 suerr = suser(curproc); #else suerr = suser(curthread); #endif #else suerr = suser(curproc->p_ucred, &curproc->p_acflag); #endif error = 0; for (i = 0; i < IEEE80211_WEP_NKID; i++) { if (nwkey->i_key[i].i_keydat == NULL) continue; if (suerr) { error = suerr; break; } len = sizeof(keybuf); error = awi_wep_getkey(sc, i, keybuf, &len); if (error) break; if (nwkey->i_key[i].i_keylen < len) { error = ENOSPC; break; } nwkey->i_key[i].i_keylen = len; error = copyout(keybuf, nwkey->i_key[i].i_keydat, len); if (error) break; } return error; } int awi_wep_getalgo(sc) struct awi_softc *sc; { if (sc->sc_wep_algo == NULL) return 0; return sc->sc_wep_algo - awi_wep_algo; } int awi_wep_setalgo(sc, algo) struct awi_softc *sc; int algo; { struct awi_wep_algo *awa; int ctxlen; awi_crc_init(); /* XXX: not belongs here */ if (algo < 0 || algo > sizeof(awi_wep_algo)/sizeof(awi_wep_algo[0])) return EINVAL; awa = &awi_wep_algo[algo]; if (awa->awa_name == NULL) return EINVAL; if (awa->awa_ctxlen == NULL) { awa = NULL; ctxlen = 0; } else ctxlen = awa->awa_ctxlen(); if (sc->sc_wep_ctx != NULL) { free(sc->sc_wep_ctx, M_DEVBUF); sc->sc_wep_ctx = NULL; } if (ctxlen) { sc->sc_wep_ctx = malloc(ctxlen, M_DEVBUF, M_NOWAIT); if (sc->sc_wep_ctx == NULL) return ENOMEM; } sc->sc_wep_algo = awa; return 0; } int awi_wep_setkey(sc, kid, key, keylen) struct awi_softc *sc; int kid; unsigned char *key; int keylen; { if (kid < 0 || kid >= IEEE80211_WEP_NKID) return EINVAL; if (keylen < 0 || keylen + IEEE80211_WEP_IVLEN > AWI_MAX_KEYLEN) return EINVAL; sc->sc_wep_keylen[kid] = keylen; if (keylen > 0) memcpy(sc->sc_wep_key[kid] + IEEE80211_WEP_IVLEN, key, keylen); return 0; } int awi_wep_getkey(sc, kid, key, keylen) struct awi_softc *sc; int kid; unsigned char *key; int *keylen; { if (kid < 0 || kid >= IEEE80211_WEP_NKID) return EINVAL; if (*keylen < sc->sc_wep_keylen[kid]) return ENOSPC; *keylen = sc->sc_wep_keylen[kid]; if (*keylen > 0) memcpy(key, sc->sc_wep_key[kid] + IEEE80211_WEP_IVLEN, *keylen); return 0; } struct mbuf * awi_wep_encrypt(sc, m0, txflag) struct awi_softc *sc; struct mbuf *m0; int txflag; { struct mbuf *m, *n, *n0; struct ieee80211_frame *wh; struct awi_wep_algo *awa; int left, len, moff, noff, keylen, kid; u_int32_t iv, crc; u_int8_t *key, *ivp; void *ctx; u_int8_t crcbuf[IEEE80211_WEP_CRCLEN]; n0 = NULL; awa = sc->sc_wep_algo; if (awa == NULL) goto fail; ctx = sc->sc_wep_ctx; m = m0; left = m->m_pkthdr.len; MGET(n, M_DONTWAIT, m->m_type); n0 = n; if (n == NULL) goto fail; - M_COPY_PKTHDR(n, m); + M_MOVE_PKTHDR(n, m); len = IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN + IEEE80211_WEP_CRCLEN; if (txflag) { n->m_pkthdr.len += len; } else { n->m_pkthdr.len -= len; left -= len; } n->m_len = MHLEN; if (n->m_pkthdr.len >= MINCLSIZE) { MCLGET(n, M_DONTWAIT); if (n->m_flags & M_EXT) n->m_len = n->m_ext.ext_size; } len = sizeof(struct ieee80211_frame); memcpy(mtod(n, caddr_t), mtod(m, caddr_t), len); left -= len; moff = len; noff = len; if (txflag) { kid = sc->sc_wep_defkid; wh = mtod(n, struct ieee80211_frame *); wh->i_fc[1] |= IEEE80211_FC1_WEP; iv = random(); /* * store IV, byte order is not the matter since it's random. * assuming IEEE80211_WEP_IVLEN is 3 */ ivp = mtod(n, u_int8_t *) + noff; ivp[0] = (iv >> 16) & 0xff; ivp[1] = (iv >> 8) & 0xff; ivp[2] = iv & 0xff; ivp[IEEE80211_WEP_IVLEN] = kid << 6; /* pad and keyid */ noff += IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN; } else { ivp = mtod(m, u_int8_t *) + moff; kid = ivp[IEEE80211_WEP_IVLEN] >> 6; moff += IEEE80211_WEP_IVLEN + IEEE80211_WEP_KIDLEN; } key = sc->sc_wep_key[kid]; keylen = sc->sc_wep_keylen[kid]; /* assuming IEEE80211_WEP_IVLEN is 3 */ key[0] = ivp[0]; key[1] = ivp[1]; key[2] = ivp[2]; awa->awa_setkey(ctx, key, IEEE80211_WEP_IVLEN + keylen); /* encrypt with calculating CRC */ crc = ~0; while (left > 0) { len = m->m_len - moff; if (len == 0) { m = m->m_next; moff = 0; continue; } if (len > n->m_len - noff) { len = n->m_len - noff; if (len == 0) { MGET(n->m_next, M_DONTWAIT, n->m_type); if (n->m_next == NULL) goto fail; n = n->m_next; n->m_len = MLEN; if (left >= MINCLSIZE) { MCLGET(n, M_DONTWAIT); if (n->m_flags & M_EXT) n->m_len = n->m_ext.ext_size; } noff = 0; continue; } } if (len > left) len = left; if (txflag) { awa->awa_encrypt(ctx, mtod(n, caddr_t) + noff, mtod(m, caddr_t) + moff, len); crc = awi_crc_update(crc, mtod(m, caddr_t) + moff, len); } else { awa->awa_decrypt(ctx, mtod(n, caddr_t) + noff, mtod(m, caddr_t) + moff, len); crc = awi_crc_update(crc, mtod(n, caddr_t) + noff, len); } left -= len; moff += len; noff += len; } crc = ~crc; if (txflag) { LE_WRITE_4(crcbuf, crc); if (n->m_len >= noff + sizeof(crcbuf)) n->m_len = noff + sizeof(crcbuf); else { n->m_len = noff; MGET(n->m_next, M_DONTWAIT, n->m_type); if (n->m_next == NULL) goto fail; n = n->m_next; n->m_len = sizeof(crcbuf); noff = 0; } awa->awa_encrypt(ctx, mtod(n, caddr_t) + noff, crcbuf, sizeof(crcbuf)); } else { n->m_len = noff; for (noff = 0; noff < sizeof(crcbuf); noff += len) { len = sizeof(crcbuf) - noff; if (len > m->m_len - moff) len = m->m_len - moff; if (len > 0) awa->awa_decrypt(ctx, crcbuf + noff, mtod(m, caddr_t) + moff, len); m = m->m_next; moff = 0; } if (crc != LE_READ_4(crcbuf)) goto fail; } m_freem(m0); return n0; fail: m_freem(m0); m_freem(n0); return NULL; } /* * CRC 32 -- routine from RFC 2083 */ /* Table of CRCs of all 8-bit messages */ static u_int32_t awi_crc_table[256]; static int awi_crc_table_computed = 0; /* Make the table for a fast CRC. */ static void awi_crc_init() { u_int32_t c; int n, k; if (awi_crc_table_computed) return; for (n = 0; n < 256; n++) { c = (u_int32_t)n; for (k = 0; k < 8; k++) { if (c & 1) c = 0xedb88320UL ^ (c >> 1); else c = c >> 1; } awi_crc_table[n] = c; } awi_crc_table_computed = 1; } /* * Update a running CRC with the bytes buf[0..len-1]--the CRC * should be initialized to all 1's, and the transmitted value * is the 1's complement of the final running CRC */ static u_int32_t awi_crc_update(crc, buf, len) u_int32_t crc; u_int8_t *buf; int len; { u_int8_t *endbuf; for (endbuf = buf + len; buf < endbuf; buf++) crc = awi_crc_table[(crc ^ *buf) & 0xff] ^ (crc >> 8); return crc; } /* * Null -- do nothing but copy. */ static int awi_null_ctxlen() { return 0; } static void awi_null_setkey(ctx, key, keylen) void *ctx; u_char *key; int keylen; { } static void awi_null_copy(ctx, dst, src, len) void *ctx; u_char *dst; u_char *src; int len; { memcpy(dst, src, len); } Index: head/sys/dev/en/midway.c =================================================================== --- head/sys/dev/en/midway.c (revision 108465) +++ head/sys/dev/en/midway.c (revision 108466) @@ -1,3467 +1,3467 @@ /* $NetBSD: midway.c,v 1.30 1997/09/29 17:40:38 chuck Exp $ */ /* (sync'd to midway.c 1.68) */ /* * * Copyright (c) 1996 Charles D. Cranor and Washington University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor and * Washington University. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ /* * * m i d w a y . c e n i 1 5 5 d r i v e r * * author: Chuck Cranor * started: spring, 1996 (written from scratch). * * notes from the author: * Extra special thanks go to Werner Almesberger, EPFL LRC. Werner's * ENI driver was especially useful in figuring out how this card works. * I would also like to thank Werner for promptly answering email and being * generally helpful. */ #undef EN_DEBUG #undef EN_DEBUG_RANGE /* check ranges on en_read/en_write's? */ #define EN_MBUF_OPT /* try and put more stuff in mbuf? */ #define EN_DIAG #define EN_STAT #ifndef EN_DMA #define EN_DMA 1 /* use dma? */ #endif #define EN_NOTXDMA 0 /* hook to disable tx dma only */ #define EN_NORXDMA 0 /* hook to disable rx dma only */ #define EN_DDBHOOK 1 /* compile in ddb functions */ #if defined(MIDWAY_ADPONLY) #define EN_ENIDMAFIX 0 /* no ENI cards to worry about */ #else #define EN_ENIDMAFIX 1 /* avoid byte DMA on the ENI card (see below) */ #endif /* * note on EN_ENIDMAFIX: the byte aligner on the ENI version of the card * appears to be broken. it works just fine if there is no load... however * when the card is loaded the data get corrupted. to see this, one only * has to use "telnet" over ATM. do the following command in "telnet": * cat /usr/share/misc/termcap * "telnet" seems to generate lots of 1023 byte mbufs (which make great * use of the byte aligner). watch "netstat -s" for checksum errors. * * I further tested this by adding a function that compared the transmit * data on the card's SRAM with the data in the mbuf chain _after_ the * "transmit DMA complete" interrupt. using the "telnet" test I got data * mismatches where the byte-aligned data should have been. using ddb * and en_dumpmem() I verified that the DTQs fed into the card were * absolutely correct. thus, we are forced to concluded that the ENI * hardware is buggy. note that the Adaptec version of the card works * just fine with byte DMA. * * bottom line: we set EN_ENIDMAFIX to 1 to avoid byte DMAs on the ENI * card. */ #if defined(DIAGNOSTIC) && !defined(EN_DIAG) #define EN_DIAG /* link in with master DIAG option */ #endif #ifdef EN_STAT #define EN_COUNT(X) (X)++ #else #define EN_COUNT(X) /* nothing */ #endif #ifdef EN_DEBUG #undef EN_DDBHOOK #define EN_DDBHOOK 1 #define STATIC /* nothing */ #define INLINE /* nothing */ #else /* EN_DEBUG */ #define STATIC static #define INLINE __inline #endif /* EN_DEBUG */ #ifdef __FreeBSD__ #include "opt_inet.h" #include "opt_natm.h" #include "opt_ddb.h" /* enable DDBHOOK when DDB is available */ #undef EN_DDBHOOK #ifdef DDB #define EN_DDBHOOK 1 #endif #endif #include #include #include #if defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) #include #endif #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #endif #ifdef NATM #include #endif #if defined(__NetBSD__) || defined(__OpenBSD__) #include #include #include #elif defined(__FreeBSD__) #include #include #include #include #include #include #include /* for vtophys proto */ #ifndef IFF_NOTRAILERS #define IFF_NOTRAILERS 0 #endif #endif /* __FreeBSD__ */ #if defined(__alpha__) /* XXX XXX NEED REAL DMA MAPPING SUPPORT XXX XXX */ #undef vtophys #define vtophys(va) alpha_XXX_dmamap((vm_offset_t)(va)) #endif #ifdef __FreeBSD__ #define NBPF 1 #else #include "bpf.h" #endif #if NBPF > 0 #include #ifdef __FreeBSD__ #define BPFATTACH(ifp, dlt, hlen) bpfattach((ifp), (dlt), (hlen)) #else #define BPFATTACH(ifp, dlt, hlen) bpfattach(&(ifp)->if_bpf, (ifp), (dlt), (hlen)) #define BPF_MTAP(ifp, m) bpf_mtap((ifp)->if_bpf, (m)) #endif #endif /* NBPF > 0 */ /* * params */ #ifndef EN_TXHIWAT #define EN_TXHIWAT (64*1024) /* max 64 KB waiting to be DMAd out */ #endif #ifndef EN_MINDMA #define EN_MINDMA 32 /* don't DMA anything less than this (bytes) */ #endif #define RX_NONE 0xffff /* recv VC not in use */ #define EN_OBHDR ATM_PH_DRIVER7 /* TBD in first mbuf ! */ #define EN_OBTRL ATM_PH_DRIVER8 /* PDU trailier in last mbuf ! */ #define ENOTHER_FREE 0x01 /* free rxslot */ #define ENOTHER_DRAIN 0x02 /* almost free (drain DRQ dma) */ #define ENOTHER_RAW 0x04 /* 'raw' access (aka boodi mode) */ #define ENOTHER_SWSL 0x08 /* in software service list */ static int en_dma = EN_DMA; /* use DMA (switch off for dbg) */ #ifndef __FreeBSD__ /* * autoconfig attachments */ struct cfdriver en_cd = { 0, "en", DV_IFNET, }; #endif /* * local structures */ /* * params to en_txlaunch() function */ struct en_launch { u_int32_t tbd1; /* TBD 1 */ u_int32_t tbd2; /* TBD 2 */ u_int32_t pdu1; /* PDU 1 (aal5) */ int nodma; /* don't use DMA */ int need; /* total space we need (pad out if less data) */ int mlen; /* length of mbuf (for dtq) */ struct mbuf *t; /* data */ u_int32_t aal; /* aal code */ u_int32_t atm_vci; /* vci */ u_int8_t atm_flags; /* flags */ }; /* * dma table (index by # of words) * * plan A: use WMAYBE (obsolete) * plan B: avoid WMAYBE */ struct en_dmatab { u_int8_t bcode; /* code */ u_int8_t divshift; /* byte divisor */ }; static struct en_dmatab en_dma_planB[] = { { 0, 0 }, /* 0 */ { MIDDMA_WORD, 2}, /* 1 */ { MIDDMA_2WORD, 3}, /* 2 */ { MIDDMA_WORD, 2}, /* 3 */ { MIDDMA_4WORD, 4}, /* 4 */ { MIDDMA_WORD, 2}, /* 5 */ { MIDDMA_2WORD, 3}, /* 6 */ { MIDDMA_WORD, 2}, /* 7 */ { MIDDMA_8WORD, 5}, /* 8 */ { MIDDMA_WORD, 2}, /* 9 */ { MIDDMA_2WORD, 3}, /* 10 */ { MIDDMA_WORD, 2}, /* 11 */ { MIDDMA_4WORD, 4}, /* 12 */ { MIDDMA_WORD, 2}, /* 13 */ { MIDDMA_2WORD, 3}, /* 14 */ { MIDDMA_WORD, 2}, /* 15 */ { MIDDMA_16WORD, 6}, /* 16 */ }; static struct en_dmatab *en_dmaplan = en_dma_planB; /* * prototypes */ STATIC INLINE int en_b2sz(int) __attribute__ ((unused)); #ifdef EN_DDBHOOK int en_dump(int,int); int en_dumpmem(int,int,int); #endif STATIC void en_dmaprobe(struct en_softc *); STATIC int en_dmaprobe_doit(struct en_softc *, u_int8_t *, u_int8_t *, int); STATIC INLINE int en_dqneed(struct en_softc *, caddr_t, u_int, u_int) __attribute__ ((unused)); STATIC void en_init(struct en_softc *); STATIC int en_ioctl(struct ifnet *, EN_IOCTL_CMDT, caddr_t); STATIC INLINE int en_k2sz(int) __attribute__ ((unused)); STATIC void en_loadvc(struct en_softc *, int); STATIC int en_mfix(struct en_softc *, struct mbuf **, struct mbuf *); STATIC INLINE struct mbuf *en_mget(struct en_softc *, u_int, u_int *) __attribute__ ((unused)); STATIC INLINE u_int32_t en_read(struct en_softc *, u_int32_t) __attribute__ ((unused)); STATIC int en_rxctl(struct en_softc *, struct atm_pseudoioctl *, int); STATIC void en_txdma(struct en_softc *, int); STATIC void en_txlaunch(struct en_softc *, int, struct en_launch *); STATIC void en_service(struct en_softc *); STATIC void en_start(struct ifnet *); STATIC INLINE int en_sz2b(int) __attribute__ ((unused)); STATIC INLINE void en_write(struct en_softc *, u_int32_t, u_int32_t) __attribute__ ((unused)); /* * macros/inline */ /* * raw read/write macros */ #define EN_READDAT(SC,R) en_read(SC,R) #define EN_WRITEDAT(SC,R,V) en_write(SC,R,V) /* * cooked read/write macros */ #define EN_READ(SC,R) (u_int32_t)ntohl(en_read(SC,R)) #define EN_WRITE(SC,R,V) en_write(SC,R, htonl(V)) #define EN_WRAPADD(START,STOP,CUR,VAL) { \ (CUR) = (CUR) + (VAL); \ if ((CUR) >= (STOP)) \ (CUR) = (START) + ((CUR) - (STOP)); \ } #define WORD_IDX(START, X) (((X) - (START)) / sizeof(u_int32_t)) /* we store sc->dtq and sc->drq data in the following format... */ #define EN_DQ_MK(SLOT,LEN) (((SLOT) << 20)|(LEN)|(0x80000)) /* the 0x80000 ensures we != 0 */ #define EN_DQ_SLOT(X) ((X) >> 20) #define EN_DQ_LEN(X) ((X) & 0x3ffff) /* format of DTQ/DRQ word 1 differs between ENI and ADP */ #if defined(MIDWAY_ENIONLY) #define MID_MK_TXQ(SC,CNT,CHAN,END,BCODE) \ EN_WRITE((SC), (SC)->dtq_us, \ MID_MK_TXQ_ENI((CNT), (CHAN), (END), (BCODE))); #define MID_MK_RXQ(SC,CNT,VCI,END,BCODE) \ EN_WRITE((SC), (SC)->drq_us, \ MID_MK_RXQ_ENI((CNT), (VCI), (END), (BCODE))); #elif defined(MIDWAY_ADPONLY) #define MID_MK_TXQ(SC,CNT,CHAN,END,JK) \ EN_WRITE((SC), (SC)->dtq_us, \ MID_MK_TXQ_ADP((CNT), (CHAN), (END), (JK))); #define MID_MK_RXQ(SC,CNT,VCI,END,JK) \ EN_WRITE((SC), (SC)->drq_us, \ MID_MK_RXQ_ADP((CNT), (VCI), (END), (JK))); #else #define MID_MK_TXQ(SC,CNT,CHAN,END,JK_OR_BCODE) { \ if ((SC)->is_adaptec) \ EN_WRITE((SC), (SC)->dtq_us, \ MID_MK_TXQ_ADP((CNT), (CHAN), (END), (JK_OR_BCODE))); \ else \ EN_WRITE((SC), (SC)->dtq_us, \ MID_MK_TXQ_ENI((CNT), (CHAN), (END), (JK_OR_BCODE))); \ } #define MID_MK_RXQ(SC,CNT,VCI,END,JK_OR_BCODE) { \ if ((SC)->is_adaptec) \ EN_WRITE((SC), (SC)->drq_us, \ MID_MK_RXQ_ADP((CNT), (VCI), (END), (JK_OR_BCODE))); \ else \ EN_WRITE((SC), (SC)->drq_us, \ MID_MK_RXQ_ENI((CNT), (VCI), (END), (JK_OR_BCODE))); \ } #endif /* add an item to the DTQ */ #define EN_DTQADD(SC,CNT,CHAN,JK_OR_BCODE,ADDR,LEN,END) { \ if (END) \ (SC)->dtq[MID_DTQ_A2REG((SC)->dtq_us)] = EN_DQ_MK(CHAN,LEN); \ MID_MK_TXQ(SC,CNT,CHAN,END,JK_OR_BCODE); \ (SC)->dtq_us += 4; \ EN_WRITE((SC), (SC)->dtq_us, (ADDR)); \ EN_WRAPADD(MID_DTQOFF, MID_DTQEND, (SC)->dtq_us, 4); \ (SC)->dtq_free--; \ if (END) \ EN_WRITE((SC), MID_DMA_WRTX, MID_DTQ_A2REG((SC)->dtq_us)); \ } /* DRQ add macro */ #define EN_DRQADD(SC,CNT,VCI,JK_OR_BCODE,ADDR,LEN,SLOT,END) { \ if (END) \ (SC)->drq[MID_DRQ_A2REG((SC)->drq_us)] = EN_DQ_MK(SLOT,LEN); \ MID_MK_RXQ(SC,CNT,VCI,END,JK_OR_BCODE); \ (SC)->drq_us += 4; \ EN_WRITE((SC), (SC)->drq_us, (ADDR)); \ EN_WRAPADD(MID_DRQOFF, MID_DRQEND, (SC)->drq_us, 4); \ (SC)->drq_free--; \ if (END) \ EN_WRITE((SC), MID_DMA_WRRX, MID_DRQ_A2REG((SC)->drq_us)); \ } /* * the driver code * * the code is arranged in a specific way: * [1] short/inline functions * [2] autoconfig stuff * [3] ioctl stuff * [4] reset -> init -> trasmit -> intr -> receive functions * */ /***********************************************************************/ /* * en_read: read a word from the card. this is the only function * that reads from the card. */ STATIC INLINE u_int32_t en_read(sc, r) struct en_softc *sc; u_int32_t r; { #ifdef EN_DEBUG_RANGE if (r > MID_MAXOFF || (r % 4)) panic("en_read out of range, r=0x%x", r); #endif return(bus_space_read_4(sc->en_memt, sc->en_base, r)); } /* * en_write: write a word to the card. this is the only function that * writes to the card. */ STATIC INLINE void en_write(sc, r, v) struct en_softc *sc; u_int32_t r, v; { #ifdef EN_DEBUG_RANGE if (r > MID_MAXOFF || (r % 4)) panic("en_write out of range, r=0x%x", r); #endif bus_space_write_4(sc->en_memt, sc->en_base, r, v); } /* * en_k2sz: convert KBytes to a size parameter (a log2) */ STATIC INLINE int en_k2sz(k) int k; { switch(k) { case 1: return(0); case 2: return(1); case 4: return(2); case 8: return(3); case 16: return(4); case 32: return(5); case 64: return(6); case 128: return(7); default: panic("en_k2sz"); } return(0); } #define en_log2(X) en_k2sz(X) /* * en_b2sz: convert a DMA burst code to its byte size */ STATIC INLINE int en_b2sz(b) int b; { switch (b) { case MIDDMA_WORD: return(1*4); case MIDDMA_2WMAYBE: case MIDDMA_2WORD: return(2*4); case MIDDMA_4WMAYBE: case MIDDMA_4WORD: return(4*4); case MIDDMA_8WMAYBE: case MIDDMA_8WORD: return(8*4); case MIDDMA_16WMAYBE: case MIDDMA_16WORD: return(16*4); default: panic("en_b2sz"); } return(0); } /* * en_sz2b: convert a burst size (bytes) to DMA burst code */ STATIC INLINE int en_sz2b(sz) int sz; { switch (sz) { case 1*4: return(MIDDMA_WORD); case 2*4: return(MIDDMA_2WORD); case 4*4: return(MIDDMA_4WORD); case 8*4: return(MIDDMA_8WORD); case 16*4: return(MIDDMA_16WORD); default: panic("en_sz2b"); } return(0); } /* * en_dqneed: calculate number of DTQ/DRQ's needed for a buffer */ STATIC INLINE int en_dqneed(sc, data, len, tx) struct en_softc *sc; caddr_t data; u_int len, tx; { int result, needalign, sz; #if !defined(MIDWAY_ENIONLY) #if !defined(MIDWAY_ADPONLY) if (sc->is_adaptec) #endif /* !MIDWAY_ADPONLY */ return(1); /* adaptec can DMA anything in one go */ #endif #if !defined(MIDWAY_ADPONLY) result = 0; if (len < EN_MINDMA) { if (!tx) /* XXX: conservative */ return(1); /* will copy/DMA_JK */ } if (tx) { /* byte burst? */ needalign = (((uintptr_t) (void *) data) % sizeof(u_int32_t)); if (needalign) { result++; sz = min(len, sizeof(u_int32_t) - needalign); len -= sz; data += sz; } } if (sc->alburst && len) { needalign = (((uintptr_t) (void *) data) & sc->bestburstmask); if (needalign) { result++; /* alburst */ sz = min(len, sc->bestburstlen - needalign); len -= sz; } } if (len >= sc->bestburstlen) { sz = len / sc->bestburstlen; sz = sz * sc->bestburstlen; len -= sz; result++; /* best shot */ } if (len) { result++; /* clean up */ if (tx && (len % sizeof(u_int32_t)) != 0) result++; /* byte cleanup */ } return(result); #endif /* !MIDWAY_ADPONLY */ } /* * en_mget: get an mbuf chain that can hold totlen bytes and return it * (for recv) [based on am7990_get from if_le and ieget from if_ie] * after this call the sum of all the m_len's in the chain will be totlen. */ STATIC INLINE struct mbuf *en_mget(sc, totlen, drqneed) struct en_softc *sc; u_int totlen, *drqneed; { struct mbuf *m; struct mbuf *top, **mp; *drqneed = 0; MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return(NULL); m->m_pkthdr.rcvif = &sc->enif; m->m_pkthdr.len = totlen; m->m_len = MHLEN; top = NULL; mp = ⊤ /* if (top != NULL) then we've already got 1 mbuf on the chain */ while (totlen > 0) { if (top) { MGET(m, M_DONTWAIT, MT_DATA); if (!m) { m_freem(top); return(NULL); /* out of mbufs */ } m->m_len = MLEN; } if (totlen >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); m_freem(top); return(NULL); /* out of mbuf clusters */ } m->m_len = MCLBYTES; } m->m_len = min(totlen, m->m_len); totlen -= m->m_len; *mp = m; mp = &m->m_next; *drqneed += en_dqneed(sc, m->m_data, m->m_len, 0); } return(top); } /***********************************************************************/ /* * autoconfig stuff */ void en_attach(sc) struct en_softc *sc; { struct ifnet *ifp = &sc->enif; int sz; u_int32_t reg, lcv, check, ptr, sav, midvloc; /* * probe card to determine memory size. the stupid ENI card always * reports to PCI that it needs 4MB of space (2MB regs and 2MB RAM). * if it has less than 2MB RAM the addresses wrap in the RAM address space. * (i.e. on a 512KB card addresses 0x3ffffc, 0x37fffc, and 0x2ffffc * are aliases for 0x27fffc [note that RAM starts at offset 0x200000]). */ if (sc->en_busreset) sc->en_busreset(sc); EN_WRITE(sc, MID_RESID, 0x0); /* reset card before touching RAM */ for (lcv = MID_PROBEOFF; lcv <= MID_MAXOFF ; lcv += MID_PROBSIZE) { EN_WRITE(sc, lcv, lcv); /* data[address] = address */ for (check = MID_PROBEOFF ; check < lcv ; check += MID_PROBSIZE) { reg = EN_READ(sc, check); if (reg != check) { /* found an alias! */ goto done_probe; /* and quit */ } } } done_probe: lcv -= MID_PROBSIZE; /* take one step back */ sc->en_obmemsz = (lcv + 4) - MID_RAMOFF; /* * determine the largest DMA burst supported */ en_dmaprobe(sc); /* * "hello world" */ if (sc->en_busreset) sc->en_busreset(sc); EN_WRITE(sc, MID_RESID, 0x0); /* reset */ for (lcv = MID_RAMOFF ; lcv < MID_RAMOFF + sc->en_obmemsz ; lcv += 4) EN_WRITE(sc, lcv, 0); /* zero memory */ reg = EN_READ(sc, MID_RESID); printf("%s: ATM midway v%d, board IDs %d.%d, %s%s%s, %ldKB on-board RAM\n", sc->sc_dev.dv_xname, MID_VER(reg), MID_MID(reg), MID_DID(reg), (MID_IS_SABRE(reg)) ? "sabre controller, " : "", (MID_IS_SUNI(reg)) ? "SUNI" : "Utopia", (!MID_IS_SUNI(reg) && MID_IS_UPIPE(reg)) ? " (pipelined)" : "", (long)sc->en_obmemsz / 1024); if (sc->is_adaptec) { if (sc->bestburstlen == 64 && sc->alburst == 0) printf("%s: passed 64 byte DMA test\n", sc->sc_dev.dv_xname); else printf("%s: FAILED DMA TEST: burst=%d, alburst=%d\n", sc->sc_dev.dv_xname, sc->bestburstlen, sc->alburst); } else { printf("%s: maximum DMA burst length = %d bytes%s\n", sc->sc_dev.dv_xname, sc->bestburstlen, (sc->alburst) ? " (must align)" : ""); } /* * link into network subsystem and prepare card */ #if defined(__NetBSD__) || defined(__OpenBSD__) bcopy(sc->sc_dev.dv_xname, sc->enif.if_xname, IFNAMSIZ); #endif sc->enif.if_softc = sc; ifp->if_flags = IFF_SIMPLEX|IFF_NOTRAILERS; ifp->if_ioctl = en_ioctl; ifp->if_output = atm_output; ifp->if_start = en_start; /* * init softc */ for (lcv = 0 ; lcv < MID_N_VC ; lcv++) { sc->rxvc2slot[lcv] = RX_NONE; sc->txspeed[lcv] = 0; /* full */ sc->txvc2slot[lcv] = 0; /* full speed == slot 0 */ } sz = sc->en_obmemsz - (MID_BUFOFF - MID_RAMOFF); ptr = sav = MID_BUFOFF; ptr = roundup(ptr, EN_TXSZ * 1024); /* align */ sz = sz - (ptr - sav); if (EN_TXSZ*1024 * EN_NTX > sz) { printf("%s: EN_NTX/EN_TXSZ too big\n", sc->sc_dev.dv_xname); return; } for (lcv = 0 ; lcv < EN_NTX ; lcv++) { sc->txslot[lcv].mbsize = 0; sc->txslot[lcv].start = ptr; ptr += (EN_TXSZ * 1024); sz -= (EN_TXSZ * 1024); sc->txslot[lcv].stop = ptr; sc->txslot[lcv].nref = 0; bzero(&sc->txslot[lcv].indma, sizeof(sc->txslot[lcv].indma)); bzero(&sc->txslot[lcv].q, sizeof(sc->txslot[lcv].q)); #ifdef EN_DEBUG printf("%s: tx%d: start 0x%x, stop 0x%x\n", sc->sc_dev.dv_xname, lcv, sc->txslot[lcv].start, sc->txslot[lcv].stop); #endif } sav = ptr; ptr = roundup(ptr, EN_RXSZ * 1024); /* align */ sz = sz - (ptr - sav); sc->en_nrx = sz / (EN_RXSZ * 1024); if (sc->en_nrx <= 0) { printf("%s: EN_NTX/EN_TXSZ/EN_RXSZ too big\n", sc->sc_dev.dv_xname); return; } /* * ensure that there is always one VC slot on the service list free * so that we can tell the difference between a full and empty list. */ if (sc->en_nrx >= MID_N_VC) sc->en_nrx = MID_N_VC - 1; for (lcv = 0 ; lcv < sc->en_nrx ; lcv++) { sc->rxslot[lcv].rxhand = NULL; sc->rxslot[lcv].oth_flags = ENOTHER_FREE; bzero(&sc->rxslot[lcv].indma, sizeof(sc->rxslot[lcv].indma)); bzero(&sc->rxslot[lcv].q, sizeof(sc->rxslot[lcv].q)); midvloc = sc->rxslot[lcv].start = ptr; ptr += (EN_RXSZ * 1024); sz -= (EN_RXSZ * 1024); sc->rxslot[lcv].stop = ptr; midvloc = midvloc - MID_RAMOFF; midvloc = (midvloc & ~((EN_RXSZ*1024) - 1)) >> 2; /* mask, cvt to words */ midvloc = midvloc >> MIDV_LOCTOPSHFT; /* we only want the top 11 bits */ midvloc = (midvloc & MIDV_LOCMASK) << MIDV_LOCSHIFT; sc->rxslot[lcv].mode = midvloc | (en_k2sz(EN_RXSZ) << MIDV_SZSHIFT) | MIDV_TRASH; #ifdef EN_DEBUG printf("%s: rx%d: start 0x%x, stop 0x%x, mode 0x%x\n", sc->sc_dev.dv_xname, lcv, sc->rxslot[lcv].start, sc->rxslot[lcv].stop, sc->rxslot[lcv].mode); #endif } #ifdef EN_STAT sc->vtrash = sc->otrash = sc->mfix = sc->txmbovr = sc->dmaovr = 0; sc->txoutspace = sc->txdtqout = sc->launch = sc->lheader = sc->ltail = 0; sc->hwpull = sc->swadd = sc->rxqnotus = sc->rxqus = sc->rxoutboth = 0; sc->rxdrqout = sc->ttrash = sc->rxmbufout = sc->mfixfail = 0; sc->headbyte = sc->tailbyte = sc->tailflush = 0; #endif sc->need_drqs = sc->need_dtqs = 0; printf("%s: %d %dKB receive buffers, %d %dKB transmit buffers allocated\n", sc->sc_dev.dv_xname, sc->en_nrx, EN_RXSZ, EN_NTX, EN_TXSZ); printf("%s: End Station Identifier (mac address) %6D\n", sc->sc_dev.dv_xname, sc->macaddr, ":"); /* * final commit */ if_attach(ifp); atm_ifattach(ifp); #if NBPF > 0 BPFATTACH(ifp, DLT_ATM_RFC1483, sizeof(struct atmllc)); #endif } /* * en_dmaprobe: helper function for en_attach. * * see how the card handles DMA by running a few DMA tests. we need * to figure out the largest number of bytes we can DMA in one burst * ("bestburstlen"), and if the starting address for a burst needs to * be aligned on any sort of boundary or not ("alburst"). * * typical findings: * sparc1: bestburstlen=4, alburst=0 (ick, broken DMA!) * sparc2: bestburstlen=64, alburst=1 * p166: bestburstlen=64, alburst=0 */ #if defined(__FreeBSD__) && defined(__i386__) #define NBURSTS 3 /* number of bursts to use for dmaprobe */ #define BOUNDARY 1024 /* test misaligned dma crossing the bounday. should be n * 64. at least 64*(NBURSTS+1). dell P6 with EDO DRAM has 1K bounday problem */ #endif STATIC void en_dmaprobe(sc) struct en_softc *sc; { #ifdef NBURSTS /* be careful. kernel stack is only 8K */ u_int8_t buffer[BOUNDARY * 2 + 64 * (NBURSTS + 1)]; #else u_int32_t srcbuf[64], dstbuf[64]; #endif u_int8_t *sp, *dp; int bestalgn, bestnotalgn, lcv, try; sc->alburst = 0; #ifdef NBURSTS /* setup src and dst buf at the end of the boundary */ sp = (u_int8_t *)roundup((uintptr_t)(void *)buffer, 64); while (((uintptr_t)(void *)sp & (BOUNDARY - 1)) != (BOUNDARY - 64)) sp += 64; dp = sp + BOUNDARY; /* * we can't dma across page boundary so that, if buf is at a page * boundary, move it to the next page. but still either src or dst * will be at the boundary, which should be ok. */ if ((((uintptr_t)(void *)sp + 64) & PAGE_MASK) == 0) sp += 64; if ((((uintptr_t)(void *)dp + 64) & PAGE_MASK) == 0) dp += 64; #else /* !NBURSTS */ sp = (u_int8_t *) srcbuf; while ((((unsigned long) sp) % MIDDMA_MAXBURST) != 0) sp += 4; dp = (u_int8_t *) dstbuf; while ((((unsigned long) dp) % MIDDMA_MAXBURST) != 0) dp += 4; #endif /* !NBURSTS */ bestalgn = bestnotalgn = en_dmaprobe_doit(sc, sp, dp, 0); for (lcv = 4 ; lcv < MIDDMA_MAXBURST ; lcv += 4) { try = en_dmaprobe_doit(sc, sp+lcv, dp+lcv, 0); #ifdef NBURSTS if (try < bestnotalgn) { bestnotalgn = try; break; } #else if (try < bestnotalgn) bestnotalgn = try; #endif } if (bestalgn != bestnotalgn) /* need bursts aligned */ sc->alburst = 1; sc->bestburstlen = bestalgn; sc->bestburstshift = en_log2(bestalgn); sc->bestburstmask = sc->bestburstlen - 1; /* must be power of 2 */ sc->bestburstcode = en_sz2b(bestalgn); #if 1 /* __FreeBSD__ */ /* * correct pci chipsets should be able to handle misaligned-64-byte DMA. * but there are too many broken chipsets around. we try to work around * by finding the best workable dma size, but still some broken machines * exhibit the problem later. so warn it here. */ if (bestalgn != 64 || sc->alburst != 0) { printf("%s: WARNING: DMA test detects a broken PCI chipset!\n", sc->sc_dev.dv_xname); printf(" trying to work around the problem... but if this doesn't\n"); printf(" work for you, you'd better switch to a newer motherboard.\n"); } #endif /* 1 */ return; } /* * en_dmaprobe_doit: do actual testing */ STATIC int en_dmaprobe_doit(sc, sp, dp, wmtry) struct en_softc *sc; u_int8_t *sp, *dp; int wmtry; { int lcv, retval = 4, cnt, count; u_int32_t reg, bcode, midvloc; /* * set up a 1k buffer at MID_BUFOFF */ if (sc->en_busreset) sc->en_busreset(sc); EN_WRITE(sc, MID_RESID, 0x0); /* reset card before touching RAM */ midvloc = ((MID_BUFOFF - MID_RAMOFF) / sizeof(u_int32_t)) >> MIDV_LOCTOPSHFT; EN_WRITE(sc, MIDX_PLACE(0), MIDX_MKPLACE(en_k2sz(1), midvloc)); EN_WRITE(sc, MID_VC(0), (midvloc << MIDV_LOCSHIFT) | (en_k2sz(1) << MIDV_SZSHIFT) | MIDV_TRASH); EN_WRITE(sc, MID_DST_RP(0), 0); EN_WRITE(sc, MID_WP_ST_CNT(0), 0); #ifdef NBURSTS for (lcv = 0 ; lcv < 64*NBURSTS; lcv++) /* set up sample data */ #else for (lcv = 0 ; lcv < 68 ; lcv++) /* set up sample data */ #endif sp[lcv] = lcv+1; EN_WRITE(sc, MID_MAST_CSR, MID_MCSR_ENDMA); /* enable DMA (only) */ sc->drq_chip = MID_DRQ_REG2A(EN_READ(sc, MID_DMA_RDRX)); sc->dtq_chip = MID_DTQ_REG2A(EN_READ(sc, MID_DMA_RDTX)); /* * try it now . . . DMA it out, then DMA it back in and compare * * note: in order to get the dma stuff to reverse directions it wants * the "end" flag set! since we are not dma'ing valid data we may * get an ident mismatch interrupt (which we will ignore). * * note: we've got two different tests rolled up in the same loop * if (wmtry) * then we are doing a wmaybe test and wmtry is a byte count * else we are doing a burst test */ for (lcv = 8 ; lcv <= MIDDMA_MAXBURST ; lcv = lcv * 2) { #ifdef EN_DEBUG printf("DMA test lcv=%d, sp=0x%lx, dp=0x%lx, wmtry=%d\n", lcv, (unsigned long)sp, (unsigned long)dp, wmtry); #endif /* zero SRAM and dest buffer */ for (cnt = 0 ; cnt < 1024; cnt += 4) EN_WRITE(sc, MID_BUFOFF+cnt, 0); /* zero memory */ #ifdef NBURSTS for (cnt = 0 ; cnt < 64*NBURSTS; cnt++) #else for (cnt = 0 ; cnt < 68 ; cnt++) #endif dp[cnt] = 0; if (wmtry) { count = (sc->bestburstlen - sizeof(u_int32_t)) / sizeof(u_int32_t); bcode = en_dmaplan[count].bcode; count = wmtry >> en_dmaplan[count].divshift; } else { bcode = en_sz2b(lcv); count = 1; } #ifdef NBURSTS /* build lcv-byte-DMA x NBURSTS */ if (sc->is_adaptec) EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ADP(lcv*NBURSTS, 0, MID_DMA_END, 0)); else EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ENI(count*NBURSTS, 0, MID_DMA_END, bcode)); EN_WRITE(sc, sc->dtq_chip+4, vtophys(sp)); EN_WRAPADD(MID_DTQOFF, MID_DTQEND, sc->dtq_chip, 8); EN_WRITE(sc, MID_DMA_WRTX, MID_DTQ_A2REG(sc->dtq_chip)); cnt = 1000; while (EN_READ(sc, MID_DMA_RDTX) != MID_DTQ_A2REG(sc->dtq_chip)) { DELAY(1); cnt--; if (cnt == 0) { printf("%s: unexpected timeout in tx DMA test\n", sc->sc_dev.dv_xname); /* printf(" alignment=0x%x, burst size=%d, dma addr reg=0x%x\n", (u_long)sp & 63, lcv, EN_READ(sc, MID_DMA_ADDR)); */ return(retval); /* timeout, give up */ } } #else /* !NBURSTS */ if (sc->is_adaptec) EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ADP(lcv, 0, MID_DMA_END, 0)); else EN_WRITE(sc, sc->dtq_chip, MID_MK_TXQ_ENI(count, 0, MID_DMA_END, bcode)); EN_WRITE(sc, sc->dtq_chip+4, vtophys(sp)); EN_WRITE(sc, MID_DMA_WRTX, MID_DTQ_A2REG(sc->dtq_chip+8)); cnt = 1000; while (EN_READ(sc, MID_DMA_RDTX) == MID_DTQ_A2REG(sc->dtq_chip)) { DELAY(1); cnt--; if (cnt == 0) { printf("%s: unexpected timeout in tx DMA test\n", sc->sc_dev.dv_xname); return(retval); /* timeout, give up */ } } EN_WRAPADD(MID_DTQOFF, MID_DTQEND, sc->dtq_chip, 8); #endif /* !NBURSTS */ reg = EN_READ(sc, MID_INTACK); if ((reg & MID_INT_DMA_TX) != MID_INT_DMA_TX) { printf("%s: unexpected status in tx DMA test: 0x%x\n", sc->sc_dev.dv_xname, reg); return(retval); } EN_WRITE(sc, MID_MAST_CSR, MID_MCSR_ENDMA); /* re-enable DMA (only) */ /* "return to sender..." address is known ... */ #ifdef NBURSTS /* build lcv-byte-DMA x NBURSTS */ if (sc->is_adaptec) EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ADP(lcv*NBURSTS, 0, MID_DMA_END, 0)); else EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ENI(count*NBURSTS, 0, MID_DMA_END, bcode)); EN_WRITE(sc, sc->drq_chip+4, vtophys(dp)); EN_WRAPADD(MID_DRQOFF, MID_DRQEND, sc->drq_chip, 8); EN_WRITE(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip)); cnt = 1000; while (EN_READ(sc, MID_DMA_RDRX) != MID_DRQ_A2REG(sc->drq_chip)) { DELAY(1); cnt--; if (cnt == 0) { printf("%s: unexpected timeout in rx DMA test\n", sc->sc_dev.dv_xname); return(retval); /* timeout, give up */ } } #else /* !NBURSTS */ if (sc->is_adaptec) EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ADP(lcv, 0, MID_DMA_END, 0)); else EN_WRITE(sc, sc->drq_chip, MID_MK_RXQ_ENI(count, 0, MID_DMA_END, bcode)); EN_WRITE(sc, sc->drq_chip+4, vtophys(dp)); EN_WRITE(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip+8)); cnt = 1000; while (EN_READ(sc, MID_DMA_RDRX) == MID_DRQ_A2REG(sc->drq_chip)) { DELAY(1); cnt--; if (cnt == 0) { printf("%s: unexpected timeout in rx DMA test\n", sc->sc_dev.dv_xname); return(retval); /* timeout, give up */ } } EN_WRAPADD(MID_DRQOFF, MID_DRQEND, sc->drq_chip, 8); #endif /* !NBURSTS */ reg = EN_READ(sc, MID_INTACK); if ((reg & MID_INT_DMA_RX) != MID_INT_DMA_RX) { printf("%s: unexpected status in rx DMA test: 0x%x\n", sc->sc_dev.dv_xname, reg); return(retval); } EN_WRITE(sc, MID_MAST_CSR, MID_MCSR_ENDMA); /* re-enable DMA (only) */ if (wmtry) { return(bcmp(sp, dp, wmtry)); /* wmtry always exits here, no looping */ } #ifdef NBURSTS if (bcmp(sp, dp, lcv * NBURSTS)) { /* printf("DMA test failed! lcv=%d, sp=0x%x, dp=0x%x\n", lcv, sp, dp); */ return(retval); /* failed, use last value */ } #else if (bcmp(sp, dp, lcv)) return(retval); /* failed, use last value */ #endif retval = lcv; } return(retval); /* studly 64 byte DMA present! oh baby!! */ } /***********************************************************************/ /* * en_ioctl: handle ioctl requests * * NOTE: if you add an ioctl to set txspeed, you should choose a new * TX channel/slot. Choose the one with the lowest sc->txslot[slot].nref * value, subtract one from sc->txslot[0].nref, add one to the * sc->txslot[slot].nref, set sc->txvc2slot[vci] = slot, and then set * txspeed[vci]. */ STATIC int en_ioctl(ifp, cmd, data) struct ifnet *ifp; EN_IOCTL_CMDT cmd; caddr_t data; { struct en_softc *sc = (struct en_softc *) ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; struct atm_pseudoioctl *api = (struct atm_pseudoioctl *)data; #ifdef NATM struct atm_rawioctl *ario = (struct atm_rawioctl *)data; int slot; #endif int s, error = 0; s = splnet(); switch (cmd) { case SIOCATMENA: /* enable circuit for recv */ error = en_rxctl(sc, api, 1); break; case SIOCATMDIS: /* disable circuit for recv */ error = en_rxctl(sc, api, 0); break; #ifdef NATM case SIOCXRAWATM: if ((slot = sc->rxvc2slot[ario->npcb->npcb_vci]) == RX_NONE) { error = EINVAL; break; } if (ario->rawvalue > EN_RXSZ*1024) ario->rawvalue = EN_RXSZ*1024; if (ario->rawvalue) { sc->rxslot[slot].oth_flags |= ENOTHER_RAW; sc->rxslot[slot].raw_threshold = ario->rawvalue; } else { sc->rxslot[slot].oth_flags &= (~ENOTHER_RAW); sc->rxslot[slot].raw_threshold = 0; } #ifdef EN_DEBUG printf("%s: rxvci%d: turn %s raw (boodi) mode\n", sc->sc_dev.dv_xname, ario->npcb->npcb_vci, (ario->rawvalue) ? "on" : "off"); #endif break; #endif case SIOCSIFADDR: ifp->if_flags |= IFF_UP; #if defined(INET) || defined(INET6) if (ifa->ifa_addr->sa_family == AF_INET || ifa->ifa_addr->sa_family == AF_INET6) { en_reset(sc); en_init(sc); ifa->ifa_rtrequest = atm_rtrequest; /* ??? */ break; } #endif /* INET */ /* what to do if not INET? */ en_reset(sc); en_init(sc); break; case SIOCGIFADDR: error = EINVAL; break; case SIOCSIFFLAGS: error = EINVAL; break; #if defined(SIOCSIFMTU) /* ??? copied from if_de */ #if !defined(ifr_mtu) #define ifr_mtu ifr_metric #endif case SIOCSIFMTU: /* * Set the interface MTU. */ #ifdef notsure if (ifr->ifr_mtu > ATMMTU) { error = EINVAL; break; } #endif ifp->if_mtu = ifr->ifr_mtu; /* XXXCDC: do we really need to reset on MTU size change? */ en_reset(sc); en_init(sc); break; #endif /* SIOCSIFMTU */ default: error = EINVAL; break; } splx(s); return error; } /* * en_rxctl: turn on and off VCs for recv. */ STATIC int en_rxctl(sc, pi, on) struct en_softc *sc; struct atm_pseudoioctl *pi; int on; { u_int s, vci, flags, slot; u_int32_t oldmode, newmode; vci = ATM_PH_VCI(&pi->aph); flags = ATM_PH_FLAGS(&pi->aph); #ifdef EN_DEBUG printf("%s: %s vpi=%d, vci=%d, flags=%d\n", sc->sc_dev.dv_xname, (on) ? "enable" : "disable", ATM_PH_VPI(&pi->aph), vci, flags); #endif if (ATM_PH_VPI(&pi->aph) || vci >= MID_N_VC) return(EINVAL); /* * turn on VCI! */ if (on) { if (sc->rxvc2slot[vci] != RX_NONE) return(EINVAL); for (slot = 0 ; slot < sc->en_nrx ; slot++) if (sc->rxslot[slot].oth_flags & ENOTHER_FREE) break; if (slot == sc->en_nrx) return(ENOSPC); sc->rxvc2slot[vci] = slot; sc->rxslot[slot].rxhand = NULL; oldmode = sc->rxslot[slot].mode; newmode = (flags & ATM_PH_AAL5) ? MIDV_AAL5 : MIDV_NOAAL; sc->rxslot[slot].mode = MIDV_SETMODE(oldmode, newmode); sc->rxslot[slot].atm_vci = vci; sc->rxslot[slot].atm_flags = flags; sc->rxslot[slot].oth_flags = 0; sc->rxslot[slot].rxhand = pi->rxhand; if (sc->rxslot[slot].indma.ifq_head || sc->rxslot[slot].q.ifq_head) panic("en_rxctl: left over mbufs on enable"); sc->txspeed[vci] = 0; /* full speed to start */ sc->txvc2slot[vci] = 0; /* init value */ sc->txslot[0].nref++; /* bump reference count */ en_loadvc(sc, vci); /* does debug printf for us */ return(0); } /* * turn off VCI */ if (sc->rxvc2slot[vci] == RX_NONE) return(EINVAL); slot = sc->rxvc2slot[vci]; if ((sc->rxslot[slot].oth_flags & (ENOTHER_FREE|ENOTHER_DRAIN)) != 0) return(EINVAL); s = splimp(); /* block out enintr() */ oldmode = EN_READ(sc, MID_VC(vci)); newmode = MIDV_SETMODE(oldmode, MIDV_TRASH) & ~MIDV_INSERVICE; EN_WRITE(sc, MID_VC(vci), (newmode | (oldmode & MIDV_INSERVICE))); /* halt in tracks, be careful to preserve inserivce bit */ DELAY(27); sc->rxslot[slot].rxhand = NULL; sc->rxslot[slot].mode = newmode; sc->txslot[sc->txvc2slot[vci]].nref--; sc->txspeed[vci] = 0; sc->txvc2slot[vci] = 0; /* if stuff is still going on we are going to have to drain it out */ if (sc->rxslot[slot].indma.ifq_head || sc->rxslot[slot].q.ifq_head || (sc->rxslot[slot].oth_flags & ENOTHER_SWSL) != 0) { sc->rxslot[slot].oth_flags |= ENOTHER_DRAIN; } else { sc->rxslot[slot].oth_flags = ENOTHER_FREE; sc->rxslot[slot].atm_vci = RX_NONE; sc->rxvc2slot[vci] = RX_NONE; } splx(s); /* enable enintr() */ #ifdef EN_DEBUG printf("%s: rx%d: VCI %d is now %s\n", sc->sc_dev.dv_xname, slot, vci, (sc->rxslot[slot].oth_flags & ENOTHER_DRAIN) ? "draining" : "free"); #endif return(0); } /***********************************************************************/ /* * en_reset: reset the board, throw away work in progress. * must en_init to recover. */ void en_reset(sc) struct en_softc *sc; { struct mbuf *m; int lcv, slot; #ifdef EN_DEBUG printf("%s: reset\n", sc->sc_dev.dv_xname); #endif if (sc->en_busreset) sc->en_busreset(sc); EN_WRITE(sc, MID_RESID, 0x0); /* reset hardware */ /* * recv: dump any mbufs we are dma'ing into, if DRAINing, then a reset * will free us! */ for (lcv = 0 ; lcv < MID_N_VC ; lcv++) { if (sc->rxvc2slot[lcv] == RX_NONE) continue; slot = sc->rxvc2slot[lcv]; while (1) { _IF_DEQUEUE(&sc->rxslot[slot].indma, m); if (m == NULL) break; /* >>> exit 'while(1)' here <<< */ m_freem(m); } while (1) { _IF_DEQUEUE(&sc->rxslot[slot].q, m); if (m == NULL) break; /* >>> exit 'while(1)' here <<< */ m_freem(m); } sc->rxslot[slot].oth_flags &= ~ENOTHER_SWSL; if (sc->rxslot[slot].oth_flags & ENOTHER_DRAIN) { sc->rxslot[slot].oth_flags = ENOTHER_FREE; sc->rxvc2slot[lcv] = RX_NONE; #ifdef EN_DEBUG printf("%s: rx%d: VCI %d is now free\n", sc->sc_dev.dv_xname, slot, lcv); #endif } } /* * xmit: dump everything */ for (lcv = 0 ; lcv < EN_NTX ; lcv++) { while (1) { _IF_DEQUEUE(&sc->txslot[lcv].indma, m); if (m == NULL) break; /* >>> exit 'while(1)' here <<< */ m_freem(m); } while (1) { _IF_DEQUEUE(&sc->txslot[lcv].q, m); if (m == NULL) break; /* >>> exit 'while(1)' here <<< */ m_freem(m); } sc->txslot[lcv].mbsize = 0; } return; } /* * en_init: init board and sync the card with the data in the softc. */ STATIC void en_init(sc) struct en_softc *sc; { int vc, slot; u_int32_t loc; if ((sc->enif.if_flags & IFF_UP) == 0) { #ifdef EN_DEBUG printf("%s: going down\n", sc->sc_dev.dv_xname); #endif en_reset(sc); /* to be safe */ sc->enif.if_flags &= ~IFF_RUNNING; /* disable */ return; } #ifdef EN_DEBUG printf("%s: going up\n", sc->sc_dev.dv_xname); #endif sc->enif.if_flags |= IFF_RUNNING; /* enable */ if (sc->en_busreset) sc->en_busreset(sc); EN_WRITE(sc, MID_RESID, 0x0); /* reset */ /* * init obmem data structures: vc tab, dma q's, slist. * * note that we set drq_free/dtq_free to one less than the total number * of DTQ/DRQs present. we do this because the card uses the condition * (drq_chip == drq_us) to mean "list is empty"... but if you allow the * circular list to be completely full then (drq_chip == drq_us) [i.e. * the drq_us pointer will wrap all the way around]. by restricting * the number of active requests to (N - 1) we prevent the list from * becoming completely full. note that the card will sometimes give * us an interrupt for a DTQ/DRQ we have already processes... this helps * keep that interrupt from messing us up. */ for (vc = 0 ; vc < MID_N_VC ; vc++) en_loadvc(sc, vc); bzero(&sc->drq, sizeof(sc->drq)); sc->drq_free = MID_DRQ_N - 1; /* N - 1 */ sc->drq_chip = MID_DRQ_REG2A(EN_READ(sc, MID_DMA_RDRX)); EN_WRITE(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip)); /* ensure zero queue */ sc->drq_us = sc->drq_chip; bzero(&sc->dtq, sizeof(sc->dtq)); sc->dtq_free = MID_DTQ_N - 1; /* N - 1 */ sc->dtq_chip = MID_DTQ_REG2A(EN_READ(sc, MID_DMA_RDTX)); EN_WRITE(sc, MID_DMA_WRTX, MID_DRQ_A2REG(sc->dtq_chip)); /* ensure zero queue */ sc->dtq_us = sc->dtq_chip; sc->hwslistp = MID_SL_REG2A(EN_READ(sc, MID_SERV_WRITE)); sc->swsl_size = sc->swsl_head = sc->swsl_tail = 0; #ifdef EN_DEBUG printf("%s: drq free/chip: %d/0x%x, dtq free/chip: %d/0x%x, hwslist: 0x%x\n", sc->sc_dev.dv_xname, sc->drq_free, sc->drq_chip, sc->dtq_free, sc->dtq_chip, sc->hwslistp); #endif for (slot = 0 ; slot < EN_NTX ; slot++) { sc->txslot[slot].bfree = EN_TXSZ * 1024; EN_WRITE(sc, MIDX_READPTR(slot), 0); EN_WRITE(sc, MIDX_DESCSTART(slot), 0); loc = sc->txslot[slot].cur = sc->txslot[slot].start; loc = loc - MID_RAMOFF; loc = (loc & ~((EN_TXSZ*1024) - 1)) >> 2; /* mask, cvt to words */ loc = loc >> MIDV_LOCTOPSHFT; /* top 11 bits */ EN_WRITE(sc, MIDX_PLACE(slot), MIDX_MKPLACE(en_k2sz(EN_TXSZ), loc)); #ifdef EN_DEBUG printf("%s: tx%d: place 0x%x\n", sc->sc_dev.dv_xname, slot, (u_int)EN_READ(sc, MIDX_PLACE(slot))); #endif } /* * enable! */ EN_WRITE(sc, MID_INTENA, MID_INT_TX|MID_INT_DMA_OVR|MID_INT_IDENT| MID_INT_LERR|MID_INT_DMA_ERR|MID_INT_DMA_RX|MID_INT_DMA_TX| MID_INT_SERVICE| /* >>> MID_INT_SUNI| XXXCDC<<< */ MID_INT_STATS); EN_WRITE(sc, MID_MAST_CSR, MID_SETIPL(sc->ipl)|MID_MCSR_ENDMA| MID_MCSR_ENTX|MID_MCSR_ENRX); } /* * en_loadvc: load a vc tab entry from a slot */ STATIC void en_loadvc(sc, vc) struct en_softc *sc; int vc; { int slot; u_int32_t reg = EN_READ(sc, MID_VC(vc)); reg = MIDV_SETMODE(reg, MIDV_TRASH); EN_WRITE(sc, MID_VC(vc), reg); DELAY(27); if ((slot = sc->rxvc2slot[vc]) == RX_NONE) return; /* no need to set CRC */ EN_WRITE(sc, MID_DST_RP(vc), 0); /* read pointer = 0, desc. start = 0 */ EN_WRITE(sc, MID_WP_ST_CNT(vc), 0); /* write pointer = 0 */ EN_WRITE(sc, MID_VC(vc), sc->rxslot[slot].mode); /* set mode, size, loc */ sc->rxslot[slot].cur = sc->rxslot[slot].start; #ifdef EN_DEBUG printf("%s: rx%d: assigned to VCI %d\n", sc->sc_dev.dv_xname, slot, vc); #endif } /* * en_start: start transmitting the next packet that needs to go out * if there is one. note that atm_output() has already splimp()'d us. */ STATIC void en_start(ifp) struct ifnet *ifp; { struct en_softc *sc = (struct en_softc *) ifp->if_softc; struct ifqueue *ifq = &ifp->if_snd; /* if INPUT QUEUE */ struct mbuf *m, *lastm, *prev; struct atm_pseudohdr *ap, *new_ap; int txchan, mlen, got, need, toadd, cellcnt, first; u_int32_t atm_vpi, atm_vci, atm_flags, *dat, aal; u_int8_t *cp; if ((ifp->if_flags & IFF_RUNNING) == 0) return; /* * remove everything from interface queue since we handle all queueing * locally ... */ while (1) { IF_DEQUEUE(ifq, m); if (m == NULL) return; /* EMPTY: >>> exit here <<< */ /* * calculate size of packet (in bytes) * also, if we are not doing transmit DMA we eliminate all stupid * (non-word) alignments here using en_mfix(). calls to en_mfix() * seem to be due to tcp retransmits for the most part. * * after this loop mlen total length of mbuf chain (including atm_ph), * and lastm is a pointer to the last mbuf on the chain. */ lastm = m; mlen = 0; prev = NULL; while (1) { /* no DMA? */ if ((!sc->is_adaptec && EN_ENIDMAFIX) || EN_NOTXDMA || !en_dma) { if ( ((uintptr_t)mtod(lastm, void *) % sizeof(u_int32_t)) != 0 || ((lastm->m_len % sizeof(u_int32_t)) != 0 && lastm->m_next)) { first = (lastm == m); if (en_mfix(sc, &lastm, prev) == 0) { /* failed? */ m_freem(m); m = NULL; break; } if (first) m = lastm; /* update */ } prev = lastm; } mlen += lastm->m_len; if (lastm->m_next == NULL) break; lastm = lastm->m_next; } if (m == NULL) /* happens only if mfix fails */ continue; ap = mtod(m, struct atm_pseudohdr *); atm_vpi = ATM_PH_VPI(ap); atm_vci = ATM_PH_VCI(ap); atm_flags = ATM_PH_FLAGS(ap) & ~(EN_OBHDR|EN_OBTRL); aal = ((atm_flags & ATM_PH_AAL5) != 0) ? MID_TBD_AAL5 : MID_TBD_NOAAL5; /* * check that vpi/vci is one we can use */ if (atm_vpi || atm_vci > MID_N_VC) { printf("%s: output vpi=%d, vci=%d out of card range, dropping...\n", sc->sc_dev.dv_xname, atm_vpi, atm_vci); m_freem(m); continue; } /* * computing how much padding we need on the end of the mbuf, then * see if we can put the TBD at the front of the mbuf where the * link header goes (well behaved protocols will reserve room for us). * last, check if room for PDU tail. * * got = number of bytes of data we have * cellcnt = number of cells in this mbuf * need = number of bytes of data + padding we need (excludes TBD) * toadd = number of bytes of data we need to add to end of mbuf, * [including AAL5 PDU, if AAL5] */ got = mlen - sizeof(struct atm_pseudohdr); toadd = (aal == MID_TBD_AAL5) ? MID_PDU_SIZE : 0; /* PDU */ cellcnt = (got + toadd + (MID_ATMDATASZ - 1)) / MID_ATMDATASZ; need = cellcnt * MID_ATMDATASZ; toadd = need - got; /* recompute, including zero padding */ #ifdef EN_DEBUG printf("%s: txvci%d: mlen=%d, got=%d, need=%d, toadd=%d, cell#=%d\n", sc->sc_dev.dv_xname, atm_vci, mlen, got, need, toadd, cellcnt); printf(" leading_space=%d, trailing_space=%d\n", (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(lastm)); #endif #ifdef EN_MBUF_OPT /* * note: external storage (M_EXT) can be shared between mbufs * to avoid copying (see m_copym()). this means that the same * data buffer could be shared by several mbufs, and thus it isn't * a good idea to try and write TBDs or PDUs to M_EXT data areas. */ if (M_LEADINGSPACE(m) >= MID_TBD_SIZE && (m->m_flags & M_EXT) == 0) { m->m_data -= MID_TBD_SIZE; m->m_len += MID_TBD_SIZE; mlen += MID_TBD_SIZE; new_ap = mtod(m, struct atm_pseudohdr *); *new_ap = *ap; /* move it back */ ap = new_ap; dat = ((u_int32_t *) ap) + 1; /* make sure the TBD is in proper byte order */ *dat++ = htonl(MID_TBD_MK1(aal, sc->txspeed[atm_vci], cellcnt)); *dat = htonl(MID_TBD_MK2(atm_vci, 0, 0)); atm_flags |= EN_OBHDR; } if (toadd && (lastm->m_flags & M_EXT) == 0 && M_TRAILINGSPACE(lastm) >= toadd) { cp = mtod(lastm, u_int8_t *) + lastm->m_len; lastm->m_len += toadd; mlen += toadd; if (aal == MID_TBD_AAL5) { bzero(cp, toadd - MID_PDU_SIZE); dat = (u_int32_t *)(cp + toadd - MID_PDU_SIZE); /* make sure the PDU is in proper byte order */ *dat = htonl(MID_PDU_MK1(0, 0, got)); } else { bzero(cp, toadd); } atm_flags |= EN_OBTRL; } ATM_PH_FLAGS(ap) = atm_flags; /* update EN_OBHDR/EN_OBTRL bits */ #endif /* EN_MBUF_OPT */ /* * get assigned channel (will be zero unless txspeed[atm_vci] is set) */ txchan = sc->txvc2slot[atm_vci]; if (sc->txslot[txchan].mbsize > EN_TXHIWAT) { EN_COUNT(sc->txmbovr); m_freem(m); #ifdef EN_DEBUG printf("%s: tx%d: buffer space shortage\n", sc->sc_dev.dv_xname, txchan); #endif continue; } sc->txslot[txchan].mbsize += mlen; #ifdef EN_DEBUG printf("%s: tx%d: VPI=%d, VCI=%d, FLAGS=0x%x, speed=0x%x\n", sc->sc_dev.dv_xname, txchan, atm_vpi, atm_vci, atm_flags, sc->txspeed[atm_vci]); printf(" adjusted mlen=%d, mbsize=%d\n", mlen, sc->txslot[txchan].mbsize); #endif _IF_ENQUEUE(&sc->txslot[txchan].q, m); en_txdma(sc, txchan); } /*NOTREACHED*/ } /* * en_mfix: fix a stupid mbuf */ #ifndef __FreeBSD__ STATIC int en_mfix(sc, mm, prev) struct en_softc *sc; struct mbuf **mm, *prev; { struct mbuf *m, *new; u_char *d, *cp; int off; struct mbuf *nxt; m = *mm; EN_COUNT(sc->mfix); /* count # of calls */ #ifdef EN_DEBUG printf("%s: mfix mbuf m_data=%p, m_len=%d\n", sc->sc_dev.dv_xname, m->m_data, m->m_len); #endif d = mtod(m, u_char *); off = ((unsigned long) d) % sizeof(u_int32_t); if (off) { if ((m->m_flags & M_EXT) == 0) { bcopy(d, d - off, m->m_len); /* ALIGN! (with costly data copy...) */ d -= off; m->m_data = (caddr_t)d; } else { /* can't write to an M_EXT mbuf since it may be shared */ MGET(new, M_DONTWAIT, MT_DATA); if (!new) { EN_COUNT(sc->mfixfail); return(0); } MCLGET(new, M_DONTWAIT); if ((new->m_flags & M_EXT) == 0) { m_free(new); EN_COUNT(sc->mfixfail); return(0); } bcopy(d, new->m_data, m->m_len); /* ALIGN! (with costly data copy...) */ new->m_len = m->m_len; new->m_next = m->m_next; if (prev) prev->m_next = new; m_free(m); *mm = m = new; /* note: 'd' now invalid */ } } off = m->m_len % sizeof(u_int32_t); if (off == 0) return(1); d = mtod(m, u_char *) + m->m_len; off = sizeof(u_int32_t) - off; nxt = m->m_next; while (off--) { for ( ; nxt != NULL && nxt->m_len == 0 ; nxt = nxt->m_next) /*null*/; if (nxt == NULL) { /* out of data, zero fill */ *d++ = 0; continue; /* next "off" */ } cp = mtod(nxt, u_char *); *d++ = *cp++; m->m_len++; nxt->m_len--; nxt->m_data = (caddr_t)cp; } return(1); } #else /* __FreeBSD__ */ STATIC int en_makeexclusive(struct en_softc *, struct mbuf **, struct mbuf *); STATIC int en_makeexclusive(sc, mm, prev) struct en_softc *sc; struct mbuf **mm, *prev; { struct mbuf *m, *new; m = *mm; if (m->m_flags & M_EXT) { if (m->m_ext.ext_type != EXT_CLUSTER) { /* external buffer isn't an ordinary mbuf cluster! */ printf("%s: mfix: special buffer! can't make a copy!\n", sc->sc_dev.dv_xname); return (0); } if (MEXT_IS_REF(m)) { /* make a real copy of the M_EXT mbuf since it is shared */ MGET(new, M_DONTWAIT, MT_DATA); if (!new) { EN_COUNT(sc->mfixfail); return(0); } if (m->m_flags & M_PKTHDR) - M_COPY_PKTHDR(new, m); + M_MOVE_PKTHDR(new, m); MCLGET(new, M_DONTWAIT); if ((new->m_flags & M_EXT) == 0) { m_free(new); EN_COUNT(sc->mfixfail); return(0); } bcopy(m->m_data, new->m_data, m->m_len); new->m_len = m->m_len; new->m_next = m->m_next; if (prev) prev->m_next = new; m_free(m); *mm = new; } else { /* the buffer is not shared, align the data offset using this buffer. */ u_char *d = mtod(m, u_char *); int off = ((uintptr_t)(void *)d) % sizeof(u_int32_t); if (off > 0) { bcopy(d, d - off, m->m_len); m->m_data = (caddr_t)d - off; } } } return (1); } STATIC int en_mfix(sc, mm, prev) struct en_softc *sc; struct mbuf **mm, *prev; { struct mbuf *m; u_char *d, *cp; int off; struct mbuf *nxt; m = *mm; EN_COUNT(sc->mfix); /* count # of calls */ #ifdef EN_DEBUG printf("%s: mfix mbuf m_data=%p, m_len=%d\n", sc->sc_dev.dv_xname, m->m_data, m->m_len); #endif d = mtod(m, u_char *); off = ((uintptr_t) (void *) d) % sizeof(u_int32_t); if (off) { if ((m->m_flags & M_EXT) == 0) { bcopy(d, d - off, m->m_len); /* ALIGN! (with costly data copy...) */ d -= off; m->m_data = (caddr_t)d; } else { /* can't write to an M_EXT mbuf since it may be shared */ if (en_makeexclusive(sc, &m, prev) == 0) return (0); *mm = m; /* note: 'd' now invalid */ } } off = m->m_len % sizeof(u_int32_t); if (off == 0) return(1); if (m->m_flags & M_EXT) { /* can't write to an M_EXT mbuf since it may be shared */ if (en_makeexclusive(sc, &m, prev) == 0) return (0); *mm = m; /* note: 'd' now invalid */ } d = mtod(m, u_char *) + m->m_len; off = sizeof(u_int32_t) - off; nxt = m->m_next; while (off--) { if (nxt != NULL && nxt->m_len == 0) { /* remove an empty mbuf. this avoids odd byte padding to an empty last mbuf. */ m->m_next = nxt = m_free(nxt); } if (nxt == NULL) { /* out of data, zero fill */ *d++ = 0; continue; /* next "off" */ } cp = mtod(nxt, u_char *); *d++ = *cp++; m->m_len++; nxt->m_len--; nxt->m_data = (caddr_t)cp; } if (nxt != NULL && nxt->m_len == 0) m->m_next = m_free(nxt); return(1); } #endif /* __FreeBSD__ */ /* * en_txdma: start trasmit DMA, if possible */ STATIC void en_txdma(sc, chan) struct en_softc *sc; int chan; { struct mbuf *tmp; struct atm_pseudohdr *ap; struct en_launch launch; int datalen = 0, dtqneed, len, ncells; u_int8_t *cp; struct ifnet *ifp; #ifdef EN_DEBUG printf("%s: tx%d: starting...\n", sc->sc_dev.dv_xname, chan); #endif /* * note: now that txlaunch handles non-word aligned/sized requests * the only time you can safely set launch.nodma is if you've en_mfix()'d * the mbuf chain. this happens only if EN_NOTXDMA || !en_dma. */ launch.nodma = (EN_NOTXDMA || !en_dma); again: /* * get an mbuf waiting for DMA */ launch.t = sc->txslot[chan].q.ifq_head; /* peek at head of queue */ if (launch.t == NULL) { #ifdef EN_DEBUG printf("%s: tx%d: ...done!\n", sc->sc_dev.dv_xname, chan); #endif return; /* >>> exit here if no data waiting for DMA <<< */ } /* * get flags, vci * * note: launch.need = # bytes we need to get on the card * dtqneed = # of DTQs we need for this packet * launch.mlen = # of bytes in in mbuf chain (<= launch.need) */ ap = mtod(launch.t, struct atm_pseudohdr *); launch.atm_vci = ATM_PH_VCI(ap); launch.atm_flags = ATM_PH_FLAGS(ap); launch.aal = ((launch.atm_flags & ATM_PH_AAL5) != 0) ? MID_TBD_AAL5 : MID_TBD_NOAAL5; /* * XXX: have to recompute the length again, even though we already did * it in en_start(). might as well compute dtqneed here as well, so * this isn't that bad. */ if ((launch.atm_flags & EN_OBHDR) == 0) { dtqneed = 1; /* header still needs to be added */ launch.need = MID_TBD_SIZE; /* not includeded with mbuf */ } else { dtqneed = 0; /* header on-board, dma with mbuf */ launch.need = 0; } launch.mlen = 0; for (tmp = launch.t ; tmp != NULL ; tmp = tmp->m_next) { len = tmp->m_len; launch.mlen += len; cp = mtod(tmp, u_int8_t *); if (tmp == launch.t) { len -= sizeof(struct atm_pseudohdr); /* don't count this! */ cp += sizeof(struct atm_pseudohdr); } launch.need += len; if (len == 0) continue; /* atm_pseudohdr alone in first mbuf */ dtqneed += en_dqneed(sc, (caddr_t) cp, len, 1); } if ((launch.need % sizeof(u_int32_t)) != 0) dtqneed++; /* need DTQ to FLUSH internal buffer */ if ((launch.atm_flags & EN_OBTRL) == 0) { if (launch.aal == MID_TBD_AAL5) { datalen = launch.need - MID_TBD_SIZE; launch.need += MID_PDU_SIZE; /* AAL5: need PDU tail */ } dtqneed++; /* need to work on the end a bit */ } /* * finish calculation of launch.need (need to figure out how much padding * we will need). launch.need includes MID_TBD_SIZE, but we need to * remove that to so we can round off properly. we have to add * MID_TBD_SIZE back in after calculating ncells. */ launch.need = roundup(launch.need - MID_TBD_SIZE, MID_ATMDATASZ); ncells = launch.need / MID_ATMDATASZ; launch.need += MID_TBD_SIZE; if (launch.need > EN_TXSZ * 1024) { printf("%s: tx%d: packet larger than xmit buffer (%d > %d)\n", sc->sc_dev.dv_xname, chan, launch.need, EN_TXSZ * 1024); goto dequeue_drop; } /* * note: don't use the entire buffer space. if WRTX becomes equal * to RDTX, the transmitter stops assuming the buffer is empty! --kjc */ if (launch.need >= sc->txslot[chan].bfree) { EN_COUNT(sc->txoutspace); #ifdef EN_DEBUG printf("%s: tx%d: out of transmit space\n", sc->sc_dev.dv_xname, chan); #endif return; /* >>> exit here if out of obmem buffer space <<< */ } /* * ensure we have enough dtqs to go, if not, wait for more. */ if (launch.nodma) { dtqneed = 1; } if (dtqneed > sc->dtq_free) { sc->need_dtqs = 1; EN_COUNT(sc->txdtqout); #ifdef EN_DEBUG printf("%s: tx%d: out of transmit DTQs\n", sc->sc_dev.dv_xname, chan); #endif return; /* >>> exit here if out of dtqs <<< */ } /* * it is a go, commit! dequeue mbuf start working on the xfer. */ _IF_DEQUEUE(&sc->txslot[chan].q, tmp); #ifdef EN_DIAG if (launch.t != tmp) panic("en dequeue"); #endif /* EN_DIAG */ /* * launch! */ EN_COUNT(sc->launch); ifp = &sc->enif; ifp->if_opackets++; if ((launch.atm_flags & EN_OBHDR) == 0) { EN_COUNT(sc->lheader); /* store tbd1/tbd2 in host byte order */ launch.tbd1 = MID_TBD_MK1(launch.aal, sc->txspeed[launch.atm_vci], ncells); launch.tbd2 = MID_TBD_MK2(launch.atm_vci, 0, 0); } if ((launch.atm_flags & EN_OBTRL) == 0 && launch.aal == MID_TBD_AAL5) { EN_COUNT(sc->ltail); launch.pdu1 = MID_PDU_MK1(0, 0, datalen); /* host byte order */ } en_txlaunch(sc, chan, &launch); #if NBPF > 0 if (ifp->if_bpf) { /* * adjust the top of the mbuf to skip the pseudo atm header * (and TBD, if present) before passing the packet to bpf, * restore it afterwards. */ int size = sizeof(struct atm_pseudohdr); if (launch.atm_flags & EN_OBHDR) size += MID_TBD_SIZE; launch.t->m_data += size; launch.t->m_len -= size; BPF_MTAP(ifp, launch.t); launch.t->m_data -= size; launch.t->m_len += size; } #endif /* NBPF > 0 */ /* * do some housekeeping and get the next packet */ sc->txslot[chan].bfree -= launch.need; _IF_ENQUEUE(&sc->txslot[chan].indma, launch.t); goto again; /* * END of txdma loop! */ /* * error handles */ dequeue_drop: _IF_DEQUEUE(&sc->txslot[chan].q, tmp); if (launch.t != tmp) panic("en dequeue drop"); m_freem(launch.t); sc->txslot[chan].mbsize -= launch.mlen; goto again; } /* * en_txlaunch: launch an mbuf into the dma pool! */ STATIC void en_txlaunch(sc, chan, l) struct en_softc *sc; int chan; struct en_launch *l; { struct mbuf *tmp; u_int32_t cur = sc->txslot[chan].cur, start = sc->txslot[chan].start, stop = sc->txslot[chan].stop, dma, *data, *datastop, count, bcode; int pad, addtail, need, len, needalign, cnt, end, mx; /* * vars: * need = # bytes card still needs (decr. to zero) * len = # of bytes left in current mbuf * cur = our current pointer * dma = last place we programmed into the DMA * data = pointer into data area of mbuf that needs to go next * cnt = # of bytes to transfer in this DTQ * bcode/count = DMA burst code, and chip's version of cnt * * a single buffer can require up to 5 DTQs depending on its size * and alignment requirements. the 5 possible requests are: * [1] 1, 2, or 3 byte DMA to align src data pointer to word boundary * [2] alburst DMA to align src data pointer to bestburstlen * [3] 1 or more bestburstlen DMAs * [4] clean up burst (to last word boundary) * [5] 1, 2, or 3 byte final clean up DMA */ need = l->need; dma = cur; addtail = (l->atm_flags & EN_OBTRL) == 0; /* add a tail? */ #ifdef EN_DIAG if ((need - MID_TBD_SIZE) % MID_ATMDATASZ) printf("%s: tx%d: bogus trasmit needs (%d)\n", sc->sc_dev.dv_xname, chan, need); #endif #ifdef EN_DEBUG printf("%s: tx%d: launch mbuf %p! cur=0x%x[%d], need=%d, addtail=%d\n", sc->sc_dev.dv_xname, chan, l->t, cur, (cur-start)/4, need, addtail); count = EN_READ(sc, MIDX_PLACE(chan)); printf(" HW: base_address=0x%x, size=%d, read=%d, descstart=%d\n", (u_int)MIDX_BASE(count), MIDX_SZ(count), (int)EN_READ(sc, MIDX_READPTR(chan)), (int)EN_READ(sc, MIDX_DESCSTART(chan))); #endif /* * do we need to insert the TBD by hand? * note that tbd1/tbd2/pdu1 are in host byte order. */ if ((l->atm_flags & EN_OBHDR) == 0) { #ifdef EN_DEBUG printf("%s: tx%d: insert header 0x%x 0x%x\n", sc->sc_dev.dv_xname, chan, l->tbd1, l->tbd2); #endif EN_WRITE(sc, cur, l->tbd1); EN_WRAPADD(start, stop, cur, 4); EN_WRITE(sc, cur, l->tbd2); EN_WRAPADD(start, stop, cur, 4); need -= 8; } /* * now do the mbufs... */ for (tmp = l->t ; tmp != NULL ; tmp = tmp->m_next) { /* get pointer to data and length */ data = mtod(tmp, u_int32_t *); len = tmp->m_len; if (tmp == l->t) { data += sizeof(struct atm_pseudohdr)/sizeof(u_int32_t); len -= sizeof(struct atm_pseudohdr); } /* now, determine if we should copy it */ if (l->nodma || (len < EN_MINDMA && (len % 4) == 0 && ((uintptr_t) (void *) data % 4) == 0 && (cur % 4) == 0)) { /* * roundup len: the only time this will change the value of len * is when l->nodma is true, tmp is the last mbuf, and there is * a non-word number of bytes to transmit. in this case it is * safe to round up because we've en_mfix'd the mbuf (so the first * byte is word aligned there must be enough free bytes at the end * to round off to the next word boundary)... */ len = roundup(len, sizeof(u_int32_t)); datastop = data + (len / sizeof(u_int32_t)); /* copy loop: preserve byte order!!! use WRITEDAT */ while (data != datastop) { EN_WRITEDAT(sc, cur, *data); data++; EN_WRAPADD(start, stop, cur, 4); } need -= len; #ifdef EN_DEBUG printf("%s: tx%d: copied %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, len, need, cur); #endif continue; /* continue on to next mbuf */ } /* going to do DMA, first make sure the dtq is in sync. */ if (dma != cur) { EN_DTQADD(sc, WORD_IDX(start,cur), chan, MIDDMA_JK, 0, 0, 0); #ifdef EN_DEBUG printf("%s: tx%d: dtq_sync: advance pointer to %d\n", sc->sc_dev.dv_xname, chan, cur); #endif } /* * if this is the last buffer, and it looks like we are going to need to * flush the internal buffer, can we extend the length of this mbuf to * avoid the FLUSH? */ if (tmp->m_next == NULL) { cnt = (need - len) % sizeof(u_int32_t); if (cnt && M_TRAILINGSPACE(tmp) >= cnt) len += cnt; /* pad for FLUSH */ } #if !defined(MIDWAY_ENIONLY) /* * the adaptec DMA engine is smart and handles everything for us. */ if (sc->is_adaptec) { /* need to DMA "len" bytes out to card */ need -= len; EN_WRAPADD(start, stop, cur, len); #ifdef EN_DEBUG printf("%s: tx%d: adp_dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, len, need, cur); #endif end = (need == 0) ? MID_DMA_END : 0; EN_DTQADD(sc, len, chan, 0, vtophys(data), l->mlen, end); if (end) goto done; dma = cur; /* update dma pointer */ continue; } #endif /* !MIDWAY_ENIONLY */ #if !defined(MIDWAY_ADPONLY) /* * the ENI DMA engine is not so smart and need more help from us */ /* do we need to do a DMA op to align to word boundary? */ needalign = (uintptr_t) (void *) data % sizeof(u_int32_t); if (needalign) { EN_COUNT(sc->headbyte); cnt = sizeof(u_int32_t) - needalign; if (cnt == 2 && len >= cnt) { count = 1; bcode = MIDDMA_2BYTE; } else { cnt = min(cnt, len); /* prevent overflow */ count = cnt; bcode = MIDDMA_BYTE; } need -= cnt; EN_WRAPADD(start, stop, cur, cnt); #ifdef EN_DEBUG printf("%s: tx%d: small al_dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, cnt, need, cur); #endif len -= cnt; end = (need == 0) ? MID_DMA_END : 0; EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end); if (end) goto done; data = (u_int32_t *) ((u_char *)data + cnt); } /* do we need to do a DMA op to align? */ if (sc->alburst && (needalign = (((uintptr_t) (void *) data) & sc->bestburstmask)) != 0 && len >= sizeof(u_int32_t)) { cnt = sc->bestburstlen - needalign; mx = len & ~(sizeof(u_int32_t)-1); /* don't go past end */ if (cnt > mx) { cnt = mx; count = cnt / sizeof(u_int32_t); bcode = MIDDMA_WORD; } else { count = cnt / sizeof(u_int32_t); bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; } need -= cnt; EN_WRAPADD(start, stop, cur, cnt); #ifdef EN_DEBUG printf("%s: tx%d: al_dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, cnt, need, cur); #endif len -= cnt; end = (need == 0) ? MID_DMA_END : 0; EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end); if (end) goto done; data = (u_int32_t *) ((u_char *)data + cnt); } /* do we need to do a max-sized burst? */ if (len >= sc->bestburstlen) { count = len >> sc->bestburstshift; cnt = count << sc->bestburstshift; bcode = sc->bestburstcode; need -= cnt; EN_WRAPADD(start, stop, cur, cnt); #ifdef EN_DEBUG printf("%s: tx%d: best_dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, cnt, need, cur); #endif len -= cnt; end = (need == 0) ? MID_DMA_END : 0; EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end); if (end) goto done; data = (u_int32_t *) ((u_char *)data + cnt); } /* do we need to do a cleanup burst? */ cnt = len & ~(sizeof(u_int32_t)-1); if (cnt) { count = cnt / sizeof(u_int32_t); bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; need -= cnt; EN_WRAPADD(start, stop, cur, cnt); #ifdef EN_DEBUG printf("%s: tx%d: cleanup_dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, cnt, need, cur); #endif len -= cnt; end = (need == 0) ? MID_DMA_END : 0; EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end); if (end) goto done; data = (u_int32_t *) ((u_char *)data + cnt); } /* any word fragments left? */ if (len) { EN_COUNT(sc->tailbyte); if (len == 2) { count = 1; bcode = MIDDMA_2BYTE; /* use 2byte mode */ } else { count = len; bcode = MIDDMA_BYTE; /* use 1 byte mode */ } need -= len; EN_WRAPADD(start, stop, cur, len); #ifdef EN_DEBUG printf("%s: tx%d: byte cleanup_dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, len, need, cur); #endif end = (need == 0) ? MID_DMA_END : 0; EN_DTQADD(sc, count, chan, bcode, vtophys(data), l->mlen, end); if (end) goto done; } dma = cur; /* update dma pointer */ #endif /* !MIDWAY_ADPONLY */ } /* next mbuf, please */ /* * all mbuf data has been copied out to the obmem (or set up to be DMAd). * if the trailer or padding needs to be put in, do it now. * * NOTE: experimental results reveal the following fact: * if you DMA "X" bytes to the card, where X is not a multiple of 4, * then the card will internally buffer the last (X % 4) bytes (in * hopes of getting (4 - (X % 4)) more bytes to make a complete word). * it is imporant to make sure we don't leave any important data in * this internal buffer because it is discarded on the last (end) DTQ. * one way to do this is to DMA in (4 - (X % 4)) more bytes to flush * the darn thing out. */ if (addtail) { pad = need % sizeof(u_int32_t); if (pad) { /* * FLUSH internal data buffer. pad out with random data from the front * of the mbuf chain... */ bcode = (sc->is_adaptec) ? 0 : MIDDMA_BYTE; EN_COUNT(sc->tailflush); EN_WRAPADD(start, stop, cur, pad); EN_DTQADD(sc, pad, chan, bcode, vtophys(l->t->m_data), 0, 0); need -= pad; #ifdef EN_DEBUG printf("%s: tx%d: pad/FLUSH dma %d bytes (%d left, cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, pad, need, cur); #endif } /* copy data */ pad = need / sizeof(u_int32_t); /* round *down* */ if (l->aal == MID_TBD_AAL5) pad -= 2; #ifdef EN_DEBUG printf("%s: tx%d: padding %d bytes (cur now 0x%x)\n", sc->sc_dev.dv_xname, chan, (int)(pad * sizeof(u_int32_t)), cur); #endif while (pad--) { EN_WRITEDAT(sc, cur, 0); /* no byte order issues with zero */ EN_WRAPADD(start, stop, cur, 4); } if (l->aal == MID_TBD_AAL5) { EN_WRITE(sc, cur, l->pdu1); /* in host byte order */ EN_WRAPADD(start, stop, cur, 8); } } if (addtail || dma != cur) { /* write final descritor */ EN_DTQADD(sc, WORD_IDX(start,cur), chan, MIDDMA_JK, 0, l->mlen, MID_DMA_END); /* dma = cur; */ /* not necessary since we are done */ } done: /* update current pointer */ sc->txslot[chan].cur = cur; #ifdef EN_DEBUG printf("%s: tx%d: DONE! cur now = 0x%x\n", sc->sc_dev.dv_xname, chan, cur); #endif return; } /* * interrupt handler */ EN_INTR_TYPE en_intr(arg) void *arg; { struct en_softc *sc = (struct en_softc *) arg; struct mbuf *m; struct atm_pseudohdr ah; struct ifnet *ifp; u_int32_t reg, kick, val, mask, chip, vci, slot, dtq, drq; int lcv, idx, need_softserv = 0; reg = EN_READ(sc, MID_INTACK); if ((reg & MID_INT_ANY) == 0) EN_INTR_RET(0); /* not us */ #ifdef EN_DEBUG printf("%s: interrupt=0x%b\n", sc->sc_dev.dv_xname, reg, MID_INTBITS); #endif /* * unexpected errors that need a reset */ if ((reg & (MID_INT_IDENT|MID_INT_LERR|MID_INT_DMA_ERR|MID_INT_SUNI)) != 0) { printf("%s: unexpected interrupt=0x%b, resetting card\n", sc->sc_dev.dv_xname, reg, MID_INTBITS); #ifdef EN_DEBUG #ifdef DDB #ifdef __FreeBSD__ Debugger("en: unexpected error"); #else Debugger(); #endif #endif /* DDB */ sc->enif.if_flags &= ~IFF_RUNNING; /* FREEZE! */ #else en_reset(sc); en_init(sc); #endif EN_INTR_RET(1); /* for us */ } /******************* * xmit interrupts * ******************/ kick = 0; /* bitmask of channels to kick */ if (reg & MID_INT_TX) { /* TX done! */ /* * check for tx complete, if detected then this means that some space * has come free on the card. we must account for it and arrange to * kick the channel to life (in case it is stalled waiting on the card). */ for (mask = 1, lcv = 0 ; lcv < EN_NTX ; lcv++, mask = mask * 2) { if (reg & MID_TXCHAN(lcv)) { kick = kick | mask; /* want to kick later */ val = EN_READ(sc, MIDX_READPTR(lcv)); /* current read pointer */ val = (val * sizeof(u_int32_t)) + sc->txslot[lcv].start; /* convert to offset */ if (val > sc->txslot[lcv].cur) sc->txslot[lcv].bfree = val - sc->txslot[lcv].cur; else sc->txslot[lcv].bfree = (val + (EN_TXSZ*1024)) - sc->txslot[lcv].cur; #ifdef EN_DEBUG printf("%s: tx%d: trasmit done. %d bytes now free in buffer\n", sc->sc_dev.dv_xname, lcv, sc->txslot[lcv].bfree); #endif } } } if (reg & MID_INT_DMA_TX) { /* TX DMA done! */ /* * check for TX DMA complete, if detected then this means that some DTQs * are now free. it also means some indma mbufs can be freed. * if we needed DTQs, kick all channels. */ val = EN_READ(sc, MID_DMA_RDTX); /* chip's current location */ idx = MID_DTQ_A2REG(sc->dtq_chip);/* where we last saw chip */ if (sc->need_dtqs) { kick = MID_NTX_CH - 1; /* assume power of 2, kick all! */ sc->need_dtqs = 0; /* recalculated in "kick" loop below */ #ifdef EN_DEBUG printf("%s: cleared need DTQ condition\n", sc->sc_dev.dv_xname); #endif } while (idx != val) { sc->dtq_free++; if ((dtq = sc->dtq[idx]) != 0) { sc->dtq[idx] = 0; /* don't forget to zero it out when done */ slot = EN_DQ_SLOT(dtq); _IF_DEQUEUE(&sc->txslot[slot].indma, m); if (!m) panic("enintr: dtqsync"); sc->txslot[slot].mbsize -= EN_DQ_LEN(dtq); #ifdef EN_DEBUG printf("%s: tx%d: free %d dma bytes, mbsize now %d\n", sc->sc_dev.dv_xname, slot, EN_DQ_LEN(dtq), sc->txslot[slot].mbsize); #endif m_freem(m); } EN_WRAPADD(0, MID_DTQ_N, idx, 1); }; sc->dtq_chip = MID_DTQ_REG2A(val); /* sync softc */ } /* * kick xmit channels as needed */ if (kick) { #ifdef EN_DEBUG printf("%s: tx kick mask = 0x%x\n", sc->sc_dev.dv_xname, kick); #endif for (mask = 1, lcv = 0 ; lcv < EN_NTX ; lcv++, mask = mask * 2) { if ((kick & mask) && sc->txslot[lcv].q.ifq_head) { en_txdma(sc, lcv); /* kick it! */ } } /* for each slot */ } /* if kick */ /******************* * recv interrupts * ******************/ /* * check for RX DMA complete, and pass the data "upstairs" */ if (reg & MID_INT_DMA_RX) { val = EN_READ(sc, MID_DMA_RDRX); /* chip's current location */ idx = MID_DRQ_A2REG(sc->drq_chip);/* where we last saw chip */ while (idx != val) { sc->drq_free++; if ((drq = sc->drq[idx]) != 0) { sc->drq[idx] = 0; /* don't forget to zero it out when done */ slot = EN_DQ_SLOT(drq); if (EN_DQ_LEN(drq) == 0) { /* "JK" trash DMA? */ m = NULL; } else { _IF_DEQUEUE(&sc->rxslot[slot].indma, m); if (!m) panic("enintr: drqsync: %s: lost mbuf in slot %d!", sc->sc_dev.dv_xname, slot); } /* do something with this mbuf */ if (sc->rxslot[slot].oth_flags & ENOTHER_DRAIN) { /* drain? */ if (m) m_freem(m); vci = sc->rxslot[slot].atm_vci; if (sc->rxslot[slot].indma.ifq_head == NULL && sc->rxslot[slot].q.ifq_head == NULL && (EN_READ(sc, MID_VC(vci)) & MIDV_INSERVICE) == 0 && (sc->rxslot[slot].oth_flags & ENOTHER_SWSL) == 0) { sc->rxslot[slot].oth_flags = ENOTHER_FREE; /* done drain */ sc->rxslot[slot].atm_vci = RX_NONE; sc->rxvc2slot[vci] = RX_NONE; #ifdef EN_DEBUG printf("%s: rx%d: VCI %d now free\n", sc->sc_dev.dv_xname, slot, vci); #endif } } else if (m != NULL) { ATM_PH_FLAGS(&ah) = sc->rxslot[slot].atm_flags; ATM_PH_VPI(&ah) = 0; ATM_PH_SETVCI(&ah, sc->rxslot[slot].atm_vci); #ifdef EN_DEBUG printf("%s: rx%d: rxvci%d: atm_input, mbuf %p, len %d, hand %p\n", sc->sc_dev.dv_xname, slot, sc->rxslot[slot].atm_vci, m, EN_DQ_LEN(drq), sc->rxslot[slot].rxhand); #endif ifp = &sc->enif; ifp->if_ipackets++; #if NBPF > 0 if (ifp->if_bpf) BPF_MTAP(ifp, m); #endif atm_input(ifp, &ah, m, sc->rxslot[slot].rxhand); } } EN_WRAPADD(0, MID_DRQ_N, idx, 1); }; sc->drq_chip = MID_DRQ_REG2A(val); /* sync softc */ if (sc->need_drqs) { /* true if we had a DRQ shortage */ need_softserv = 1; sc->need_drqs = 0; #ifdef EN_DEBUG printf("%s: cleared need DRQ condition\n", sc->sc_dev.dv_xname); #endif } } /* * handle service interrupts */ if (reg & MID_INT_SERVICE) { chip = MID_SL_REG2A(EN_READ(sc, MID_SERV_WRITE)); while (sc->hwslistp != chip) { /* fetch and remove it from hardware service list */ vci = EN_READ(sc, sc->hwslistp); EN_WRAPADD(MID_SLOFF, MID_SLEND, sc->hwslistp, 4);/* advance hw ptr */ slot = sc->rxvc2slot[vci]; if (slot == RX_NONE) { #ifdef EN_DEBUG printf("%s: unexpected rx interrupt on VCI %d\n", sc->sc_dev.dv_xname, vci); #endif EN_WRITE(sc, MID_VC(vci), MIDV_TRASH); /* rx off, damn it! */ continue; /* next */ } EN_WRITE(sc, MID_VC(vci), sc->rxslot[slot].mode); /* remove from hwsl */ EN_COUNT(sc->hwpull); #ifdef EN_DEBUG printf("%s: pulled VCI %d off hwslist\n", sc->sc_dev.dv_xname, vci); #endif /* add it to the software service list (if needed) */ if ((sc->rxslot[slot].oth_flags & ENOTHER_SWSL) == 0) { EN_COUNT(sc->swadd); need_softserv = 1; sc->rxslot[slot].oth_flags |= ENOTHER_SWSL; sc->swslist[sc->swsl_tail] = slot; EN_WRAPADD(0, MID_SL_N, sc->swsl_tail, 1); sc->swsl_size++; #ifdef EN_DEBUG printf("%s: added VCI %d to swslist\n", sc->sc_dev.dv_xname, vci); #endif } }; } /* * now service (function too big to include here) */ if (need_softserv) en_service(sc); /* * keep our stats */ if (reg & MID_INT_DMA_OVR) { EN_COUNT(sc->dmaovr); #ifdef EN_DEBUG printf("%s: MID_INT_DMA_OVR\n", sc->sc_dev.dv_xname); #endif } reg = EN_READ(sc, MID_STAT); #ifdef EN_STAT sc->otrash += MID_OTRASH(reg); sc->vtrash += MID_VTRASH(reg); #endif EN_INTR_RET(1); /* for us */ } /* * en_service: handle a service interrupt * * Q: why do we need a software service list? * * A: if we remove a VCI from the hardware list and we find that we are * out of DRQs we must defer processing until some DRQs become free. * so we must remember to look at this RX VCI/slot later, but we can't * put it back on the hardware service list (since that isn't allowed). * so we instead save it on the software service list. it would be nice * if we could peek at the VCI on top of the hwservice list without removing * it, however this leads to a race condition: if we peek at it and * decide we are done with it new data could come in before we have a * chance to remove it from the hwslist. by the time we get it out of * the list the interrupt for the new data will be lost. oops! * */ STATIC void en_service(sc) struct en_softc *sc; { struct mbuf *m, *tmp; u_int32_t cur, dstart, rbd, pdu, *sav, dma, bcode, count, *data, *datastop; u_int32_t start, stop, cnt, needalign; int slot, raw, aal5, llc, vci, fill, mlen, tlen, drqneed, need, needfill, end; aal5 = 0; /* Silence gcc */ next_vci: if (sc->swsl_size == 0) { #ifdef EN_DEBUG printf("%s: en_service done\n", sc->sc_dev.dv_xname); #endif return; /* >>> exit here if swsl now empty <<< */ } /* * get slot/vci to service */ slot = sc->swslist[sc->swsl_head]; vci = sc->rxslot[slot].atm_vci; #ifdef EN_DIAG if (sc->rxvc2slot[vci] != slot) panic("en_service rx slot/vci sync"); #endif /* * determine our mode and if we've got any work to do */ raw = sc->rxslot[slot].oth_flags & ENOTHER_RAW; start= sc->rxslot[slot].start; stop= sc->rxslot[slot].stop; cur = sc->rxslot[slot].cur; #ifdef EN_DEBUG printf("%s: rx%d: service vci=%d raw=%d start/stop/cur=0x%x 0x%x 0x%x\n", sc->sc_dev.dv_xname, slot, vci, raw, start, stop, cur); #endif same_vci: dstart = MIDV_DSTART(EN_READ(sc, MID_DST_RP(vci))); dstart = (dstart * sizeof(u_int32_t)) + start; /* check to see if there is any data at all */ if (dstart == cur) { defer: /* defer processing */ EN_WRAPADD(0, MID_SL_N, sc->swsl_head, 1); sc->rxslot[slot].oth_flags &= ~ENOTHER_SWSL; sc->swsl_size--; /* >>> remove from swslist <<< */ #ifdef EN_DEBUG printf("%s: rx%d: remove vci %d from swslist\n", sc->sc_dev.dv_xname, slot, vci); #endif goto next_vci; } /* * figure out how many bytes we need * [mlen = # bytes to go in mbufs, fill = # bytes to dump (MIDDMA_JK)] */ if (raw) { /* raw mode (aka boodi mode) */ fill = 0; if (dstart > cur) mlen = dstart - cur; else mlen = (dstart + (EN_RXSZ*1024)) - cur; if (mlen < sc->rxslot[slot].raw_threshold) goto defer; /* too little data to deal with */ } else { /* normal mode */ aal5 = (sc->rxslot[slot].atm_flags & ATM_PH_AAL5); llc = (aal5 && (sc->rxslot[slot].atm_flags & ATM_PH_LLCSNAP)) ? 1 : 0; rbd = EN_READ(sc, cur); if (MID_RBD_ID(rbd) != MID_RBD_STDID) panic("en_service: id mismatch"); if (rbd & MID_RBD_T) { mlen = 0; /* we've got trash */ fill = MID_RBD_SIZE; EN_COUNT(sc->ttrash); #ifdef EN_DEBUG printf("RX overflow lost %d cells!\n", MID_RBD_CNT(rbd)); #endif } else if (!aal5) { mlen = MID_RBD_SIZE + MID_CHDR_SIZE + MID_ATMDATASZ; /* 1 cell (ick!) */ fill = 0; } else { struct ifnet *ifp; tlen = (MID_RBD_CNT(rbd) * MID_ATMDATASZ) + MID_RBD_SIZE; pdu = cur + tlen - MID_PDU_SIZE; if (pdu >= stop) pdu -= (EN_RXSZ*1024); pdu = EN_READ(sc, pdu); /* get PDU in correct byte order */ fill = tlen - MID_RBD_SIZE - MID_PDU_LEN(pdu); if (fill < 0 || (rbd & MID_RBD_CRCERR) != 0) { static int first = 1; if (first) { printf("%s: %s, dropping frame\n", sc->sc_dev.dv_xname, (rbd & MID_RBD_CRCERR) ? "CRC error" : "invalid AAL5 PDU length"); printf("%s: got %d cells (%d bytes), AAL5 len is %d bytes (pdu=0x%x)\n", sc->sc_dev.dv_xname, MID_RBD_CNT(rbd), tlen - MID_RBD_SIZE, MID_PDU_LEN(pdu), pdu); #ifndef EN_DEBUG printf("CRC error report disabled from now on!\n"); first = 0; #endif } fill = tlen; ifp = &sc->enif; ifp->if_ierrors++; } mlen = tlen - fill; } } /* * now allocate mbufs for mlen bytes of data, if out of mbufs, trash all * * notes: * 1. it is possible that we've already allocated an mbuf for this pkt * but ran out of DRQs, in which case we saved the allocated mbuf on * "q". * 2. if we save an mbuf in "q" we store the "cur" (pointer) in the front * of the mbuf as an identity (that we can check later), and we also * store drqneed (so we don't have to recompute it). * 3. after this block of code, if m is still NULL then we ran out of mbufs */ m = sc->rxslot[slot].q.ifq_head; drqneed = 1; if (m) { sav = mtod(m, u_int32_t *); if (sav[0] != cur) { #ifdef EN_DEBUG printf("%s: rx%d: q'ed mbuf %p not ours\n", sc->sc_dev.dv_xname, slot, m); #endif m = NULL; /* wasn't ours */ EN_COUNT(sc->rxqnotus); } else { EN_COUNT(sc->rxqus); _IF_DEQUEUE(&sc->rxslot[slot].q, m); drqneed = sav[1]; #ifdef EN_DEBUG printf("%s: rx%d: recovered q'ed mbuf %p (drqneed=%d)\n", sc->sc_dev.dv_xname, slot, m, drqneed); #endif } } if (mlen != 0 && m == NULL) { m = en_mget(sc, mlen, &drqneed); /* allocate! */ if (m == NULL) { fill += mlen; mlen = 0; EN_COUNT(sc->rxmbufout); #ifdef EN_DEBUG printf("%s: rx%d: out of mbufs\n", sc->sc_dev.dv_xname, slot); #endif } #ifdef EN_DEBUG printf("%s: rx%d: allocate mbuf %p, mlen=%d, drqneed=%d\n", sc->sc_dev.dv_xname, slot, m, mlen, drqneed); #endif } #ifdef EN_DEBUG printf("%s: rx%d: VCI %d, mbuf_chain %p, mlen %d, fill %d\n", sc->sc_dev.dv_xname, slot, vci, m, mlen, fill); #endif /* * now check to see if we've got the DRQs needed. if we are out of * DRQs we must quit (saving our mbuf, if we've got one). */ needfill = (fill) ? 1 : 0; if (drqneed + needfill > sc->drq_free) { sc->need_drqs = 1; /* flag condition */ if (m == NULL) { EN_COUNT(sc->rxoutboth); #ifdef EN_DEBUG printf("%s: rx%d: out of DRQs *and* mbufs!\n", sc->sc_dev.dv_xname, slot); #endif return; /* >>> exit here if out of both mbufs and DRQs <<< */ } sav = mtod(m, u_int32_t *); sav[0] = cur; sav[1] = drqneed; _IF_ENQUEUE(&sc->rxslot[slot].q, m); EN_COUNT(sc->rxdrqout); #ifdef EN_DEBUG printf("%s: rx%d: out of DRQs\n", sc->sc_dev.dv_xname, slot); #endif return; /* >>> exit here if out of DRQs <<< */ } /* * at this point all resources have been allocated and we are commited * to servicing this slot. * * dma = last location we told chip about * cur = current location * mlen = space in the mbuf we want * need = bytes to xfer in (decrs to zero) * fill = how much fill we need * tlen = how much data to transfer to this mbuf * cnt/bcode/count = * * 'needfill' not used after this point */ dma = cur; /* dma = last location we told chip about */ need = roundup(mlen, sizeof(u_int32_t)); fill = fill - (need - mlen); /* note: may invalidate 'needfill' */ for (tmp = m ; tmp != NULL && need > 0 ; tmp = tmp->m_next) { tlen = roundup(tmp->m_len, sizeof(u_int32_t)); /* m_len set by en_mget */ data = mtod(tmp, u_int32_t *); #ifdef EN_DEBUG printf("%s: rx%d: load mbuf %p, m_len=%d, m_data=%p, tlen=%d\n", sc->sc_dev.dv_xname, slot, tmp, tmp->m_len, tmp->m_data, tlen); #endif /* copy data */ if (EN_NORXDMA || !en_dma || tlen < EN_MINDMA) { datastop = (u_int32_t *)((u_char *) data + tlen); /* copy loop: preserve byte order!!! use READDAT */ while (data != datastop) { *data = EN_READDAT(sc, cur); data++; EN_WRAPADD(start, stop, cur, 4); } need -= tlen; #ifdef EN_DEBUG printf("%s: rx%d: vci%d: copied %d bytes (%d left)\n", sc->sc_dev.dv_xname, slot, vci, tlen, need); #endif continue; } /* DMA data (check to see if we need to sync DRQ first) */ if (dma != cur) { EN_DRQADD(sc, WORD_IDX(start,cur), vci, MIDDMA_JK, 0, 0, 0, 0); #ifdef EN_DEBUG printf("%s: rx%d: vci%d: drq_sync: advance pointer to %d\n", sc->sc_dev.dv_xname, slot, vci, cur); #endif } #if !defined(MIDWAY_ENIONLY) /* * the adaptec DMA engine is smart and handles everything for us. */ if (sc->is_adaptec) { need -= tlen; EN_WRAPADD(start, stop, cur, tlen); #ifdef EN_DEBUG printf("%s: rx%d: vci%d: adp_dma %d bytes (%d left)\n", sc->sc_dev.dv_xname, slot, vci, tlen, need); #endif end = (need == 0 && !fill) ? MID_DMA_END : 0; EN_DRQADD(sc, tlen, vci, 0, vtophys(data), mlen, slot, end); if (end) goto done; dma = cur; /* update dma pointer */ continue; } #endif /* !MIDWAY_ENIONLY */ #if !defined(MIDWAY_ADPONLY) /* * the ENI DMA engine is not so smart and need more help from us */ /* do we need to do a DMA op to align? */ if (sc->alburst && (needalign = (((uintptr_t) (void *) data) & sc->bestburstmask)) != 0) { cnt = sc->bestburstlen - needalign; if (cnt > tlen) { cnt = tlen; count = cnt / sizeof(u_int32_t); bcode = MIDDMA_WORD; } else { count = cnt / sizeof(u_int32_t); bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; } need -= cnt; EN_WRAPADD(start, stop, cur, cnt); #ifdef EN_DEBUG printf("%s: rx%d: vci%d: al_dma %d bytes (%d left)\n", sc->sc_dev.dv_xname, slot, vci, cnt, need); #endif tlen -= cnt; end = (need == 0 && !fill) ? MID_DMA_END : 0; EN_DRQADD(sc, count, vci, bcode, vtophys(data), mlen, slot, end); if (end) goto done; data = (u_int32_t *)((u_char *) data + cnt); } /* do we need a max-sized burst? */ if (tlen >= sc->bestburstlen) { count = tlen >> sc->bestburstshift; cnt = count << sc->bestburstshift; bcode = sc->bestburstcode; need -= cnt; EN_WRAPADD(start, stop, cur, cnt); #ifdef EN_DEBUG printf("%s: rx%d: vci%d: best_dma %d bytes (%d left)\n", sc->sc_dev.dv_xname, slot, vci, cnt, need); #endif tlen -= cnt; end = (need == 0 && !fill) ? MID_DMA_END : 0; EN_DRQADD(sc, count, vci, bcode, vtophys(data), mlen, slot, end); if (end) goto done; data = (u_int32_t *)((u_char *) data + cnt); } /* do we need to do a cleanup burst? */ if (tlen) { count = tlen / sizeof(u_int32_t); bcode = en_dmaplan[count].bcode; count = tlen >> en_dmaplan[count].divshift; need -= tlen; EN_WRAPADD(start, stop, cur, tlen); #ifdef EN_DEBUG printf("%s: rx%d: vci%d: cleanup_dma %d bytes (%d left)\n", sc->sc_dev.dv_xname, slot, vci, tlen, need); #endif end = (need == 0 && !fill) ? MID_DMA_END : 0; EN_DRQADD(sc, count, vci, bcode, vtophys(data), mlen, slot, end); if (end) goto done; } dma = cur; /* update dma pointer */ #endif /* !MIDWAY_ADPONLY */ } /* skip the end */ if (fill || dma != cur) { #ifdef EN_DEBUG if (fill) printf("%s: rx%d: vci%d: skipping %d bytes of fill\n", sc->sc_dev.dv_xname, slot, vci, fill); else printf("%s: rx%d: vci%d: syncing chip from 0x%x to 0x%x [cur]\n", sc->sc_dev.dv_xname, slot, vci, dma, cur); #endif EN_WRAPADD(start, stop, cur, fill); EN_DRQADD(sc, WORD_IDX(start,cur), vci, MIDDMA_JK, 0, mlen, slot, MID_DMA_END); /* dma = cur; */ /* not necessary since we are done */ } /* * done, remove stuff we don't want to pass up: * raw mode (boodi mode): pass everything up for later processing * aal5: remove RBD * aal0: remove RBD + cell header */ done: if (m) { if (!raw) { cnt = MID_RBD_SIZE; if (!aal5) cnt += MID_CHDR_SIZE; m->m_len -= cnt; /* chop! */ m->m_pkthdr.len -= cnt; m->m_data += cnt; } _IF_ENQUEUE(&sc->rxslot[slot].indma, m); } sc->rxslot[slot].cur = cur; /* update master copy of 'cur' */ #ifdef EN_DEBUG printf("%s: rx%d: vci%d: DONE! cur now =0x%x\n", sc->sc_dev.dv_xname, slot, vci, cur); #endif goto same_vci; /* get next packet in this slot */ } #ifdef EN_DDBHOOK /* * functions we can call from ddb */ /* * en_dump: dump the state */ #define END_SWSL 0x00000040 /* swsl state */ #define END_DRQ 0x00000020 /* drq state */ #define END_DTQ 0x00000010 /* dtq state */ #define END_RX 0x00000008 /* rx state */ #define END_TX 0x00000004 /* tx state */ #define END_MREGS 0x00000002 /* registers */ #define END_STATS 0x00000001 /* dump stats */ #define END_BITS "\20\7SWSL\6DRQ\5DTQ\4RX\3TX\2MREGS\1STATS" /* Do not staticize - meant for calling from DDB! */ int en_dump(unit, level) int unit, level; { struct en_softc *sc; int lcv, cnt, slot; u_int32_t ptr, reg; #ifdef __FreeBSD__ devclass_t dc; int maxunit; dc = devclass_find("en"); if (dc == NULL) { printf("en_dump: can't find devclass!\n"); return 0; } maxunit = devclass_get_maxunit(dc); for (lcv = 0 ; lcv < maxunit ; lcv++) { sc = devclass_get_softc(dc, lcv); #else for (lcv = 0 ; lcv < en_cd.cd_ndevs ; lcv++) { sc = (struct en_softc *) en_cd.cd_devs[lcv]; #endif if (sc == NULL) continue; if (unit != -1 && unit != lcv) continue; printf("dumping device %s at level 0x%b\n", sc->sc_dev.dv_xname, level, END_BITS); if (sc->dtq_us == 0) { printf("\n"); continue; } if (level & END_STATS) { printf(" en_stats:\n"); printf(" %d mfix (%d failed); %d/%d head/tail byte DMAs, %d flushes\n", sc->mfix, sc->mfixfail, sc->headbyte, sc->tailbyte, sc->tailflush); printf(" %d rx dma overflow interrupts\n", sc->dmaovr); printf(" %d times we ran out of TX space and stalled\n", sc->txoutspace); printf(" %d times we ran out of DTQs\n", sc->txdtqout); printf(" %d times we launched a packet\n", sc->launch); printf(" %d times we launched without on-board header\n", sc->lheader); printf(" %d times we launched without on-board tail\n", sc->ltail); printf(" %d times we pulled the hw service list\n", sc->hwpull); printf(" %d times we pushed a vci on the sw service list\n", sc->swadd); printf(" %d times RX pulled an mbuf from Q that wasn't ours\n", sc->rxqnotus); printf(" %d times RX pulled a good mbuf from Q\n", sc->rxqus); printf(" %d times we ran out of mbufs *and* DRQs\n", sc->rxoutboth); printf(" %d times we ran out of DRQs\n", sc->rxdrqout); printf(" %d trasmit packets dropped due to mbsize\n", sc->txmbovr); printf(" %d cells trashed due to turned off rxvc\n", sc->vtrash); printf(" %d cells trashed due to totally full buffer\n", sc->otrash); printf(" %d cells trashed due almost full buffer\n", sc->ttrash); printf(" %d rx mbuf allocation failures\n", sc->rxmbufout); #ifdef NATM printf(" %d drops at natmintrq\n", natmintrq.ifq_drops); #ifdef NATM_STAT printf(" natmintr so_rcv: ok/drop cnt: %d/%d, ok/drop bytes: %d/%d\n", natm_sookcnt, natm_sodropcnt, natm_sookbytes, natm_sodropbytes); #endif #endif } if (level & END_MREGS) { printf("mregs:\n"); printf("resid = 0x%x\n", EN_READ(sc, MID_RESID)); printf("interrupt status = 0x%b\n", (int)EN_READ(sc, MID_INTSTAT), MID_INTBITS); printf("interrupt enable = 0x%b\n", (int)EN_READ(sc, MID_INTENA), MID_INTBITS); printf("mcsr = 0x%b\n", (int)EN_READ(sc, MID_MAST_CSR), MID_MCSRBITS); printf("serv_write = [chip=%u] [us=%u]\n", EN_READ(sc, MID_SERV_WRITE), MID_SL_A2REG(sc->hwslistp)); printf("dma addr = 0x%x\n", EN_READ(sc, MID_DMA_ADDR)); printf("DRQ: chip[rd=0x%x,wr=0x%x], sc[chip=0x%x,us=0x%x]\n", MID_DRQ_REG2A(EN_READ(sc, MID_DMA_RDRX)), MID_DRQ_REG2A(EN_READ(sc, MID_DMA_WRRX)), sc->drq_chip, sc->drq_us); printf("DTQ: chip[rd=0x%x,wr=0x%x], sc[chip=0x%x,us=0x%x]\n", MID_DTQ_REG2A(EN_READ(sc, MID_DMA_RDTX)), MID_DTQ_REG2A(EN_READ(sc, MID_DMA_WRTX)), sc->dtq_chip, sc->dtq_us); printf(" unusal txspeeds: "); for (cnt = 0 ; cnt < MID_N_VC ; cnt++) if (sc->txspeed[cnt]) printf(" vci%d=0x%x", cnt, sc->txspeed[cnt]); printf("\n"); printf(" rxvc slot mappings: "); for (cnt = 0 ; cnt < MID_N_VC ; cnt++) if (sc->rxvc2slot[cnt] != RX_NONE) printf(" %d->%d", cnt, sc->rxvc2slot[cnt]); printf("\n"); } if (level & END_TX) { printf("tx:\n"); for (slot = 0 ; slot < EN_NTX; slot++) { printf("tx%d: start/stop/cur=0x%x/0x%x/0x%x [%d] ", slot, sc->txslot[slot].start, sc->txslot[slot].stop, sc->txslot[slot].cur, (sc->txslot[slot].cur - sc->txslot[slot].start)/4); printf("mbsize=%d, bfree=%d\n", sc->txslot[slot].mbsize, sc->txslot[slot].bfree); printf("txhw: base_address=0x%x, size=%u, read=%u, descstart=%u\n", (u_int)MIDX_BASE(EN_READ(sc, MIDX_PLACE(slot))), MIDX_SZ(EN_READ(sc, MIDX_PLACE(slot))), EN_READ(sc, MIDX_READPTR(slot)), EN_READ(sc, MIDX_DESCSTART(slot))); } } if (level & END_RX) { printf(" recv slots:\n"); for (slot = 0 ; slot < sc->en_nrx; slot++) { printf("rx%d: vci=%d: start/stop/cur=0x%x/0x%x/0x%x ", slot, sc->rxslot[slot].atm_vci, sc->rxslot[slot].start, sc->rxslot[slot].stop, sc->rxslot[slot].cur); printf("mode=0x%x, atm_flags=0x%x, oth_flags=0x%x\n", sc->rxslot[slot].mode, sc->rxslot[slot].atm_flags, sc->rxslot[slot].oth_flags); printf("RXHW: mode=0x%x, DST_RP=0x%x, WP_ST_CNT=0x%x\n", EN_READ(sc, MID_VC(sc->rxslot[slot].atm_vci)), EN_READ(sc, MID_DST_RP(sc->rxslot[slot].atm_vci)), EN_READ(sc, MID_WP_ST_CNT(sc->rxslot[slot].atm_vci))); } } if (level & END_DTQ) { printf(" dtq [need_dtqs=%d,dtq_free=%d]:\n", sc->need_dtqs, sc->dtq_free); ptr = sc->dtq_chip; while (ptr != sc->dtq_us) { reg = EN_READ(sc, ptr); printf("\t0x%x=[cnt=%d, chan=%d, end=%d, type=%d @ 0x%x]\n", sc->dtq[MID_DTQ_A2REG(ptr)], MID_DMA_CNT(reg), MID_DMA_TXCHAN(reg), (reg & MID_DMA_END) != 0, MID_DMA_TYPE(reg), EN_READ(sc, ptr+4)); EN_WRAPADD(MID_DTQOFF, MID_DTQEND, ptr, 8); } } if (level & END_DRQ) { printf(" drq [need_drqs=%d,drq_free=%d]:\n", sc->need_drqs, sc->drq_free); ptr = sc->drq_chip; while (ptr != sc->drq_us) { reg = EN_READ(sc, ptr); printf("\t0x%x=[cnt=%d, chan=%d, end=%d, type=%d @ 0x%x]\n", sc->drq[MID_DRQ_A2REG(ptr)], MID_DMA_CNT(reg), MID_DMA_RXVCI(reg), (reg & MID_DMA_END) != 0, MID_DMA_TYPE(reg), EN_READ(sc, ptr+4)); EN_WRAPADD(MID_DRQOFF, MID_DRQEND, ptr, 8); } } if (level & END_SWSL) { printf(" swslist [size=%d]: ", sc->swsl_size); for (cnt = sc->swsl_head ; cnt != sc->swsl_tail ; cnt = (cnt + 1) % MID_SL_N) printf("0x%x ", sc->swslist[cnt]); printf("\n"); } } return(0); } /* * en_dumpmem: dump the memory */ /* Do not staticize - meant for calling from DDB! */ int en_dumpmem(unit, addr, len) int unit, addr, len; { struct en_softc *sc; u_int32_t reg; #ifdef __FreeBSD__ devclass_t dc; dc = devclass_find("en"); if (dc == NULL) { printf("en_dumpmem: can't find devclass!\n"); return 0; } sc = devclass_get_softc(dc, unit); #else if (unit < 0 || unit > en_cd.cd_ndevs || (sc = (struct en_softc *) en_cd.cd_devs[unit]) == NULL) { printf("invalid unit number: %d\n", unit); return(0); } #endif addr = addr & ~3; if (addr < MID_RAMOFF || addr + len*4 > MID_MAXOFF || len <= 0) { printf("invalid addr/len number: %d, %d\n", addr, len); return(0); } printf("dumping %d words starting at offset 0x%x\n", len, addr); while (len--) { reg = EN_READ(sc, addr); printf("mem[0x%x] = 0x%x\n", addr, reg); addr += 4; } return(0); } #endif Index: head/sys/dev/hifn/hifn7751.c =================================================================== --- head/sys/dev/hifn/hifn7751.c (revision 108465) +++ head/sys/dev/hifn/hifn7751.c (revision 108466) @@ -1,2637 +1,2638 @@ /* $FreeBSD$ */ /* $OpenBSD: hifn7751.c,v 1.120 2002/05/17 00:33:34 deraadt Exp $ */ /* * Invertex AEON / Hifn 7751 driver * Copyright (c) 1999 Invertex Inc. All rights reserved. * Copyright (c) 1999 Theo de Raadt * Copyright (c) 2000-2001 Network Security Technologies, Inc. * http://www.netsec.net * * This driver is based on a previous driver by Invertex, for which they * requested: Please send any comments, feedback, bug-fixes, or feature * requests to software@invertex.com. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * */ #define HIFN_DEBUG /* * Driver for the Hifn 7751 encryption processor. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Prototypes and count for the pci_device structure */ static int hifn_probe(device_t); static int hifn_attach(device_t); static int hifn_detach(device_t); static int hifn_suspend(device_t); static int hifn_resume(device_t); static void hifn_shutdown(device_t); static device_method_t hifn_methods[] = { /* Device interface */ DEVMETHOD(device_probe, hifn_probe), DEVMETHOD(device_attach, hifn_attach), DEVMETHOD(device_detach, hifn_detach), DEVMETHOD(device_suspend, hifn_suspend), DEVMETHOD(device_resume, hifn_resume), DEVMETHOD(device_shutdown, hifn_shutdown), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; static driver_t hifn_driver = { "hifn", hifn_methods, sizeof (struct hifn_softc) }; static devclass_t hifn_devclass; DRIVER_MODULE(hifn, pci, hifn_driver, hifn_devclass, 0, 0); MODULE_DEPEND(hifn, crypto, 1, 1, 1); static void hifn_reset_board(struct hifn_softc *, int); static void hifn_reset_puc(struct hifn_softc *); static void hifn_puc_wait(struct hifn_softc *); static int hifn_enable_crypto(struct hifn_softc *); static void hifn_set_retry(struct hifn_softc *sc); static void hifn_init_dma(struct hifn_softc *); static void hifn_init_pci_registers(struct hifn_softc *); static int hifn_sramsize(struct hifn_softc *); static int hifn_dramsize(struct hifn_softc *); static int hifn_ramtype(struct hifn_softc *); static void hifn_sessions(struct hifn_softc *); static void hifn_intr(void *); static u_int hifn_write_command(struct hifn_command *, u_int8_t *); static u_int32_t hifn_next_signature(u_int32_t a, u_int cnt); static int hifn_newsession(void *, u_int32_t *, struct cryptoini *); static int hifn_freesession(void *, u_int64_t); static int hifn_process(void *, struct cryptop *, int); static void hifn_callback(struct hifn_softc *, struct hifn_command *, u_int8_t *); static int hifn_crypto(struct hifn_softc *, struct hifn_command *, struct cryptop *, int); static int hifn_readramaddr(struct hifn_softc *, int, u_int8_t *); static int hifn_writeramaddr(struct hifn_softc *, int, u_int8_t *); static int hifn_dmamap_load_src(struct hifn_softc *, struct hifn_command *); static int hifn_dmamap_load_dst(struct hifn_softc *, struct hifn_command *); static int hifn_init_pubrng(struct hifn_softc *); static void hifn_rng(void *); static void hifn_tick(void *); static void hifn_abort(struct hifn_softc *); static void hifn_alloc_slot(struct hifn_softc *, int *, int *, int *, int *); static void hifn_write_reg_0(struct hifn_softc *, bus_size_t, u_int32_t); static void hifn_write_reg_1(struct hifn_softc *, bus_size_t, u_int32_t); static __inline__ u_int32_t READ_REG_0(struct hifn_softc *sc, bus_size_t reg) { u_int32_t v = bus_space_read_4(sc->sc_st0, sc->sc_sh0, reg); sc->sc_bar0_lastreg = (bus_size_t) -1; return (v); } #define WRITE_REG_0(sc, reg, val) hifn_write_reg_0(sc, reg, val) static __inline__ u_int32_t READ_REG_1(struct hifn_softc *sc, bus_size_t reg) { u_int32_t v = bus_space_read_4(sc->sc_st1, sc->sc_sh1, reg); sc->sc_bar1_lastreg = (bus_size_t) -1; return (v); } #define WRITE_REG_1(sc, reg, val) hifn_write_reg_1(sc, reg, val) #ifdef HIFN_DEBUG static int hifn_debug = 0; SYSCTL_INT(_debug, OID_AUTO, hifn, CTLFLAG_RW, &hifn_debug, 0, "Hifn driver debugging printfs"); #endif static struct hifn_stats hifnstats; SYSCTL_STRUCT(_kern, OID_AUTO, hifn_stats, CTLFLAG_RD, &hifnstats, hifn_stats, "Hifn driver statistics"); static int hifn_maxbatch = 2; /* XXX tune based on part+sys speed */ SYSCTL_INT(_kern, OID_AUTO, hifn_maxbatch, CTLFLAG_RW, &hifn_maxbatch, 0, "Hifn driver: max ops to batch w/o interrupt"); /* * Probe for a supported device. The PCI vendor and device * IDs are used to detect devices we know how to handle. */ static int hifn_probe(device_t dev) { if (pci_get_vendor(dev) == PCI_VENDOR_INVERTEX && pci_get_device(dev) == PCI_PRODUCT_INVERTEX_AEON) return (0); if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && (pci_get_device(dev) == PCI_PRODUCT_HIFN_7751 || pci_get_device(dev) == PCI_PRODUCT_HIFN_7951 || pci_get_device(dev) == PCI_PRODUCT_HIFN_7811)) return (0); if (pci_get_vendor(dev) == PCI_VENDOR_NETSEC && pci_get_device(dev) == PCI_PRODUCT_NETSEC_7751) return (0); return (ENXIO); } static void hifn_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; *paddr = segs->ds_addr; } static const char* hifn_partname(struct hifn_softc *sc) { /* XXX sprintf numbers when not decoded */ switch (pci_get_vendor(sc->sc_dev)) { case PCI_VENDOR_HIFN: switch (pci_get_device(sc->sc_dev)) { case PCI_PRODUCT_HIFN_6500: return "Hifn 6500"; case PCI_PRODUCT_HIFN_7751: return "Hifn 7751"; case PCI_PRODUCT_HIFN_7811: return "Hifn 7811"; case PCI_PRODUCT_HIFN_7951: return "Hifn 7951"; } return "Hifn unknown-part"; case PCI_VENDOR_INVERTEX: switch (pci_get_device(sc->sc_dev)) { case PCI_PRODUCT_INVERTEX_AEON: return "Invertex AEON"; } return "Invertex unknown-part"; case PCI_VENDOR_NETSEC: switch (pci_get_device(sc->sc_dev)) { case PCI_PRODUCT_NETSEC_7751: return "NetSec 7751"; } return "NetSec unknown-part"; } return "Unknown-vendor unknown-part"; } /* * Attach an interface that successfully probed. */ static int hifn_attach(device_t dev) { struct hifn_softc *sc = device_get_softc(dev); u_int32_t cmd; caddr_t kva; int rseg, rid; char rbase; u_int16_t ena, rev; KASSERT(sc != NULL, ("hifn_attach: null software carrier!")); bzero(sc, sizeof (*sc)); sc->sc_dev = dev; mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "crypto driver", MTX_DEF); /* XXX handle power management */ /* * The 7951 has a random number generator and * public key support; note this. */ if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && pci_get_device(dev) == PCI_PRODUCT_HIFN_7951) sc->sc_flags = HIFN_HAS_RNG | HIFN_HAS_PUBLIC; /* * The 7811 has a random number generator and * we also note it's identity 'cuz of some quirks. */ if (pci_get_vendor(dev) == PCI_VENDOR_HIFN && pci_get_device(dev) == PCI_PRODUCT_HIFN_7811) sc->sc_flags |= HIFN_IS_7811 | HIFN_HAS_RNG; /* * Configure support for memory-mapped access to * registers and for DMA operations. */ #define PCIM_ENA (PCIM_CMD_MEMEN|PCIM_CMD_BUSMASTEREN) cmd = pci_read_config(dev, PCIR_COMMAND, 4); cmd |= PCIM_ENA; pci_write_config(dev, PCIR_COMMAND, cmd, 4); cmd = pci_read_config(dev, PCIR_COMMAND, 4); if ((cmd & PCIM_ENA) != PCIM_ENA) { device_printf(dev, "failed to enable %s\n", (cmd & PCIM_ENA) == 0 ? "memory mapping & bus mastering" : (cmd & PCIM_CMD_MEMEN) == 0 ? "memory mapping" : "bus mastering"); goto fail_pci; } #undef PCIM_ENA /* * Setup PCI resources. Note that we record the bus * tag and handle for each register mapping, this is * used by the READ_REG_0, WRITE_REG_0, READ_REG_1, * and WRITE_REG_1 macros throughout the driver. */ rid = HIFN_BAR0; sc->sc_bar0res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, ~0, 1, RF_ACTIVE); if (sc->sc_bar0res == NULL) { device_printf(dev, "cannot map bar%d register space\n", 0); goto fail_pci; } sc->sc_st0 = rman_get_bustag(sc->sc_bar0res); sc->sc_sh0 = rman_get_bushandle(sc->sc_bar0res); sc->sc_bar0_lastreg = (bus_size_t) -1; rid = HIFN_BAR1; sc->sc_bar1res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, ~0, 1, RF_ACTIVE); if (sc->sc_bar1res == NULL) { device_printf(dev, "cannot map bar%d register space\n", 1); goto fail_io0; } sc->sc_st1 = rman_get_bustag(sc->sc_bar1res); sc->sc_sh1 = rman_get_bushandle(sc->sc_bar1res); sc->sc_bar1_lastreg = (bus_size_t) -1; hifn_set_retry(sc); /* * Setup the area where the Hifn DMA's descriptors * and associated data structures. */ if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment,boundary */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HIFN_MAX_DMALEN, /* maxsize */ MAX_SCATTER, /* nsegments */ HIFN_MAX_SEGLEN, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ &sc->sc_dmat)) { device_printf(dev, "cannot allocate DMA tag\n"); goto fail_io1; } if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &sc->sc_dmamap)) { device_printf(dev, "cannot create dma map\n"); bus_dma_tag_destroy(sc->sc_dmat); goto fail_io1; } if (bus_dmamem_alloc(sc->sc_dmat, (void**) &kva, BUS_DMA_NOWAIT, &sc->sc_dmamap)) { device_printf(dev, "cannot alloc dma buffer\n"); bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap); bus_dma_tag_destroy(sc->sc_dmat); goto fail_io1; } if (bus_dmamap_load(sc->sc_dmat, sc->sc_dmamap, kva, sizeof (*sc->sc_dma), hifn_dmamap_cb, &sc->sc_dma_physaddr, BUS_DMA_NOWAIT)) { device_printf(dev, "cannot load dma map\n"); bus_dmamem_free(sc->sc_dmat, kva, sc->sc_dmamap); bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap); bus_dma_tag_destroy(sc->sc_dmat); goto fail_io1; } sc->sc_dma = (struct hifn_dma *)kva; bzero(sc->sc_dma, sizeof(*sc->sc_dma)); KASSERT(sc->sc_st0 != NULL, ("hifn_attach: null bar0 tag!")); KASSERT(sc->sc_sh0 != NULL, ("hifn_attach: null bar0 handle!")); KASSERT(sc->sc_st1 != NULL, ("hifn_attach: null bar1 tag!")); KASSERT(sc->sc_sh1 != NULL, ("hifn_attach: null bar1 handle!")); /* * Reset the board and do the ``secret handshake'' * to enable the crypto support. Then complete the * initialization procedure by setting up the interrupt * and hooking in to the system crypto support so we'll * get used for system services like the crypto device, * IPsec, RNG device, etc. */ hifn_reset_board(sc, 0); if (hifn_enable_crypto(sc) != 0) { device_printf(dev, "crypto enabling failed\n"); goto fail_mem; } hifn_reset_puc(sc); hifn_init_dma(sc); hifn_init_pci_registers(sc); if (hifn_ramtype(sc)) goto fail_mem; if (sc->sc_drammodel == 0) hifn_sramsize(sc); else hifn_dramsize(sc); /* * Workaround for NetSec 7751 rev A: half ram size because two * of the address lines were left floating */ if (pci_get_vendor(dev) == PCI_VENDOR_NETSEC && pci_get_device(dev) == PCI_PRODUCT_NETSEC_7751 && pci_get_revid(dev) == 0x61) /*XXX???*/ sc->sc_ramsize >>= 1; /* * Arrange the interrupt line. */ rid = 0; sc->sc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 1, RF_SHAREABLE|RF_ACTIVE); if (sc->sc_irq == NULL) { device_printf(dev, "could not map interrupt\n"); goto fail_mem; } /* * NB: Network code assumes we are blocked with splimp() * so make sure the IRQ is marked appropriately. */ if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET, hifn_intr, sc, &sc->sc_intrhand)) { device_printf(dev, "could not setup interrupt\n"); goto fail_intr2; } hifn_sessions(sc); /* * NB: Keep only the low 16 bits; this masks the chip id * from the 7951. */ rev = READ_REG_1(sc, HIFN_1_REVID) & 0xffff; rseg = sc->sc_ramsize / 1024; rbase = 'K'; if (sc->sc_ramsize >= (1024 * 1024)) { rbase = 'M'; rseg /= 1024; } device_printf(sc->sc_dev, "%s, rev %u, %d%cB %cram, %u sessions\n", hifn_partname(sc), rev, rseg, rbase, sc->sc_drammodel ? 'd' : 's', sc->sc_maxses); sc->sc_cid = crypto_get_driverid(0); if (sc->sc_cid < 0) { device_printf(dev, "could not get crypto driver id\n"); goto fail_intr; } WRITE_REG_0(sc, HIFN_0_PUCNFG, READ_REG_0(sc, HIFN_0_PUCNFG) | HIFN_PUCNFG_CHIPID); ena = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA; switch (ena) { case HIFN_PUSTAT_ENA_2: crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); crypto_register(sc->sc_cid, CRYPTO_ARC4, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); /*FALLTHROUGH*/ case HIFN_PUSTAT_ENA_1: crypto_register(sc->sc_cid, CRYPTO_MD5, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); crypto_register(sc->sc_cid, CRYPTO_SHA1, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0, hifn_newsession, hifn_freesession, hifn_process, sc); break; } bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); if (sc->sc_flags & (HIFN_HAS_PUBLIC | HIFN_HAS_RNG)) hifn_init_pubrng(sc); /* NB: 1 means the callout runs w/o Giant locked */ callout_init(&sc->sc_tickto, 1); callout_reset(&sc->sc_tickto, hz, hifn_tick, sc); return (0); fail_intr: bus_teardown_intr(dev, sc->sc_irq, sc->sc_intrhand); fail_intr2: /* XXX don't store rid */ bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); fail_mem: bus_dmamap_unload(sc->sc_dmat, sc->sc_dmamap); bus_dmamem_free(sc->sc_dmat, sc->sc_dma, sc->sc_dmamap); bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap); bus_dma_tag_destroy(sc->sc_dmat); /* Turn off DMA polling */ WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); fail_io1: bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR1, sc->sc_bar1res); fail_io0: bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR0, sc->sc_bar0res); fail_pci: mtx_destroy(&sc->sc_mtx); return (ENXIO); } /* * Detach an interface that successfully probed. */ static int hifn_detach(device_t dev) { struct hifn_softc *sc = device_get_softc(dev); KASSERT(sc != NULL, ("hifn_detach: null software carrier!")); HIFN_LOCK(sc); /*XXX other resources */ callout_stop(&sc->sc_tickto); callout_stop(&sc->sc_rngto); /* Turn off DMA polling */ WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); crypto_unregister_all(sc->sc_cid); bus_generic_detach(dev); /*XXX should be no children, right? */ bus_teardown_intr(dev, sc->sc_irq, sc->sc_intrhand); /* XXX don't store rid */ bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); bus_dmamap_unload(sc->sc_dmat, sc->sc_dmamap); bus_dmamem_free(sc->sc_dmat, sc->sc_dma, sc->sc_dmamap); bus_dmamap_destroy(sc->sc_dmat, sc->sc_dmamap); bus_dma_tag_destroy(sc->sc_dmat); bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR1, sc->sc_bar1res); bus_release_resource(dev, SYS_RES_MEMORY, HIFN_BAR0, sc->sc_bar0res); HIFN_UNLOCK(sc); mtx_destroy(&sc->sc_mtx); return (0); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static void hifn_shutdown(device_t dev) { #ifdef notyet hifn_stop(device_get_softc(dev)); #endif } /* * Device suspend routine. Stop the interface and save some PCI * settings in case the BIOS doesn't restore them properly on * resume. */ static int hifn_suspend(device_t dev) { struct hifn_softc *sc = device_get_softc(dev); #ifdef notyet int i; hifn_stop(sc); for (i = 0; i < 5; i++) sc->saved_maps[i] = pci_read_config(dev, PCIR_MAPS + i * 4, 4); sc->saved_biosaddr = pci_read_config(dev, PCIR_BIOS, 4); sc->saved_intline = pci_read_config(dev, PCIR_INTLINE, 1); sc->saved_cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1); sc->saved_lattimer = pci_read_config(dev, PCIR_LATTIMER, 1); #endif sc->sc_suspended = 1; return (0); } /* * Device resume routine. Restore some PCI settings in case the BIOS * doesn't, re-enable busmastering, and restart the interface if * appropriate. */ static int hifn_resume(device_t dev) { struct hifn_softc *sc = device_get_softc(dev); #ifdef notyet int i; /* better way to do this? */ for (i = 0; i < 5; i++) pci_write_config(dev, PCIR_MAPS + i * 4, sc->saved_maps[i], 4); pci_write_config(dev, PCIR_BIOS, sc->saved_biosaddr, 4); pci_write_config(dev, PCIR_INTLINE, sc->saved_intline, 1); pci_write_config(dev, PCIR_CACHELNSZ, sc->saved_cachelnsz, 1); pci_write_config(dev, PCIR_LATTIMER, sc->saved_lattimer, 1); /* reenable busmastering */ pci_enable_busmaster(dev); pci_enable_io(dev, HIFN_RES); /* reinitialize interface if necessary */ if (ifp->if_flags & IFF_UP) rl_init(sc); #endif sc->sc_suspended = 0; return (0); } static int hifn_init_pubrng(struct hifn_softc *sc) { u_int32_t r; int i; if ((sc->sc_flags & HIFN_IS_7811) == 0) { /* Reset 7951 public key/rng engine */ WRITE_REG_1(sc, HIFN_1_PUB_RESET, READ_REG_1(sc, HIFN_1_PUB_RESET) | HIFN_PUBRST_RESET); for (i = 0; i < 100; i++) { DELAY(1000); if ((READ_REG_1(sc, HIFN_1_PUB_RESET) & HIFN_PUBRST_RESET) == 0) break; } if (i == 100) { device_printf(sc->sc_dev, "public key init failed\n"); return (1); } } /* Enable the rng, if available */ if (sc->sc_flags & HIFN_HAS_RNG) { if (sc->sc_flags & HIFN_IS_7811) { r = READ_REG_1(sc, HIFN_1_7811_RNGENA); if (r & HIFN_7811_RNGENA_ENA) { r &= ~HIFN_7811_RNGENA_ENA; WRITE_REG_1(sc, HIFN_1_7811_RNGENA, r); } WRITE_REG_1(sc, HIFN_1_7811_RNGCFG, HIFN_7811_RNGCFG_DEFL); r |= HIFN_7811_RNGENA_ENA; WRITE_REG_1(sc, HIFN_1_7811_RNGENA, r); } else WRITE_REG_1(sc, HIFN_1_RNG_CONFIG, READ_REG_1(sc, HIFN_1_RNG_CONFIG) | HIFN_RNGCFG_ENA); sc->sc_rngfirst = 1; if (hz >= 100) sc->sc_rnghz = hz / 100; else sc->sc_rnghz = 1; /* NB: 1 means the callout runs w/o Giant locked */ callout_init(&sc->sc_rngto, 1); callout_reset(&sc->sc_rngto, sc->sc_rnghz, hifn_rng, sc); } /* Enable public key engine, if available */ if (sc->sc_flags & HIFN_HAS_PUBLIC) { WRITE_REG_1(sc, HIFN_1_PUB_IEN, HIFN_PUBIEN_DONE); sc->sc_dmaier |= HIFN_DMAIER_PUBDONE; WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); } return (0); } static void hifn_rng(void *vsc) { #define RANDOM_BITS(n) (n)*sizeof (u_int32_t), (n)*sizeof (u_int32_t)*NBBY, 0 struct hifn_softc *sc = vsc; u_int32_t sts, num[2]; int i; if (sc->sc_flags & HIFN_IS_7811) { for (i = 0; i < 5; i++) { sts = READ_REG_1(sc, HIFN_1_7811_RNGSTS); if (sts & HIFN_7811_RNGSTS_UFL) { device_printf(sc->sc_dev, "RNG underflow: disabling\n"); return; } if ((sts & HIFN_7811_RNGSTS_RDY) == 0) break; /* * There are at least two words in the RNG FIFO * at this point. */ num[0] = READ_REG_1(sc, HIFN_1_7811_RNGDAT); num[1] = READ_REG_1(sc, HIFN_1_7811_RNGDAT); /* NB: discard first data read */ if (sc->sc_rngfirst) sc->sc_rngfirst = 0; else random_harvest(num, RANDOM_BITS(2), RANDOM_PURE); } } else { num[0] = READ_REG_1(sc, HIFN_1_RNG_DATA); /* NB: discard first data read */ if (sc->sc_rngfirst) sc->sc_rngfirst = 0; else random_harvest(num, RANDOM_BITS(1), RANDOM_PURE); } callout_reset(&sc->sc_rngto, sc->sc_rnghz, hifn_rng, sc); #undef RANDOM_BITS } static void hifn_puc_wait(struct hifn_softc *sc) { int i; for (i = 5000; i > 0; i--) { DELAY(1); if (!(READ_REG_0(sc, HIFN_0_PUCTRL) & HIFN_PUCTRL_RESET)) break; } if (!i) device_printf(sc->sc_dev, "proc unit did not reset\n"); } /* * Reset the processing unit. */ static void hifn_reset_puc(struct hifn_softc *sc) { /* Reset processing unit */ WRITE_REG_0(sc, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA); hifn_puc_wait(sc); } /* * Set the Retry and TRDY registers; note that we set them to * zero because the 7811 locks up when forced to retry (section * 3.6 of "Specification Update SU-0014-04". Not clear if we * should do this for all Hifn parts, but it doesn't seem to hurt. */ static void hifn_set_retry(struct hifn_softc *sc) { /* NB: RETRY only responds to 8-bit reads/writes */ pci_write_config(sc->sc_dev, HIFN_RETRY_TIMEOUT, 0, 1); pci_write_config(sc->sc_dev, HIFN_TRDY_TIMEOUT, 0, 4); } /* * Resets the board. Values in the regesters are left as is * from the reset (i.e. initial values are assigned elsewhere). */ static void hifn_reset_board(struct hifn_softc *sc, int full) { u_int32_t reg; /* * Set polling in the DMA configuration register to zero. 0x7 avoids * resetting the board and zeros out the other fields. */ WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); /* * Now that polling has been disabled, we have to wait 1 ms * before resetting the board. */ DELAY(1000); /* Reset the DMA unit */ if (full) { WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MODE); DELAY(1000); } else { WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MODE | HIFN_DMACNFG_MSTRESET); hifn_reset_puc(sc); } KASSERT(sc->sc_dma != NULL, ("hifn_reset_board: null DMA tag!")); bzero(sc->sc_dma, sizeof(*sc->sc_dma)); /* Bring dma unit out of reset */ WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); hifn_puc_wait(sc); hifn_set_retry(sc); if (sc->sc_flags & HIFN_IS_7811) { for (reg = 0; reg < 1000; reg++) { if (READ_REG_1(sc, HIFN_1_7811_MIPSRST) & HIFN_MIPSRST_CRAMINIT) break; DELAY(1000); } if (reg == 1000) printf(": cram init timeout\n"); } } static u_int32_t hifn_next_signature(u_int32_t a, u_int cnt) { int i; u_int32_t v; for (i = 0; i < cnt; i++) { /* get the parity */ v = a & 0x80080125; v ^= v >> 16; v ^= v >> 8; v ^= v >> 4; v ^= v >> 2; v ^= v >> 1; a = (v & 1) ^ (a << 1); } return a; } struct pci2id { u_short pci_vendor; u_short pci_prod; char card_id[13]; }; static struct pci2id pci2id[] = { { PCI_VENDOR_HIFN, PCI_PRODUCT_HIFN_7951, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, { PCI_VENDOR_NETSEC, PCI_PRODUCT_NETSEC_7751, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, { PCI_VENDOR_INVERTEX, PCI_PRODUCT_INVERTEX_AEON, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, { PCI_VENDOR_HIFN, PCI_PRODUCT_HIFN_7811, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, { /* * Other vendors share this PCI ID as well, such as * http://www.powercrypt.com, and obviously they also * use the same key. */ PCI_VENDOR_HIFN, PCI_PRODUCT_HIFN_7751, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }, }; /* * Checks to see if crypto is already enabled. If crypto isn't enable, * "hifn_enable_crypto" is called to enable it. The check is important, * as enabling crypto twice will lock the board. */ static int hifn_enable_crypto(struct hifn_softc *sc) { u_int32_t dmacfg, ramcfg, encl, addr, i; char *offtbl = NULL; for (i = 0; i < sizeof(pci2id)/sizeof(pci2id[0]); i++) { if (pci2id[i].pci_vendor == pci_get_vendor(sc->sc_dev) && pci2id[i].pci_prod == pci_get_device(sc->sc_dev)) { offtbl = pci2id[i].card_id; break; } } if (offtbl == NULL) { device_printf(sc->sc_dev, "Unknown card!\n"); return (1); } ramcfg = READ_REG_0(sc, HIFN_0_PUCNFG); dmacfg = READ_REG_1(sc, HIFN_1_DMA_CNFG); /* * The RAM config register's encrypt level bit needs to be set before * every read performed on the encryption level register. */ WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg | HIFN_PUCNFG_CHIPID); encl = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA; /* * Make sure we don't re-unlock. Two unlocks kills chip until the * next reboot. */ if (encl == HIFN_PUSTAT_ENA_1 || encl == HIFN_PUSTAT_ENA_2) { #ifdef HIFN_DEBUG if (hifn_debug) device_printf(sc->sc_dev, "Strong crypto already enabled!\n"); #endif goto report; } if (encl != 0 && encl != HIFN_PUSTAT_ENA_0) { #ifdef HIFN_DEBUG if (hifn_debug) device_printf(sc->sc_dev, "Unknown encryption level 0x%x\n", encl); #endif return 1; } WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_UNLOCK | HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE); DELAY(1000); addr = READ_REG_1(sc, HIFN_UNLOCK_SECRET1); DELAY(1000); WRITE_REG_1(sc, HIFN_UNLOCK_SECRET2, 0); DELAY(1000); for (i = 0; i <= 12; i++) { addr = hifn_next_signature(addr, offtbl[i] + 0x101); WRITE_REG_1(sc, HIFN_UNLOCK_SECRET2, addr); DELAY(1000); } WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg | HIFN_PUCNFG_CHIPID); encl = READ_REG_0(sc, HIFN_0_PUSTAT) & HIFN_PUSTAT_CHIPENA; #ifdef HIFN_DEBUG if (hifn_debug) { if (encl != HIFN_PUSTAT_ENA_1 && encl != HIFN_PUSTAT_ENA_2) device_printf(sc->sc_dev, "Engine is permanently " "locked until next system reset!\n"); else device_printf(sc->sc_dev, "Engine enabled " "successfully!\n"); } #endif report: WRITE_REG_0(sc, HIFN_0_PUCNFG, ramcfg); WRITE_REG_1(sc, HIFN_1_DMA_CNFG, dmacfg); switch (encl) { case HIFN_PUSTAT_ENA_1: case HIFN_PUSTAT_ENA_2: break; case HIFN_PUSTAT_ENA_0: default: device_printf(sc->sc_dev, "disabled"); break; } return 0; } /* * Give initial values to the registers listed in the "Register Space" * section of the HIFN Software Development reference manual. */ static void hifn_init_pci_registers(struct hifn_softc *sc) { /* write fixed values needed by the Initialization registers */ WRITE_REG_0(sc, HIFN_0_PUCTRL, HIFN_PUCTRL_DMAENA); WRITE_REG_0(sc, HIFN_0_FIFOCNFG, HIFN_FIFOCNFG_THRESHOLD); WRITE_REG_0(sc, HIFN_0_PUIER, HIFN_PUIER_DSTOVER); /* write all 4 ring address registers */ WRITE_REG_1(sc, HIFN_1_DMA_CRAR, sc->sc_dma_physaddr + offsetof(struct hifn_dma, cmdr[0])); WRITE_REG_1(sc, HIFN_1_DMA_SRAR, sc->sc_dma_physaddr + offsetof(struct hifn_dma, srcr[0])); WRITE_REG_1(sc, HIFN_1_DMA_DRAR, sc->sc_dma_physaddr + offsetof(struct hifn_dma, dstr[0])); WRITE_REG_1(sc, HIFN_1_DMA_RRAR, sc->sc_dma_physaddr + offsetof(struct hifn_dma, resr[0])); DELAY(2000); /* write status register */ WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_D_ABORT | HIFN_DMACSR_D_DONE | HIFN_DMACSR_D_LAST | HIFN_DMACSR_D_WAIT | HIFN_DMACSR_D_OVER | HIFN_DMACSR_R_ABORT | HIFN_DMACSR_R_DONE | HIFN_DMACSR_R_LAST | HIFN_DMACSR_R_WAIT | HIFN_DMACSR_R_OVER | HIFN_DMACSR_S_ABORT | HIFN_DMACSR_S_DONE | HIFN_DMACSR_S_LAST | HIFN_DMACSR_S_WAIT | HIFN_DMACSR_C_ABORT | HIFN_DMACSR_C_DONE | HIFN_DMACSR_C_LAST | HIFN_DMACSR_C_WAIT | HIFN_DMACSR_ENGINE | ((sc->sc_flags & HIFN_HAS_PUBLIC) ? HIFN_DMACSR_PUBDONE : 0) | ((sc->sc_flags & HIFN_IS_7811) ? HIFN_DMACSR_ILLW | HIFN_DMACSR_ILLR : 0)); sc->sc_d_busy = sc->sc_r_busy = sc->sc_s_busy = sc->sc_c_busy = 0; sc->sc_dmaier |= HIFN_DMAIER_R_DONE | HIFN_DMAIER_C_ABORT | HIFN_DMAIER_D_OVER | HIFN_DMAIER_R_OVER | HIFN_DMAIER_S_ABORT | HIFN_DMAIER_D_ABORT | HIFN_DMAIER_R_ABORT | ((sc->sc_flags & HIFN_IS_7811) ? HIFN_DMAIER_ILLW | HIFN_DMAIER_ILLR : 0); sc->sc_dmaier &= ~HIFN_DMAIER_C_WAIT; WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); WRITE_REG_0(sc, HIFN_0_PUCNFG, HIFN_PUCNFG_COMPSING | HIFN_PUCNFG_DRFR_128 | HIFN_PUCNFG_TCALLPHASES | HIFN_PUCNFG_TCDRVTOTEM | HIFN_PUCNFG_BUS32 | (sc->sc_drammodel ? HIFN_PUCNFG_DRAM : HIFN_PUCNFG_SRAM)); WRITE_REG_0(sc, HIFN_0_PUISR, HIFN_PUISR_DSTOVER); WRITE_REG_1(sc, HIFN_1_DMA_CNFG, HIFN_DMACNFG_MSTRESET | HIFN_DMACNFG_DMARESET | HIFN_DMACNFG_MODE | HIFN_DMACNFG_LAST | ((HIFN_POLL_FREQUENCY << 16 ) & HIFN_DMACNFG_POLLFREQ) | ((HIFN_POLL_SCALAR << 8) & HIFN_DMACNFG_POLLINVAL)); } /* * The maximum number of sessions supported by the card * is dependent on the amount of context ram, which * encryption algorithms are enabled, and how compression * is configured. This should be configured before this * routine is called. */ static void hifn_sessions(struct hifn_softc *sc) { u_int32_t pucnfg; int ctxsize; pucnfg = READ_REG_0(sc, HIFN_0_PUCNFG); if (pucnfg & HIFN_PUCNFG_COMPSING) { if (pucnfg & HIFN_PUCNFG_ENCCNFG) ctxsize = 128; else ctxsize = 512; sc->sc_maxses = 1 + ((sc->sc_ramsize - 32768) / ctxsize); } else sc->sc_maxses = sc->sc_ramsize / 16384; if (sc->sc_maxses > 2048) sc->sc_maxses = 2048; } /* * Determine ram type (sram or dram). Board should be just out of a reset * state when this is called. */ static int hifn_ramtype(struct hifn_softc *sc) { u_int8_t data[8], dataexpect[8]; int i; for (i = 0; i < sizeof(data); i++) data[i] = dataexpect[i] = 0x55; if (hifn_writeramaddr(sc, 0, data)) return (-1); if (hifn_readramaddr(sc, 0, data)) return (-1); if (bcmp(data, dataexpect, sizeof(data)) != 0) { sc->sc_drammodel = 1; return (0); } for (i = 0; i < sizeof(data); i++) data[i] = dataexpect[i] = 0xaa; if (hifn_writeramaddr(sc, 0, data)) return (-1); if (hifn_readramaddr(sc, 0, data)) return (-1); if (bcmp(data, dataexpect, sizeof(data)) != 0) { sc->sc_drammodel = 1; return (0); } return (0); } #define HIFN_SRAM_MAX (32 << 20) #define HIFN_SRAM_STEP_SIZE 16384 #define HIFN_SRAM_GRANULARITY (HIFN_SRAM_MAX / HIFN_SRAM_STEP_SIZE) static int hifn_sramsize(struct hifn_softc *sc) { u_int32_t a; u_int8_t data[8]; u_int8_t dataexpect[sizeof(data)]; int32_t i; for (i = 0; i < sizeof(data); i++) data[i] = dataexpect[i] = i ^ 0x5a; for (i = HIFN_SRAM_GRANULARITY - 1; i >= 0; i--) { a = i * HIFN_SRAM_STEP_SIZE; bcopy(&i, data, sizeof(i)); hifn_writeramaddr(sc, a, data); } for (i = 0; i < HIFN_SRAM_GRANULARITY; i++) { a = i * HIFN_SRAM_STEP_SIZE; bcopy(&i, dataexpect, sizeof(i)); if (hifn_readramaddr(sc, a, data) < 0) return (0); if (bcmp(data, dataexpect, sizeof(data)) != 0) return (0); sc->sc_ramsize = a + HIFN_SRAM_STEP_SIZE; } return (0); } /* * XXX For dram boards, one should really try all of the * HIFN_PUCNFG_DSZ_*'s. This just assumes that PUCNFG * is already set up correctly. */ static int hifn_dramsize(struct hifn_softc *sc) { u_int32_t cnfg; cnfg = READ_REG_0(sc, HIFN_0_PUCNFG) & HIFN_PUCNFG_DRAMMASK; sc->sc_ramsize = 1 << ((cnfg >> 13) + 18); return (0); } static void hifn_alloc_slot(struct hifn_softc *sc, int *cmdp, int *srcp, int *dstp, int *resp) { struct hifn_dma *dma = sc->sc_dma; if (dma->cmdi == HIFN_D_CMD_RSIZE) { dma->cmdi = 0; dma->cmdr[HIFN_D_CMD_RSIZE].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_CMDR_SYNC(sc, HIFN_D_CMD_RSIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); } *cmdp = dma->cmdi++; dma->cmdk = dma->cmdi; if (dma->srci == HIFN_D_SRC_RSIZE) { dma->srci = 0; dma->srcr[HIFN_D_SRC_RSIZE].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_SRCR_SYNC(sc, HIFN_D_SRC_RSIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); } *srcp = dma->srci++; dma->srck = dma->srci; if (dma->dsti == HIFN_D_DST_RSIZE) { dma->dsti = 0; dma->dstr[HIFN_D_DST_RSIZE].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_DSTR_SYNC(sc, HIFN_D_DST_RSIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); } *dstp = dma->dsti++; dma->dstk = dma->dsti; if (dma->resi == HIFN_D_RES_RSIZE) { dma->resi = 0; dma->resr[HIFN_D_RES_RSIZE].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_RESR_SYNC(sc, HIFN_D_RES_RSIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); } *resp = dma->resi++; dma->resk = dma->resi; } static int hifn_writeramaddr(struct hifn_softc *sc, int addr, u_int8_t *data) { struct hifn_dma *dma = sc->sc_dma; hifn_base_command_t wc; const u_int32_t masks = HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ; int r, cmdi, resi, srci, dsti; wc.masks = htole16(3 << 13); wc.session_num = htole16(addr >> 14); wc.total_source_count = htole16(8); wc.total_dest_count = htole16(addr & 0x3fff); hifn_alloc_slot(sc, &cmdi, &srci, &dsti, &resi); WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA | HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA); /* build write command */ bzero(dma->command_bufs[cmdi], HIFN_MAX_COMMAND); *(hifn_base_command_t *)dma->command_bufs[cmdi] = wc; bcopy(data, &dma->test_src, sizeof(dma->test_src)); dma->srcr[srci].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, test_src)); dma->dstr[dsti].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, test_dst)); dma->cmdr[cmdi].l = htole32(16 | masks); dma->srcr[srci].l = htole32(8 | masks); dma->dstr[dsti].l = htole32(4 | masks); dma->resr[resi].l = htole32(4 | masks); bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); for (r = 10000; r >= 0; r--) { DELAY(10); bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if ((dma->resr[resi].l & htole32(HIFN_D_VALID)) == 0) break; bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } if (r == 0) { device_printf(sc->sc_dev, "writeramaddr -- " "result[%d](addr %d) still valid\n", resi, addr); r = -1; return (-1); } else r = 0; WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS); return (r); } static int hifn_readramaddr(struct hifn_softc *sc, int addr, u_int8_t *data) { struct hifn_dma *dma = sc->sc_dma; hifn_base_command_t rc; const u_int32_t masks = HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ; int r, cmdi, srci, dsti, resi; rc.masks = htole16(2 << 13); rc.session_num = htole16(addr >> 14); rc.total_source_count = htole16(addr & 0x3fff); rc.total_dest_count = htole16(8); hifn_alloc_slot(sc, &cmdi, &srci, &dsti, &resi); WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_ENA | HIFN_DMACSR_S_CTRL_ENA | HIFN_DMACSR_D_CTRL_ENA | HIFN_DMACSR_R_CTRL_ENA); bzero(dma->command_bufs[cmdi], HIFN_MAX_COMMAND); *(hifn_base_command_t *)dma->command_bufs[cmdi] = rc; dma->srcr[srci].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, test_src)); dma->test_src = 0; dma->dstr[dsti].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, test_dst)); dma->test_dst = 0; dma->cmdr[cmdi].l = htole32(8 | masks); dma->srcr[srci].l = htole32(8 | masks); dma->dstr[dsti].l = htole32(8 | masks); dma->resr[resi].l = htole32(HIFN_MAX_RESULT | masks); bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); for (r = 10000; r >= 0; r--) { DELAY(10); bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if ((dma->resr[resi].l & htole32(HIFN_D_VALID)) == 0) break; bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } if (r == 0) { device_printf(sc->sc_dev, "readramaddr -- " "result[%d](addr %d) still valid\n", resi, addr); r = -1; } else { r = 0; bcopy(&dma->test_dst, data, sizeof(dma->test_dst)); } WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_DIS | HIFN_DMACSR_S_CTRL_DIS | HIFN_DMACSR_D_CTRL_DIS | HIFN_DMACSR_R_CTRL_DIS); return (r); } /* * Initialize the descriptor rings. */ static void hifn_init_dma(struct hifn_softc *sc) { struct hifn_dma *dma = sc->sc_dma; int i; hifn_set_retry(sc); /* initialize static pointer values */ for (i = 0; i < HIFN_D_CMD_RSIZE; i++) dma->cmdr[i].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, command_bufs[i][0])); for (i = 0; i < HIFN_D_RES_RSIZE; i++) dma->resr[i].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, result_bufs[i][0])); dma->cmdr[HIFN_D_CMD_RSIZE].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, cmdr[0])); dma->srcr[HIFN_D_SRC_RSIZE].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, srcr[0])); dma->dstr[HIFN_D_DST_RSIZE].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, dstr[0])); dma->resr[HIFN_D_RES_RSIZE].p = htole32(sc->sc_dma_physaddr + offsetof(struct hifn_dma, resr[0])); dma->cmdu = dma->srcu = dma->dstu = dma->resu = 0; dma->cmdi = dma->srci = dma->dsti = dma->resi = 0; dma->cmdk = dma->srck = dma->dstk = dma->resk = 0; } /* * Writes out the raw command buffer space. Returns the * command buffer size. */ static u_int hifn_write_command(struct hifn_command *cmd, u_int8_t *buf) { #define MIN(a,b) ((a)<(b)?(a):(b)) u_int8_t *buf_pos; hifn_base_command_t *base_cmd; hifn_mac_command_t *mac_cmd; hifn_crypt_command_t *cry_cmd; int using_mac, using_crypt, len; u_int32_t dlen, slen; buf_pos = buf; using_mac = cmd->base_masks & HIFN_BASE_CMD_MAC; using_crypt = cmd->base_masks & HIFN_BASE_CMD_CRYPT; base_cmd = (hifn_base_command_t *)buf_pos; base_cmd->masks = htole16(cmd->base_masks); slen = cmd->src_mapsize; if (cmd->sloplen) dlen = cmd->dst_mapsize - cmd->sloplen + sizeof(u_int32_t); else dlen = cmd->dst_mapsize; base_cmd->total_source_count = htole16(slen & HIFN_BASE_CMD_LENMASK_LO); base_cmd->total_dest_count = htole16(dlen & HIFN_BASE_CMD_LENMASK_LO); dlen >>= 16; slen >>= 16; base_cmd->session_num = htole16(cmd->session_num | ((slen << HIFN_BASE_CMD_SRCLEN_S) & HIFN_BASE_CMD_SRCLEN_M) | ((dlen << HIFN_BASE_CMD_DSTLEN_S) & HIFN_BASE_CMD_DSTLEN_M)); buf_pos += sizeof(hifn_base_command_t); if (using_mac) { mac_cmd = (hifn_mac_command_t *)buf_pos; dlen = cmd->maccrd->crd_len; mac_cmd->source_count = htole16(dlen & 0xffff); dlen >>= 16; mac_cmd->masks = htole16(cmd->mac_masks | ((dlen << HIFN_MAC_CMD_SRCLEN_S) & HIFN_MAC_CMD_SRCLEN_M)); mac_cmd->header_skip = htole16(cmd->maccrd->crd_skip); mac_cmd->reserved = 0; buf_pos += sizeof(hifn_mac_command_t); } if (using_crypt) { cry_cmd = (hifn_crypt_command_t *)buf_pos; dlen = cmd->enccrd->crd_len; cry_cmd->source_count = htole16(dlen & 0xffff); dlen >>= 16; cry_cmd->masks = htole16(cmd->cry_masks | ((dlen << HIFN_CRYPT_CMD_SRCLEN_S) & HIFN_CRYPT_CMD_SRCLEN_M)); cry_cmd->header_skip = htole16(cmd->enccrd->crd_skip); cry_cmd->reserved = 0; buf_pos += sizeof(hifn_crypt_command_t); } if (using_mac && cmd->mac_masks & HIFN_MAC_CMD_NEW_KEY) { bcopy(cmd->mac, buf_pos, HIFN_MAC_KEY_LENGTH); buf_pos += HIFN_MAC_KEY_LENGTH; } if (using_crypt && cmd->cry_masks & HIFN_CRYPT_CMD_NEW_KEY) { switch (cmd->cry_masks & HIFN_CRYPT_CMD_ALG_MASK) { case HIFN_CRYPT_CMD_ALG_3DES: bcopy(cmd->ck, buf_pos, HIFN_3DES_KEY_LENGTH); buf_pos += HIFN_3DES_KEY_LENGTH; break; case HIFN_CRYPT_CMD_ALG_DES: bcopy(cmd->ck, buf_pos, HIFN_DES_KEY_LENGTH); buf_pos += cmd->cklen; break; case HIFN_CRYPT_CMD_ALG_RC4: len = 256; do { int clen; clen = MIN(cmd->cklen, len); bcopy(cmd->ck, buf_pos, clen); len -= clen; buf_pos += clen; } while (len > 0); bzero(buf_pos, 4); buf_pos += 4; break; } } if (using_crypt && cmd->cry_masks & HIFN_CRYPT_CMD_NEW_IV) { bcopy(cmd->iv, buf_pos, HIFN_IV_LENGTH); buf_pos += HIFN_IV_LENGTH; } if ((cmd->base_masks & (HIFN_BASE_CMD_MAC|HIFN_BASE_CMD_CRYPT)) == 0) { bzero(buf_pos, 8); buf_pos += 8; } return (buf_pos - buf); #undef MIN } static int hifn_dmamap_aligned(struct hifn_operand *op) { int i; for (i = 0; i < op->nsegs; i++) { if (op->segs[i].ds_addr & 3) return (0); if ((i != (op->nsegs - 1)) && (op->segs[i].ds_len & 3)) return (0); } return (1); } static int hifn_dmamap_load_dst(struct hifn_softc *sc, struct hifn_command *cmd) { struct hifn_dma *dma = sc->sc_dma; struct hifn_operand *dst = &cmd->dst; u_int32_t p, l; int idx, used = 0, i; idx = dma->dsti; for (i = 0; i < dst->nsegs - 1; i++) { dma->dstr[idx].p = htole32(dst->segs[i].ds_addr); dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_MASKDONEIRQ | dst->segs[i].ds_len); HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); used++; if (++idx == HIFN_D_DST_RSIZE) { dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); idx = 0; } } if (cmd->sloplen == 0) { p = dst->segs[i].ds_addr; l = HIFN_D_VALID | HIFN_D_MASKDONEIRQ | HIFN_D_LAST | dst->segs[i].ds_len; } else { p = sc->sc_dma_physaddr + offsetof(struct hifn_dma, slop[cmd->slopidx]); l = HIFN_D_VALID | HIFN_D_MASKDONEIRQ | HIFN_D_LAST | sizeof(u_int32_t); if ((dst->segs[i].ds_len - cmd->sloplen) != 0) { dma->dstr[idx].p = htole32(dst->segs[i].ds_addr); dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_MASKDONEIRQ | (dst->segs[i].ds_len - cmd->sloplen)); HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); used++; if (++idx == HIFN_D_DST_RSIZE) { dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); idx = 0; } } } dma->dstr[idx].p = htole32(p); dma->dstr[idx].l = htole32(l); HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); used++; if (++idx == HIFN_D_DST_RSIZE) { dma->dstr[idx].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_DSTR_SYNC(sc, idx, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); idx = 0; } dma->dsti = idx; dma->dstu += used; return (idx); } static int hifn_dmamap_load_src(struct hifn_softc *sc, struct hifn_command *cmd) { struct hifn_dma *dma = sc->sc_dma; struct hifn_operand *src = &cmd->src; int idx, i; u_int32_t last = 0; idx = dma->srci; for (i = 0; i < src->nsegs; i++) { if (i == src->nsegs - 1) last = HIFN_D_LAST; dma->srcr[idx].p = htole32(src->segs[i].ds_addr); dma->srcr[idx].l = htole32(src->segs[i].ds_len | HIFN_D_VALID | HIFN_D_MASKDONEIRQ | last); HIFN_SRCR_SYNC(sc, idx, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); if (++idx == HIFN_D_SRC_RSIZE) { dma->srcr[idx].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_SRCR_SYNC(sc, HIFN_D_SRC_RSIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); idx = 0; } } dma->srci = idx; dma->srcu += src->nsegs; return (idx); } static void hifn_op_cb(void* arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error) { struct hifn_operand *op = arg; KASSERT(nsegs <= MAX_SCATTER, ("hifn_op_cb: too many DMA segments (%u > %u) " "returned when mapping operand", nsegs, MAX_SCATTER)); op->mapsize = mapsize; op->nsegs = nsegs; bcopy(seg, op->segs, nsegs * sizeof (seg[0])); } static int hifn_crypto( struct hifn_softc *sc, struct hifn_command *cmd, struct cryptop *crp, int hint) { struct hifn_dma *dma = sc->sc_dma; u_int32_t cmdlen; int cmdi, resi, err = 0; /* * need 1 cmd, and 1 res * * NB: check this first since it's easy. */ if ((dma->cmdu + 1) > HIFN_D_CMD_RSIZE || (dma->resu + 1) > HIFN_D_RES_RSIZE) { #ifdef HIFN_DEBUG if (hifn_debug) { device_printf(sc->sc_dev, "cmd/result exhaustion, cmdu %u resu %u\n", dma->cmdu, dma->resu); } #endif hifnstats.hst_nomem_cr++; return (ERESTART); } if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &cmd->src_map)) { hifnstats.hst_nomem_map++; return (ENOMEM); } if (crp->crp_flags & CRYPTO_F_IMBUF) { if (bus_dmamap_load_mbuf(sc->sc_dmat, cmd->src_map, cmd->src_m, hifn_op_cb, &cmd->src, BUS_DMA_NOWAIT)) { hifnstats.hst_nomem_load++; err = ENOMEM; goto err_srcmap1; } } else if (crp->crp_flags & CRYPTO_F_IOV) { if (bus_dmamap_load_uio(sc->sc_dmat, cmd->src_map, cmd->src_io, hifn_op_cb, &cmd->src, BUS_DMA_NOWAIT)) { hifnstats.hst_nomem_load++; err = ENOMEM; goto err_srcmap1; } } else { err = EINVAL; goto err_srcmap1; } if (hifn_dmamap_aligned(&cmd->src)) { cmd->sloplen = cmd->src_mapsize & 3; cmd->dst = cmd->src; } else { if (crp->crp_flags & CRYPTO_F_IOV) { err = EINVAL; goto err_srcmap; } else if (crp->crp_flags & CRYPTO_F_IMBUF) { int totlen, len; struct mbuf *m, *m0, *mlast; KASSERT(cmd->dst_m == cmd->src_m, ("hifn_crypto: dst_m initialized improperly")); hifnstats.hst_unaligned++; /* * Source is not aligned on a longword boundary. * Copy the data to insure alignment. If we fail * to allocate mbufs or clusters while doing this * we return ERESTART so the operation is requeued * at the crypto later, but only if there are * ops already posted to the hardware; otherwise we * have no guarantee that we'll be re-entered. */ totlen = cmd->src_mapsize; if (cmd->src_m->m_flags & M_PKTHDR) { len = MHLEN; MGETHDR(m0, M_DONTWAIT, MT_DATA); + if (m0 && !m_dup_pkthdr(m0, cmd->src_m, M_DONTWAIT)) { + m_free(m0); + m0 = NULL; + } } else { len = MLEN; MGET(m0, M_DONTWAIT, MT_DATA); } if (m0 == NULL) { hifnstats.hst_nomem_mbuf++; err = dma->cmdu ? ERESTART : ENOMEM; goto err_srcmap; - } - if (len == MHLEN) { - M_COPY_PKTHDR(m0, cmd->src_m); } if (totlen >= MINCLSIZE) { MCLGET(m0, M_DONTWAIT); if ((m0->m_flags & M_EXT) == 0) { hifnstats.hst_nomem_mcl++; err = dma->cmdu ? ERESTART : ENOMEM; m_freem(m0); goto err_srcmap; } len = MCLBYTES; } totlen -= len; m0->m_pkthdr.len = m0->m_len = len; mlast = m0; while (totlen > 0) { MGET(m, M_DONTWAIT, MT_DATA); if (m == NULL) { hifnstats.hst_nomem_mbuf++; err = dma->cmdu ? ERESTART : ENOMEM; m_freem(m0); goto err_srcmap; } len = MLEN; if (totlen >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { hifnstats.hst_nomem_mcl++; err = dma->cmdu ? ERESTART : ENOMEM; mlast->m_next = m; m_freem(m0); goto err_srcmap; } len = MCLBYTES; } m->m_len = len; m0->m_pkthdr.len += len; totlen -= len; mlast->m_next = m; mlast = m; } cmd->dst_m = m0; } } if (cmd->dst_map == NULL) { if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &cmd->dst_map)) { hifnstats.hst_nomem_map++; err = ENOMEM; goto err_srcmap; } if (crp->crp_flags & CRYPTO_F_IMBUF) { if (bus_dmamap_load_mbuf(sc->sc_dmat, cmd->dst_map, cmd->dst_m, hifn_op_cb, &cmd->dst, BUS_DMA_NOWAIT)) { hifnstats.hst_nomem_map++; err = ENOMEM; goto err_dstmap1; } } else if (crp->crp_flags & CRYPTO_F_IOV) { if (bus_dmamap_load_uio(sc->sc_dmat, cmd->dst_map, cmd->dst_io, hifn_op_cb, &cmd->dst, BUS_DMA_NOWAIT)) { hifnstats.hst_nomem_load++; err = ENOMEM; goto err_dstmap1; } } } #ifdef HIFN_DEBUG if (hifn_debug) { device_printf(sc->sc_dev, "Entering cmd: stat %8x ien %8x u %d/%d/%d/%d n %d/%d\n", READ_REG_1(sc, HIFN_1_DMA_CSR), READ_REG_1(sc, HIFN_1_DMA_IER), dma->cmdu, dma->srcu, dma->dstu, dma->resu, cmd->src_nsegs, cmd->dst_nsegs); } #endif if (cmd->src_map == cmd->dst_map) { bus_dmamap_sync(sc->sc_dmat, cmd->src_map, BUS_DMASYNC_PREWRITE|BUS_DMASYNC_PREREAD); } else { bus_dmamap_sync(sc->sc_dmat, cmd->src_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, cmd->dst_map, BUS_DMASYNC_PREREAD); } /* * need N src, and N dst */ if ((dma->srcu + cmd->src_nsegs) > HIFN_D_SRC_RSIZE || (dma->dstu + cmd->dst_nsegs + 1) > HIFN_D_DST_RSIZE) { #ifdef HIFN_DEBUG if (hifn_debug) { device_printf(sc->sc_dev, "src/dst exhaustion, srcu %u+%u dstu %u+%u\n", dma->srcu, cmd->src_nsegs, dma->dstu, cmd->dst_nsegs); } #endif hifnstats.hst_nomem_sd++; err = ERESTART; goto err_dstmap; } if (dma->cmdi == HIFN_D_CMD_RSIZE) { dma->cmdi = 0; dma->cmdr[HIFN_D_CMD_RSIZE].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_CMDR_SYNC(sc, HIFN_D_CMD_RSIZE, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); } cmdi = dma->cmdi++; cmdlen = hifn_write_command(cmd, dma->command_bufs[cmdi]); HIFN_CMD_SYNC(sc, cmdi, BUS_DMASYNC_PREWRITE); /* .p for command/result already set */ dma->cmdr[cmdi].l = htole32(cmdlen | HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ); HIFN_CMDR_SYNC(sc, cmdi, BUS_DMASYNC_PREWRITE | BUS_DMASYNC_PREREAD); dma->cmdu++; if (sc->sc_c_busy == 0) { WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_C_CTRL_ENA); sc->sc_c_busy = 1; } /* * We don't worry about missing an interrupt (which a "command wait" * interrupt salvages us from), unless there is more than one command * in the queue. */ if (dma->cmdu > 1) { sc->sc_dmaier |= HIFN_DMAIER_C_WAIT; WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); } hifnstats.hst_ipackets++; hifnstats.hst_ibytes += cmd->src_mapsize; hifn_dmamap_load_src(sc, cmd); if (sc->sc_s_busy == 0) { WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_S_CTRL_ENA); sc->sc_s_busy = 1; } /* * Unlike other descriptors, we don't mask done interrupt from * result descriptor. */ #ifdef HIFN_DEBUG if (hifn_debug) printf("load res\n"); #endif if (dma->resi == HIFN_D_RES_RSIZE) { dma->resi = 0; dma->resr[HIFN_D_RES_RSIZE].l = htole32(HIFN_D_VALID | HIFN_D_JUMP | HIFN_D_MASKDONEIRQ); HIFN_RESR_SYNC(sc, HIFN_D_RES_RSIZE, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); } resi = dma->resi++; KASSERT(dma->hifn_commands[resi] == NULL, ("hifn_crypto: command slot %u busy", resi)); dma->hifn_commands[resi] = cmd; HIFN_RES_SYNC(sc, resi, BUS_DMASYNC_PREREAD); if ((hint & CRYPTO_HINT_MORE) && sc->sc_curbatch < hifn_maxbatch) { dma->resr[resi].l = htole32(HIFN_MAX_RESULT | HIFN_D_VALID | HIFN_D_LAST | HIFN_D_MASKDONEIRQ); sc->sc_curbatch++; if (sc->sc_curbatch > hifnstats.hst_maxbatch) hifnstats.hst_maxbatch = sc->sc_curbatch; hifnstats.hst_totbatch++; } else { dma->resr[resi].l = htole32(HIFN_MAX_RESULT | HIFN_D_VALID | HIFN_D_LAST); sc->sc_curbatch = 0; } HIFN_RESR_SYNC(sc, resi, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); dma->resu++; if (sc->sc_r_busy == 0) { WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_R_CTRL_ENA); sc->sc_r_busy = 1; } if (cmd->sloplen) cmd->slopidx = resi; hifn_dmamap_load_dst(sc, cmd); if (sc->sc_d_busy == 0) { WRITE_REG_1(sc, HIFN_1_DMA_CSR, HIFN_DMACSR_D_CTRL_ENA); sc->sc_d_busy = 1; } #ifdef HIFN_DEBUG if (hifn_debug) { device_printf(sc->sc_dev, "command: stat %8x ier %8x\n", READ_REG_1(sc, HIFN_1_DMA_CSR), READ_REG_1(sc, HIFN_1_DMA_IER)); } #endif sc->sc_active = 5; KASSERT(err == 0, ("hifn_crypto: success with error %u", err)); return (err); /* success */ err_dstmap: if (cmd->src_map != cmd->dst_map) bus_dmamap_unload(sc->sc_dmat, cmd->dst_map); err_dstmap1: if (cmd->src_map != cmd->dst_map) bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map); err_srcmap: if (crp->crp_flags & CRYPTO_F_IMBUF) { if (cmd->src_m != cmd->dst_m) m_freem(cmd->dst_m); } bus_dmamap_unload(sc->sc_dmat, cmd->src_map); err_srcmap1: bus_dmamap_destroy(sc->sc_dmat, cmd->src_map); return (err); } static void hifn_tick(void* vsc) { struct hifn_softc *sc = vsc; HIFN_LOCK(sc); if (sc->sc_active == 0) { struct hifn_dma *dma = sc->sc_dma; u_int32_t r = 0; if (dma->cmdu == 0 && sc->sc_c_busy) { sc->sc_c_busy = 0; r |= HIFN_DMACSR_C_CTRL_DIS; } if (dma->srcu == 0 && sc->sc_s_busy) { sc->sc_s_busy = 0; r |= HIFN_DMACSR_S_CTRL_DIS; } if (dma->dstu == 0 && sc->sc_d_busy) { sc->sc_d_busy = 0; r |= HIFN_DMACSR_D_CTRL_DIS; } if (dma->resu == 0 && sc->sc_r_busy) { sc->sc_r_busy = 0; r |= HIFN_DMACSR_R_CTRL_DIS; } if (r) WRITE_REG_1(sc, HIFN_1_DMA_CSR, r); } else sc->sc_active--; HIFN_UNLOCK(sc); callout_reset(&sc->sc_tickto, hz, hifn_tick, sc); } static void hifn_intr(void *arg) { struct hifn_softc *sc = arg; struct hifn_dma *dma; u_int32_t dmacsr, restart; int i, u; HIFN_LOCK(sc); dma = sc->sc_dma; dmacsr = READ_REG_1(sc, HIFN_1_DMA_CSR); #ifdef HIFN_DEBUG if (hifn_debug) { device_printf(sc->sc_dev, "irq: stat %08x ien %08x damier %08x i %d/%d/%d/%d k %d/%d/%d/%d u %d/%d/%d/%d\n", dmacsr, READ_REG_1(sc, HIFN_1_DMA_IER), sc->sc_dmaier, dma->cmdi, dma->srci, dma->dsti, dma->resi, dma->cmdk, dma->srck, dma->dstk, dma->resk, dma->cmdu, dma->srcu, dma->dstu, dma->resu); } #endif /* Nothing in the DMA unit interrupted */ if ((dmacsr & sc->sc_dmaier) == 0) { hifnstats.hst_noirq++; HIFN_UNLOCK(sc); return; } WRITE_REG_1(sc, HIFN_1_DMA_CSR, dmacsr & sc->sc_dmaier); if ((sc->sc_flags & HIFN_HAS_PUBLIC) && (dmacsr & HIFN_DMACSR_PUBDONE)) WRITE_REG_1(sc, HIFN_1_PUB_STATUS, READ_REG_1(sc, HIFN_1_PUB_STATUS) | HIFN_PUBSTS_DONE); restart = dmacsr & (HIFN_DMACSR_D_OVER | HIFN_DMACSR_R_OVER); if (restart) device_printf(sc->sc_dev, "overrun %x\n", dmacsr); if (sc->sc_flags & HIFN_IS_7811) { if (dmacsr & HIFN_DMACSR_ILLR) device_printf(sc->sc_dev, "illegal read\n"); if (dmacsr & HIFN_DMACSR_ILLW) device_printf(sc->sc_dev, "illegal write\n"); } restart = dmacsr & (HIFN_DMACSR_C_ABORT | HIFN_DMACSR_S_ABORT | HIFN_DMACSR_D_ABORT | HIFN_DMACSR_R_ABORT); if (restart) { device_printf(sc->sc_dev, "abort, resetting.\n"); hifnstats.hst_abort++; hifn_abort(sc); HIFN_UNLOCK(sc); return; } if ((dmacsr & HIFN_DMACSR_C_WAIT) && (dma->cmdu == 0)) { /* * If no slots to process and we receive a "waiting on * command" interrupt, we disable the "waiting on command" * (by clearing it). */ sc->sc_dmaier &= ~HIFN_DMAIER_C_WAIT; WRITE_REG_1(sc, HIFN_1_DMA_IER, sc->sc_dmaier); } /* clear the rings */ i = dma->resk; u = dma->resu; while (u != 0) { HIFN_RESR_SYNC(sc, i, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (dma->resr[i].l & htole32(HIFN_D_VALID)) { HIFN_RESR_SYNC(sc, i, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); break; } if (i != HIFN_D_RES_RSIZE) { struct hifn_command *cmd; u_int8_t *macbuf = NULL; HIFN_RES_SYNC(sc, i, BUS_DMASYNC_POSTREAD); cmd = dma->hifn_commands[i]; KASSERT(cmd != NULL, ("hifn_intr: null command slot %u", i)); dma->hifn_commands[i] = NULL; if (cmd->base_masks & HIFN_BASE_CMD_MAC) { macbuf = dma->result_bufs[i]; macbuf += 12; } hifn_callback(sc, cmd, macbuf); hifnstats.hst_opackets++; u--; } if (++i == (HIFN_D_RES_RSIZE + 1)) i = 0; } dma->resk = i; dma->resu = u; i = dma->srck; u = dma->srcu; while (u != 0) { if (i == HIFN_D_SRC_RSIZE) i = 0; HIFN_SRCR_SYNC(sc, i, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (dma->srcr[i].l & htole32(HIFN_D_VALID)) { HIFN_SRCR_SYNC(sc, i, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); break; } i++, u--; } dma->srck = i; dma->srcu = u; i = dma->cmdk; u = dma->cmdu; while (u != 0) { HIFN_CMDR_SYNC(sc, i, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (dma->cmdr[i].l & htole32(HIFN_D_VALID)) { HIFN_CMDR_SYNC(sc, i, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); break; } if (i != HIFN_D_CMD_RSIZE) { u--; HIFN_CMD_SYNC(sc, i, BUS_DMASYNC_POSTWRITE); } if (++i == (HIFN_D_CMD_RSIZE + 1)) i = 0; } dma->cmdk = i; dma->cmdu = u; if (sc->sc_needwakeup) { /* XXX check high watermark */ int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ); #ifdef HIFN_DEBUG if (hifn_debug) device_printf(sc->sc_dev, "wakeup crypto (%x) u %d/%d/%d/%d\n", sc->sc_needwakeup, dma->cmdu, dma->srcu, dma->dstu, dma->resu); #endif sc->sc_needwakeup &= ~wakeup; crypto_unblock(sc->sc_cid, wakeup); } HIFN_UNLOCK(sc); } /* * Allocate a new 'session' and return an encoded session id. 'sidp' * contains our registration id, and should contain an encoded session * id on successful allocation. */ static int hifn_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri) { struct cryptoini *c; struct hifn_softc *sc = arg; int i, mac = 0, cry = 0; KASSERT(sc != NULL, ("hifn_newsession: null softc")); if (sidp == NULL || cri == NULL || sc == NULL) return (EINVAL); for (i = 0; i < sc->sc_maxses; i++) if (sc->sc_sessions[i].hs_state == HS_STATE_FREE) break; if (i == sc->sc_maxses) return (ENOMEM); for (c = cri; c != NULL; c = c->cri_next) { switch (c->cri_alg) { case CRYPTO_MD5: case CRYPTO_SHA1: case CRYPTO_MD5_HMAC: case CRYPTO_SHA1_HMAC: if (mac) return (EINVAL); mac = 1; break; case CRYPTO_DES_CBC: case CRYPTO_3DES_CBC: /* XXX this may read fewer, does it matter? */ read_random(sc->sc_sessions[i].hs_iv, HIFN_IV_LENGTH); /*FALLTHROUGH*/ case CRYPTO_ARC4: if (cry) return (EINVAL); cry = 1; break; default: return (EINVAL); } } if (mac == 0 && cry == 0) return (EINVAL); *sidp = HIFN_SID(device_get_unit(sc->sc_dev), i); sc->sc_sessions[i].hs_state = HS_STATE_USED; return (0); } /* * Deallocate a session. * XXX this routine should run a zero'd mac/encrypt key into context ram. * XXX to blow away any keys already stored there. */ static int hifn_freesession(void *arg, u_int64_t tid) { struct hifn_softc *sc = arg; int session; u_int32_t sid = ((u_int32_t) tid) & 0xffffffff; KASSERT(sc != NULL, ("hifn_freesession: null softc")); if (sc == NULL) return (EINVAL); session = HIFN_SESSION(sid); if (session >= sc->sc_maxses) return (EINVAL); bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session])); return (0); } static int hifn_process(void *arg, struct cryptop *crp, int hint) { struct hifn_softc *sc = arg; struct hifn_command *cmd = NULL; int session, err; struct cryptodesc *crd1, *crd2, *maccrd, *enccrd; if (crp == NULL || crp->crp_callback == NULL) { hifnstats.hst_invalid++; return (EINVAL); } session = HIFN_SESSION(crp->crp_sid); if (sc == NULL || session >= sc->sc_maxses) { err = EINVAL; goto errout; } cmd = malloc(sizeof(struct hifn_command), M_DEVBUF, M_NOWAIT | M_ZERO); if (cmd == NULL) { hifnstats.hst_nomem++; err = ENOMEM; goto errout; } if (crp->crp_flags & CRYPTO_F_IMBUF) { cmd->src_m = (struct mbuf *)crp->crp_buf; cmd->dst_m = (struct mbuf *)crp->crp_buf; } else if (crp->crp_flags & CRYPTO_F_IOV) { cmd->src_io = (struct uio *)crp->crp_buf; cmd->dst_io = (struct uio *)crp->crp_buf; } else { err = EINVAL; goto errout; /* XXX we don't handle contiguous buffers! */ } crd1 = crp->crp_desc; if (crd1 == NULL) { err = EINVAL; goto errout; } crd2 = crd1->crd_next; if (crd2 == NULL) { if (crd1->crd_alg == CRYPTO_MD5_HMAC || crd1->crd_alg == CRYPTO_SHA1_HMAC || crd1->crd_alg == CRYPTO_SHA1 || crd1->crd_alg == CRYPTO_MD5) { maccrd = crd1; enccrd = NULL; } else if (crd1->crd_alg == CRYPTO_DES_CBC || crd1->crd_alg == CRYPTO_3DES_CBC || crd1->crd_alg == CRYPTO_ARC4) { if ((crd1->crd_flags & CRD_F_ENCRYPT) == 0) cmd->base_masks |= HIFN_BASE_CMD_DECODE; maccrd = NULL; enccrd = crd1; } else { err = EINVAL; goto errout; } } else { if ((crd1->crd_alg == CRYPTO_MD5_HMAC || crd1->crd_alg == CRYPTO_SHA1_HMAC || crd1->crd_alg == CRYPTO_MD5 || crd1->crd_alg == CRYPTO_SHA1) && (crd2->crd_alg == CRYPTO_DES_CBC || crd2->crd_alg == CRYPTO_3DES_CBC || crd2->crd_alg == CRYPTO_ARC4) && ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) { cmd->base_masks = HIFN_BASE_CMD_DECODE; maccrd = crd1; enccrd = crd2; } else if ((crd1->crd_alg == CRYPTO_DES_CBC || crd1->crd_alg == CRYPTO_ARC4 || crd1->crd_alg == CRYPTO_3DES_CBC) && (crd2->crd_alg == CRYPTO_MD5_HMAC || crd2->crd_alg == CRYPTO_SHA1_HMAC || crd2->crd_alg == CRYPTO_MD5 || crd2->crd_alg == CRYPTO_SHA1) && (crd1->crd_flags & CRD_F_ENCRYPT)) { enccrd = crd1; maccrd = crd2; } else { /* * We cannot order the 7751 as requested */ err = EINVAL; goto errout; } } if (enccrd) { cmd->enccrd = enccrd; cmd->base_masks |= HIFN_BASE_CMD_CRYPT; switch (enccrd->crd_alg) { case CRYPTO_ARC4: cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_RC4; if ((enccrd->crd_flags & CRD_F_ENCRYPT) != sc->sc_sessions[session].hs_prev_op) sc->sc_sessions[session].hs_state = HS_STATE_USED; break; case CRYPTO_DES_CBC: cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_DES | HIFN_CRYPT_CMD_MODE_CBC | HIFN_CRYPT_CMD_NEW_IV; break; case CRYPTO_3DES_CBC: cmd->cry_masks |= HIFN_CRYPT_CMD_ALG_3DES | HIFN_CRYPT_CMD_MODE_CBC | HIFN_CRYPT_CMD_NEW_IV; break; default: err = EINVAL; goto errout; } if (enccrd->crd_alg != CRYPTO_ARC4) { if (enccrd->crd_flags & CRD_F_ENCRYPT) { if (enccrd->crd_flags & CRD_F_IV_EXPLICIT) bcopy(enccrd->crd_iv, cmd->iv, HIFN_IV_LENGTH); else bcopy(sc->sc_sessions[session].hs_iv, cmd->iv, HIFN_IV_LENGTH); if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) { if (crp->crp_flags & CRYPTO_F_IMBUF) m_copyback(cmd->src_m, enccrd->crd_inject, HIFN_IV_LENGTH, cmd->iv); else if (crp->crp_flags & CRYPTO_F_IOV) cuio_copyback(cmd->src_io, enccrd->crd_inject, HIFN_IV_LENGTH, cmd->iv); } } else { if (enccrd->crd_flags & CRD_F_IV_EXPLICIT) bcopy(enccrd->crd_iv, cmd->iv, HIFN_IV_LENGTH); else if (crp->crp_flags & CRYPTO_F_IMBUF) m_copydata(cmd->src_m, enccrd->crd_inject, HIFN_IV_LENGTH, cmd->iv); else if (crp->crp_flags & CRYPTO_F_IOV) cuio_copydata(cmd->src_io, enccrd->crd_inject, HIFN_IV_LENGTH, cmd->iv); } } cmd->ck = enccrd->crd_key; cmd->cklen = enccrd->crd_klen >> 3; if (sc->sc_sessions[session].hs_state == HS_STATE_USED) cmd->cry_masks |= HIFN_CRYPT_CMD_NEW_KEY; } if (maccrd) { cmd->maccrd = maccrd; cmd->base_masks |= HIFN_BASE_CMD_MAC; switch (maccrd->crd_alg) { case CRYPTO_MD5: cmd->mac_masks |= HIFN_MAC_CMD_ALG_MD5 | HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HASH | HIFN_MAC_CMD_POS_IPSEC; break; case CRYPTO_MD5_HMAC: cmd->mac_masks |= HIFN_MAC_CMD_ALG_MD5 | HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HMAC | HIFN_MAC_CMD_POS_IPSEC | HIFN_MAC_CMD_TRUNC; break; case CRYPTO_SHA1: cmd->mac_masks |= HIFN_MAC_CMD_ALG_SHA1 | HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HASH | HIFN_MAC_CMD_POS_IPSEC; break; case CRYPTO_SHA1_HMAC: cmd->mac_masks |= HIFN_MAC_CMD_ALG_SHA1 | HIFN_MAC_CMD_RESULT | HIFN_MAC_CMD_MODE_HMAC | HIFN_MAC_CMD_POS_IPSEC | HIFN_MAC_CMD_TRUNC; break; } if ((maccrd->crd_alg == CRYPTO_SHA1_HMAC || maccrd->crd_alg == CRYPTO_MD5_HMAC) && sc->sc_sessions[session].hs_state == HS_STATE_USED) { cmd->mac_masks |= HIFN_MAC_CMD_NEW_KEY; bcopy(maccrd->crd_key, cmd->mac, maccrd->crd_klen >> 3); bzero(cmd->mac + (maccrd->crd_klen >> 3), HIFN_MAC_KEY_LENGTH - (maccrd->crd_klen >> 3)); } } cmd->crp = crp; cmd->session_num = session; cmd->softc = sc; err = hifn_crypto(sc, cmd, crp, hint); if (!err) { if (enccrd) sc->sc_sessions[session].hs_prev_op = enccrd->crd_flags & CRD_F_ENCRYPT; if (sc->sc_sessions[session].hs_state == HS_STATE_USED) sc->sc_sessions[session].hs_state = HS_STATE_KEY; return 0; } else if (err == ERESTART) { /* * There weren't enough resources to dispatch the request * to the part. Notify the caller so they'll requeue this * request and resubmit it again soon. */ #ifdef HIFN_DEBUG if (hifn_debug) device_printf(sc->sc_dev, "requeue request\n"); #endif free(cmd, M_DEVBUF); sc->sc_needwakeup |= CRYPTO_SYMQ; return (err); } errout: if (cmd != NULL) free(cmd, M_DEVBUF); if (err == EINVAL) hifnstats.hst_invalid++; else hifnstats.hst_nomem++; crp->crp_etype = err; crypto_done(crp); return (err); } static void hifn_abort(struct hifn_softc *sc) { struct hifn_dma *dma = sc->sc_dma; struct hifn_command *cmd; struct cryptop *crp; int i, u; i = dma->resk; u = dma->resu; while (u != 0) { cmd = dma->hifn_commands[i]; KASSERT(cmd != NULL, ("hifn_abort: null command slot %u", i)); dma->hifn_commands[i] = NULL; crp = cmd->crp; if ((dma->resr[i].l & htole32(HIFN_D_VALID)) == 0) { /* Salvage what we can. */ u_int8_t *macbuf; if (cmd->base_masks & HIFN_BASE_CMD_MAC) { macbuf = dma->result_bufs[i]; macbuf += 12; } else macbuf = NULL; hifnstats.hst_opackets++; hifn_callback(sc, cmd, macbuf); } else { if (cmd->src_map == cmd->dst_map) { bus_dmamap_sync(sc->sc_dmat, cmd->src_map, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); } else { bus_dmamap_sync(sc->sc_dmat, cmd->src_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->sc_dmat, cmd->dst_map, BUS_DMASYNC_POSTREAD); } if (cmd->src_m != cmd->dst_m) { m_freem(cmd->src_m); crp->crp_buf = (caddr_t)cmd->dst_m; } /* non-shared buffers cannot be restarted */ if (cmd->src_map != cmd->dst_map) { /* * XXX should be EAGAIN, delayed until * after the reset. */ crp->crp_etype = ENOMEM; bus_dmamap_unload(sc->sc_dmat, cmd->dst_map); bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map); } else crp->crp_etype = ENOMEM; bus_dmamap_unload(sc->sc_dmat, cmd->src_map); bus_dmamap_destroy(sc->sc_dmat, cmd->src_map); free(cmd, M_DEVBUF); if (crp->crp_etype != EAGAIN) crypto_done(crp); } if (++i == HIFN_D_RES_RSIZE) i = 0; u--; } dma->resk = i; dma->resu = u; /* Force upload of key next time */ for (i = 0; i < sc->sc_maxses; i++) if (sc->sc_sessions[i].hs_state == HS_STATE_KEY) sc->sc_sessions[i].hs_state = HS_STATE_USED; hifn_reset_board(sc, 1); hifn_init_dma(sc); hifn_init_pci_registers(sc); } static void hifn_callback(struct hifn_softc *sc, struct hifn_command *cmd, u_int8_t *macbuf) { struct hifn_dma *dma = sc->sc_dma; struct cryptop *crp = cmd->crp; struct cryptodesc *crd; struct mbuf *m; int totlen, i, u; if (cmd->src_map == cmd->dst_map) { bus_dmamap_sync(sc->sc_dmat, cmd->src_map, BUS_DMASYNC_POSTWRITE | BUS_DMASYNC_POSTREAD); } else { bus_dmamap_sync(sc->sc_dmat, cmd->src_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->sc_dmat, cmd->dst_map, BUS_DMASYNC_POSTREAD); } if (crp->crp_flags & CRYPTO_F_IMBUF) { if (cmd->src_m != cmd->dst_m) { crp->crp_buf = (caddr_t)cmd->dst_m; totlen = cmd->src_mapsize; for (m = cmd->dst_m; m != NULL; m = m->m_next) { if (totlen < m->m_len) { m->m_len = totlen; totlen = 0; } else totlen -= m->m_len; } cmd->dst_m->m_pkthdr.len = cmd->src_m->m_pkthdr.len; m_freem(cmd->src_m); } } if (cmd->sloplen != 0) { if (crp->crp_flags & CRYPTO_F_IMBUF) m_copyback((struct mbuf *)crp->crp_buf, cmd->src_mapsize - cmd->sloplen, cmd->sloplen, (caddr_t)&dma->slop[cmd->slopidx]); else if (crp->crp_flags & CRYPTO_F_IOV) cuio_copyback((struct uio *)crp->crp_buf, cmd->src_mapsize - cmd->sloplen, cmd->sloplen, (caddr_t)&dma->slop[cmd->slopidx]); } i = dma->dstk; u = dma->dstu; while (u != 0) { if (i == HIFN_D_DST_RSIZE) i = 0; bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (dma->dstr[i].l & htole32(HIFN_D_VALID)) { bus_dmamap_sync(sc->sc_dmat, sc->sc_dmamap, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); break; } i++, u--; } dma->dstk = i; dma->dstu = u; hifnstats.hst_obytes += cmd->dst_mapsize; if ((cmd->base_masks & (HIFN_BASE_CMD_CRYPT | HIFN_BASE_CMD_DECODE)) == HIFN_BASE_CMD_CRYPT) { for (crd = crp->crp_desc; crd; crd = crd->crd_next) { if (crd->crd_alg != CRYPTO_DES_CBC && crd->crd_alg != CRYPTO_3DES_CBC) continue; if (crp->crp_flags & CRYPTO_F_IMBUF) m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip + crd->crd_len - HIFN_IV_LENGTH, HIFN_IV_LENGTH, cmd->softc->sc_sessions[cmd->session_num].hs_iv); else if (crp->crp_flags & CRYPTO_F_IOV) { cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip + crd->crd_len - HIFN_IV_LENGTH, HIFN_IV_LENGTH, cmd->softc->sc_sessions[cmd->session_num].hs_iv); } break; } } if (macbuf != NULL) { for (crd = crp->crp_desc; crd; crd = crd->crd_next) { int len; if (crd->crd_alg == CRYPTO_MD5) len = 16; else if (crd->crd_alg == CRYPTO_SHA1) len = 20; else if (crd->crd_alg == CRYPTO_MD5_HMAC || crd->crd_alg == CRYPTO_SHA1_HMAC) len = 12; else continue; if (crp->crp_flags & CRYPTO_F_IMBUF) m_copyback((struct mbuf *)crp->crp_buf, crd->crd_inject, len, macbuf); else if ((crp->crp_flags & CRYPTO_F_IOV) && crp->crp_mac) bcopy((caddr_t)macbuf, crp->crp_mac, len); break; } } if (cmd->src_map != cmd->dst_map) { bus_dmamap_unload(sc->sc_dmat, cmd->dst_map); bus_dmamap_destroy(sc->sc_dmat, cmd->dst_map); } bus_dmamap_unload(sc->sc_dmat, cmd->src_map); bus_dmamap_destroy(sc->sc_dmat, cmd->src_map); free(cmd, M_DEVBUF); crypto_done(crp); } /* * 7811 PB3 rev/2 parts lock-up on burst writes to Group 0 * and Group 1 registers; avoid conditions that could create * burst writes by doing a read in between the writes. * * NB: The read we interpose is always to the same register; * we do this because reading from an arbitrary (e.g. last) * register may not always work. */ static void hifn_write_reg_0(struct hifn_softc *sc, bus_size_t reg, u_int32_t val) { if (sc->sc_flags & HIFN_IS_7811) { if (sc->sc_bar0_lastreg == reg - 4) bus_space_read_4(sc->sc_st0, sc->sc_sh0, HIFN_0_PUCNFG); sc->sc_bar0_lastreg = reg; } bus_space_write_4(sc->sc_st0, sc->sc_sh0, reg, val); } static void hifn_write_reg_1(struct hifn_softc *sc, bus_size_t reg, u_int32_t val) { if (sc->sc_flags & HIFN_IS_7811) { if (sc->sc_bar1_lastreg == reg - 4) bus_space_read_4(sc->sc_st1, sc->sc_sh1, HIFN_1_REVID); sc->sc_bar1_lastreg = reg; } bus_space_write_4(sc->sc_st1, sc->sc_sh1, reg, val); } Index: head/sys/dev/ubsec/ubsec.c =================================================================== --- head/sys/dev/ubsec/ubsec.c (revision 108465) +++ head/sys/dev/ubsec/ubsec.c (revision 108466) @@ -1,2763 +1,2765 @@ /* $FreeBSD$ */ /* $OpenBSD: ubsec.c,v 1.115 2002/09/24 18:33:26 jason Exp $ */ /* * Copyright (c) 2000 Jason L. Wright (jason@thought.net) * Copyright (c) 2000 Theo de Raadt (deraadt@openbsd.org) * Copyright (c) 2001 Patrik Lindergren (patrik@ipunplugged.com) * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Jason L. Wright * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * */ #define UBSEC_DEBUG /* * uBsec 5[56]01, 58xx hardware crypto accelerator */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* grr, #defines for gratuitous incompatibility in queue.h */ #define SIMPLEQ_HEAD STAILQ_HEAD #define SIMPLEQ_ENTRY STAILQ_ENTRY #define SIMPLEQ_INIT STAILQ_INIT #define SIMPLEQ_INSERT_TAIL STAILQ_INSERT_TAIL #define SIMPLEQ_EMPTY STAILQ_EMPTY #define SIMPLEQ_FIRST STAILQ_FIRST #define SIMPLEQ_REMOVE_HEAD STAILQ_REMOVE_HEAD_UNTIL /* ditto for endian.h */ #define letoh16(x) le16toh(x) #define letoh32(x) le32toh(x) #include #include /* * Prototypes and count for the pci_device structure */ static int ubsec_probe(device_t); static int ubsec_attach(device_t); static int ubsec_detach(device_t); static int ubsec_suspend(device_t); static int ubsec_resume(device_t); static void ubsec_shutdown(device_t); static device_method_t ubsec_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ubsec_probe), DEVMETHOD(device_attach, ubsec_attach), DEVMETHOD(device_detach, ubsec_detach), DEVMETHOD(device_suspend, ubsec_suspend), DEVMETHOD(device_resume, ubsec_resume), DEVMETHOD(device_shutdown, ubsec_shutdown), /* bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_driver_added, bus_generic_driver_added), { 0, 0 } }; static driver_t ubsec_driver = { "ubsec", ubsec_methods, sizeof (struct ubsec_softc) }; static devclass_t ubsec_devclass; DRIVER_MODULE(ubsec, pci, ubsec_driver, ubsec_devclass, 0, 0); MODULE_DEPEND(ubsec, crypto, 1, 1, 1); static void ubsec_intr(void *); static int ubsec_newsession(void *, u_int32_t *, struct cryptoini *); static int ubsec_freesession(void *, u_int64_t); static int ubsec_process(void *, struct cryptop *, int); static void ubsec_callback(struct ubsec_softc *, struct ubsec_q *); static int ubsec_feed(struct ubsec_softc *); static void ubsec_mcopy(struct mbuf *, struct mbuf *, int, int); static void ubsec_callback2(struct ubsec_softc *, struct ubsec_q2 *); static int ubsec_feed2(struct ubsec_softc *); static void ubsec_rng(void *); static int ubsec_dma_malloc(struct ubsec_softc *, bus_size_t, struct ubsec_dma_alloc *, int); static void ubsec_dma_free(struct ubsec_softc *, struct ubsec_dma_alloc *); static int ubsec_dmamap_aligned(struct ubsec_operand *op); static void ubsec_reset_board(struct ubsec_softc *sc); static void ubsec_init_board(struct ubsec_softc *sc); static void ubsec_init_pciregs(device_t dev); static void ubsec_totalreset(struct ubsec_softc *sc); static int ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q); static int ubsec_kprocess(void*, struct cryptkop *, int); static int ubsec_kprocess_modexp_hw(struct ubsec_softc *, struct cryptkop *, int); static int ubsec_kprocess_modexp_sw(struct ubsec_softc *, struct cryptkop *, int); static int ubsec_kprocess_rsapriv(struct ubsec_softc *, struct cryptkop *, int); static void ubsec_kfree(struct ubsec_softc *, struct ubsec_q2 *); static int ubsec_ksigbits(struct crparam *); static void ubsec_kshift_r(u_int, u_int8_t *, u_int, u_int8_t *, u_int); static void ubsec_kshift_l(u_int, u_int8_t *, u_int, u_int8_t *, u_int); #ifdef UBSEC_DEBUG static void ubsec_dump_pb(volatile struct ubsec_pktbuf *); static void ubsec_dump_mcr(struct ubsec_mcr *); static void ubsec_dump_ctx2(struct ubsec_ctx_keyop *); static int ubsec_debug = 0; SYSCTL_INT(_debug, OID_AUTO, ubsec, CTLFLAG_RW, &ubsec_debug, 0, "UBSEC driver debugging printfs"); #endif #define READ_REG(sc,r) \ bus_space_read_4((sc)->sc_st, (sc)->sc_sh, (r)) #define WRITE_REG(sc,reg,val) \ bus_space_write_4((sc)->sc_st, (sc)->sc_sh, reg, val) #define SWAP32(x) (x) = htole32(ntohl((x))) #define HTOLE32(x) (x) = htole32(x) struct ubsec_stats ubsecstats; SYSCTL_STRUCT(_kern, OID_AUTO, ubsec_stats, CTLFLAG_RD, &ubsecstats, ubsec_stats, "Broadcom driver statistics"); static int ubsec_maxbatch = 2; /* XXX tune based on part+sys speed */ SYSCTL_INT(_kern, OID_AUTO, ubsec_maxbatch, CTLFLAG_RW, &ubsec_maxbatch, 0, "Broadcom driver: max ops to batch w/o interrupt"); static int ubsec_probe(device_t dev) { if (pci_get_vendor(dev) == PCI_VENDOR_BLUESTEEL && (pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5501 || pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5601)) return (0); if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM && (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5805 || pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5820 || pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5821 || pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5822)) return (0); return (ENXIO); } static const char* ubsec_partname(struct ubsec_softc *sc) { /* XXX sprintf numbers when not decoded */ switch (pci_get_vendor(sc->sc_dev)) { case PCI_VENDOR_BROADCOM: switch (pci_get_device(sc->sc_dev)) { case PCI_PRODUCT_BROADCOM_5805: return "Broadcom 5805"; case PCI_PRODUCT_BROADCOM_5820: return "Broadcom 5820"; case PCI_PRODUCT_BROADCOM_5821: return "Broadcom 5821"; case PCI_PRODUCT_BROADCOM_5822: return "Broadcom 5822"; } return "Broadcom unknown-part"; case PCI_VENDOR_BLUESTEEL: switch (pci_get_device(sc->sc_dev)) { case PCI_PRODUCT_BLUESTEEL_5601: return "Bluesteel 5601"; } return "Bluesteel unknown-part"; } return "Unknown-vendor unknown-part"; } static int ubsec_attach(device_t dev) { struct ubsec_softc *sc = device_get_softc(dev); struct ubsec_dma *dmap; u_int32_t cmd, i; int rid; KASSERT(sc != NULL, ("ubsec_attach: null software carrier!")); bzero(sc, sizeof (*sc)); sc->sc_dev = dev; mtx_init(&sc->sc_mtx, device_get_nameunit(dev), "crypto driver", MTX_DEF); SIMPLEQ_INIT(&sc->sc_queue); SIMPLEQ_INIT(&sc->sc_qchip); SIMPLEQ_INIT(&sc->sc_queue2); SIMPLEQ_INIT(&sc->sc_qchip2); SIMPLEQ_INIT(&sc->sc_q2free); /* XXX handle power management */ sc->sc_statmask = BS_STAT_MCR1_DONE | BS_STAT_DMAERR; if (pci_get_vendor(dev) == PCI_VENDOR_BLUESTEEL && pci_get_device(dev) == PCI_PRODUCT_BLUESTEEL_5601) sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG; if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM && pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5805) sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG; if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM && pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5820) sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX | UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY; if (pci_get_vendor(dev) == PCI_VENDOR_BROADCOM && (pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5821 || pci_get_device(dev) == PCI_PRODUCT_BROADCOM_5822)) { /* NB: the 5821/5822 defines some additional status bits */ sc->sc_statmask |= BS_STAT_MCR1_ALLEMPTY | BS_STAT_MCR2_ALLEMPTY; sc->sc_flags |= UBS_FLAGS_KEY | UBS_FLAGS_RNG | UBS_FLAGS_LONGCTX | UBS_FLAGS_HWNORM | UBS_FLAGS_BIGKEY; } /* XXX no PK key support until we sort out the bus_dma stuff */ sc->sc_flags &= ~UBS_FLAGS_KEY; cmd = pci_read_config(dev, PCIR_COMMAND, 4); cmd |= PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, cmd, 4); cmd = pci_read_config(dev, PCIR_COMMAND, 4); if (!(cmd & PCIM_CMD_MEMEN)) { device_printf(dev, "failed to enable memory mapping\n"); goto bad; } if (!(cmd & PCIM_CMD_BUSMASTEREN)) { device_printf(dev, "failed to enable bus mastering\n"); goto bad; } /* * Setup memory-mapping of PCI registers. */ rid = BS_BAR; sc->sc_sr = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, ~0, 1, RF_ACTIVE); if (sc->sc_sr == NULL) { device_printf(dev, "cannot map register space\n"); goto bad; } sc->sc_st = rman_get_bustag(sc->sc_sr); sc->sc_sh = rman_get_bushandle(sc->sc_sr); /* * Arrange interrupt line. */ rid = 0; sc->sc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 1, RF_SHAREABLE|RF_ACTIVE); if (sc->sc_irq == NULL) { device_printf(dev, "could not map interrupt\n"); goto bad; } /* * NB: Network code assumes we are blocked with splimp() * so make sure the IRQ is mapped appropriately. */ if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_NET, ubsec_intr, sc, &sc->sc_ih)) { device_printf(dev, "could not establish interrupt\n"); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr); goto bad; } sc->sc_cid = crypto_get_driverid(0); if (sc->sc_cid < 0) { device_printf(dev, "could not get crypto driver id\n"); bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr); goto bad; } /* * Setup DMA descriptor area. */ if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ 0x3ffff, /* maxsize XXX */ UBS_MAX_SCATTER, /* nsegments */ 0xffff, /* maxsegsize XXX */ BUS_DMA_ALLOCNOW, /* flags */ &sc->sc_dmat)) { device_printf(dev, "cannot allocate DMA tag\n"); crypto_unregister_all(sc->sc_cid); bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr); goto bad; } SIMPLEQ_INIT(&sc->sc_freequeue); dmap = sc->sc_dmaa; for (i = 0; i < UBS_MAX_NQUEUE; i++, dmap++) { struct ubsec_q *q; q = (struct ubsec_q *)malloc(sizeof(struct ubsec_q), M_DEVBUF, M_NOWAIT); if (q == NULL) { device_printf(dev, "cannot allocate queue buffers\n"); break; } if (ubsec_dma_malloc(sc, sizeof(struct ubsec_dmachunk), &dmap->d_alloc, 0)) { device_printf(dev, "cannot allocate dma buffers\n"); free(q, M_DEVBUF); break; } dmap->d_dma = (struct ubsec_dmachunk *)dmap->d_alloc.dma_vaddr; q->q_dma = dmap; sc->sc_queuea[i] = q; SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next); } device_printf(sc->sc_dev, "%s\n", ubsec_partname(sc)); crypto_register(sc->sc_cid, CRYPTO_3DES_CBC, 0, 0, ubsec_newsession, ubsec_freesession, ubsec_process, sc); crypto_register(sc->sc_cid, CRYPTO_DES_CBC, 0, 0, ubsec_newsession, ubsec_freesession, ubsec_process, sc); crypto_register(sc->sc_cid, CRYPTO_MD5_HMAC, 0, 0, ubsec_newsession, ubsec_freesession, ubsec_process, sc); crypto_register(sc->sc_cid, CRYPTO_SHA1_HMAC, 0, 0, ubsec_newsession, ubsec_freesession, ubsec_process, sc); /* * Reset Broadcom chip */ ubsec_reset_board(sc); /* * Init Broadcom specific PCI settings */ ubsec_init_pciregs(dev); /* * Init Broadcom chip */ ubsec_init_board(sc); #ifndef UBSEC_NO_RNG if (sc->sc_flags & UBS_FLAGS_RNG) { sc->sc_statmask |= BS_STAT_MCR2_DONE; if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr), &sc->sc_rng.rng_q.q_mcr, 0)) goto skip_rng; if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rngbypass), &sc->sc_rng.rng_q.q_ctx, 0)) { ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr); goto skip_rng; } if (ubsec_dma_malloc(sc, sizeof(u_int32_t) * UBSEC_RNG_BUFSIZ, &sc->sc_rng.rng_buf, 0)) { ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_ctx); ubsec_dma_free(sc, &sc->sc_rng.rng_q.q_mcr); goto skip_rng; } if (hz >= 100) sc->sc_rnghz = hz / 100; else sc->sc_rnghz = 1; /* NB: 1 means the callout runs w/o Giant locked */ callout_init(&sc->sc_rngto, 1); callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc); skip_rng: ; } #endif /* UBSEC_NO_RNG */ if (sc->sc_flags & UBS_FLAGS_KEY) { sc->sc_statmask |= BS_STAT_MCR2_DONE; crypto_kregister(sc->sc_cid, CRK_MOD_EXP, 0, ubsec_kprocess, sc); #if 0 crypto_kregister(sc->sc_cid, CRK_MOD_EXP_CRT, 0, ubsec_kprocess, sc); #endif } return (0); bad: mtx_destroy(&sc->sc_mtx); return (ENXIO); } /* * Detach a device that successfully probed. */ static int ubsec_detach(device_t dev) { struct ubsec_softc *sc = device_get_softc(dev); KASSERT(sc != NULL, ("ubsec_detach: null software carrier")); UBSEC_LOCK(sc); callout_stop(&sc->sc_rngto); crypto_unregister_all(sc->sc_cid); bus_generic_detach(dev); bus_teardown_intr(dev, sc->sc_irq, sc->sc_ih); bus_release_resource(dev, SYS_RES_IRQ, 0, sc->sc_irq); bus_dma_tag_destroy(sc->sc_dmat); bus_release_resource(dev, SYS_RES_MEMORY, BS_BAR, sc->sc_sr); UBSEC_UNLOCK(sc); mtx_destroy(&sc->sc_mtx); return (0); } /* * Stop all chip i/o so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ static void ubsec_shutdown(device_t dev) { #ifdef notyet ubsec_stop(device_get_softc(dev)); #endif } /* * Device suspend routine. */ static int ubsec_suspend(device_t dev) { struct ubsec_softc *sc = device_get_softc(dev); KASSERT(sc != NULL, ("ubsec_suspend: null software carrier")); #ifdef notyet /* XXX stop the device and save PCI settings */ #endif sc->sc_suspended = 1; return (0); } static int ubsec_resume(device_t dev) { struct ubsec_softc *sc = device_get_softc(dev); KASSERT(sc != NULL, ("ubsec_resume: null software carrier")); #ifdef notyet /* XXX retore PCI settings and start the device */ #endif sc->sc_suspended = 0; return (0); } /* * UBSEC Interrupt routine */ static void ubsec_intr(void *arg) { struct ubsec_softc *sc = arg; volatile u_int32_t stat; struct ubsec_q *q; struct ubsec_dma *dmap; int npkts = 0, i; UBSEC_LOCK(sc); stat = READ_REG(sc, BS_STAT); stat &= sc->sc_statmask; if (stat == 0) { UBSEC_UNLOCK(sc); return; } WRITE_REG(sc, BS_STAT, stat); /* IACK */ /* * Check to see if we have any packets waiting for us */ if ((stat & BS_STAT_MCR1_DONE)) { while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) { q = SIMPLEQ_FIRST(&sc->sc_qchip); dmap = q->q_dma; if ((dmap->d_dma->d_mcr.mcr_flags & htole16(UBS_MCR_DONE)) == 0) break; SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, q, q_next); npkts = q->q_nstacked_mcrs; /* * search for further sc_qchip ubsec_q's that share * the same MCR, and complete them too, they must be * at the top. */ for (i = 0; i < npkts; i++) { if(q->q_stacked_mcr[i]) { ubsec_callback(sc, q->q_stacked_mcr[i]); ubsecstats.hst_opackets++; } else { break; } } ubsec_callback(sc, q); ubsecstats.hst_opackets++; } /* * Don't send any more packet to chip if there has been * a DMAERR. */ if (!(stat & BS_STAT_DMAERR)) ubsec_feed(sc); } /* * Check to see if we have any key setups/rng's waiting for us */ if ((sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG)) && (stat & BS_STAT_MCR2_DONE)) { struct ubsec_q2 *q2; struct ubsec_mcr *mcr; while (!SIMPLEQ_EMPTY(&sc->sc_qchip2)) { q2 = SIMPLEQ_FIRST(&sc->sc_qchip2); bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); mcr = (struct ubsec_mcr *)q2->q_mcr.dma_vaddr; if ((mcr->mcr_flags & htole16(UBS_MCR_DONE)) == 0) { bus_dmamap_sync(sc->sc_dmat, q2->q_mcr.dma_map, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); break; } SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip2, q2, q_next); ubsec_callback2(sc, q2); /* * Don't send any more packet to chip if there has been * a DMAERR. */ if (!(stat & BS_STAT_DMAERR)) ubsec_feed2(sc); } } /* * Check to see if we got any DMA Error */ if (stat & BS_STAT_DMAERR) { #ifdef UBSEC_DEBUG if (ubsec_debug) { volatile u_int32_t a = READ_REG(sc, BS_ERR); printf("dmaerr %s@%08x\n", (a & BS_ERR_READ) ? "read" : "write", a & BS_ERR_ADDR); } #endif /* UBSEC_DEBUG */ ubsecstats.hst_dmaerr++; ubsec_totalreset(sc); ubsec_feed(sc); } if (sc->sc_needwakeup) { /* XXX check high watermark */ int wakeup = sc->sc_needwakeup & (CRYPTO_SYMQ|CRYPTO_ASYMQ); #ifdef UBSEC_DEBUG if (ubsec_debug) device_printf(sc->sc_dev, "wakeup crypto (%x)\n", sc->sc_needwakeup); #endif /* UBSEC_DEBUG */ sc->sc_needwakeup &= ~wakeup; crypto_unblock(sc->sc_cid, wakeup); } UBSEC_UNLOCK(sc); } /* * ubsec_feed() - aggregate and post requests to chip */ static int ubsec_feed(struct ubsec_softc *sc) { struct ubsec_q *q, *q2; int npkts, i; void *v; u_int32_t stat; npkts = sc->sc_nqueue; if (npkts > UBS_MAX_AGGR) npkts = UBS_MAX_AGGR; if (npkts > ubsecstats.hst_maxbatch) ubsecstats.hst_maxbatch = npkts; if (npkts < 2) goto feed1; ubsecstats.hst_totbatch += npkts-1; if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) { if(stat & BS_STAT_DMAERR) { ubsec_totalreset(sc); ubsecstats.hst_dmaerr++; } return (0); } #ifdef UBSEC_DEBUG if (ubsec_debug) printf("merging %d records\n", npkts); #endif /* UBSEC_DEBUG */ q = SIMPLEQ_FIRST(&sc->sc_queue); SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q, q_next); --sc->sc_nqueue; bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_PREWRITE); if (q->q_dst_map != NULL) bus_dmamap_sync(sc->sc_dmat, q->q_dst_map, BUS_DMASYNC_PREREAD); q->q_nstacked_mcrs = npkts - 1; /* Number of packets stacked */ for (i = 0; i < q->q_nstacked_mcrs; i++) { q2 = SIMPLEQ_FIRST(&sc->sc_queue); bus_dmamap_sync(sc->sc_dmat, q2->q_src_map, BUS_DMASYNC_PREWRITE); if (q2->q_dst_map != NULL) bus_dmamap_sync(sc->sc_dmat, q2->q_dst_map, BUS_DMASYNC_PREREAD); SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q2, q_next); --sc->sc_nqueue; v = (void*)(((char *)&q2->q_dma->d_dma->d_mcr) + sizeof(struct ubsec_mcr) - sizeof(struct ubsec_mcr_add)); bcopy(v, &q->q_dma->d_dma->d_mcradd[i], sizeof(struct ubsec_mcr_add)); q->q_stacked_mcr[i] = q2; } q->q_dma->d_dma->d_mcr.mcr_pkts = htole16(npkts); SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next); bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_mcr)); return (0); feed1: while (!SIMPLEQ_EMPTY(&sc->sc_queue)) { if ((stat = READ_REG(sc, BS_STAT)) & (BS_STAT_MCR1_FULL | BS_STAT_DMAERR)) { if(stat & BS_STAT_DMAERR) { ubsec_totalreset(sc); ubsecstats.hst_dmaerr++; } break; } q = SIMPLEQ_FIRST(&sc->sc_queue); bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_PREWRITE); if (q->q_dst_map != NULL) bus_dmamap_sync(sc->sc_dmat, q->q_dst_map, BUS_DMASYNC_PREREAD); bus_dmamap_sync(sc->sc_dmat, q->q_dma->d_alloc.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); WRITE_REG(sc, BS_MCR1, q->q_dma->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_mcr)); #ifdef UBSEC_DEBUG if (ubsec_debug) printf("feed: q->chip %p %08x stat %08x\n", q, (u_int32_t)vtophys(&q->q_dma->d_dma->d_mcr), stat); #endif /* UBSEC_DEBUG */ SIMPLEQ_REMOVE_HEAD(&sc->sc_queue, q, q_next); --sc->sc_nqueue; SIMPLEQ_INSERT_TAIL(&sc->sc_qchip, q, q_next); } return (0); } /* * Allocate a new 'session' and return an encoded session id. 'sidp' * contains our registration id, and should contain an encoded session * id on successful allocation. */ static int ubsec_newsession(void *arg, u_int32_t *sidp, struct cryptoini *cri) { struct cryptoini *c, *encini = NULL, *macini = NULL; struct ubsec_softc *sc = arg; struct ubsec_session *ses = NULL; MD5_CTX md5ctx; SHA1_CTX sha1ctx; int i, sesn; KASSERT(sc != NULL, ("ubsec_newsession: null softc")); if (sidp == NULL || cri == NULL || sc == NULL) return (EINVAL); for (c = cri; c != NULL; c = c->cri_next) { if (c->cri_alg == CRYPTO_MD5_HMAC || c->cri_alg == CRYPTO_SHA1_HMAC) { if (macini) return (EINVAL); macini = c; } else if (c->cri_alg == CRYPTO_DES_CBC || c->cri_alg == CRYPTO_3DES_CBC) { if (encini) return (EINVAL); encini = c; } else return (EINVAL); } if (encini == NULL && macini == NULL) return (EINVAL); if (sc->sc_sessions == NULL) { ses = sc->sc_sessions = (struct ubsec_session *)malloc( sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT); if (ses == NULL) return (ENOMEM); sesn = 0; sc->sc_nsessions = 1; } else { for (sesn = 0; sesn < sc->sc_nsessions; sesn++) { if (sc->sc_sessions[sesn].ses_used == 0) { ses = &sc->sc_sessions[sesn]; break; } } if (ses == NULL) { sesn = sc->sc_nsessions; ses = (struct ubsec_session *)malloc((sesn + 1) * sizeof(struct ubsec_session), M_DEVBUF, M_NOWAIT); if (ses == NULL) return (ENOMEM); bcopy(sc->sc_sessions, ses, sesn * sizeof(struct ubsec_session)); bzero(sc->sc_sessions, sesn * sizeof(struct ubsec_session)); free(sc->sc_sessions, M_DEVBUF); sc->sc_sessions = ses; ses = &sc->sc_sessions[sesn]; sc->sc_nsessions++; } } bzero(ses, sizeof(struct ubsec_session)); ses->ses_used = 1; if (encini) { /* get an IV, network byte order */ /* XXX may read fewer than requested */ read_random(ses->ses_iv, sizeof(ses->ses_iv)); /* Go ahead and compute key in ubsec's byte order */ if (encini->cri_alg == CRYPTO_DES_CBC) { bcopy(encini->cri_key, &ses->ses_deskey[0], 8); bcopy(encini->cri_key, &ses->ses_deskey[2], 8); bcopy(encini->cri_key, &ses->ses_deskey[4], 8); } else bcopy(encini->cri_key, ses->ses_deskey, 24); SWAP32(ses->ses_deskey[0]); SWAP32(ses->ses_deskey[1]); SWAP32(ses->ses_deskey[2]); SWAP32(ses->ses_deskey[3]); SWAP32(ses->ses_deskey[4]); SWAP32(ses->ses_deskey[5]); } if (macini) { for (i = 0; i < macini->cri_klen / 8; i++) macini->cri_key[i] ^= HMAC_IPAD_VAL; if (macini->cri_alg == CRYPTO_MD5_HMAC) { MD5Init(&md5ctx); MD5Update(&md5ctx, macini->cri_key, macini->cri_klen / 8); MD5Update(&md5ctx, hmac_ipad_buffer, HMAC_BLOCK_LEN - (macini->cri_klen / 8)); bcopy(md5ctx.state, ses->ses_hminner, sizeof(md5ctx.state)); } else { SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, macini->cri_key, macini->cri_klen / 8); SHA1Update(&sha1ctx, hmac_ipad_buffer, HMAC_BLOCK_LEN - (macini->cri_klen / 8)); bcopy(sha1ctx.h.b32, ses->ses_hminner, sizeof(sha1ctx.h.b32)); } for (i = 0; i < macini->cri_klen / 8; i++) macini->cri_key[i] ^= (HMAC_IPAD_VAL ^ HMAC_OPAD_VAL); if (macini->cri_alg == CRYPTO_MD5_HMAC) { MD5Init(&md5ctx); MD5Update(&md5ctx, macini->cri_key, macini->cri_klen / 8); MD5Update(&md5ctx, hmac_opad_buffer, HMAC_BLOCK_LEN - (macini->cri_klen / 8)); bcopy(md5ctx.state, ses->ses_hmouter, sizeof(md5ctx.state)); } else { SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, macini->cri_key, macini->cri_klen / 8); SHA1Update(&sha1ctx, hmac_opad_buffer, HMAC_BLOCK_LEN - (macini->cri_klen / 8)); bcopy(sha1ctx.h.b32, ses->ses_hmouter, sizeof(sha1ctx.h.b32)); } for (i = 0; i < macini->cri_klen / 8; i++) macini->cri_key[i] ^= HMAC_OPAD_VAL; } *sidp = UBSEC_SID(device_get_unit(sc->sc_dev), sesn); return (0); } /* * Deallocate a session. */ static int ubsec_freesession(void *arg, u_int64_t tid) { struct ubsec_softc *sc = arg; int session; u_int32_t sid = ((u_int32_t) tid) & 0xffffffff; KASSERT(sc != NULL, ("ubsec_freesession: null softc")); if (sc == NULL) return (EINVAL); session = UBSEC_SESSION(sid); if (session >= sc->sc_nsessions) return (EINVAL); bzero(&sc->sc_sessions[session], sizeof(sc->sc_sessions[session])); return (0); } static void ubsec_op_cb(void *arg, bus_dma_segment_t *seg, int nsegs, bus_size_t mapsize, int error) { struct ubsec_operand *op = arg; KASSERT(nsegs <= UBS_MAX_SCATTER, ("Too many DMA segments returned when mapping operand")); #ifdef UBSEC_DEBUG if (ubsec_debug) printf("ubsec_op_cb: mapsize %u nsegs %d\n", (u_int) mapsize, nsegs); #endif op->mapsize = mapsize; op->nsegs = nsegs; bcopy(seg, op->segs, nsegs * sizeof (seg[0])); } static int ubsec_process(void *arg, struct cryptop *crp, int hint) { struct ubsec_q *q = NULL; int err = 0, i, j, nicealign; struct ubsec_softc *sc = arg; struct cryptodesc *crd1, *crd2, *maccrd, *enccrd; int encoffset = 0, macoffset = 0, cpskip, cpoffset; int sskip, dskip, stheend, dtheend; int16_t coffset; struct ubsec_session *ses; struct ubsec_pktctx ctx; struct ubsec_dma *dmap = NULL; if (crp == NULL || crp->crp_callback == NULL || sc == NULL) { ubsecstats.hst_invalid++; return (EINVAL); } if (UBSEC_SESSION(crp->crp_sid) >= sc->sc_nsessions) { ubsecstats.hst_invalid++; return (EINVAL); } UBSEC_LOCK(sc); if (SIMPLEQ_EMPTY(&sc->sc_freequeue)) { ubsecstats.hst_queuefull++; sc->sc_needwakeup |= CRYPTO_SYMQ; UBSEC_UNLOCK(sc); return (ERESTART); } q = SIMPLEQ_FIRST(&sc->sc_freequeue); SIMPLEQ_REMOVE_HEAD(&sc->sc_freequeue, q, q_next); UBSEC_UNLOCK(sc); dmap = q->q_dma; /* Save dma pointer */ bzero(q, sizeof(struct ubsec_q)); bzero(&ctx, sizeof(ctx)); q->q_sesn = UBSEC_SESSION(crp->crp_sid); q->q_dma = dmap; ses = &sc->sc_sessions[q->q_sesn]; if (crp->crp_flags & CRYPTO_F_IMBUF) { q->q_src_m = (struct mbuf *)crp->crp_buf; q->q_dst_m = (struct mbuf *)crp->crp_buf; } else if (crp->crp_flags & CRYPTO_F_IOV) { q->q_src_io = (struct uio *)crp->crp_buf; q->q_dst_io = (struct uio *)crp->crp_buf; } else { ubsecstats.hst_invalid++; err = EINVAL; goto errout; /* XXX we don't handle contiguous blocks! */ } bzero(&dmap->d_dma->d_mcr, sizeof(struct ubsec_mcr)); dmap->d_dma->d_mcr.mcr_pkts = htole16(1); dmap->d_dma->d_mcr.mcr_flags = 0; q->q_crp = crp; crd1 = crp->crp_desc; if (crd1 == NULL) { ubsecstats.hst_invalid++; err = EINVAL; goto errout; } crd2 = crd1->crd_next; if (crd2 == NULL) { if (crd1->crd_alg == CRYPTO_MD5_HMAC || crd1->crd_alg == CRYPTO_SHA1_HMAC) { maccrd = crd1; enccrd = NULL; } else if (crd1->crd_alg == CRYPTO_DES_CBC || crd1->crd_alg == CRYPTO_3DES_CBC) { maccrd = NULL; enccrd = crd1; } else { ubsecstats.hst_invalid++; err = EINVAL; goto errout; } } else { if ((crd1->crd_alg == CRYPTO_MD5_HMAC || crd1->crd_alg == CRYPTO_SHA1_HMAC) && (crd2->crd_alg == CRYPTO_DES_CBC || crd2->crd_alg == CRYPTO_3DES_CBC) && ((crd2->crd_flags & CRD_F_ENCRYPT) == 0)) { maccrd = crd1; enccrd = crd2; } else if ((crd1->crd_alg == CRYPTO_DES_CBC || crd1->crd_alg == CRYPTO_3DES_CBC) && (crd2->crd_alg == CRYPTO_MD5_HMAC || crd2->crd_alg == CRYPTO_SHA1_HMAC) && (crd1->crd_flags & CRD_F_ENCRYPT)) { enccrd = crd1; maccrd = crd2; } else { /* * We cannot order the ubsec as requested */ ubsecstats.hst_invalid++; err = EINVAL; goto errout; } } if (enccrd) { encoffset = enccrd->crd_skip; ctx.pc_flags |= htole16(UBS_PKTCTX_ENC_3DES); if (enccrd->crd_flags & CRD_F_ENCRYPT) { q->q_flags |= UBSEC_QFLAGS_COPYOUTIV; if (enccrd->crd_flags & CRD_F_IV_EXPLICIT) bcopy(enccrd->crd_iv, ctx.pc_iv, 8); else { ctx.pc_iv[0] = ses->ses_iv[0]; ctx.pc_iv[1] = ses->ses_iv[1]; } if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) { if (crp->crp_flags & CRYPTO_F_IMBUF) m_copyback(q->q_src_m, enccrd->crd_inject, 8, (caddr_t)ctx.pc_iv); else if (crp->crp_flags & CRYPTO_F_IOV) cuio_copyback(q->q_src_io, enccrd->crd_inject, 8, (caddr_t)ctx.pc_iv); } } else { ctx.pc_flags |= htole16(UBS_PKTCTX_INBOUND); if (enccrd->crd_flags & CRD_F_IV_EXPLICIT) bcopy(enccrd->crd_iv, ctx.pc_iv, 8); else if (crp->crp_flags & CRYPTO_F_IMBUF) m_copydata(q->q_src_m, enccrd->crd_inject, 8, (caddr_t)ctx.pc_iv); else if (crp->crp_flags & CRYPTO_F_IOV) cuio_copydata(q->q_src_io, enccrd->crd_inject, 8, (caddr_t)ctx.pc_iv); } ctx.pc_deskey[0] = ses->ses_deskey[0]; ctx.pc_deskey[1] = ses->ses_deskey[1]; ctx.pc_deskey[2] = ses->ses_deskey[2]; ctx.pc_deskey[3] = ses->ses_deskey[3]; ctx.pc_deskey[4] = ses->ses_deskey[4]; ctx.pc_deskey[5] = ses->ses_deskey[5]; SWAP32(ctx.pc_iv[0]); SWAP32(ctx.pc_iv[1]); } if (maccrd) { macoffset = maccrd->crd_skip; if (maccrd->crd_alg == CRYPTO_MD5_HMAC) ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_MD5); else ctx.pc_flags |= htole16(UBS_PKTCTX_AUTH_SHA1); for (i = 0; i < 5; i++) { ctx.pc_hminner[i] = ses->ses_hminner[i]; ctx.pc_hmouter[i] = ses->ses_hmouter[i]; HTOLE32(ctx.pc_hminner[i]); HTOLE32(ctx.pc_hmouter[i]); } } if (enccrd && maccrd) { /* * ubsec cannot handle packets where the end of encryption * and authentication are not the same, or where the * encrypted part begins before the authenticated part. */ if ((encoffset + enccrd->crd_len) != (macoffset + maccrd->crd_len)) { ubsecstats.hst_lenmismatch++; err = EINVAL; goto errout; } if (enccrd->crd_skip < maccrd->crd_skip) { ubsecstats.hst_skipmismatch++; err = EINVAL; goto errout; } sskip = maccrd->crd_skip; cpskip = dskip = enccrd->crd_skip; stheend = maccrd->crd_len; dtheend = enccrd->crd_len; coffset = enccrd->crd_skip - maccrd->crd_skip; cpoffset = cpskip + dtheend; #ifdef UBSEC_DEBUG if (ubsec_debug) { printf("mac: skip %d, len %d, inject %d\n", maccrd->crd_skip, maccrd->crd_len, maccrd->crd_inject); printf("enc: skip %d, len %d, inject %d\n", enccrd->crd_skip, enccrd->crd_len, enccrd->crd_inject); printf("src: skip %d, len %d\n", sskip, stheend); printf("dst: skip %d, len %d\n", dskip, dtheend); printf("ubs: coffset %d, pktlen %d, cpskip %d, cpoffset %d\n", coffset, stheend, cpskip, cpoffset); } #endif } else { cpskip = dskip = sskip = macoffset + encoffset; dtheend = stheend = (enccrd)?enccrd->crd_len:maccrd->crd_len; cpoffset = cpskip + dtheend; coffset = 0; } ctx.pc_offset = htole16(coffset >> 2); if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &q->q_src_map)) { ubsecstats.hst_nomap++; err = ENOMEM; goto errout; } if (crp->crp_flags & CRYPTO_F_IMBUF) { if (bus_dmamap_load_mbuf(sc->sc_dmat, q->q_src_map, q->q_src_m, ubsec_op_cb, &q->q_src, BUS_DMA_NOWAIT) != 0) { bus_dmamap_destroy(sc->sc_dmat, q->q_src_map); q->q_src_map = NULL; ubsecstats.hst_noload++; err = ENOMEM; goto errout; } } else if (crp->crp_flags & CRYPTO_F_IOV) { if (bus_dmamap_load_uio(sc->sc_dmat, q->q_src_map, q->q_src_io, ubsec_op_cb, &q->q_src, BUS_DMA_NOWAIT) != 0) { bus_dmamap_destroy(sc->sc_dmat, q->q_src_map); q->q_src_map = NULL; ubsecstats.hst_noload++; err = ENOMEM; goto errout; } } nicealign = ubsec_dmamap_aligned(&q->q_src); dmap->d_dma->d_mcr.mcr_pktlen = htole16(stheend); #ifdef UBSEC_DEBUG if (ubsec_debug) printf("src skip: %d nicealign: %u\n", sskip, nicealign); #endif for (i = j = 0; i < q->q_src_nsegs; i++) { struct ubsec_pktbuf *pb; bus_size_t packl = q->q_src_segs[i].ds_len; bus_addr_t packp = q->q_src_segs[i].ds_addr; if (sskip >= packl) { sskip -= packl; continue; } packl -= sskip; packp += sskip; sskip = 0; if (packl > 0xfffc) { err = EIO; goto errout; } if (j == 0) pb = &dmap->d_dma->d_mcr.mcr_ipktbuf; else pb = &dmap->d_dma->d_sbuf[j - 1]; pb->pb_addr = htole32(packp); if (stheend) { if (packl > stheend) { pb->pb_len = htole32(stheend); stheend = 0; } else { pb->pb_len = htole32(packl); stheend -= packl; } } else pb->pb_len = htole32(packl); if ((i + 1) == q->q_src_nsegs) pb->pb_next = 0; else pb->pb_next = htole32(dmap->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_sbuf[j])); j++; } if (enccrd == NULL && maccrd != NULL) { dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr = 0; dmap->d_dma->d_mcr.mcr_opktbuf.pb_len = 0; dmap->d_dma->d_mcr.mcr_opktbuf.pb_next = htole32(dmap->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_macbuf[0])); #ifdef UBSEC_DEBUG if (ubsec_debug) printf("opkt: %x %x %x\n", dmap->d_dma->d_mcr.mcr_opktbuf.pb_addr, dmap->d_dma->d_mcr.mcr_opktbuf.pb_len, dmap->d_dma->d_mcr.mcr_opktbuf.pb_next); #endif } else { if (crp->crp_flags & CRYPTO_F_IOV) { if (!nicealign) { ubsecstats.hst_iovmisaligned++; err = EINVAL; goto errout; } if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &q->q_dst_map)) { ubsecstats.hst_nomap++; err = ENOMEM; goto errout; } if (bus_dmamap_load_uio(sc->sc_dmat, q->q_dst_map, q->q_dst_io, ubsec_op_cb, &q->q_dst, BUS_DMA_NOWAIT) != 0) { bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map); q->q_dst_map = NULL; ubsecstats.hst_noload++; err = ENOMEM; goto errout; } } else if (crp->crp_flags & CRYPTO_F_IMBUF) { if (nicealign) { q->q_dst = q->q_src; } else { int totlen, len; struct mbuf *m, *top, **mp; ubsecstats.hst_unaligned++; totlen = q->q_src_mapsize; if (q->q_src_m->m_flags & M_PKTHDR) { len = MHLEN; MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m && !m_dup_pkthdr(m, q->q_src_m, M_DONTWAIT)) { + m_free(m); + m = NULL; + } } else { len = MLEN; MGET(m, M_DONTWAIT, MT_DATA); } if (m == NULL) { ubsecstats.hst_nombuf++; err = sc->sc_nqueue ? ERESTART : ENOMEM; goto errout; } - if (len == MHLEN) - M_COPY_PKTHDR(m, q->q_src_m); if (totlen >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); ubsecstats.hst_nomcl++; err = sc->sc_nqueue ? ERESTART : ENOMEM; goto errout; } len = MCLBYTES; } m->m_len = len; top = NULL; mp = ⊤ while (totlen > 0) { if (top) { MGET(m, M_DONTWAIT, MT_DATA); if (m == NULL) { m_freem(top); ubsecstats.hst_nombuf++; err = sc->sc_nqueue ? ERESTART : ENOMEM; goto errout; } len = MLEN; } if (top && totlen >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { *mp = m; m_freem(top); ubsecstats.hst_nomcl++; err = sc->sc_nqueue ? ERESTART : ENOMEM; goto errout; } len = MCLBYTES; } m->m_len = len = min(totlen, len); totlen -= len; *mp = m; mp = &m->m_next; } q->q_dst_m = top; ubsec_mcopy(q->q_src_m, q->q_dst_m, cpskip, cpoffset); if (bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &q->q_dst_map) != 0) { ubsecstats.hst_nomap++; err = ENOMEM; goto errout; } if (bus_dmamap_load_mbuf(sc->sc_dmat, q->q_dst_map, q->q_dst_m, ubsec_op_cb, &q->q_dst, BUS_DMA_NOWAIT) != 0) { bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map); q->q_dst_map = NULL; ubsecstats.hst_noload++; err = ENOMEM; goto errout; } } } else { ubsecstats.hst_invalid++; err = EINVAL; goto errout; } #ifdef UBSEC_DEBUG if (ubsec_debug) printf("dst skip: %d\n", dskip); #endif for (i = j = 0; i < q->q_dst_nsegs; i++) { struct ubsec_pktbuf *pb; bus_size_t packl = q->q_dst_segs[i].ds_len; bus_addr_t packp = q->q_dst_segs[i].ds_addr; if (dskip >= packl) { dskip -= packl; continue; } packl -= dskip; packp += dskip; dskip = 0; if (packl > 0xfffc) { err = EIO; goto errout; } if (j == 0) pb = &dmap->d_dma->d_mcr.mcr_opktbuf; else pb = &dmap->d_dma->d_dbuf[j - 1]; pb->pb_addr = htole32(packp); if (dtheend) { if (packl > dtheend) { pb->pb_len = htole32(dtheend); dtheend = 0; } else { pb->pb_len = htole32(packl); dtheend -= packl; } } else pb->pb_len = htole32(packl); if ((i + 1) == q->q_dst_nsegs) { if (maccrd) pb->pb_next = htole32(dmap->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_macbuf[0])); else pb->pb_next = 0; } else pb->pb_next = htole32(dmap->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_dbuf[j])); j++; } } dmap->d_dma->d_mcr.mcr_cmdctxp = htole32(dmap->d_alloc.dma_paddr + offsetof(struct ubsec_dmachunk, d_ctx)); if (sc->sc_flags & UBS_FLAGS_LONGCTX) { struct ubsec_pktctx_long *ctxl; ctxl = (struct ubsec_pktctx_long *)(dmap->d_alloc.dma_vaddr + offsetof(struct ubsec_dmachunk, d_ctx)); /* transform small context into long context */ ctxl->pc_len = htole16(sizeof(struct ubsec_pktctx_long)); ctxl->pc_type = htole16(UBS_PKTCTX_TYPE_IPSEC); ctxl->pc_flags = ctx.pc_flags; ctxl->pc_offset = ctx.pc_offset; for (i = 0; i < 6; i++) ctxl->pc_deskey[i] = ctx.pc_deskey[i]; for (i = 0; i < 5; i++) ctxl->pc_hminner[i] = ctx.pc_hminner[i]; for (i = 0; i < 5; i++) ctxl->pc_hmouter[i] = ctx.pc_hmouter[i]; ctxl->pc_iv[0] = ctx.pc_iv[0]; ctxl->pc_iv[1] = ctx.pc_iv[1]; } else bcopy(&ctx, dmap->d_alloc.dma_vaddr + offsetof(struct ubsec_dmachunk, d_ctx), sizeof(struct ubsec_pktctx)); UBSEC_LOCK(sc); SIMPLEQ_INSERT_TAIL(&sc->sc_queue, q, q_next); sc->sc_nqueue++; ubsecstats.hst_ipackets++; ubsecstats.hst_ibytes += dmap->d_alloc.dma_size; if ((hint & CRYPTO_HINT_MORE) == 0 || sc->sc_nqueue >= ubsec_maxbatch) ubsec_feed(sc); UBSEC_UNLOCK(sc); return (0); errout: if (q != NULL) { if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m)) m_freem(q->q_dst_m); if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) { bus_dmamap_unload(sc->sc_dmat, q->q_dst_map); bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map); } if (q->q_src_map != NULL) { bus_dmamap_unload(sc->sc_dmat, q->q_src_map); bus_dmamap_destroy(sc->sc_dmat, q->q_src_map); } UBSEC_LOCK(sc); SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next); UBSEC_UNLOCK(sc); } if (err != ERESTART) { crp->crp_etype = err; crypto_done(crp); } else { sc->sc_needwakeup |= CRYPTO_SYMQ; } return (err); } static void ubsec_callback(struct ubsec_softc *sc, struct ubsec_q *q) { struct cryptop *crp = (struct cryptop *)q->q_crp; struct cryptodesc *crd; struct ubsec_dma *dmap = q->q_dma; bus_dmamap_sync(sc->sc_dmat, dmap->d_alloc.dma_map, BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE); if (q->q_dst_map != NULL && q->q_dst_map != q->q_src_map) { bus_dmamap_sync(sc->sc_dmat, q->q_dst_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->sc_dmat, q->q_dst_map); bus_dmamap_destroy(sc->sc_dmat, q->q_dst_map); } bus_dmamap_sync(sc->sc_dmat, q->q_src_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, q->q_src_map); bus_dmamap_destroy(sc->sc_dmat, q->q_src_map); if ((crp->crp_flags & CRYPTO_F_IMBUF) && (q->q_src_m != q->q_dst_m)) { m_freem(q->q_src_m); crp->crp_buf = (caddr_t)q->q_dst_m; } ubsecstats.hst_obytes += ((struct mbuf *)crp->crp_buf)->m_len; /* copy out IV for future use */ if (q->q_flags & UBSEC_QFLAGS_COPYOUTIV) { for (crd = crp->crp_desc; crd; crd = crd->crd_next) { if (crd->crd_alg != CRYPTO_DES_CBC && crd->crd_alg != CRYPTO_3DES_CBC) continue; if (crp->crp_flags & CRYPTO_F_IMBUF) m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip + crd->crd_len - 8, 8, (caddr_t)sc->sc_sessions[q->q_sesn].ses_iv); else if (crp->crp_flags & CRYPTO_F_IOV) { cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip + crd->crd_len - 8, 8, (caddr_t)sc->sc_sessions[q->q_sesn].ses_iv); } break; } } for (crd = crp->crp_desc; crd; crd = crd->crd_next) { if (crd->crd_alg != CRYPTO_MD5_HMAC && crd->crd_alg != CRYPTO_SHA1_HMAC) continue; if (crp->crp_flags & CRYPTO_F_IMBUF) m_copyback((struct mbuf *)crp->crp_buf, crd->crd_inject, 12, (caddr_t)dmap->d_dma->d_macbuf); else if (crp->crp_flags & CRYPTO_F_IOV && crp->crp_mac) bcopy((caddr_t)dmap->d_dma->d_macbuf, crp->crp_mac, 12); break; } SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next); crypto_done(crp); } static void ubsec_mcopy(struct mbuf *srcm, struct mbuf *dstm, int hoffset, int toffset) { int i, j, dlen, slen; caddr_t dptr, sptr; j = 0; sptr = srcm->m_data; slen = srcm->m_len; dptr = dstm->m_data; dlen = dstm->m_len; while (1) { for (i = 0; i < min(slen, dlen); i++) { if (j < hoffset || j >= toffset) *dptr++ = *sptr++; slen--; dlen--; j++; } if (slen == 0) { srcm = srcm->m_next; if (srcm == NULL) return; sptr = srcm->m_data; slen = srcm->m_len; } if (dlen == 0) { dstm = dstm->m_next; if (dstm == NULL) return; dptr = dstm->m_data; dlen = dstm->m_len; } } } /* * feed the key generator, must be called at splimp() or higher. */ static int ubsec_feed2(struct ubsec_softc *sc) { struct ubsec_q2 *q; while (!SIMPLEQ_EMPTY(&sc->sc_queue2)) { if (READ_REG(sc, BS_STAT) & BS_STAT_MCR2_FULL) break; q = SIMPLEQ_FIRST(&sc->sc_queue2); bus_dmamap_sync(sc->sc_dmat, q->q_mcr.dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, BUS_DMASYNC_PREWRITE); WRITE_REG(sc, BS_MCR2, q->q_mcr.dma_paddr); SIMPLEQ_REMOVE_HEAD(&sc->sc_queue2, q, q_next); --sc->sc_nqueue2; SIMPLEQ_INSERT_TAIL(&sc->sc_qchip2, q, q_next); } return (0); } /* * Callback for handling random numbers */ static void ubsec_callback2(struct ubsec_softc *sc, struct ubsec_q2 *q) { struct cryptkop *krp; struct ubsec_ctx_keyop *ctx; ctx = (struct ubsec_ctx_keyop *)q->q_ctx.dma_vaddr; bus_dmamap_sync(sc->sc_dmat, q->q_ctx.dma_map, BUS_DMASYNC_POSTWRITE); switch (q->q_type) { #ifndef UBSEC_NO_RNG case UBS_CTXOP_RNGBYPASS: { struct ubsec_q2_rng *rng = (struct ubsec_q2_rng *)q; bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, BUS_DMASYNC_POSTREAD); random_harvest(rng->rng_buf.dma_vaddr, UBSEC_RNG_BUFSIZ*sizeof (u_int32_t), UBSEC_RNG_BUFSIZ*sizeof (u_int32_t)*NBBY, 0, RANDOM_PURE); rng->rng_used = 0; callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc); break; } #endif case UBS_CTXOP_MODEXP: { struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q; u_int rlen, clen; krp = me->me_krp; rlen = (me->me_modbits + 7) / 8; clen = (krp->krp_param[krp->krp_iparams].crp_nbits + 7) / 8; bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map, BUS_DMASYNC_POSTWRITE); if (clen < rlen) krp->krp_status = E2BIG; else { if (sc->sc_flags & UBS_FLAGS_HWNORM) { bzero(krp->krp_param[krp->krp_iparams].crp_p, (krp->krp_param[krp->krp_iparams].crp_nbits + 7) / 8); bcopy(me->me_C.dma_vaddr, krp->krp_param[krp->krp_iparams].crp_p, (me->me_modbits + 7) / 8); } else ubsec_kshift_l(me->me_shiftbits, me->me_C.dma_vaddr, me->me_normbits, krp->krp_param[krp->krp_iparams].crp_p, krp->krp_param[krp->krp_iparams].crp_nbits); } crypto_kdone(krp); /* bzero all potentially sensitive data */ bzero(me->me_E.dma_vaddr, me->me_E.dma_size); bzero(me->me_M.dma_vaddr, me->me_M.dma_size); bzero(me->me_C.dma_vaddr, me->me_C.dma_size); bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size); /* Can't free here, so put us on the free list. */ SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &me->me_q, q_next); break; } case UBS_CTXOP_RSAPRIV: { struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q; u_int len; krp = rp->rpr_krp; bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map, BUS_DMASYNC_POSTREAD); len = (krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_nbits + 7) / 8; bcopy(rp->rpr_msgout.dma_vaddr, krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT].crp_p, len); crypto_kdone(krp); bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size); bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size); bzero(rp->rpr_q.q_ctx.dma_vaddr, rp->rpr_q.q_ctx.dma_size); /* Can't free here, so put us on the free list. */ SIMPLEQ_INSERT_TAIL(&sc->sc_q2free, &rp->rpr_q, q_next); break; } default: device_printf(sc->sc_dev, "unknown ctx op: %x\n", letoh16(ctx->ctx_op)); break; } } #ifndef UBSEC_NO_RNG static void ubsec_rng(void *vsc) { struct ubsec_softc *sc = vsc; struct ubsec_q2_rng *rng = &sc->sc_rng; struct ubsec_mcr *mcr; struct ubsec_ctx_rngbypass *ctx; UBSEC_LOCK(sc); if (rng->rng_used) { UBSEC_UNLOCK(sc); return; } sc->sc_nqueue2++; if (sc->sc_nqueue2 >= UBS_MAX_NQUEUE) goto out; mcr = (struct ubsec_mcr *)rng->rng_q.q_mcr.dma_vaddr; ctx = (struct ubsec_ctx_rngbypass *)rng->rng_q.q_ctx.dma_vaddr; mcr->mcr_pkts = htole16(1); mcr->mcr_flags = 0; mcr->mcr_cmdctxp = htole32(rng->rng_q.q_ctx.dma_paddr); mcr->mcr_ipktbuf.pb_addr = mcr->mcr_ipktbuf.pb_next = 0; mcr->mcr_ipktbuf.pb_len = 0; mcr->mcr_reserved = mcr->mcr_pktlen = 0; mcr->mcr_opktbuf.pb_addr = htole32(rng->rng_buf.dma_paddr); mcr->mcr_opktbuf.pb_len = htole32(((sizeof(u_int32_t) * UBSEC_RNG_BUFSIZ)) & UBS_PKTBUF_LEN); mcr->mcr_opktbuf.pb_next = 0; ctx->rbp_len = htole16(sizeof(struct ubsec_ctx_rngbypass)); ctx->rbp_op = htole16(UBS_CTXOP_RNGBYPASS); rng->rng_q.q_type = UBS_CTXOP_RNGBYPASS; bus_dmamap_sync(sc->sc_dmat, rng->rng_buf.dma_map, BUS_DMASYNC_PREREAD); SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rng->rng_q, q_next); rng->rng_used = 1; ubsec_feed2(sc); ubsecstats.hst_rng++; UBSEC_UNLOCK(sc); return; out: /* * Something weird happened, generate our own call back. */ sc->sc_nqueue2--; UBSEC_UNLOCK(sc); callout_reset(&sc->sc_rngto, sc->sc_rnghz, ubsec_rng, sc); } #endif /* UBSEC_NO_RNG */ static void ubsec_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; *paddr = segs->ds_addr; } static int ubsec_dma_malloc( struct ubsec_softc *sc, bus_size_t size, struct ubsec_dma_alloc *dma, int mapflags ) { int r; r = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) goto fail_0; r = bus_dmamem_alloc(sc->sc_dmat, (void**) &dma->dma_vaddr, BUS_DMA_NOWAIT, &dma->dma_map); if (r != 0) goto fail_1; r = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_vaddr, size, ubsec_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) goto fail_2; dma->dma_size = size; return (0); fail_2: bus_dmamap_unload(sc->sc_dmat, dma->dma_map); fail_1: bus_dmamem_free(sc->sc_dmat, dma->dma_vaddr, dma->dma_map); fail_0: bus_dmamap_destroy(sc->sc_dmat, dma->dma_map); dma->dma_map = NULL; return (r); } static void ubsec_dma_free(struct ubsec_softc *sc, struct ubsec_dma_alloc *dma) { bus_dmamap_unload(sc->sc_dmat, dma->dma_map); bus_dmamem_free(sc->sc_dmat, dma->dma_vaddr, dma->dma_map); bus_dmamap_destroy(sc->sc_dmat, dma->dma_map); } /* * Resets the board. Values in the regesters are left as is * from the reset (i.e. initial values are assigned elsewhere). */ static void ubsec_reset_board(struct ubsec_softc *sc) { volatile u_int32_t ctrl; ctrl = READ_REG(sc, BS_CTRL); ctrl |= BS_CTRL_RESET; WRITE_REG(sc, BS_CTRL, ctrl); /* * Wait aprox. 30 PCI clocks = 900 ns = 0.9 us */ DELAY(10); } /* * Init Broadcom registers */ static void ubsec_init_board(struct ubsec_softc *sc) { u_int32_t ctrl; ctrl = READ_REG(sc, BS_CTRL); ctrl &= ~(BS_CTRL_BE32 | BS_CTRL_BE64); ctrl |= BS_CTRL_LITTLE_ENDIAN | BS_CTRL_MCR1INT; if (sc->sc_flags & (UBS_FLAGS_KEY|UBS_FLAGS_RNG)) ctrl |= BS_CTRL_MCR2INT; else ctrl &= ~BS_CTRL_MCR2INT; if (sc->sc_flags & UBS_FLAGS_HWNORM) ctrl &= ~BS_CTRL_SWNORM; WRITE_REG(sc, BS_CTRL, ctrl); } /* * Init Broadcom PCI registers */ static void ubsec_init_pciregs(device_t dev) { #if 0 u_int32_t misc; misc = pci_conf_read(pc, pa->pa_tag, BS_RTY_TOUT); misc = (misc & ~(UBS_PCI_RTY_MASK << UBS_PCI_RTY_SHIFT)) | ((UBS_DEF_RTY & 0xff) << UBS_PCI_RTY_SHIFT); misc = (misc & ~(UBS_PCI_TOUT_MASK << UBS_PCI_TOUT_SHIFT)) | ((UBS_DEF_TOUT & 0xff) << UBS_PCI_TOUT_SHIFT); pci_conf_write(pc, pa->pa_tag, BS_RTY_TOUT, misc); #endif /* * This will set the cache line size to 1, this will * force the BCM58xx chip just to do burst read/writes. * Cache line read/writes are to slow */ pci_write_config(dev, PCIR_CACHELNSZ, UBS_DEF_CACHELINE, 1); } /* * Clean up after a chip crash. * It is assumed that the caller in splimp() */ static void ubsec_cleanchip(struct ubsec_softc *sc) { struct ubsec_q *q; while (!SIMPLEQ_EMPTY(&sc->sc_qchip)) { q = SIMPLEQ_FIRST(&sc->sc_qchip); SIMPLEQ_REMOVE_HEAD(&sc->sc_qchip, q, q_next); ubsec_free_q(sc, q); } } /* * free a ubsec_q * It is assumed that the caller is within spimp() */ static int ubsec_free_q(struct ubsec_softc *sc, struct ubsec_q *q) { struct ubsec_q *q2; struct cryptop *crp; int npkts; int i; npkts = q->q_nstacked_mcrs; for (i = 0; i < npkts; i++) { if(q->q_stacked_mcr[i]) { q2 = q->q_stacked_mcr[i]; if ((q2->q_dst_m != NULL) && (q2->q_src_m != q2->q_dst_m)) m_freem(q2->q_dst_m); crp = (struct cryptop *)q2->q_crp; SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q2, q_next); crp->crp_etype = EFAULT; crypto_done(crp); } else { break; } } /* * Free header MCR */ if ((q->q_dst_m != NULL) && (q->q_src_m != q->q_dst_m)) m_freem(q->q_dst_m); crp = (struct cryptop *)q->q_crp; SIMPLEQ_INSERT_TAIL(&sc->sc_freequeue, q, q_next); crp->crp_etype = EFAULT; crypto_done(crp); return(0); } /* * Routine to reset the chip and clean up. * It is assumed that the caller is in splimp() */ static void ubsec_totalreset(struct ubsec_softc *sc) { ubsec_reset_board(sc); ubsec_init_board(sc); ubsec_cleanchip(sc); } static int ubsec_dmamap_aligned(struct ubsec_operand *op) { int i; for (i = 0; i < op->nsegs; i++) { if (op->segs[i].ds_addr & 3) return (0); if ((i != (op->nsegs - 1)) && (op->segs[i].ds_len & 3)) return (0); } return (1); } static void ubsec_kfree(struct ubsec_softc *sc, struct ubsec_q2 *q) { switch (q->q_type) { case UBS_CTXOP_MODEXP: { struct ubsec_q2_modexp *me = (struct ubsec_q2_modexp *)q; ubsec_dma_free(sc, &me->me_q.q_mcr); ubsec_dma_free(sc, &me->me_q.q_ctx); ubsec_dma_free(sc, &me->me_M); ubsec_dma_free(sc, &me->me_E); ubsec_dma_free(sc, &me->me_C); ubsec_dma_free(sc, &me->me_epb); free(me, M_DEVBUF); break; } case UBS_CTXOP_RSAPRIV: { struct ubsec_q2_rsapriv *rp = (struct ubsec_q2_rsapriv *)q; ubsec_dma_free(sc, &rp->rpr_q.q_mcr); ubsec_dma_free(sc, &rp->rpr_q.q_ctx); ubsec_dma_free(sc, &rp->rpr_msgin); ubsec_dma_free(sc, &rp->rpr_msgout); free(rp, M_DEVBUF); break; } default: device_printf(sc->sc_dev, "invalid kfree 0x%x\n", q->q_type); break; } } static int ubsec_kprocess(void *arg, struct cryptkop *krp, int hint) { struct ubsec_softc *sc = arg; int r; if (krp == NULL || krp->krp_callback == NULL) return (EINVAL); while (!SIMPLEQ_EMPTY(&sc->sc_q2free)) { struct ubsec_q2 *q; q = SIMPLEQ_FIRST(&sc->sc_q2free); SIMPLEQ_REMOVE_HEAD(&sc->sc_q2free, q, q_next); ubsec_kfree(sc, q); } switch (krp->krp_op) { case CRK_MOD_EXP: if (sc->sc_flags & UBS_FLAGS_HWNORM) r = ubsec_kprocess_modexp_hw(sc, krp, hint); else r = ubsec_kprocess_modexp_sw(sc, krp, hint); break; case CRK_MOD_EXP_CRT: return (ubsec_kprocess_rsapriv(sc, krp, hint)); default: device_printf(sc->sc_dev, "kprocess: invalid op 0x%x\n", krp->krp_op); krp->krp_status = EOPNOTSUPP; crypto_kdone(krp); return (0); } return (0); /* silence compiler */ } /* * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (sw normalization) */ static int ubsec_kprocess_modexp_sw(struct ubsec_softc *sc, struct cryptkop *krp, int hint) { struct ubsec_q2_modexp *me; struct ubsec_mcr *mcr; struct ubsec_ctx_modexp *ctx; struct ubsec_pktbuf *epb; int err = 0; u_int nbits, normbits, mbits, shiftbits, ebits; me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT); if (me == NULL) { err = ENOMEM; goto errout; } bzero(me, sizeof *me); me->me_krp = krp; me->me_q.q_type = UBS_CTXOP_MODEXP; nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]); if (nbits <= 512) normbits = 512; else if (nbits <= 768) normbits = 768; else if (nbits <= 1024) normbits = 1024; else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536) normbits = 1536; else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048) normbits = 2048; else { err = E2BIG; goto errout; } shiftbits = normbits - nbits; me->me_modbits = nbits; me->me_shiftbits = shiftbits; me->me_normbits = normbits; /* Sanity check: result bits must be >= true modulus bits. */ if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) { err = ERANGE; goto errout; } if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr), &me->me_q.q_mcr, 0)) { err = ENOMEM; goto errout; } mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr; if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp), &me->me_q.q_ctx, 0)) { err = ENOMEM; goto errout; } mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]); if (mbits > nbits) { err = E2BIG; goto errout; } if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) { err = ENOMEM; goto errout; } ubsec_kshift_r(shiftbits, krp->krp_param[UBS_MODEXP_PAR_M].crp_p, mbits, me->me_M.dma_vaddr, normbits); if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) { err = ENOMEM; goto errout; } bzero(me->me_C.dma_vaddr, me->me_C.dma_size); ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]); if (ebits > nbits) { err = E2BIG; goto errout; } if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) { err = ENOMEM; goto errout; } ubsec_kshift_r(shiftbits, krp->krp_param[UBS_MODEXP_PAR_E].crp_p, ebits, me->me_E.dma_vaddr, normbits); if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf), &me->me_epb, 0)) { err = ENOMEM; goto errout; } epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr; epb->pb_addr = htole32(me->me_E.dma_paddr); epb->pb_next = 0; epb->pb_len = htole32(normbits / 8); #ifdef UBSEC_DEBUG if (ubsec_debug) { printf("Epb "); ubsec_dump_pb(epb); } #endif mcr->mcr_pkts = htole16(1); mcr->mcr_flags = 0; mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr); mcr->mcr_reserved = 0; mcr->mcr_pktlen = 0; mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr); mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8); mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr); mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr); mcr->mcr_opktbuf.pb_next = 0; mcr->mcr_opktbuf.pb_len = htole32(normbits / 8); #ifdef DIAGNOSTIC /* Misaligned output buffer will hang the chip. */ if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0) panic("%s: modexp invalid addr 0x%x\n", device_get_nameunit(sc->sc_dev), letoh32(mcr->mcr_opktbuf.pb_addr)); if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0) panic("%s: modexp invalid len 0x%x\n", device_get_nameunit(sc->sc_dev), letoh32(mcr->mcr_opktbuf.pb_len)); #endif ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr; bzero(ctx, sizeof(*ctx)); ubsec_kshift_r(shiftbits, krp->krp_param[UBS_MODEXP_PAR_N].crp_p, nbits, ctx->me_N, normbits); ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t))); ctx->me_op = htole16(UBS_CTXOP_MODEXP); ctx->me_E_len = htole16(nbits); ctx->me_N_len = htole16(nbits); #ifdef UBSEC_DEBUG if (ubsec_debug) { ubsec_dump_mcr(mcr); ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx); } #endif /* * ubsec_feed2 will sync mcr and ctx, we just need to sync * everything else. */ bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map, BUS_DMASYNC_PREREAD); bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map, BUS_DMASYNC_PREWRITE); /* Enqueue and we're done... */ UBSEC_LOCK(sc); SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next); ubsec_feed2(sc); ubsecstats.hst_modexp++; UBSEC_UNLOCK(sc); return (0); errout: if (me != NULL) { if (me->me_q.q_mcr.dma_map != NULL) ubsec_dma_free(sc, &me->me_q.q_mcr); if (me->me_q.q_ctx.dma_map != NULL) { bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size); ubsec_dma_free(sc, &me->me_q.q_ctx); } if (me->me_M.dma_map != NULL) { bzero(me->me_M.dma_vaddr, me->me_M.dma_size); ubsec_dma_free(sc, &me->me_M); } if (me->me_E.dma_map != NULL) { bzero(me->me_E.dma_vaddr, me->me_E.dma_size); ubsec_dma_free(sc, &me->me_E); } if (me->me_C.dma_map != NULL) { bzero(me->me_C.dma_vaddr, me->me_C.dma_size); ubsec_dma_free(sc, &me->me_C); } if (me->me_epb.dma_map != NULL) ubsec_dma_free(sc, &me->me_epb); free(me, M_DEVBUF); } krp->krp_status = err; crypto_kdone(krp); return (0); } /* * Start computation of cr[C] = (cr[M] ^ cr[E]) mod cr[N] (hw normalization) */ static int ubsec_kprocess_modexp_hw(struct ubsec_softc *sc, struct cryptkop *krp, int hint) { struct ubsec_q2_modexp *me; struct ubsec_mcr *mcr; struct ubsec_ctx_modexp *ctx; struct ubsec_pktbuf *epb; int err = 0; u_int nbits, normbits, mbits, shiftbits, ebits; me = (struct ubsec_q2_modexp *)malloc(sizeof *me, M_DEVBUF, M_NOWAIT); if (me == NULL) { err = ENOMEM; goto errout; } bzero(me, sizeof *me); me->me_krp = krp; me->me_q.q_type = UBS_CTXOP_MODEXP; nbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_N]); if (nbits <= 512) normbits = 512; else if (nbits <= 768) normbits = 768; else if (nbits <= 1024) normbits = 1024; else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 1536) normbits = 1536; else if (sc->sc_flags & UBS_FLAGS_BIGKEY && nbits <= 2048) normbits = 2048; else { err = E2BIG; goto errout; } shiftbits = normbits - nbits; /* XXX ??? */ me->me_modbits = nbits; me->me_shiftbits = shiftbits; me->me_normbits = normbits; /* Sanity check: result bits must be >= true modulus bits. */ if (krp->krp_param[krp->krp_iparams].crp_nbits < nbits) { err = ERANGE; goto errout; } if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr), &me->me_q.q_mcr, 0)) { err = ENOMEM; goto errout; } mcr = (struct ubsec_mcr *)me->me_q.q_mcr.dma_vaddr; if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_modexp), &me->me_q.q_ctx, 0)) { err = ENOMEM; goto errout; } mbits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_M]); if (mbits > nbits) { err = E2BIG; goto errout; } if (ubsec_dma_malloc(sc, normbits / 8, &me->me_M, 0)) { err = ENOMEM; goto errout; } bzero(me->me_M.dma_vaddr, normbits / 8); bcopy(krp->krp_param[UBS_MODEXP_PAR_M].crp_p, me->me_M.dma_vaddr, (mbits + 7) / 8); if (ubsec_dma_malloc(sc, normbits / 8, &me->me_C, 0)) { err = ENOMEM; goto errout; } bzero(me->me_C.dma_vaddr, me->me_C.dma_size); ebits = ubsec_ksigbits(&krp->krp_param[UBS_MODEXP_PAR_E]); if (ebits > nbits) { err = E2BIG; goto errout; } if (ubsec_dma_malloc(sc, normbits / 8, &me->me_E, 0)) { err = ENOMEM; goto errout; } bzero(me->me_E.dma_vaddr, normbits / 8); bcopy(krp->krp_param[UBS_MODEXP_PAR_E].crp_p, me->me_E.dma_vaddr, (ebits + 7) / 8); if (ubsec_dma_malloc(sc, sizeof(struct ubsec_pktbuf), &me->me_epb, 0)) { err = ENOMEM; goto errout; } epb = (struct ubsec_pktbuf *)me->me_epb.dma_vaddr; epb->pb_addr = htole32(me->me_E.dma_paddr); epb->pb_next = 0; epb->pb_len = htole32((ebits + 7) / 8); #ifdef UBSEC_DEBUG printf("Epb "); ubsec_dump_pb(epb); #endif mcr->mcr_pkts = htole16(1); mcr->mcr_flags = 0; mcr->mcr_cmdctxp = htole32(me->me_q.q_ctx.dma_paddr); mcr->mcr_reserved = 0; mcr->mcr_pktlen = 0; mcr->mcr_ipktbuf.pb_addr = htole32(me->me_M.dma_paddr); mcr->mcr_ipktbuf.pb_len = htole32(normbits / 8); mcr->mcr_ipktbuf.pb_next = htole32(me->me_epb.dma_paddr); mcr->mcr_opktbuf.pb_addr = htole32(me->me_C.dma_paddr); mcr->mcr_opktbuf.pb_next = 0; mcr->mcr_opktbuf.pb_len = htole32(normbits / 8); #ifdef DIAGNOSTIC /* Misaligned output buffer will hang the chip. */ if ((letoh32(mcr->mcr_opktbuf.pb_addr) & 3) != 0) panic("%s: modexp invalid addr 0x%x\n", device_get_nameunit(sc->sc_dev), letoh32(mcr->mcr_opktbuf.pb_addr)); if ((letoh32(mcr->mcr_opktbuf.pb_len) & 3) != 0) panic("%s: modexp invalid len 0x%x\n", device_get_nameunit(sc->sc_dev), letoh32(mcr->mcr_opktbuf.pb_len)); #endif ctx = (struct ubsec_ctx_modexp *)me->me_q.q_ctx.dma_vaddr; bzero(ctx, sizeof(*ctx)); bcopy(krp->krp_param[UBS_MODEXP_PAR_N].crp_p, ctx->me_N, (nbits + 7) / 8); ctx->me_len = htole16((normbits / 8) + (4 * sizeof(u_int16_t))); ctx->me_op = htole16(UBS_CTXOP_MODEXP); ctx->me_E_len = htole16(ebits); ctx->me_N_len = htole16(nbits); #ifdef UBSEC_DEBUG if (ubsec_debug) { ubsec_dump_mcr(mcr); ubsec_dump_ctx2((struct ubsec_ctx_keyop *)ctx); } #endif /* * ubsec_feed2 will sync mcr and ctx, we just need to sync * everything else. */ bus_dmamap_sync(sc->sc_dmat, me->me_M.dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, me->me_E.dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, me->me_C.dma_map, BUS_DMASYNC_PREREAD); bus_dmamap_sync(sc->sc_dmat, me->me_epb.dma_map, BUS_DMASYNC_PREWRITE); /* Enqueue and we're done... */ UBSEC_LOCK(sc); SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &me->me_q, q_next); ubsec_feed2(sc); UBSEC_UNLOCK(sc); return (0); errout: if (me != NULL) { if (me->me_q.q_mcr.dma_map != NULL) ubsec_dma_free(sc, &me->me_q.q_mcr); if (me->me_q.q_ctx.dma_map != NULL) { bzero(me->me_q.q_ctx.dma_vaddr, me->me_q.q_ctx.dma_size); ubsec_dma_free(sc, &me->me_q.q_ctx); } if (me->me_M.dma_map != NULL) { bzero(me->me_M.dma_vaddr, me->me_M.dma_size); ubsec_dma_free(sc, &me->me_M); } if (me->me_E.dma_map != NULL) { bzero(me->me_E.dma_vaddr, me->me_E.dma_size); ubsec_dma_free(sc, &me->me_E); } if (me->me_C.dma_map != NULL) { bzero(me->me_C.dma_vaddr, me->me_C.dma_size); ubsec_dma_free(sc, &me->me_C); } if (me->me_epb.dma_map != NULL) ubsec_dma_free(sc, &me->me_epb); free(me, M_DEVBUF); } krp->krp_status = err; crypto_kdone(krp); return (0); } static int ubsec_kprocess_rsapriv(struct ubsec_softc *sc, struct cryptkop *krp, int hint) { struct ubsec_q2_rsapriv *rp = NULL; struct ubsec_mcr *mcr; struct ubsec_ctx_rsapriv *ctx; int err = 0; u_int padlen, msglen; msglen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_P]); padlen = ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_Q]); if (msglen > padlen) padlen = msglen; if (padlen <= 256) padlen = 256; else if (padlen <= 384) padlen = 384; else if (padlen <= 512) padlen = 512; else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 768) padlen = 768; else if (sc->sc_flags & UBS_FLAGS_BIGKEY && padlen <= 1024) padlen = 1024; else { err = E2BIG; goto errout; } if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DP]) > padlen) { err = E2BIG; goto errout; } if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_DQ]) > padlen) { err = E2BIG; goto errout; } if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_PINV]) > padlen) { err = E2BIG; goto errout; } rp = (struct ubsec_q2_rsapriv *)malloc(sizeof *rp, M_DEVBUF, M_NOWAIT); if (rp == NULL) return (ENOMEM); bzero(rp, sizeof *rp); rp->rpr_krp = krp; rp->rpr_q.q_type = UBS_CTXOP_RSAPRIV; if (ubsec_dma_malloc(sc, sizeof(struct ubsec_mcr), &rp->rpr_q.q_mcr, 0)) { err = ENOMEM; goto errout; } mcr = (struct ubsec_mcr *)rp->rpr_q.q_mcr.dma_vaddr; if (ubsec_dma_malloc(sc, sizeof(struct ubsec_ctx_rsapriv), &rp->rpr_q.q_ctx, 0)) { err = ENOMEM; goto errout; } ctx = (struct ubsec_ctx_rsapriv *)rp->rpr_q.q_ctx.dma_vaddr; bzero(ctx, sizeof *ctx); /* Copy in p */ bcopy(krp->krp_param[UBS_RSAPRIV_PAR_P].crp_p, &ctx->rpr_buf[0 * (padlen / 8)], (krp->krp_param[UBS_RSAPRIV_PAR_P].crp_nbits + 7) / 8); /* Copy in q */ bcopy(krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_p, &ctx->rpr_buf[1 * (padlen / 8)], (krp->krp_param[UBS_RSAPRIV_PAR_Q].crp_nbits + 7) / 8); /* Copy in dp */ bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_p, &ctx->rpr_buf[2 * (padlen / 8)], (krp->krp_param[UBS_RSAPRIV_PAR_DP].crp_nbits + 7) / 8); /* Copy in dq */ bcopy(krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_p, &ctx->rpr_buf[3 * (padlen / 8)], (krp->krp_param[UBS_RSAPRIV_PAR_DQ].crp_nbits + 7) / 8); /* Copy in pinv */ bcopy(krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_p, &ctx->rpr_buf[4 * (padlen / 8)], (krp->krp_param[UBS_RSAPRIV_PAR_PINV].crp_nbits + 7) / 8); msglen = padlen * 2; /* Copy in input message (aligned buffer/length). */ if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGIN]) > msglen) { /* Is this likely? */ err = E2BIG; goto errout; } if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgin, 0)) { err = ENOMEM; goto errout; } bzero(rp->rpr_msgin.dma_vaddr, (msglen + 7) / 8); bcopy(krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_p, rp->rpr_msgin.dma_vaddr, (krp->krp_param[UBS_RSAPRIV_PAR_MSGIN].crp_nbits + 7) / 8); /* Prepare space for output message (aligned buffer/length). */ if (ubsec_ksigbits(&krp->krp_param[UBS_RSAPRIV_PAR_MSGOUT]) < msglen) { /* Is this likely? */ err = E2BIG; goto errout; } if (ubsec_dma_malloc(sc, (msglen + 7) / 8, &rp->rpr_msgout, 0)) { err = ENOMEM; goto errout; } bzero(rp->rpr_msgout.dma_vaddr, (msglen + 7) / 8); mcr->mcr_pkts = htole16(1); mcr->mcr_flags = 0; mcr->mcr_cmdctxp = htole32(rp->rpr_q.q_ctx.dma_paddr); mcr->mcr_ipktbuf.pb_addr = htole32(rp->rpr_msgin.dma_paddr); mcr->mcr_ipktbuf.pb_next = 0; mcr->mcr_ipktbuf.pb_len = htole32(rp->rpr_msgin.dma_size); mcr->mcr_reserved = 0; mcr->mcr_pktlen = htole16(msglen); mcr->mcr_opktbuf.pb_addr = htole32(rp->rpr_msgout.dma_paddr); mcr->mcr_opktbuf.pb_next = 0; mcr->mcr_opktbuf.pb_len = htole32(rp->rpr_msgout.dma_size); #ifdef DIAGNOSTIC if (rp->rpr_msgin.dma_paddr & 3 || rp->rpr_msgin.dma_size & 3) { panic("%s: rsapriv: invalid msgin %x(0x%jx)", device_get_nameunit(sc->sc_dev), rp->rpr_msgin.dma_paddr, (uintmax_t)rp->rpr_msgin.dma_size); } if (rp->rpr_msgout.dma_paddr & 3 || rp->rpr_msgout.dma_size & 3) { panic("%s: rsapriv: invalid msgout %x(0x%jx)", device_get_nameunit(sc->sc_dev), rp->rpr_msgout.dma_paddr, (uintmax_t)rp->rpr_msgout.dma_size); } #endif ctx->rpr_len = (sizeof(u_int16_t) * 4) + (5 * (padlen / 8)); ctx->rpr_op = htole16(UBS_CTXOP_RSAPRIV); ctx->rpr_q_len = htole16(padlen); ctx->rpr_p_len = htole16(padlen); /* * ubsec_feed2 will sync mcr and ctx, we just need to sync * everything else. */ bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgin.dma_map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(sc->sc_dmat, rp->rpr_msgout.dma_map, BUS_DMASYNC_PREREAD); /* Enqueue and we're done... */ UBSEC_LOCK(sc); SIMPLEQ_INSERT_TAIL(&sc->sc_queue2, &rp->rpr_q, q_next); ubsec_feed2(sc); ubsecstats.hst_modexpcrt++; UBSEC_UNLOCK(sc); return (0); errout: if (rp != NULL) { if (rp->rpr_q.q_mcr.dma_map != NULL) ubsec_dma_free(sc, &rp->rpr_q.q_mcr); if (rp->rpr_msgin.dma_map != NULL) { bzero(rp->rpr_msgin.dma_vaddr, rp->rpr_msgin.dma_size); ubsec_dma_free(sc, &rp->rpr_msgin); } if (rp->rpr_msgout.dma_map != NULL) { bzero(rp->rpr_msgout.dma_vaddr, rp->rpr_msgout.dma_size); ubsec_dma_free(sc, &rp->rpr_msgout); } free(rp, M_DEVBUF); } krp->krp_status = err; crypto_kdone(krp); return (0); } #ifdef UBSEC_DEBUG static void ubsec_dump_pb(volatile struct ubsec_pktbuf *pb) { printf("addr 0x%x (0x%x) next 0x%x\n", pb->pb_addr, pb->pb_len, pb->pb_next); } static void ubsec_dump_ctx2(struct ubsec_ctx_keyop *c) { printf("CTX (0x%x):\n", c->ctx_len); switch (letoh16(c->ctx_op)) { case UBS_CTXOP_RNGBYPASS: case UBS_CTXOP_RNGSHA1: break; case UBS_CTXOP_MODEXP: { struct ubsec_ctx_modexp *cx = (void *)c; int i, len; printf(" Elen %u, Nlen %u\n", letoh16(cx->me_E_len), letoh16(cx->me_N_len)); len = (cx->me_N_len + 7)/8; for (i = 0; i < len; i++) printf("%s%02x", (i == 0) ? " N: " : ":", cx->me_N[i]); printf("\n"); break; } default: printf("unknown context: %x\n", c->ctx_op); } printf("END CTX\n"); } static void ubsec_dump_mcr(struct ubsec_mcr *mcr) { volatile struct ubsec_mcr_add *ma; int i; printf("MCR:\n"); printf(" pkts: %u, flags 0x%x\n", letoh16(mcr->mcr_pkts), letoh16(mcr->mcr_flags)); ma = (volatile struct ubsec_mcr_add *)&mcr->mcr_cmdctxp; for (i = 0; i < letoh16(mcr->mcr_pkts); i++) { printf(" %d: ctx 0x%x len 0x%x rsvd 0x%x\n", i, letoh32(ma->mcr_cmdctxp), letoh16(ma->mcr_pktlen), letoh16(ma->mcr_reserved)); printf(" %d: ipkt ", i); ubsec_dump_pb(&ma->mcr_ipktbuf); printf(" %d: opkt ", i); ubsec_dump_pb(&ma->mcr_opktbuf); ma++; } printf("END MCR\n"); } #endif /* UBSEC_DEBUG */ /* * Return the number of significant bits of a big number. */ static int ubsec_ksigbits(struct crparam *cr) { u_int plen = (cr->crp_nbits + 7) / 8; int i, sig = plen * 8; u_int8_t c, *p = cr->crp_p; for (i = plen - 1; i >= 0; i--) { c = p[i]; if (c != 0) { while ((c & 0x80) == 0) { sig--; c <<= 1; } break; } sig -= 8; } return (sig); } static void ubsec_kshift_r( u_int shiftbits, u_int8_t *src, u_int srcbits, u_int8_t *dst, u_int dstbits) { u_int slen, dlen; int i, si, di, n; slen = (srcbits + 7) / 8; dlen = (dstbits + 7) / 8; for (i = 0; i < slen; i++) dst[i] = src[i]; for (i = 0; i < dlen - slen; i++) dst[slen + i] = 0; n = shiftbits / 8; if (n != 0) { si = dlen - n - 1; di = dlen - 1; while (si >= 0) dst[di--] = dst[si--]; while (di >= 0) dst[di--] = 0; } n = shiftbits % 8; if (n != 0) { for (i = dlen - 1; i > 0; i--) dst[i] = (dst[i] << n) | (dst[i - 1] >> (8 - n)); dst[0] = dst[0] << n; } } static void ubsec_kshift_l( u_int shiftbits, u_int8_t *src, u_int srcbits, u_int8_t *dst, u_int dstbits) { int slen, dlen, i, n; slen = (srcbits + 7) / 8; dlen = (dstbits + 7) / 8; n = shiftbits / 8; for (i = 0; i < slen; i++) dst[i] = src[i + n]; for (i = 0; i < dlen - slen; i++) dst[slen + i] = 0; n = shiftbits % 8; if (n != 0) { for (i = 0; i < (dlen - 1); i++) dst[i] = (dst[i] >> n) | (dst[i + 1] << (8 - n)); dst[dlen - 1] = dst[dlen - 1] >> n; } } Index: head/sys/kern/uipc_mbuf.c =================================================================== --- head/sys/kern/uipc_mbuf.c (revision 108465) +++ head/sys/kern/uipc_mbuf.c (revision 108466) @@ -1,739 +1,779 @@ /* * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 * $FreeBSD$ */ #include "opt_mac.h" #include "opt_param.h" #include #include #include #include #include #include #include #include #include #include int max_linkhdr; int max_protohdr; int max_hdr; int max_datalen; /* * sysctl(8) exported objects */ SYSCTL_DECL(_kern_ipc); SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, &max_linkhdr, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, &max_protohdr, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, &max_datalen, 0, ""); /* - * Copy mbuf pkthdr from "from" to "to". + * "Move" mbuf pkthdr from "from" to "to". * "from" must have M_PKTHDR set, and "to" must be empty. - * aux pointer will be moved to "to". */ void -m_copy_pkthdr(struct mbuf *to, struct mbuf *from) +m_move_pkthdr(struct mbuf *to, struct mbuf *from) { #if 0 + /* see below for why these are not enabled */ KASSERT(to->m_flags & M_PKTHDR, - ("m_copy_pkthdr() called on non-header")); + ("m_move_pkthdr: called on non-header")); + KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), + ("m_move_pkthdr: to has tags")); #endif + KASSERT((to->m_flags & M_EXT) == 0, ("m_move_pkthdr: to has cluster")); #ifdef MAC if (to->m_flags & M_PKTHDR) mac_destroy_mbuf(to); #endif + to->m_flags = from->m_flags & M_COPYFLAGS; to->m_data = to->m_pktdat; + to->m_pkthdr = from->m_pkthdr; /* especially tags */ +#ifdef MAC + mac_init_mbuf(to, 1); /* XXXMAC no way to fail */ + mac_create_mbuf_from_mbuf(from, to); +#endif + SLIST_INIT(&from->m_pkthdr.tags); /* purge tags from src */ + from->m_flags &= ~M_PKTHDR; +} + +/* + * Duplicate "from"'s mbuf pkthdr in "to". + * "from" must have M_PKTHDR set, and "to" must be empty. + * In particular, this does a deep copy of the packet tags. + */ +int +m_dup_pkthdr(struct mbuf *to, struct mbuf *from, int how) +{ + +#if 0 + /* + * The mbuf allocator only initializes the pkthdr + * when the mbuf is allocated with MGETHDR. Many users + * (e.g. m_copy*, m_prepend) use MGET and then + * smash the pkthdr as needed causing these + * assertions to trip. For now just disable them. + */ + KASSERT(to->m_flags & M_PKTHDR, ("m_dup_pkthdr: called on non-header")); + KASSERT(SLIST_EMPTY(&to->m_pkthdr.tags), ("m_dup_pkthdr: to has tags")); +#endif + KASSERT((to->m_flags & M_EXT) == 0, ("m_dup_pkthdr: to has cluster")); +#ifdef MAC + if (to->m_flags & M_PKTHDR) + mac_destroy_mbuf(to); +#endif to->m_flags = from->m_flags & M_COPYFLAGS; + to->m_data = to->m_pktdat; to->m_pkthdr = from->m_pkthdr; #ifdef MAC mac_init_mbuf(to, 1); /* XXXMAC no way to fail */ mac_create_mbuf_from_mbuf(from, to); #endif - SLIST_INIT(&from->m_pkthdr.tags); + SLIST_INIT(&to->m_pkthdr.tags); + return (m_tag_copy_chain(to, from, how)); } /* * Lesser-used path for M_PREPEND: * allocate new mbuf to prepend to chain, * copy junk along. */ struct mbuf * m_prepend(struct mbuf *m, int len, int how) { struct mbuf *mn; MGET(mn, how, m->m_type); if (mn == NULL) { m_freem(m); return (NULL); } if (m->m_flags & M_PKTHDR) { - M_COPY_PKTHDR(mn, m); + M_MOVE_PKTHDR(mn, m); #ifdef MAC mac_destroy_mbuf(m); #endif - m->m_flags &= ~M_PKTHDR; } mn->m_next = m; m = mn; if (len < MHLEN) MH_ALIGN(m, len); m->m_len = len; return (m); } /* * Make a copy of an mbuf chain starting "off0" bytes from the beginning, * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. * The wait parameter is a choice of M_TRYWAIT/M_DONTWAIT from caller. * Note that the copy is read-only, because clusters are not copied, * only their reference counts are incremented. */ struct mbuf * m_copym(struct mbuf *m, int off0, int len, int wait) { struct mbuf *n, **np; int off = off0; struct mbuf *top; int copyhdr = 0; KASSERT(off >= 0, ("m_copym, negative off %d", off)); KASSERT(len >= 0, ("m_copym, negative len %d", len)); if (off == 0 && m->m_flags & M_PKTHDR) copyhdr = 1; while (off > 0) { KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } np = ⊤ top = 0; while (len > 0) { if (m == NULL) { KASSERT(len == M_COPYALL, ("m_copym, length > size of mbuf chain")); break; } MGET(n, wait, m->m_type); *np = n; if (n == NULL) goto nospace; if (copyhdr) { - M_COPY_PKTHDR(n, m); + if (!m_dup_pkthdr(n, m, wait)) + goto nospace; if (len == M_COPYALL) n->m_pkthdr.len -= off0; else n->m_pkthdr.len = len; copyhdr = 0; } n->m_len = min(len, m->m_len - off); if (m->m_flags & M_EXT) { n->m_data = m->m_data + off; n->m_ext = m->m_ext; n->m_flags |= M_EXT; MEXT_ADD_REF(m); } else bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), (u_int)n->m_len); if (len != M_COPYALL) len -= n->m_len; off = 0; m = m->m_next; np = &n->m_next; } if (top == NULL) mbstat.m_mcfail++; /* XXX: No consistency. */ return (top); nospace: m_freem(top); mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } /* * Copy an entire packet, including header (which must be present). * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. * Note that the copy is read-only, because clusters are not copied, * only their reference counts are incremented. * Preserve alignment of the first mbuf so if the creator has left * some room at the beginning (e.g. for inserting protocol headers) * the copies still have the room available. */ struct mbuf * m_copypacket(struct mbuf *m, int how) { struct mbuf *top, *n, *o; MGET(n, how, m->m_type); top = n; if (n == NULL) goto nospace; - M_COPY_PKTHDR(n, m); + if (!m_dup_pkthdr(n, m, how)) + goto nospace; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; n->m_ext = m->m_ext; n->m_flags |= M_EXT; MEXT_ADD_REF(m); } else { n->m_data = n->m_pktdat + (m->m_data - m->m_pktdat ); bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } m = m->m_next; while (m) { MGET(o, how, m->m_type); if (o == NULL) goto nospace; n->m_next = o; n = n->m_next; n->m_len = m->m_len; if (m->m_flags & M_EXT) { n->m_data = m->m_data; n->m_ext = m->m_ext; n->m_flags |= M_EXT; MEXT_ADD_REF(m); } else { bcopy(mtod(m, char *), mtod(n, char *), n->m_len); } m = m->m_next; } return top; nospace: m_freem(top); mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } /* * Copy data from an mbuf chain starting "off" bytes from the beginning, * continuing for "len" bytes, into the indicated buffer. */ void m_copydata(const struct mbuf *m, int off, int len, caddr_t cp) { u_int count; KASSERT(off >= 0, ("m_copydata, negative off %d", off)); KASSERT(len >= 0, ("m_copydata, negative len %d", len)); while (off > 0) { KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); if (off < m->m_len) break; off -= m->m_len; m = m->m_next; } while (len > 0) { KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); count = min(m->m_len - off, len); bcopy(mtod(m, caddr_t) + off, cp, count); len -= count; cp += count; off = 0; m = m->m_next; } } /* * Copy a packet header mbuf chain into a completely new chain, including * copying any mbuf clusters. Use this instead of m_copypacket() when * you need a writable copy of an mbuf chain. */ struct mbuf * m_dup(struct mbuf *m, int how) { struct mbuf **p, *top = NULL; int remain, moff, nsize; /* Sanity check */ if (m == NULL) return (NULL); KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __func__)); /* While there's more data, get a new mbuf, tack it on, and fill it */ remain = m->m_pkthdr.len; moff = 0; p = ⊤ while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ struct mbuf *n; /* Get the next new mbuf */ MGET(n, how, m->m_type); if (n == NULL) goto nospace; if (top == NULL) { /* first one, must be PKTHDR */ - M_COPY_PKTHDR(n, m); + if (!m_dup_pkthdr(n, m, how)) + goto nospace; nsize = MHLEN; } else /* not the first one */ nsize = MLEN; if (remain >= MINCLSIZE) { MCLGET(n, how); if ((n->m_flags & M_EXT) == 0) { (void)m_free(n); goto nospace; } nsize = MCLBYTES; } n->m_len = 0; /* Link it into the new chain */ *p = n; p = &n->m_next; /* Copy data from original mbuf(s) into new mbuf */ while (n->m_len < nsize && m != NULL) { int chunk = min(nsize - n->m_len, m->m_len - moff); bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); moff += chunk; n->m_len += chunk; remain -= chunk; if (moff == m->m_len) { m = m->m_next; moff = 0; } } /* Check correct total mbuf length */ KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), ("%s: bogus m_pkthdr.len", __func__)); } return (top); nospace: m_freem(top); mbstat.m_mcfail++; /* XXX: No consistency. */ return (NULL); } /* * Concatenate mbuf chain n to m. * Both chains must be of the same type (e.g. MT_DATA). * Any m_pkthdr is not updated. */ void m_cat(struct mbuf *m, struct mbuf *n) { while (m->m_next) m = m->m_next; while (n) { if (m->m_flags & M_EXT || m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { /* just join the two chains */ m->m_next = n; return; } /* splat the data from one into the other */ bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)n->m_len); m->m_len += n->m_len; n = m_free(n); } } void m_adj(struct mbuf *mp, int req_len) { int len = req_len; struct mbuf *m; int count; if ((m = mp) == NULL) return; if (len >= 0) { /* * Trim from head. */ while (m != NULL && len > 0) { if (m->m_len <= len) { len -= m->m_len; m->m_len = 0; m = m->m_next; } else { m->m_len -= len; m->m_data += len; len = 0; } } m = mp; if (mp->m_flags & M_PKTHDR) m->m_pkthdr.len -= (req_len - len); } else { /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ len = -len; count = 0; for (;;) { count += m->m_len; if (m->m_next == (struct mbuf *)0) break; m = m->m_next; } if (m->m_len >= len) { m->m_len -= len; if (mp->m_flags & M_PKTHDR) mp->m_pkthdr.len -= len; return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ m = mp; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len = count; for (; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; break; } count -= m->m_len; } while (m->m_next) (m = m->m_next) ->m_len = 0; } } /* * Rearange an mbuf chain so that len bytes are contiguous * and in the data area of an mbuf (so that mtod and dtom * will work for a structure of size len). Returns the resulting * mbuf chain on success, frees it and returns null on failure. * If there is room, it will add up to max_protohdr-len extra bytes to the * contiguous region in an attempt to avoid being called next time. */ struct mbuf * m_pullup(struct mbuf *n, int len) { struct mbuf *m; int count; int space; /* * If first mbuf has no cluster, and has room for len bytes * without shifting current data, pullup into it, * otherwise allocate a new mbuf to prepend to the chain. */ if ((n->m_flags & M_EXT) == 0 && n->m_data + len < &n->m_dat[MLEN] && n->m_next) { if (n->m_len >= len) return (n); m = n; n = n->m_next; len -= m->m_len; } else { if (len > MHLEN) goto bad; MGET(m, M_DONTWAIT, n->m_type); if (m == NULL) goto bad; m->m_len = 0; - if (n->m_flags & M_PKTHDR) { - M_COPY_PKTHDR(m, n); - n->m_flags &= ~M_PKTHDR; - } + if (n->m_flags & M_PKTHDR) + M_MOVE_PKTHDR(m, n); } space = &m->m_dat[MLEN] - (m->m_data + m->m_len); do { count = min(min(max(len, max_protohdr), space), n->m_len); bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, (u_int)count); len -= count; m->m_len += count; n->m_len -= count; space -= count; if (n->m_len) n->m_data += count; else n = m_free(n); } while (len > 0 && n); if (len > 0) { (void) m_free(m); goto bad; } m->m_next = n; return (m); bad: m_freem(n); mbstat.m_mpfail++; /* XXX: No consistency. */ return (NULL); } /* * Partition an mbuf chain in two pieces, returning the tail -- * all but the first len0 bytes. In case of failure, it returns NULL and * attempts to restore the chain to its original state. * * Note that the resulting mbufs might be read-only, because the new * mbuf can end up sharing an mbuf cluster with the original mbuf if * the "breaking point" happens to lie within a cluster mbuf. Use the * M_WRITABLE() macro to check for this case. */ struct mbuf * m_split(struct mbuf *m0, int len0, int wait) { struct mbuf *m, *n; u_int len = len0, remain; for (m = m0; m && len > m->m_len; m = m->m_next) len -= m->m_len; if (m == NULL) return (NULL); remain = m->m_len - len; if (m0->m_flags & M_PKTHDR) { MGETHDR(n, wait, m0->m_type); if (n == NULL) return (NULL); n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; n->m_pkthdr.len = m0->m_pkthdr.len - len0; m0->m_pkthdr.len = len0; if (m->m_flags & M_EXT) goto extpacket; if (remain > MHLEN) { /* m can't be the lead packet */ MH_ALIGN(n, 0); n->m_next = m_split(m, len, wait); if (n->m_next == NULL) { (void) m_free(n); return (NULL); } else { n->m_len = 0; return (n); } } else MH_ALIGN(n, remain); } else if (remain == 0) { n = m->m_next; m->m_next = NULL; return (n); } else { MGET(n, wait, m->m_type); if (n == NULL) return (NULL); M_ALIGN(n, remain); } extpacket: if (m->m_flags & M_EXT) { n->m_flags |= M_EXT; n->m_ext = m->m_ext; MEXT_ADD_REF(m); n->m_data = m->m_data + len; } else { bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); } n->m_len = remain; m->m_len = len; n->m_next = m->m_next; m->m_next = NULL; return (n); } /* * Routine to copy from device local memory into mbufs. * Note that `off' argument is offset into first mbuf of target chain from * which to begin copying the data to. */ struct mbuf * m_devget(char *buf, int totlen, int off, struct ifnet *ifp, void (*copy)(char *from, caddr_t to, u_int len)) { struct mbuf *m; struct mbuf *top = 0, **mp = ⊤ int len; if (off < 0 || off > MHLEN) return (NULL); MGETHDR(m, M_DONTWAIT, MT_DATA); if (m == NULL) return (NULL); m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = totlen; len = MHLEN; while (totlen > 0) { if (top) { MGET(m, M_DONTWAIT, MT_DATA); if (m == NULL) { m_freem(top); return (NULL); } len = MLEN; } if (totlen + off >= MINCLSIZE) { MCLGET(m, M_DONTWAIT); if (m->m_flags & M_EXT) len = MCLBYTES; } else { /* * Place initial small packet/header at end of mbuf. */ if (top == NULL && totlen + off + max_linkhdr <= len) { m->m_data += max_linkhdr; len -= max_linkhdr; } } if (off) { m->m_data += off; len -= off; off = 0; } m->m_len = len = min(totlen, len); if (copy) copy(buf, mtod(m, caddr_t), (u_int)len); else bcopy(buf, mtod(m, caddr_t), (u_int)len); buf += len; *mp = m; mp = &m->m_next; totlen -= len; } return (top); } /* * Copy data from a buffer back into the indicated mbuf chain, * starting "off" bytes from the beginning, extending the mbuf * chain if necessary. */ void m_copyback(struct mbuf *m0, int off, int len, caddr_t cp) { int mlen; struct mbuf *m = m0, *n; int totlen = 0; if (m0 == NULL) return; while (off > (mlen = m->m_len)) { off -= mlen; totlen += mlen; if (m->m_next == NULL) { n = m_get_clrd(M_DONTWAIT, m->m_type); if (n == NULL) goto out; n->m_len = min(MLEN, len + off); m->m_next = n; } m = m->m_next; } while (len > 0) { mlen = min (m->m_len - off, len); bcopy(cp, off + mtod(m, caddr_t), (u_int)mlen); cp += mlen; len -= mlen; mlen += off; off = 0; totlen += mlen; if (len == 0) break; if (m->m_next == NULL) { n = m_get(M_DONTWAIT, m->m_type); if (n == NULL) break; n->m_len = min(MLEN, len); m->m_next = n; } m = m->m_next; } out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) m->m_pkthdr.len = totlen; } void m_print(const struct mbuf *m) { int len; const struct mbuf *m2; len = m->m_pkthdr.len; m2 = m; while (len) { printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); len -= m2->m_len; m2 = m2->m_next; } return; } u_int m_fixhdr(struct mbuf *m0) { u_int len; len = m_length(m0, NULL); m0->m_pkthdr.len = len; return (len); } u_int m_length(struct mbuf *m0, struct mbuf **last) { struct mbuf *m; u_int len; len = 0; for (m = m0; m != NULL; m = m->m_next) { len += m->m_len; if (m->m_next == NULL) break; } if (last != NULL) *last = m; return (len); } Index: head/sys/kern/uipc_mbuf2.c =================================================================== --- head/sys/kern/uipc_mbuf2.c (revision 108465) +++ head/sys/kern/uipc_mbuf2.c (revision 108466) @@ -1,456 +1,458 @@ /* $FreeBSD$ */ /* $KAME: uipc_mbuf2.c,v 1.31 2001/11/28 11:08:53 itojun Exp $ */ /* $NetBSD: uipc_mbuf.c,v 1.40 1999/04/01 00:23:25 thorpej Exp $ */ /* * Copyright (C) 1999 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95 */ /*#define PULLDOWN_DEBUG*/ #include #include #include #include #include #include #include MALLOC_DEFINE(M_PACKET_TAGS, "tag", "packet-attached information"); /* can't call it m_dup(), as freebsd[34] uses m_dup() with different arg */ static struct mbuf *m_dup1(struct mbuf *, int, int, int); /* * ensure that [off, off + len) is contiguous on the mbuf chain "m". * packet chain before "off" is kept untouched. * if offp == NULL, the target will start at on resulting chain. * if offp != NULL, the target will start at on resulting chain. * * on error return (NULL return value), original "m" will be freed. * * XXX: M_TRAILINGSPACE/M_LEADINGSPACE only permitted on writable ext_buf. */ struct mbuf * m_pulldown(struct mbuf *m, int off, int len, int *offp) { struct mbuf *n, *o; int hlen, tlen, olen; int writable; /* check invalid arguments. */ if (m == NULL) panic("m == NULL in m_pulldown()"); if (len > MCLBYTES) { m_freem(m); return NULL; /* impossible */ } #ifdef PULLDOWN_DEBUG { struct mbuf *t; printf("before:"); for (t = m; t; t = t->m_next) printf(" %d", t->m_len); printf("\n"); } #endif n = m; while (n != NULL && off > 0) { if (n->m_len > off) break; off -= n->m_len; n = n->m_next; } /* be sure to point non-empty mbuf */ while (n != NULL && n->m_len == 0) n = n->m_next; if (!n) { m_freem(m); return NULL; /* mbuf chain too short */ } /* * XXX: This code is flawed because it considers a "writable" mbuf * data region to require all of the following: * (i) mbuf _has_ to have M_EXT set; if it is just a regular * mbuf, it is still not considered "writable." * (ii) since mbuf has M_EXT, the ext_type _has_ to be * EXT_CLUSTER. Anything else makes it non-writable. * (iii) M_WRITABLE() must evaluate true. * Ideally, the requirement should only be (iii). * * If we're writable, we're sure we're writable, because the ref. count * cannot increase from 1, as that would require posession of mbuf * n by someone else (which is impossible). However, if we're _not_ * writable, we may eventually become writable )if the ref. count drops * to 1), but we'll fail to notice it unless we re-evaluate * M_WRITABLE(). For now, we only evaluate once at the beginning and * live with this. */ /* * XXX: This is dumb. If we're just a regular mbuf with no M_EXT, * then we're not "writable," according to this code. */ writable = 0; if ((n->m_flags & M_EXT) == 0 || (n->m_ext.ext_type == EXT_CLUSTER && M_WRITABLE(n))) writable = 1; /* * the target data is on . * if we got enough data on the mbuf "n", we're done. */ if ((off == 0 || offp) && len <= n->m_len - off && writable) goto ok; /* * when len <= n->m_len - off and off != 0, it is a special case. * len bytes from sits in single mbuf, but the caller does * not like the starting position (off). * chop the current mbuf into two pieces, set off to 0. */ if (len <= n->m_len - off) { o = m_dup1(n, off, n->m_len - off, M_DONTWAIT); if (o == NULL) { m_freem(m); return NULL; /* ENOBUFS */ } n->m_len = off; o->m_next = n->m_next; n->m_next = o; n = n->m_next; off = 0; goto ok; } /* * we need to take hlen from and tlen from m_next, 0>, * and construct contiguous mbuf with m_len == len. * note that hlen + tlen == len, and tlen > 0. */ hlen = n->m_len - off; tlen = len - hlen; /* * ensure that we have enough trailing data on mbuf chain. * if not, we can do nothing about the chain. */ olen = 0; for (o = n->m_next; o != NULL; o = o->m_next) olen += o->m_len; if (hlen + olen < len) { m_freem(m); return NULL; /* mbuf chain too short */ } /* * easy cases first. * we need to use m_copydata() to get data from m_next, 0>. */ if ((off == 0 || offp) && M_TRAILINGSPACE(n) >= tlen && writable) { m_copydata(n->m_next, 0, tlen, mtod(n, caddr_t) + n->m_len); n->m_len += tlen; m_adj(n->m_next, tlen); goto ok; } if ((off == 0 || offp) && M_LEADINGSPACE(n->m_next) >= hlen && writable) { n->m_next->m_data -= hlen; n->m_next->m_len += hlen; bcopy(mtod(n, caddr_t) + off, mtod(n->m_next, caddr_t), hlen); n->m_len -= hlen; n = n->m_next; off = 0; goto ok; } /* * now, we need to do the hard way. don't m_copy as there's no room * on both end. */ MGET(o, M_DONTWAIT, m->m_type); if (o && len > MLEN) { MCLGET(o, M_DONTWAIT); if ((o->m_flags & M_EXT) == 0) { m_free(o); o = NULL; } } if (!o) { m_freem(m); return NULL; /* ENOBUFS */ } /* get hlen from into */ o->m_len = hlen; bcopy(mtod(n, caddr_t) + off, mtod(o, caddr_t), hlen); n->m_len -= hlen; /* get tlen from m_next, 0> into */ m_copydata(n->m_next, 0, tlen, mtod(o, caddr_t) + o->m_len); o->m_len += tlen; m_adj(n->m_next, tlen); o->m_next = n->m_next; n->m_next = o; n = o; off = 0; ok: #ifdef PULLDOWN_DEBUG { struct mbuf *t; printf("after:"); for (t = m; t; t = t->m_next) printf("%c%d", t == n ? '*' : ' ', t->m_len); printf(" (off=%d)\n", off); } #endif if (offp) *offp = off; return n; } static struct mbuf * m_dup1(struct mbuf *m, int off, int len, int wait) { struct mbuf *n; int l; int copyhdr; if (len > MCLBYTES) return NULL; if (off == 0 && (m->m_flags & M_PKTHDR) != 0) { copyhdr = 1; MGETHDR(n, wait, m->m_type); l = MHLEN; } else { copyhdr = 0; MGET(n, wait, m->m_type); l = MLEN; } if (n && len > l) { MCLGET(n, wait); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (!n) return NULL; - if (copyhdr) - M_COPY_PKTHDR(n, m); + if (copyhdr && !m_dup_pkthdr(n, m, wait)) { + m_free(n); + return NULL; + } m_copydata(m, off, len, mtod(n, caddr_t)); return n; } /* Get a packet tag structure along with specified data following. */ struct m_tag * m_tag_alloc(u_int32_t cookie, int type, int len, int wait) { struct m_tag *t; if (len < 0) return NULL; t = malloc(len + sizeof(struct m_tag), M_PACKET_TAGS, wait); if (t == NULL) return NULL; t->m_tag_id = type; t->m_tag_len = len; t->m_tag_cookie = cookie; return t; } /* Free a packet tag. */ void m_tag_free(struct m_tag *t) { free(t, M_PACKET_TAGS); } /* Prepend a packet tag. */ void m_tag_prepend(struct mbuf *m, struct m_tag *t) { KASSERT(m && t, ("m_tag_prepend: null argument, m %p t %p", m, t)); SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); } /* Unlink a packet tag. */ void m_tag_unlink(struct mbuf *m, struct m_tag *t) { KASSERT(m && t, ("m_tag_unlink: null argument, m %p t %p", m, t)); SLIST_REMOVE(&m->m_pkthdr.tags, t, m_tag, m_tag_link); } /* Unlink and free a packet tag. */ void m_tag_delete(struct mbuf *m, struct m_tag *t) { KASSERT(m && t, ("m_tag_delete: null argument, m %p t %p", m, t)); m_tag_unlink(m, t); m_tag_free(t); } /* Unlink and free a packet tag chain, starting from given tag. */ void m_tag_delete_chain(struct mbuf *m, struct m_tag *t) { struct m_tag *p, *q; KASSERT(m, ("m_tag_delete_chain: null mbuf")); if (t != NULL) p = t; else p = SLIST_FIRST(&m->m_pkthdr.tags); if (p == NULL) return; while ((q = SLIST_NEXT(p, m_tag_link)) != NULL) m_tag_delete(m, q); m_tag_delete(m, p); } /* Find a tag, starting from a given position. */ struct m_tag * m_tag_locate(struct mbuf *m, u_int32_t cookie, int type, struct m_tag *t) { struct m_tag *p; KASSERT(m, ("m_tag_locate: null mbuf")); if (t == NULL) p = SLIST_FIRST(&m->m_pkthdr.tags); else p = SLIST_NEXT(t, m_tag_link); while (p != NULL) { if (p->m_tag_cookie == cookie && p->m_tag_id == type) return p; p = SLIST_NEXT(p, m_tag_link); } return NULL; } /* Copy a single tag. */ struct m_tag * -m_tag_copy(struct m_tag *t) +m_tag_copy(struct m_tag *t, int how) { struct m_tag *p; KASSERT(t, ("m_tag_copy: null tag")); - p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, M_NOWAIT); + p = m_tag_alloc(t->m_tag_cookie, t->m_tag_id, t->m_tag_len, how); if (p == NULL) return (NULL); bcopy(t + 1, p + 1, t->m_tag_len); /* Copy the data */ return p; } /* * Copy two tag chains. The destination mbuf (to) loses any attached * tags even if the operation fails. This should not be a problem, as * m_tag_copy_chain() is typically called with a newly-allocated * destination mbuf. */ int -m_tag_copy_chain(struct mbuf *to, struct mbuf *from) +m_tag_copy_chain(struct mbuf *to, struct mbuf *from, int how) { struct m_tag *p, *t, *tprev = NULL; KASSERT(to && from, ("m_tag_copy_chain: null argument, to %p from %p", to, from)); m_tag_delete_chain(to, NULL); SLIST_FOREACH(p, &from->m_pkthdr.tags, m_tag_link) { - t = m_tag_copy(p); + t = m_tag_copy(p, how); if (t == NULL) { m_tag_delete_chain(to, NULL); return 0; } if (tprev == NULL) SLIST_INSERT_HEAD(&to->m_pkthdr.tags, t, m_tag_link); else { SLIST_INSERT_AFTER(tprev, t, m_tag_link); tprev = t; } } return 1; } /* Initialize tags on an mbuf. */ void m_tag_init(struct mbuf *m) { SLIST_INIT(&m->m_pkthdr.tags); } /* Get first tag in chain. */ struct m_tag * m_tag_first(struct mbuf *m) { return SLIST_FIRST(&m->m_pkthdr.tags); } /* Get next tag in chain. */ struct m_tag * m_tag_next(struct mbuf *m, struct m_tag *t) { return SLIST_NEXT(t, m_tag_link); } Index: head/sys/net/if_loop.c =================================================================== --- head/sys/net/if_loop.c (revision 108465) +++ head/sys/net/if_loop.c (revision 108466) @@ -1,458 +1,458 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_loop.c 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ /* * Loopback interface driver for protocol testing and timing. */ #include "opt_atalk.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipx.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #ifdef IPX #include #include #endif #ifdef INET6 #ifndef INET #include #endif #include #include #endif #ifdef NS #include #include #endif #ifdef NETATALK #include #include #endif #ifdef TINY_LOMTU #define LOMTU (1024+512) #elif defined(LARGE_LOMTU) #define LOMTU 131072 #else #define LOMTU 16384 #endif #define LONAME "lo" struct lo_softc { struct ifnet sc_if; /* network-visible interface */ LIST_ENTRY(lo_softc) sc_next; }; int loioctl(struct ifnet *, u_long, caddr_t); static void lortrequest(int, struct rtentry *, struct rt_addrinfo *); int looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, struct rtentry *rt); int lo_clone_create(struct if_clone *, int); void lo_clone_destroy(struct ifnet *); struct ifnet *loif = NULL; /* Used externally */ static MALLOC_DEFINE(M_LO, LONAME, "Loopback Interface"); static LIST_HEAD(lo_list, lo_softc) lo_list; struct if_clone lo_cloner = IF_CLONE_INITIALIZER(LONAME, lo_clone_create, lo_clone_destroy, 1, IF_MAXUNIT); void lo_clone_destroy(ifp) struct ifnet *ifp; { struct lo_softc *sc; sc = ifp->if_softc; /* XXX: destroying lo0 will lead to panics. */ KASSERT(loif != ifp, ("%s: destroying lo0", __func__)); bpfdetach(ifp); if_detach(ifp); LIST_REMOVE(sc, sc_next); free(sc, M_LO); } int lo_clone_create(ifc, unit) struct if_clone *ifc; int unit; { struct lo_softc *sc; MALLOC(sc, struct lo_softc *, sizeof(*sc), M_LO, M_WAITOK | M_ZERO); sc->sc_if.if_name = LONAME; sc->sc_if.if_unit = unit; sc->sc_if.if_mtu = LOMTU; sc->sc_if.if_flags = IFF_LOOPBACK | IFF_MULTICAST; sc->sc_if.if_ioctl = loioctl; sc->sc_if.if_output = looutput; sc->sc_if.if_type = IFT_LOOP; sc->sc_if.if_snd.ifq_maxlen = ifqmaxlen; sc->sc_if.if_softc = sc; if_attach(&sc->sc_if); bpfattach(&sc->sc_if, DLT_NULL, sizeof(u_int)); LIST_INSERT_HEAD(&lo_list, sc, sc_next); if (loif == NULL) loif = &sc->sc_if; return (0); } static int loop_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: LIST_INIT(&lo_list); if_clone_attach(&lo_cloner); break; case MOD_UNLOAD: printf("loop module unload - not possible for this module type\n"); return EINVAL; } return 0; } static moduledata_t loop_mod = { "loop", loop_modevent, 0 }; DECLARE_MODULE(loop, loop_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); int looutput(ifp, m, dst, rt) struct ifnet *ifp; register struct mbuf *m; struct sockaddr *dst; register struct rtentry *rt; { if ((m->m_flags & M_PKTHDR) == 0) panic("looutput no HDR"); if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { m_freem(m); return (rt->rt_flags & RTF_BLACKHOLE ? 0 : rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); } /* * KAME requires that the packet to be contiguous on the * mbuf. We need to make that sure. * this kind of code should be avoided. * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? */ if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { struct mbuf *n; + /* XXX MT_HEADER should be m->m_type */ MGETHDR(n, M_DONTWAIT, MT_HEADER); if (!n) goto contiguousfail; - MCLGET(n, M_DONTWAIT); - if (! (n->m_flags & M_EXT)) { - m_freem(n); - goto contiguousfail; - } - - m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); - n->m_pkthdr = m->m_pkthdr; - n->m_len = m->m_pkthdr.len; - SLIST_INIT(&m->m_pkthdr.tags); + M_MOVE_PKTHDR(n, m); #ifdef MAC /* * XXXMAC: Once we put labels in tags and proper * primitives are used for relocating mbuf header * data, this will no longer be required. */ m->m_pkthdr.label.l_flags &= ~MAC_FLAG_INITIALIZED; #endif + MCLGET(n, M_DONTWAIT); + if (! (n->m_flags & M_EXT)) { + m_freem(n); + goto contiguousfail; + } + + m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); + n->m_len = m->m_pkthdr.len; m_freem(m); m = n; } if (0) { contiguousfail: printf("looutput: mbuf allocation failed\n"); } ifp->if_opackets++; ifp->if_obytes += m->m_pkthdr.len; #if 1 /* XXX */ switch (dst->sa_family) { case AF_INET: case AF_INET6: case AF_IPX: case AF_NS: case AF_APPLETALK: break; default: printf("looutput: af=%d unexpected\n", dst->sa_family); m_freem(m); return (EAFNOSUPPORT); } #endif return(if_simloop(ifp, m, dst->sa_family, 0)); } /* * if_simloop() * * This function is to support software emulation of hardware loopback, * i.e., for interfaces with the IFF_SIMPLEX attribute. Since they can't * hear their own broadcasts, we create a copy of the packet that we * would normally receive via a hardware loopback. * * This function expects the packet to include the media header of length hlen. */ int if_simloop(ifp, m, af, hlen) struct ifnet *ifp; struct mbuf *m; int af; int hlen; { int isr; struct ifqueue *inq = 0; KASSERT((m->m_flags & M_PKTHDR) != 0, ("if_simloop: no HDR")); m->m_pkthdr.rcvif = ifp; /* BPF write needs to be handled specially */ if (af == AF_UNSPEC) { KASSERT(m->m_len >= sizeof(int), ("if_simloop: m_len")); af = *(mtod(m, int *)); m->m_len -= sizeof(int); m->m_pkthdr.len -= sizeof(int); m->m_data += sizeof(int); } /* Let BPF see incoming packet */ if (ifp->if_bpf) { struct mbuf m0, *n = m; if (ifp->if_bpf->bif_dlt == DLT_NULL) { /* * We need to prepend the address family as * a four byte field. Cons up a dummy header * to pacify bpf. This is safe because bpf * will only read from the mbuf (i.e., it won't * try to free it or keep a pointer a to it). */ m0.m_next = m; m0.m_len = 4; m0.m_data = (char *)⁡ n = &m0; } BPF_MTAP(ifp, n); } /* Strip away media header */ if (hlen > 0) { m_adj(m, hlen); #if defined(__alpha__) || defined(__ia64__) || defined(__sparc64__) /* The alpha doesn't like unaligned data. * We move data down in the first mbuf */ if (mtod(m, vm_offset_t) & 3) { KASSERT(hlen >= 3, ("if_simloop: hlen too small")); bcopy(m->m_data, (char *)(mtod(m, vm_offset_t) - (mtod(m, vm_offset_t) & 3)), m->m_len); mtod(m,vm_offset_t) -= (mtod(m, vm_offset_t) & 3); } #endif } /* Deliver to upper layer protocol */ switch (af) { #ifdef INET case AF_INET: inq = &ipintrq; isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: m->m_flags |= M_LOOP; inq = &ip6intrq; isr = NETISR_IPV6; break; #endif #ifdef IPX case AF_IPX: inq = &ipxintrq; isr = NETISR_IPX; break; #endif #ifdef NS case AF_NS: inq = &nsintrq; isr = NETISR_NS; break; #endif #ifdef NETATALK case AF_APPLETALK: inq = &atintrq2; isr = NETISR_ATALK; break; #endif default: printf("if_simloop: can't handle af=%d\n", af); m_freem(m); return (EAFNOSUPPORT); } ifp->if_ipackets++; ifp->if_ibytes += m->m_pkthdr.len; (void) IF_HANDOFF(inq, m, NULL); schednetisr(isr); return (0); } /* ARGSUSED */ static void lortrequest(cmd, rt, info) int cmd; struct rtentry *rt; struct rt_addrinfo *info; { if (rt) { rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */ /* * For optimal performance, the send and receive buffers * should be at least twice the MTU plus a little more for * overhead. */ rt->rt_rmx.rmx_recvpipe = rt->rt_rmx.rmx_sendpipe = 3 * LOMTU; } } /* * Process an ioctl request. */ /* ARGSUSED */ int loioctl(ifp, cmd, data) register struct ifnet *ifp; u_long cmd; caddr_t data; { register struct ifaddr *ifa; register struct ifreq *ifr = (struct ifreq *)data; register int error = 0; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP | IFF_RUNNING; ifa = (struct ifaddr *)data; ifa->ifa_rtrequest = lortrequest; /* * Everything else is done at a higher level. */ break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == 0) { error = EAFNOSUPPORT; /* XXX */ break; } switch (ifr->ifr_addr.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: error = EAFNOSUPPORT; break; } break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFFLAGS: break; default: error = EINVAL; } return (error); } Index: head/sys/netatm/port.h =================================================================== --- head/sys/netatm/port.h (revision 108465) +++ head/sys/netatm/port.h (revision 108466) @@ -1,307 +1,306 @@ /* * * =================================== * HARP | Host ATM Research Platform * =================================== * * * This Host ATM Research Platform ("HARP") file (the "Software") is * made available by Network Computing Services, Inc. ("NetworkCS") * "AS IS". NetworkCS does not provide maintenance, improvements or * support of any kind. * * NETWORKCS MAKES NO WARRANTIES OR REPRESENTATIONS, EXPRESS OR IMPLIED, * INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF MERCHANTABILITY * AND FITNESS FOR A PARTICULAR PURPOSE, AS TO ANY ELEMENT OF THE * SOFTWARE OR ANY SUPPORT PROVIDED IN CONNECTION WITH THIS SOFTWARE. * In no event shall NetworkCS be responsible for any damages, including * but not limited to consequential damages, arising from or relating to * any use of the Software or related support. * * Copyright 1994-1998 Network Computing Services, Inc. * * Copies of this Software may be made, however, the above copyright * notice must be reproduced on all copies. * * @(#) $FreeBSD$ * */ /* * System Configuration * -------------------- * * Porting aides * */ #ifndef _NETATM_PORT_H #define _NETATM_PORT_H #ifdef _KERNEL /* * Kernel buffers * * KBuffer Typedef for a kernel buffer. * * KB_NEXT(bfr) Access next buffer in chain (r/w). * KB_LEN(bfr) Access length of data in this buffer (r/w). * KB_QNEXT(bfr) Access next buffer in queue (r/w). * * KB_ALLOC(bfr, size, flags, type) * Allocates a new kernel buffer of at least size bytes. * KB_ALLOCPKT(bfr, size, flags, type) * Allocates a new kernel packet header buffer of at * least size bytes. * KB_ALLOCEXT(bfr, size, flags, type) * Allocates a new kernel buffer with external storage * of at least size bytes. * KB_FREEONE(bfr, nxt) Free buffer bfr and set next buffer in chain in nxt. * KB_FREEALL(bfr) Free bfr's entire buffer chain. * KB_COPY(bfr, off, len, new, flags) * Copy len bytes of user data from buffer bfr starting at * byte offset off and return new buffer chain in new. * If len is KB_COPYALL, copy until end of chain. * KB_COPYDATA(bfr, off, len, datap) * Copy data from buffer bfr starting at byte offset off * for len bytes into the data area pointed to by datap. * Returns the number of bytes not copied to datap. * KB_PULLUP(bfr, n, new) * Get at least the first n bytes of data in the buffer * chain headed by bfr contiguous in the first buffer. * Returns the (potentially new) head of the chain in new. * On failure the chain is freed and NULL is returned. * KB_LINKHEAD(new, head) * Link the kernel buffer new at the head of the buffer * chain headed by head. If both new and head are * packet header buffers, new will become the packet * header for the chain. * KB_LINK(new, prev) * Link the kernel buffer new into the buffer chain * after the buffer prev. * KB_UNLINKHEAD(head, next) * Unlink the kernel buffer from the head of the buffer * chain headed by head. The buffer head will be freed * and the new chain head will be placed in next. * KB_UNLINK(old, prev, next) * Unlink the kernel buffer old with previous buffer prev * from its buffer chain. The following buffer in the * chain will be placed in next and the buffer old will * be freed. * KB_ISPKT(bfr) Tests whether bfr is a packet header buffer. * KB_ISEXT(bfr) Tests whether bfr has external storage. * KB_BFRSTART(bfr, x, t) * Sets x (cast to type t) to point to the start of the * buffer space in bfr. * KB_BFREND(bfr, x, t) * Sets x (cast to type t) to point one byte past the end * of the buffer space in bfr. * KB_BFRLEN(bfr) Returns length of buffer space in bfr. * KB_DATASTART(bfr, x, t) * Sets x (cast to type t) to point to the start of the * buffer data contained in bfr. * KB_DATAEND(bfr, x, t) * Sets x (cast to type t) to point one byte past the end * of the buffer data contained in bfr. * KB_HEADSET(bfr, n) Sets the start address for buffer data in buffer bfr to * n bytes from the beginning of the buffer space. * KB_HEADMOVE(bfr, n) Adjust buffer data controls to move data down (n > 0) * or up (n < 0) n bytes in the buffer bfr. * KB_HEADADJ(bfr, n) Adjust buffer data controls to add (n > 0) or subtract * (n < 0) n bytes of data to/from the beginning of bfr. * KB_TAILADJ(bfr, n) Adjust buffer data controls to add (n > 0) or subtract * (n < 0) n bytes of data to/from the end of bfr. * KB_TAILALIGN(bfr, n) Set buffer data controls to place an object of size n * at the end of bfr, longword aligned. * KB_HEADROOM(bfr, n) Set n to the amount of buffer space available before * the start of data in bfr. * KB_TAILROOM(bfr, n) Set n to the amount of buffer space available after * the end of data in bfr. * KB_PLENGET(bfr, n) Set n to bfr's packet length. * KB_PLENSET(bfr, n) Set bfr's packet length to n. * KB_PLENADJ(bfr, n) Adjust total packet length by n bytes. * */ #include typedef struct mbuf KBuffer; #define KB_F_WAIT M_TRYWAIT #define KB_F_NOWAIT M_DONTWAIT #define KB_T_HEADER MT_HEADER #define KB_T_DATA MT_DATA #define KB_COPYALL M_COPYALL #define KB_NEXT(bfr) (bfr)->m_next #define KB_LEN(bfr) (bfr)->m_len #define KB_QNEXT(bfr) (bfr)->m_nextpkt #define KB_ALLOC(bfr, size, flags, type) { \ if ((size) <= MLEN) { \ MGET((bfr), (flags), (type)); \ } else \ (bfr) = NULL; \ } #define KB_ALLOCPKT(bfr, size, flags, type) { \ if ((size) <= MHLEN) { \ MGETHDR((bfr), (flags), (type)); \ } else \ (bfr) = NULL; \ } #define KB_ALLOCEXT(bfr, size, flags, type) { \ if ((size) <= MCLBYTES) { \ MGET((bfr), (flags), (type)); \ if ((bfr) != NULL) { \ MCLGET((bfr), (flags)); \ if (((bfr)->m_flags & M_EXT) == 0) { \ m_freem((bfr)); \ (bfr) = NULL; \ } \ } \ } else \ (bfr) = NULL; \ } #define KB_FREEONE(bfr, nxt) { \ (nxt) = m_free(bfr); \ } #define KB_FREEALL(bfr) { \ m_freem(bfr); \ } #define KB_COPY(bfr, off, len, new, flags) { \ (new) = m_copym((bfr), (off), (len), (flags)); \ } #define KB_COPYDATA(bfr, off, len, datap) \ (m_copydata((bfr), (off), (len), (datap)), 0) #define KB_PULLUP(bfr, n, new) { \ (new) = m_pullup((bfr), (n)); \ } #define KB_LINKHEAD(new, head) { \ if ((head) && KB_ISPKT(new) && KB_ISPKT(head)) {\ - M_COPY_PKTHDR((new), (head)); \ - (head)->m_flags &= ~M_PKTHDR; \ + M_MOVE_PKTHDR((new), (head)); \ } \ (new)->m_next = (head); \ } #define KB_LINK(new, prev) { \ (new)->m_next = (prev)->m_next; \ (prev)->m_next = (new); \ } #define KB_UNLINKHEAD(head, next) { \ (next) = m_free((head)); \ (head) = NULL; \ } #define KB_UNLINK(old, prev, next) { \ (next) = m_free((old)); \ (old) = NULL; \ (prev)->m_next = (next); \ } #define KB_ISPKT(bfr) (((bfr)->m_flags & M_PKTHDR) != 0) #define KB_ISEXT(bfr) (((bfr)->m_flags & M_EXT) != 0) #define KB_BFRSTART(bfr, x, t) { \ if ((bfr)->m_flags & M_EXT) \ (x) = (t)((bfr)->m_ext.ext_buf); \ else if ((bfr)->m_flags & M_PKTHDR) \ (x) = (t)(&(bfr)->m_pktdat); \ else \ (x) = (t)((bfr)->m_dat); \ } #define KB_BFREND(bfr, x, t) { \ if ((bfr)->m_flags & M_EXT) \ (x) = (t)((bfr)->m_ext.ext_buf + (bfr)->m_ext.ext_size);\ else if ((bfr)->m_flags & M_PKTHDR) \ (x) = (t)(&(bfr)->m_pktdat + MHLEN); \ else \ (x) = (t)((bfr)->m_dat + MLEN); \ } #define KB_BFRLEN(bfr) \ (((bfr)->m_flags & M_EXT) ? (bfr)->m_ext.ext_size : \ (((bfr)->m_flags & M_PKTHDR) ? MHLEN : MLEN)) #define KB_DATASTART(bfr, x, t) { \ (x) = mtod((bfr), t); \ } #define KB_DATAEND(bfr, x, t) { \ (x) = (t)(mtod((bfr), caddr_t) + (bfr)->m_len); \ } #define KB_HEADSET(bfr, n) { \ if ((bfr)->m_flags & M_EXT) \ (bfr)->m_data = (bfr)->m_ext.ext_buf + (n); \ else if ((bfr)->m_flags & M_PKTHDR) \ (bfr)->m_data = (bfr)->m_pktdat + (n); \ else \ (bfr)->m_data = (bfr)->m_dat + (n); \ } #define KB_HEADMOVE(bfr, n) { \ (bfr)->m_data += (n); \ } #define KB_HEADADJ(bfr, n) { \ (bfr)->m_len += (n); \ (bfr)->m_data -= (n); \ } #define KB_TAILADJ(bfr, n) { \ (bfr)->m_len += (n); \ } #define KB_TAILALIGN(bfr, n) { \ (bfr)->m_len = (n); \ if ((bfr)->m_flags & M_EXT) \ (bfr)->m_data = (caddr_t)(((uintptr_t)(bfr)->m_ext.ext_buf \ + (bfr)->m_ext.ext_size - (n)) & ~(sizeof(long) - 1));\ else \ (bfr)->m_data = (caddr_t)(((uintptr_t)(bfr)->m_dat + MLEN - (n)) \ & ~(sizeof(long) - 1)); \ } #define KB_HEADROOM(bfr, n) { \ /* N = m_leadingspace(BFR) XXX */ \ (n) = ((bfr)->m_flags & M_EXT ? (bfr)->m_data - (bfr)->m_ext.ext_buf : \ (bfr)->m_flags & M_PKTHDR ? (bfr)->m_data - (bfr)->m_pktdat : \ (bfr)->m_data - (bfr)->m_dat); \ } #define KB_TAILROOM(bfr, n) { \ (n) = M_TRAILINGSPACE(bfr); \ } #define KB_PLENGET(bfr, n) { \ (n) = (bfr)->m_pkthdr.len; \ } #define KB_PLENSET(bfr, n) { \ (bfr)->m_pkthdr.len = (n); \ } #define KB_PLENADJ(bfr, n) { \ (bfr)->m_pkthdr.len += (n); \ } /* * Kernel time * * KTimeout_ret Typedef for timeout() function return * * KT_TIME(t) Sets t to the current time. * */ typedef void KTimeout_ret; #define KT_TIME(t) microtime(&t) #endif /* _KERNEL */ #ifndef NTOHL #if BYTE_ORDER == BIG_ENDIAN #define NTOHL(x) (x) #define NTOHS(x) (x) #define HTONL(x) (x) #define HTONS(x) (x) #else #define NTOHL(x) (x) = ntohl((u_long)(x)) #define NTOHS(x) (x) = ntohs((u_short)(x)) #define HTONL(x) (x) = htonl((u_long)(x)) #define HTONS(x) (x) = htons((u_short)(x)) #endif #endif /* NTOHL */ #ifndef MAX #define MAX(a,b) max((a),(b)) #endif #ifndef MIN #define MIN(a,b) min((a),(b)) #endif #endif /* _NETATM_PORT_H */ Index: head/sys/netinet/ip_input.c =================================================================== --- head/sys/netinet/ip_input.c (revision 108465) +++ head/sys/netinet/ip_input.c (revision 108466) @@ -1,2153 +1,2162 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 * $FreeBSD$ */ #include "opt_bootp.h" #include "opt_ipfw.h" #include "opt_ipdn.h" #include "opt_ipdivert.h" #include "opt_ipfilter.h" #include "opt_ipstealth.h" #include "opt_ipsec.h" #include "opt_mac.h" #include "opt_pfil_hooks.h" #include "opt_random_ip_id.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif #ifdef FAST_IPSEC #include #include #endif int rsvp_on = 0; int ipforwarding = 0; SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, &ipforwarding, 0, "Enable IP forwarding between interfaces"); static int ipsendredirects = 1; /* XXX */ SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, &ipsendredirects, 0, "Enable sending IP redirects"); int ip_defttl = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, &ip_defttl, 0, "Maximum TTL on IP packets"); static int ip_dosourceroute = 0; SYSCTL_INT(_net_inet_ip, IPCTL_SOURCEROUTE, sourceroute, CTLFLAG_RW, &ip_dosourceroute, 0, "Enable forwarding source routed IP packets"); static int ip_acceptsourceroute = 0; SYSCTL_INT(_net_inet_ip, IPCTL_ACCEPTSOURCEROUTE, accept_sourceroute, CTLFLAG_RW, &ip_acceptsourceroute, 0, "Enable accepting source routed IP packets"); static int ip_keepfaith = 0; SYSCTL_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, &ip_keepfaith, 0, "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); static int ip_nfragpackets = 0; static int ip_maxfragpackets; /* initialized in ip_init() */ SYSCTL_INT(_net_inet_ip, OID_AUTO, maxfragpackets, CTLFLAG_RW, &ip_maxfragpackets, 0, "Maximum number of IPv4 fragment reassembly queue entries"); static int ip_sendsourcequench = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, &ip_sendsourcequench, 0, "Enable the transmission of source quench packets"); /* * XXX - Setting ip_checkinterface mostly implements the receive side of * the Strong ES model described in RFC 1122, but since the routing table * and transmit implementation do not implement the Strong ES model, * setting this to 1 results in an odd hybrid. * * XXX - ip_checkinterface currently must be disabled if you use ipnat * to translate the destination address to another local interface. * * XXX - ip_checkinterface must be disabled if you add IP aliases * to the loopback interface instead of the interface where the * packets for those addresses are received. */ static int ip_checkinterface = 1; SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, &ip_checkinterface, 0, "Verify packet arrives on correct interface"); #ifdef DIAGNOSTIC static int ipprintfs = 0; #endif static int ipqmaxlen = IFQ_MAXLEN; extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; struct in_ifaddrhead in_ifaddrhead; /* first inet address */ struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ u_long in_ifaddrhmask; /* mask for hash table */ SYSCTL_INT(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLFLAG_RW, &ipintrq.ifq_maxlen, 0, "Maximum size of the IP input queue"); SYSCTL_INT(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLFLAG_RD, &ipintrq.ifq_drops, 0, "Number of packets dropped from the IP input queue"); struct ipstat ipstat; SYSCTL_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, &ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); /* Packet reassembly stuff */ #define IPREASS_NHASH_LOG2 6 #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2) #define IPREASS_HMASK (IPREASS_NHASH - 1) #define IPREASS_HASH(x,y) \ (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK) static TAILQ_HEAD(ipqhead, ipq) ipq[IPREASS_NHASH]; static int nipq = 0; /* total # of reass queues */ static int maxnipq; #ifdef IPCTL_DEFMTU SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, &ip_mtu, 0, "Default MTU"); #endif #ifdef IPSTEALTH static int ipstealth = 0; SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, &ipstealth, 0, ""); #endif /* Firewall hooks */ ip_fw_chk_t *ip_fw_chk_ptr; int fw_enable = 1 ; int fw_one_pass = 1; /* Dummynet hooks */ ip_dn_io_t *ip_dn_io_ptr; /* * XXX this is ugly -- the following two global variables are * used to store packet state while it travels through the stack. * Note that the code even makes assumptions on the size and * alignment of fields inside struct ip_srcrt so e.g. adding some * fields will break the code. This needs to be fixed. * * We need to save the IP options in case a protocol wants to respond * to an incoming packet over the same route if the packet got here * using IP source routing. This allows connection establishment and * maintenance when the remote end is on a network that is not known * to us. */ static int ip_nhops = 0; static struct ip_srcrt { struct in_addr dst; /* final destination */ char nop; /* one NOP to align */ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; } ip_srcrt; static void save_rte(u_char *, struct in_addr); static int ip_dooptions(struct mbuf *m, int, struct sockaddr_in *next_hop); static void ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop); static void ip_freef(struct ipqhead *, struct ipq *); static struct mbuf *ip_reass(struct mbuf *, struct ipqhead *, struct ipq *, u_int32_t *, u_int16_t *); static void ipintr(void); /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. */ void ip_init() { register struct protosw *pr; register int i; TAILQ_INIT(&in_ifaddrhead); in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &in_ifaddrhmask); pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); for (i = 0; i < IPPROTO_MAX; i++) ip_protox[i] = pr - inetsw; for (pr = inetdomain.dom_protosw; pr < inetdomain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) ip_protox[pr->pr_protocol] = pr - inetsw; for (i = 0; i < IPREASS_NHASH; i++) TAILQ_INIT(&ipq[i]); maxnipq = nmbclusters / 4; ip_maxfragpackets = nmbclusters / 4; #ifndef RANDOM_IP_ID ip_id = time_second & 0xffff; #endif ipintrq.ifq_maxlen = ipqmaxlen; mtx_init(&ipintrq.ifq_mtx, "ip_inq", NULL, MTX_DEF); ipintrq_present = 1; register_netisr(NETISR_IP, ipintr); } /* * XXX watch out this one. It is perhaps used as a cache for * the most recently used route ? it is cleared in in_addroute() * when a new route is successfully created. */ struct route ipforward_rt; /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(struct mbuf *m) { struct ip *ip; struct ipq *fp; struct in_ifaddr *ia = NULL; struct ifaddr *ifa; int i, hlen, checkif; u_short sum; struct in_addr pkt_dst; u_int32_t divert_info = 0; /* packet divert/tee info */ struct ip_fw_args args; #ifdef PFIL_HOOKS struct packet_filter_hook *pfh; struct mbuf *m0; int rv; #endif /* PFIL_HOOKS */ #ifdef FAST_IPSEC struct m_tag *mtag; struct tdb_ident *tdbi; struct secpolicy *sp; int s, error; #endif /* FAST_IPSEC */ args.eh = NULL; args.oif = NULL; args.rule = NULL; args.divert_rule = 0; /* divert cookie */ args.next_hop = NULL; /* Grab info from MT_TAG mbufs prepended to the chain. */ for (; m && m->m_type == MT_TAG; m = m->m_next) { switch(m->_m_tag_id) { default: printf("ip_input: unrecognised MT_TAG tag %d\n", m->_m_tag_id); break; case PACKET_TAG_DUMMYNET: args.rule = ((struct dn_pkt *)m)->rule; break; case PACKET_TAG_DIVERT: args.divert_rule = (intptr_t)m->m_hdr.mh_data & 0xffff; break; case PACKET_TAG_IPFORWARD: args.next_hop = (struct sockaddr_in *)m->m_hdr.mh_data; break; } } KASSERT(m != NULL && (m->m_flags & M_PKTHDR) != 0, ("ip_input: no HDR")); if (args.rule) { /* dummynet already filtered us */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; goto iphack ; } ipstat.ips_total++; if (m->m_pkthdr.len < sizeof(struct ip)) goto tooshort; if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { ipstat.ips_toosmall++; return; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { ipstat.ips_badvers++; goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ ipstat.ips_badhlen++; goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == 0) { ipstat.ips_badhlen++; return; } ip = mtod(m, struct ip *); } /* 127/8 must not appear on wire - RFC1122 */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { ipstat.ips_badaddr++; goto bad; } } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { ipstat.ips_badsum++; goto bad; } /* * Convert fields to host representation. */ ip->ip_len = ntohs(ip->ip_len); if (ip->ip_len < hlen) { ipstat.ips_badlen++; goto bad; } ip->ip_off = ntohs(ip->ip_off); /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < ip->ip_len) { tooshort: ipstat.ips_tooshort++; goto bad; } if (m->m_pkthdr.len > ip->ip_len) { if (m->m_len == m->m_pkthdr.len) { m->m_len = ip->ip_len; m->m_pkthdr.len = ip->ip_len; } else m_adj(m, ip->ip_len - m->m_pkthdr.len); } /* * IpHack's section. * Right now when no processing on packet has done * and it is still fresh out of network we do our black * deals with it. * - Firewall: deny/allow/divert * - Xlate: translate packet's addr/port (NAT). * - Pipe: pass pkt through dummynet. * - Wrap: fake packet's addr/port * - Encapsulate: put it in another IP and send out. */ iphack: #ifdef PFIL_HOOKS /* * Run through list of hooks for input packets. If there are any * filters which require that additional packets in the flow are * not fast-forwarded, they must clear the M_CANFASTFWD flag. * Note that filters must _never_ set this flag, as another filter * in the list may have previously cleared it. */ m0 = m; pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh); for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link)) if (pfh->pfil_func) { rv = pfh->pfil_func(ip, hlen, m->m_pkthdr.rcvif, 0, &m0); if (rv) return; m = m0; if (m == NULL) return; ip = mtod(m, struct ip *); } #endif /* PFIL_HOOKS */ if (fw_enable && IPFW_LOADED) { /* * If we've been forwarded from the output side, then * skip the firewall a second time */ if (args.next_hop) goto ours; args.m = m; i = ip_fw_chk_ptr(&args); m = args.m; if ( (i & IP_FW_PORT_DENY_FLAG) || m == NULL) { /* drop */ if (m) m_freem(m); return; } ip = mtod(m, struct ip *); /* just in case m changed */ if (i == 0 && args.next_hop == NULL) /* common case */ goto pass; if (DUMMYNET_LOADED && (i & IP_FW_PORT_DYNT_FLAG) != 0) { /* Send packet to the appropriate pipe */ ip_dn_io_ptr(m, i&0xffff, DN_TO_IP_IN, &args); return; } #ifdef IPDIVERT if (i != 0 && (i & IP_FW_PORT_DYNT_FLAG) == 0) { /* Divert or tee packet */ divert_info = i; goto ours; } #endif if (i == 0 && args.next_hop != NULL) goto pass; /* * if we get here, the packet must be dropped */ m_freem(m); return; } pass: /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ ip_nhops = 0; /* for source routed packets */ if (hlen > sizeof (struct ip) && ip_dooptions(m, 0, args.next_hop)) return; /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no * matter if it is destined to another node, or whether it is * a multicast one, RSVP wants it! and prevents it from being forwarded * anywhere else. Also checks if the rsvp daemon is running before * grabbing the packet. */ if (rsvp_on && ip->ip_p==IPPROTO_RSVP) goto ours; /* * Check our list of addresses, to see if the packet is for us. * If we don't have any addresses, assume any unicast packet * we receive might be for us (and let the upper layers deal * with it). */ if (TAILQ_EMPTY(&in_ifaddrhead) && (m->m_flags & (M_MCAST|M_BCAST)) == 0) goto ours; /* * Cache the destination address of the packet; this may be * changed by use of 'ipfw fwd'. */ pkt_dst = args.next_hop ? args.next_hop->sin_addr : ip->ip_dst; /* * Enable a consistency check between the destination address * and the arrival interface for a unicast packet (the RFC 1122 * strong ES model) if IP forwarding is disabled and the packet * is not locally generated and the packet is not subject to * 'ipfw fwd'. * * XXX - Checking also should be disabled if the destination * address is ipnat'ed to a different interface. * * XXX - Checking is incompatible with IP aliases added * to the loopback interface instead of the interface where * the packets are received. */ checkif = ip_checkinterface && (ipforwarding == 0) && m->m_pkthdr.rcvif != NULL && ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) && (args.next_hop == NULL); /* * Check for exact addresses in the hash bucket. */ LIST_FOREACH(ia, INADDR_HASH(pkt_dst.s_addr), ia_hash) { /* * If the address matches, verify that the packet * arrived via the correct interface if checking is * enabled. */ if (IA_SIN(ia)->sin_addr.s_addr == pkt_dst.s_addr && (!checkif || ia->ia_ifp == m->m_pkthdr.rcvif)) goto ours; } /* * Check for broadcast addresses. * * Only accept broadcast packets that arrive via the matching * interface. Reception of forwarded directed broadcasts would * be handled via ip_forward() and ether_output() with the loopback * into the stack for SIMPLEX interfaces handled by ether_output(). */ if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) { TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == pkt_dst.s_addr) goto ours; if (ia->ia_netbroadcast.s_addr == pkt_dst.s_addr) goto ours; #ifdef BOOTP_COMPAT if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) goto ours; #endif } } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; if (ip_mrouter) { /* * If we are acting as a multicast router, all * incoming multicast packets are passed to the * kernel-level multicast forwarding function. * The packet is returned (relatively) intact; if * ip_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. */ if (ip_mforward && ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { ipstat.ips_cantforward++; m_freem(m); return; } /* * The process-level routing daemon needs to receive * all multicast IGMP packets, whether or not this * host belongs to their destination groups. */ if (ip->ip_p == IPPROTO_IGMP) goto ours; ipstat.ips_forward++; } /* * See if we belong to the destination multicast group on the * arrival interface. */ IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); if (inm == NULL) { ipstat.ips_notmember++; m_freem(m); return; } goto ours; } if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) goto ours; if (ip->ip_dst.s_addr == INADDR_ANY) goto ours; /* * FAITH(Firewall Aided Internet Translator) */ if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_type == IFT_FAITH) { if (ip_keepfaith) { if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) goto ours; } m_freem(m); return; } /* * Not for us; forward if possible and desirable. */ if (ipforwarding == 0) { ipstat.ips_cantforward++; m_freem(m); } else { #ifdef IPSEC /* * Enforce inbound IPsec SPD. */ if (ipsec4_in_reject(m, NULL)) { ipsecstat.in_polvio++; goto bad; } #endif /* IPSEC */ #ifdef FAST_IPSEC mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); s = splnet(); if (mtag != NULL) { tdbi = (struct tdb_ident *)(mtag + 1); sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); } else { sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); } if (sp == NULL) { /* NB: can happen if error */ splx(s); /*XXX error stat???*/ DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ goto bad; } /* * Check security policy against packet attributes. */ error = ipsec_in_reject(sp, m); KEY_FREESP(&sp); splx(s); if (error) { ipstat.ips_cantforward++; goto bad; } #endif /* FAST_IPSEC */ ip_forward(m, 0, args.next_hop); } return; ours: #ifdef IPSTEALTH /* * IPSTEALTH: Process non-routing options only * if the packet is destined for us. */ if (ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1, args.next_hop)) return; #endif /* IPSTEALTH */ /* Count the packet in the ip address stats */ if (ia != NULL) { ia->ia_ifa.if_ipackets++; ia->ia_ifa.if_ibytes += m->m_pkthdr.len; } /* * If offset or IP_MF are set, must reassemble. * Otherwise, nothing need be done. * (We could look in the reassembly queue to see * if the packet was previously fragmented, * but it's not worth the time; just let them time out.) */ if (ip->ip_off & (IP_MF | IP_OFFMASK)) { sum = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); /* * Look for queue of fragments * of this datagram. */ TAILQ_FOREACH(fp, &ipq[sum], ipq_list) if (ip->ip_id == fp->ipq_id && ip->ip_src.s_addr == fp->ipq_src.s_addr && ip->ip_dst.s_addr == fp->ipq_dst.s_addr && #ifdef MAC mac_fragment_match(m, fp) && #endif ip->ip_p == fp->ipq_p) goto found; fp = 0; /* check if there's a place for the new queue */ if (nipq > maxnipq) { /* * drop something from the tail of the current queue * before proceeding further */ struct ipq *q = TAILQ_LAST(&ipq[sum], ipqhead); if (q == NULL) { /* gak */ for (i = 0; i < IPREASS_NHASH; i++) { struct ipq *r = TAILQ_LAST(&ipq[i], ipqhead); if (r) { ip_freef(&ipq[i], r); break; } } } else ip_freef(&ipq[sum], q); } found: /* * Adjust ip_len to not reflect header, * convert offset of this to bytes. */ ip->ip_len -= hlen; if (ip->ip_off & IP_MF) { /* * Make sure that fragments have a data length * that's a non-zero multiple of 8 bytes. */ if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { ipstat.ips_toosmall++; /* XXX */ goto bad; } m->m_flags |= M_FRAG; } else m->m_flags &= ~M_FRAG; ip->ip_off <<= 3; /* * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf, and update * the divert info in divert_info and args.divert_rule. */ ipstat.ips_fragments++; m->m_pkthdr.header = ip; m = ip_reass(m, &ipq[sum], fp, &divert_info, &args.divert_rule); if (m == 0) return; ipstat.ips_reassembled++; ip = mtod(m, struct ip *); /* Get the header length of the reassembled packet */ hlen = ip->ip_hl << 2; #ifdef IPDIVERT /* Restore original checksum before diverting packet */ if (divert_info != 0) { ip->ip_len += hlen; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(m, hlen); ip->ip_off = ntohs(ip->ip_off); ip->ip_len = ntohs(ip->ip_len); ip->ip_len -= hlen; } #endif } else ip->ip_len -= hlen; #ifdef IPDIVERT /* * Divert or tee packet to the divert protocol if required. */ if (divert_info != 0) { struct mbuf *clone = NULL; /* Clone packet if we're doing a 'tee' */ if ((divert_info & IP_FW_PORT_TEE_FLAG) != 0) clone = m_dup(m, M_DONTWAIT); /* Restore packet header fields to original values */ ip->ip_len += hlen; ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); /* Deliver packet to divert input routine */ divert_packet(m, 1, divert_info & 0xffff, args.divert_rule); ipstat.ips_delivered++; /* If 'tee', continue with original packet */ if (clone == NULL) return; m = clone; ip = mtod(m, struct ip *); ip->ip_len += hlen; /* * Jump backwards to complete processing of the * packet. But first clear divert_info to avoid * entering this block again. * We do not need to clear args.divert_rule * or args.next_hop as they will not be used. */ divert_info = 0; goto pass; } #endif #ifdef IPSEC /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 && ipsec4_in_reject(m, NULL)) { ipsecstat.in_polvio++; goto bad; } #endif #if FAST_IPSEC /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) { /* * Check if the packet has already had IPsec processing * done. If so, then just pass it along. This tag gets * set during AH, ESP, etc. input handling, before the * packet is returned to the ip input queue for delivery. */ mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL); s = splnet(); if (mtag != NULL) { tdbi = (struct tdb_ident *)(mtag + 1); sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND); } else { sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); } if (sp != NULL) { /* * Check security policy against packet attributes. */ error = ipsec_in_reject(sp, m); KEY_FREESP(&sp); } else { /* XXX error stat??? */ error = EINVAL; DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ goto bad; } splx(s); if (error) goto bad; } #endif /* FAST_IPSEC */ /* * Switch out to protocol's input routine. */ ipstat.ips_delivered++; if (args.next_hop && ip->ip_p == IPPROTO_TCP) { /* TCP needs IPFORWARD info if available */ struct m_hdr tag; tag.mh_type = MT_TAG; tag.mh_flags = PACKET_TAG_IPFORWARD; tag.mh_data = (caddr_t)args.next_hop; tag.mh_next = m; (*inetsw[ip_protox[ip->ip_p]].pr_input)( (struct mbuf *)&tag, hlen); } else (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); return; bad: m_freem(m); } /* * IP software interrupt routine - to go away sometime soon */ static void ipintr(void) { struct mbuf *m; while (1) { IF_DEQUEUE(&ipintrq, m); if (m == 0) return; ip_input(m); } } /* * Take incoming datagram fragment and try to reassemble it into * whole datagram. If a chain for reassembly of this datagram already * exists, then it is given as fp; otherwise have to make a chain. * * When IPDIVERT enabled, keep additional state with each packet that * tells us if we need to divert or tee the packet we're building. * In particular, *divinfo includes the port and TEE flag, * *divert_rule is the number of the matching rule. */ static struct mbuf * ip_reass(struct mbuf *m, struct ipqhead *head, struct ipq *fp, u_int32_t *divinfo, u_int16_t *divert_rule) { struct ip *ip = mtod(m, struct ip *); register struct mbuf *p, *q, *nq; struct mbuf *t; int hlen = ip->ip_hl << 2; int i, next; /* * Presence of header sizes in mbufs * would confuse code below. */ m->m_data += hlen; m->m_len -= hlen; /* * If first fragment to arrive, create a reassembly queue. */ if (fp == 0) { /* * Enforce upper bound on number of fragmented packets * for which we attempt reassembly; * If maxfrag is 0, never accept fragments. * If maxfrag is -1, accept all fragments without limitation. */ if ((ip_maxfragpackets >= 0) && (ip_nfragpackets >= ip_maxfragpackets)) goto dropfrag; ip_nfragpackets++; if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) goto dropfrag; fp = mtod(t, struct ipq *); #ifdef MAC mac_init_ipq(fp); mac_create_ipq(m, fp); #endif TAILQ_INSERT_HEAD(head, fp, ipq_list); nipq++; fp->ipq_ttl = IPFRAGTTL; fp->ipq_p = ip->ip_p; fp->ipq_id = ip->ip_id; fp->ipq_src = ip->ip_src; fp->ipq_dst = ip->ip_dst; fp->ipq_frags = m; m->m_nextpkt = NULL; #ifdef IPDIVERT fp->ipq_div_info = 0; fp->ipq_div_cookie = 0; #endif goto inserted; } else { #ifdef MAC mac_update_ipq(m, fp); #endif } #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) /* * Find a segment which begins after this one does. */ for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) if (GETIP(q)->ip_off > ip->ip_off) break; /* * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us, otherwise * stick new segment in the proper place. * * If some of the data is dropped from the the preceding * segment, then it's checksum is invalidated. */ if (p) { i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; if (i > 0) { if (i >= ip->ip_len) goto dropfrag; m_adj(m, i); m->m_pkthdr.csum_flags = 0; ip->ip_off += i; ip->ip_len -= i; } m->m_nextpkt = p->m_nextpkt; p->m_nextpkt = m; } else { m->m_nextpkt = fp->ipq_frags; fp->ipq_frags = m; } /* * While we overlap succeeding segments trim them or, * if they are completely covered, dequeue them. */ for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; q = nq) { i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; if (i < GETIP(q)->ip_len) { GETIP(q)->ip_len -= i; GETIP(q)->ip_off += i; m_adj(q, i); q->m_pkthdr.csum_flags = 0; break; } nq = q->m_nextpkt; m->m_nextpkt = nq; m_freem(q); } inserted: #ifdef IPDIVERT /* * Transfer firewall instructions to the fragment structure. * Only trust info in the fragment at offset 0. */ if (ip->ip_off == 0) { fp->ipq_div_info = *divinfo; fp->ipq_div_cookie = *divert_rule; } *divinfo = 0; *divert_rule = 0; #endif /* * Check for complete reassembly. */ next = 0; for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { if (GETIP(q)->ip_off != next) return (0); next += GETIP(q)->ip_len; } /* Make sure the last packet didn't have the IP_MF flag */ if (p->m_flags & M_FRAG) return (0); /* * Reassembly is complete. Make sure the packet is a sane size. */ q = fp->ipq_frags; ip = GETIP(q); if (next + (ip->ip_hl << 2) > IP_MAXPACKET) { ipstat.ips_toolong++; ip_freef(head, fp); return (0); } /* * Concatenate fragments. */ m = q; t = m->m_next; m->m_next = 0; m_cat(m, t); nq = q->m_nextpkt; q->m_nextpkt = 0; for (q = nq; q != NULL; q = nq) { nq = q->m_nextpkt; q->m_nextpkt = NULL; m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; m_cat(m, q); } #ifdef MAC mac_create_datagram_from_ipq(fp, m); mac_destroy_ipq(fp); #endif #ifdef IPDIVERT /* * Extract firewall instructions from the fragment structure. */ *divinfo = fp->ipq_div_info; *divert_rule = fp->ipq_div_cookie; #endif /* * Create header for new ip packet by * modifying header of first packet; * dequeue and discard fragment reassembly header. * Make header visible. */ ip->ip_len = next; ip->ip_src = fp->ipq_src; ip->ip_dst = fp->ipq_dst; TAILQ_REMOVE(head, fp, ipq_list); nipq--; (void) m_free(dtom(fp)); ip_nfragpackets--; m->m_len += (ip->ip_hl << 2); m->m_data -= (ip->ip_hl << 2); /* some debugging cruft by sklower, below, will go away soon */ if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ m_fixhdr(m); return (m); dropfrag: #ifdef IPDIVERT *divinfo = 0; *divert_rule = 0; #endif ipstat.ips_fragdropped++; m_freem(m); return (0); #undef GETIP } /* * Free a fragment reassembly header and all * associated datagrams. */ static void ip_freef(fhp, fp) struct ipqhead *fhp; struct ipq *fp; { register struct mbuf *q; while (fp->ipq_frags) { q = fp->ipq_frags; fp->ipq_frags = q->m_nextpkt; m_freem(q); } TAILQ_REMOVE(fhp, fp, ipq_list); (void) m_free(dtom(fp)); ip_nfragpackets--; nipq--; } /* * IP timer processing; * if a timer expires on a reassembly * queue, discard it. */ void ip_slowtimo() { register struct ipq *fp; int s = splnet(); int i; for (i = 0; i < IPREASS_NHASH; i++) { for(fp = TAILQ_FIRST(&ipq[i]); fp;) { struct ipq *fpp; fpp = fp; fp = TAILQ_NEXT(fp, ipq_list); if(--fpp->ipq_ttl == 0) { ipstat.ips_fragtimeout++; ip_freef(&ipq[i], fpp); } } } /* * If we are over the maximum number of fragments * (due to the limit being lowered), drain off * enough to get down to the new limit. */ for (i = 0; i < IPREASS_NHASH; i++) { if (ip_maxfragpackets >= 0) { while (ip_nfragpackets > ip_maxfragpackets && !TAILQ_EMPTY(&ipq[i])) { ipstat.ips_fragdropped++; ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); } } } ipflow_slowtimo(); splx(s); } /* * Drain off all datagram fragments. */ void ip_drain() { int i; for (i = 0; i < IPREASS_NHASH; i++) { while(!TAILQ_EMPTY(&ipq[i])) { ipstat.ips_fragdropped++; ip_freef(&ipq[i], TAILQ_FIRST(&ipq[i])); } } in_rtqdrain(); } /* * Do option processing on a datagram, * possibly discarding it if bad options are encountered, * or forwarding it if source-routed. * The pass argument is used when operating in the IPSTEALTH * mode to tell what options to process: * [LS]SRR (pass 0) or the others (pass 1). * The reason for as many as two passes is that when doing IPSTEALTH, * non-routing options should be processed only if the packet is for us. * Returns 1 if packet has been forwarded/freed, * 0 if the packet should be processed further. */ static int ip_dooptions(struct mbuf *m, int pass, struct sockaddr_in *next_hop) { struct ip *ip = mtod(m, struct ip *); u_char *cp; struct in_ifaddr *ia; int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; struct in_addr *sin, dst; n_time ntime; struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; dst = ip->ip_dst; cp = (u_char *)(ip + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { if (cnt < IPOPT_OLEN + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } } switch (opt) { default: break; /* * Source routing with record. * Find interface with current destination address. * If none on this machine then drop if strictly routed, * or do nothing if loosely routed. * Record interface address and bring up next address * component. If strictly routed make sure next * address is on directly accessible net. */ case IPOPT_LSRR: case IPOPT_SSRR: #ifdef IPSTEALTH if (ipstealth && pass > 0) break; #endif if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } ipaddr.sin_addr = ip->ip_dst; ia = (struct in_ifaddr *) ifa_ifwithaddr((struct sockaddr *)&ipaddr); if (ia == 0) { if (opt == IPOPT_SSRR) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } if (!ip_dosourceroute) goto nosourcerouting; /* * Loose routing, and not at next destination * yet; nothing to do except forward. */ break; } off--; /* 0 origin */ if (off > optlen - (int)sizeof(struct in_addr)) { /* * End of source route. Should be for us. */ if (!ip_acceptsourceroute) goto nosourcerouting; save_rte(cp, ip->ip_src); break; } #ifdef IPSTEALTH if (ipstealth) goto dropit; #endif if (!ip_dosourceroute) { if (ipforwarding) { char buf[16]; /* aaa.bbb.ccc.ddd\0 */ /* * Acting as a router, so generate ICMP */ nosourcerouting: strcpy(buf, inet_ntoa(ip->ip_dst)); log(LOG_WARNING, "attempted source route from %s to %s\n", inet_ntoa(ip->ip_src), buf); type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } else { /* * Not acting as a router, so silently drop. */ #ifdef IPSTEALTH dropit: #endif ipstat.ips_cantforward++; m_freem(m); return (1); } } /* * locate outgoing interface */ (void)memcpy(&ipaddr.sin_addr, cp + off, sizeof(ipaddr.sin_addr)); if (opt == IPOPT_SSRR) { #define INA struct in_ifaddr * #define SA struct sockaddr * if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) ia = (INA)ifa_ifwithnet((SA)&ipaddr); } else ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt); if (ia == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } ip->ip_dst = ipaddr.sin_addr; (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); break; case IPOPT_RR: #ifdef IPSTEALTH if (ipstealth && pass == 0) break; #endif if (optlen < IPOPT_OFFSET + sizeof(*cp)) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } /* * If no space remains, ignore. */ off--; /* 0 origin */ if (off > optlen - (int)sizeof(struct in_addr)) break; (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst, sizeof(ipaddr.sin_addr)); /* * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 && (ia = ip_rtaddr(ipaddr.sin_addr, &ipforward_rt)) == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; } (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; case IPOPT_TS: #ifdef IPSTEALTH if (ipstealth && pass == 0) break; #endif code = cp - (u_char *)ip; if (optlen < 4 || optlen > 40) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } if ((off = cp[IPOPT_OFFSET]) < 5) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } if (off > optlen - (int)sizeof(int32_t)) { cp[IPOPT_OFFSET + 1] += (1 << 4); if ((cp[IPOPT_OFFSET + 1] & 0xf0) == 0) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } break; } off--; /* 0 origin */ sin = (struct in_addr *)(cp + off); switch (cp[IPOPT_OFFSET + 1] & 0x0f) { case IPOPT_TS_TSONLY: break; case IPOPT_TS_TSANDADDR: if (off + sizeof(n_time) + sizeof(struct in_addr) > optlen) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } ipaddr.sin_addr = dst; ia = (INA)ifaof_ifpforaddr((SA)&ipaddr, m->m_pkthdr.rcvif); if (ia == 0) continue; (void)memcpy(sin, &IA_SIN(ia)->sin_addr, sizeof(struct in_addr)); cp[IPOPT_OFFSET] += sizeof(struct in_addr); off += sizeof(struct in_addr); break; case IPOPT_TS_PRESPEC: if (off + sizeof(n_time) + sizeof(struct in_addr) > optlen) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } (void)memcpy(&ipaddr.sin_addr, sin, sizeof(struct in_addr)); if (ifa_ifwithaddr((SA)&ipaddr) == 0) continue; cp[IPOPT_OFFSET] += sizeof(struct in_addr); off += sizeof(struct in_addr); break; default: code = &cp[IPOPT_OFFSET + 1] - (u_char *)ip; goto bad; } ntime = iptime(); (void)memcpy(cp + off, &ntime, sizeof(n_time)); cp[IPOPT_OFFSET] += sizeof(n_time); } } if (forward && ipforwarding) { ip_forward(m, 1, next_hop); return (1); } return (0); bad: icmp_error(m, type, code, 0, 0); ipstat.ips_badoptions++; return (1); } /* * Given address of next destination (final or next hop), * return internet address info of interface to be used to get there. */ struct in_ifaddr * ip_rtaddr(dst, rt) struct in_addr dst; struct route *rt; { register struct sockaddr_in *sin; sin = (struct sockaddr_in *)&rt->ro_dst; if (rt->ro_rt == 0 || !(rt->ro_rt->rt_flags & RTF_UP) || dst.s_addr != sin->sin_addr.s_addr) { if (rt->ro_rt) { RTFREE(rt->ro_rt); rt->ro_rt = 0; } sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; rtalloc_ign(rt, RTF_PRCLONING); } if (rt->ro_rt == 0) return ((struct in_ifaddr *)0); return (ifatoia(rt->ro_rt->rt_ifa)); } /* * Save incoming source route for use in replies, * to be picked up later by ip_srcroute if the receiver is interested. */ static void save_rte(option, dst) u_char *option; struct in_addr dst; { unsigned olen; olen = option[IPOPT_OLEN]; #ifdef DIAGNOSTIC if (ipprintfs) printf("save_rte: olen %d\n", olen); #endif if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) return; bcopy(option, ip_srcrt.srcopt, olen); ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); ip_srcrt.dst = dst; } /* * Retrieve incoming source route for use in replies, * in the same form used by setsockopt. * The first hop is placed before the options, will be removed later. */ struct mbuf * ip_srcroute() { register struct in_addr *p, *q; register struct mbuf *m; if (ip_nhops == 0) return ((struct mbuf *)0); m = m_get(M_DONTWAIT, MT_HEADER); if (m == 0) return ((struct mbuf *)0); #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + OPTSIZ; #ifdef DIAGNOSTIC if (ipprintfs) printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len); #endif /* * First save first hop for return route */ p = &ip_srcrt.route[ip_nhops - 1]; *(mtod(m, struct in_addr *)) = *p--; #ifdef DIAGNOSTIC if (ipprintfs) printf(" hops %lx", (u_long)ntohl(mtod(m, struct in_addr *)->s_addr)); #endif /* * Copy option fields and padding (nop) to mbuf. */ ip_srcrt.nop = IPOPT_NOP; ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), &ip_srcrt.nop, OPTSIZ); q = (struct in_addr *)(mtod(m, caddr_t) + sizeof(struct in_addr) + OPTSIZ); #undef OPTSIZ /* * Record return path as an IP source route, * reversing the path (pointers are now aligned). */ while (p >= ip_srcrt.route) { #ifdef DIAGNOSTIC if (ipprintfs) printf(" %lx", (u_long)ntohl(q->s_addr)); #endif *q++ = *p--; } /* * Last hop goes to final destination. */ *q = ip_srcrt.dst; #ifdef DIAGNOSTIC if (ipprintfs) printf(" %lx\n", (u_long)ntohl(q->s_addr)); #endif return (m); } /* * Strip out IP options, at higher * level protocol in the kernel. * Second argument is buffer to which options * will be moved, and return value is their length. * XXX should be deleted; last arg currently ignored. */ void ip_stripoptions(m, mopt) register struct mbuf *m; struct mbuf *mopt; { register int i; struct ip *ip = mtod(m, struct ip *); register caddr_t opts; int olen; olen = (ip->ip_hl << 2) - sizeof (struct ip); opts = (caddr_t)(ip + 1); i = m->m_len - (sizeof (struct ip) + olen); bcopy(opts + olen, opts, (unsigned)i); m->m_len -= olen; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= olen; ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; } u_char inetctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, EMSGSIZE, EHOSTUNREACH, 0, 0, 0, 0, 0, 0, ENOPROTOOPT, ECONNREFUSED }; /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful * icmp message because icmp doesn't have a large enough repertoire * of codes and types. * * If not forwarding, just drop the packet. This could be confusing * if ipforwarding was zero but some routing protocol was advancing * us as a gateway to somewhere. However, we must let the routing * protocol deal with that. * * The srcrt parameter indicates whether the packet is being forwarded * via a source route. */ static void ip_forward(struct mbuf *m, int srcrt, struct sockaddr_in *next_hop) { struct ip *ip = mtod(m, struct ip *); struct rtentry *rt; int error, type = 0, code = 0; struct mbuf *mcopy; n_long dest; struct in_addr pkt_dst; struct ifnet *destifp; #if defined(IPSEC) || defined(FAST_IPSEC) struct ifnet dummyifp; #endif dest = 0; /* * Cache the destination address of the packet; this may be * changed by use of 'ipfw fwd'. */ pkt_dst = next_hop ? next_hop->sin_addr : ip->ip_dst; #ifdef DIAGNOSTIC if (ipprintfs) printf("forward: src %lx dst %lx ttl %x\n", (u_long)ip->ip_src.s_addr, (u_long)pkt_dst.s_addr, ip->ip_ttl); #endif if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(pkt_dst) == 0) { ipstat.ips_cantforward++; m_freem(m); return; } #ifdef IPSTEALTH if (!ipstealth) { #endif if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); return; } #ifdef IPSTEALTH } #endif if (ip_rtaddr(pkt_dst, &ipforward_rt) == 0) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); return; } else rt = ipforward_rt.ro_rt; /* * Save the IP header and at most 8 bytes of the payload, * in case we need to generate an ICMP message to the src. * * XXX this can be optimized a lot by saving the data in a local * buffer on the stack (72 bytes at most), and only allocating the * mbuf if really necessary. The vast majority of the packets * are forwarded without having to send an ICMP back (either * because unnecessary, or because rate limited), so we are * really we are wasting a lot of work here. * * We don't use m_copy() because it might return a reference * to a shared cluster. Both this function and ip_output() * assume exclusive access to the IP header in `m', so any * data in a cluster may change before we reach icmp_error(). */ MGET(mcopy, M_DONTWAIT, m->m_type); + if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { + /* + * It's probably ok if the pkthdr dup fails (because + * the deep copy of the tag chain failed), but for now + * be conservative and just discard the copy since + * code below may some day want the tags. + */ + m_free(mcopy); + mcopy = NULL; + } if (mcopy != NULL) { - M_COPY_PKTHDR(mcopy, m); mcopy->m_len = imin((ip->ip_hl << 2) + 8, (int)ip->ip_len); m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); #ifdef MAC /* * XXXMAC: This will eventually become an explicit * labeling point. */ mac_create_mbuf_from_mbuf(m, mcopy); #endif } #ifdef IPSTEALTH if (!ipstealth) { #endif ip->ip_ttl -= IPTTLDEC; #ifdef IPSTEALTH } #endif /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a default route * or a route modified by a redirect. */ if (rt->rt_ifp == m->m_pkthdr.rcvif && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && satosin(rt_key(rt))->sin_addr.s_addr != 0 && ipsendredirects && !srcrt && !next_hop) { #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) u_long src = ntohl(ip->ip_src.s_addr); if (RTA(rt) && (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { if (rt->rt_flags & RTF_GATEWAY) dest = satosin(rt->rt_gateway)->sin_addr.s_addr; else dest = pkt_dst.s_addr; /* Router requirements says to only send host redirects */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; #ifdef DIAGNOSTIC if (ipprintfs) printf("redirect (%d) to %lx\n", code, (u_long)dest); #endif } } { struct m_hdr tag; if (next_hop) { /* Pass IPFORWARD info if available */ tag.mh_type = MT_TAG; tag.mh_flags = PACKET_TAG_IPFORWARD; tag.mh_data = (caddr_t)next_hop; tag.mh_next = m; m = (struct mbuf *)&tag; } error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING, 0, NULL); } if (error) ipstat.ips_cantforward++; else { ipstat.ips_forward++; if (type) ipstat.ips_redirectsent++; else { if (mcopy) { ipflow_create(&ipforward_rt, mcopy); m_freem(mcopy); } return; } } if (mcopy == NULL) return; destifp = NULL; switch (error) { case 0: /* forwarded, but need redirect */ /* type, code set above */ break; case ENETUNREACH: /* shouldn't happen, checked above */ case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; break; case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; #ifdef IPSEC /* * If the packet is routed over IPsec tunnel, tell the * originator the tunnel MTU. * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz * XXX quickhack!!! */ if (ipforward_rt.ro_rt) { struct secpolicy *sp = NULL; int ipsecerror; int ipsechdr; struct route *ro; sp = ipsec4_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &ipsecerror); if (sp == NULL) destifp = ipforward_rt.ro_rt->rt_ifp; else { /* count IPsec header size */ ipsechdr = ipsec4_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, NULL); /* * find the correct route for outer IPv4 * header, compute tunnel MTU. * * XXX BUG ALERT * The "dummyifp" code relies upon the fact * that icmp_error() touches only ifp->if_mtu. */ /*XXX*/ destifp = NULL; if (sp->req != NULL && sp->req->sav != NULL && sp->req->sav->sah != NULL) { ro = &sp->req->sav->sah->sa_route; if (ro->ro_rt && ro->ro_rt->rt_ifp) { dummyifp.if_mtu = ro->ro_rt->rt_ifp->if_mtu; dummyifp.if_mtu -= ipsechdr; destifp = &dummyifp; } } key_freesp(sp); } } #elif FAST_IPSEC /* * If the packet is routed over IPsec tunnel, tell the * originator the tunnel MTU. * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz * XXX quickhack!!! */ if (ipforward_rt.ro_rt) { struct secpolicy *sp = NULL; int ipsecerror; int ipsechdr; struct route *ro; sp = ipsec_getpolicybyaddr(mcopy, IPSEC_DIR_OUTBOUND, IP_FORWARDING, &ipsecerror); if (sp == NULL) destifp = ipforward_rt.ro_rt->rt_ifp; else { /* count IPsec header size */ ipsechdr = ipsec4_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, NULL); /* * find the correct route for outer IPv4 * header, compute tunnel MTU. * * XXX BUG ALERT * The "dummyifp" code relies upon the fact * that icmp_error() touches only ifp->if_mtu. */ /*XXX*/ destifp = NULL; if (sp->req != NULL && sp->req->sav != NULL && sp->req->sav->sah != NULL) { ro = &sp->req->sav->sah->sa_route; if (ro->ro_rt && ro->ro_rt->rt_ifp) { dummyifp.if_mtu = ro->ro_rt->rt_ifp->if_mtu; dummyifp.if_mtu -= ipsechdr; destifp = &dummyifp; } } KEY_FREESP(&sp); } } #else /* !IPSEC && !FAST_IPSEC */ if (ipforward_rt.ro_rt) destifp = ipforward_rt.ro_rt->rt_ifp; #endif /*IPSEC*/ ipstat.ips_cantfrag++; break; case ENOBUFS: /* * A router should not generate ICMP_SOURCEQUENCH as * required in RFC1812 Requirements for IP Version 4 Routers. * Source quench could be a big problem under DoS attacks, * or if the underlying interface is rate-limited. * Those who need source quench packets may re-enable them * via the net.inet.ip.sendsourcequench sysctl. */ if (ip_sendsourcequench == 0) { m_freem(mcopy); return; } else { type = ICMP_SOURCEQUENCH; code = 0; } break; case EACCES: /* ipfw denied packet */ m_freem(mcopy); return; } icmp_error(mcopy, type, code, dest, destifp); } void ip_savecontrol(inp, mp, ip, m) register struct inpcb *inp; register struct mbuf **mp; register struct ip *ip; register struct mbuf *m; { if (inp->inp_socket->so_options & SO_TIMESTAMP) { struct timeval tv; microtime(&tv); *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), SCM_TIMESTAMP, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVDSTADDR) { *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #ifdef notyet /* XXX * Moving these out of udp_input() made them even more broken * than they already were. */ /* options were tossed already */ if (inp->inp_flags & INP_RECVOPTS) { *mp = sbcreatecontrol((caddr_t) opts_deleted_above, sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } /* ip_srcroute doesn't do what we want here, need to fix */ if (inp->inp_flags & INP_RECVRETOPTS) { *mp = sbcreatecontrol((caddr_t) ip_srcroute(), sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #endif if (inp->inp_flags & INP_RECVIF) { struct ifnet *ifp; struct sdlbuf { struct sockaddr_dl sdl; u_char pad[32]; } sdlbuf; struct sockaddr_dl *sdp; struct sockaddr_dl *sdl2 = &sdlbuf.sdl; if (((ifp = m->m_pkthdr.rcvif)) && ( ifp->if_index && (ifp->if_index <= if_index))) { sdp = (struct sockaddr_dl *) (ifaddr_byindex(ifp->if_index)->ifa_addr); /* * Change our mind and don't try copy. */ if ((sdp->sdl_family != AF_LINK) || (sdp->sdl_len > sizeof(sdlbuf))) { goto makedummy; } bcopy(sdp, sdl2, sdp->sdl_len); } else { makedummy: sdl2->sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]); sdl2->sdl_family = AF_LINK; sdl2->sdl_index = 0; sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; } *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, IP_RECVIF, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } } /* * XXX these routines are called from the upper part of the kernel. * They need to be locked when we remove Giant. * * They could also be moved to ip_mroute.c, since all the RSVP * handling is done there already. */ static int ip_rsvp_on; struct socket *ip_rsvpd; int ip_rsvp_init(struct socket *so) { if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) return EOPNOTSUPP; if (ip_rsvpd != NULL) return EADDRINUSE; ip_rsvpd = so; /* * This may seem silly, but we need to be sure we don't over-increment * the RSVP counter, in case something slips up. */ if (!ip_rsvp_on) { ip_rsvp_on = 1; rsvp_on++; } return 0; } int ip_rsvp_done(void) { ip_rsvpd = NULL; /* * This may seem silly, but we need to be sure we don't over-decrement * the RSVP counter, in case something slips up. */ if (ip_rsvp_on) { ip_rsvp_on = 0; rsvp_on--; } return 0; } void rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */ { if (rsvp_input_p) { /* call the real one if loaded */ rsvp_input_p(m, off); return; } /* Can still get packets with rsvp_on = 0 if there is a local member * of the group to which the RSVP packet is addressed. But in this * case we want to throw the packet away. */ if (!rsvp_on) { m_freem(m); return; } if (ip_rsvpd != NULL) { rip_input(m, off); return; } /* Drop the packet */ m_freem(m); } Index: head/sys/netinet6/esp_input.c =================================================================== --- head/sys/netinet6/esp_input.c (revision 108465) +++ head/sys/netinet6/esp_input.c (revision 108466) @@ -1,986 +1,985 @@ /* $FreeBSD$ */ /* $KAME: esp_input.c,v 1.62 2002/01/07 11:39:57 kjc Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * RFC1827/2406 Encapsulated Security Payload. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #ifdef INET6 #include #include #include #include #include #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #include #include #include #define IPLEN_FLIPPED #define ESPMAXLEN \ (sizeof(struct esp) < sizeof(struct newesp) \ ? sizeof(struct newesp) : sizeof(struct esp)) #ifdef INET extern struct protosw inetsw[]; void esp4_input(m, off) struct mbuf *m; int off; { struct ip *ip; struct esp *esp; struct esptail esptail; u_int32_t spi; struct secasvar *sav = NULL; size_t taillen; u_int16_t nxt; const struct esp_algorithm *algo; int ivlen; size_t hlen; size_t esplen; int proto; /* sanity check for alignment. */ if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) { ipseclog((LOG_ERR, "IPv4 ESP input: packet alignment problem " "(off=%d, pktlen=%d)\n", off, m->m_pkthdr.len)); ipsecstat.in_inval++; goto bad; } if (m->m_len < off + ESPMAXLEN) { m = m_pullup(m, off + ESPMAXLEN); if (!m) { ipseclog((LOG_DEBUG, "IPv4 ESP input: can't pullup in esp4_input\n")); ipsecstat.in_inval++; goto bad; } } ip = mtod(m, struct ip *); proto = ip->ip_p; esp = (struct esp *)(((u_int8_t *)ip) + off); #ifdef _IP_VHL hlen = IP_VHL_HL(ip->ip_vhl) << 2; #else hlen = ip->ip_hl << 2; #endif /* find the sassoc. */ spi = esp->esp_spi; if ((sav = key_allocsa(AF_INET, (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst, IPPROTO_ESP, spi)) == 0) { ipseclog((LOG_WARNING, "IPv4 ESP input: no key association found for spi %u\n", (u_int32_t)ntohl(spi))); ipsecstat.in_nosa++; goto bad; } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp4_input called to allocate SA:%p\n", sav)); if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { ipseclog((LOG_DEBUG, "IPv4 ESP input: non-mature/dying SA found for spi %u\n", (u_int32_t)ntohl(spi))); ipsecstat.in_badspi++; goto bad; } algo = esp_algorithm_lookup(sav->alg_enc); if (!algo) { ipseclog((LOG_DEBUG, "IPv4 ESP input: " "unsupported encryption algorithm for spi %u\n", (u_int32_t)ntohl(spi))); ipsecstat.in_badspi++; goto bad; } /* check if we have proper ivlen information */ ivlen = sav->ivlen; if (ivlen < 0) { ipseclog((LOG_ERR, "inproper ivlen in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); ipsecstat.in_inval++; goto bad; } if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay && (sav->alg_auth && sav->key_auth))) goto noreplaycheck; if (sav->alg_auth == SADB_X_AALG_NULL || sav->alg_auth == SADB_AALG_NONE) goto noreplaycheck; /* * check for sequence number. */ if (ipsec_chkreplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) ; /* okey */ else { ipsecstat.in_espreplay++; ipseclog((LOG_WARNING, "replay packet in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); goto bad; } /* check ICV */ { u_char sum0[AH_MAXSUMSIZE]; u_char sum[AH_MAXSUMSIZE]; const struct ah_algorithm *sumalgo; size_t siz; sumalgo = ah_algorithm_lookup(sav->alg_auth); if (!sumalgo) goto noreplaycheck; siz = (((*sumalgo->sumsiz)(sav) + 3) & ~(4 - 1)); if (m->m_pkthdr.len < off + ESPMAXLEN + siz) { ipsecstat.in_inval++; goto bad; } if (AH_MAXSUMSIZE < siz) { ipseclog((LOG_DEBUG, "internal error: AH_MAXSUMSIZE must be larger than %lu\n", (u_long)siz)); ipsecstat.in_inval++; goto bad; } m_copydata(m, m->m_pkthdr.len - siz, siz, &sum0[0]); if (esp_auth(m, off, m->m_pkthdr.len - off - siz, sav, sum)) { ipseclog((LOG_WARNING, "auth fail in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); ipsecstat.in_espauthfail++; goto bad; } if (bcmp(sum0, sum, siz) != 0) { ipseclog((LOG_WARNING, "auth fail in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); ipsecstat.in_espauthfail++; goto bad; } /* strip off the authentication data */ m_adj(m, -siz); ip = mtod(m, struct ip *); #ifdef IPLEN_FLIPPED ip->ip_len = ip->ip_len - siz; #else ip->ip_len = htons(ntohs(ip->ip_len) - siz); #endif m->m_flags |= M_AUTHIPDGM; ipsecstat.in_espauthsucc++; } /* * update sequence number. */ if ((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay) { if (ipsec_updatereplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) { ipsecstat.in_espreplay++; goto bad; } } noreplaycheck: /* process main esp header. */ if (sav->flags & SADB_X_EXT_OLD) { /* RFC 1827 */ esplen = sizeof(struct esp); } else { /* RFC 2406 */ if (sav->flags & SADB_X_EXT_DERIV) esplen = sizeof(struct esp); else esplen = sizeof(struct newesp); } if (m->m_pkthdr.len < off + esplen + ivlen + sizeof(esptail)) { ipseclog((LOG_WARNING, "IPv4 ESP input: packet too short\n")); ipsecstat.in_inval++; goto bad; } if (m->m_len < off + esplen + ivlen) { m = m_pullup(m, off + esplen + ivlen); if (!m) { ipseclog((LOG_DEBUG, "IPv4 ESP input: can't pullup in esp4_input\n")); ipsecstat.in_inval++; goto bad; } } /* * pre-compute and cache intermediate key */ if (esp_schedule(algo, sav) != 0) { ipsecstat.in_inval++; goto bad; } /* * decrypt the packet. */ if (!algo->decrypt) panic("internal error: no decrypt function"); if ((*algo->decrypt)(m, off, sav, algo, ivlen)) { /* m is already freed */ m = NULL; ipseclog((LOG_ERR, "decrypt fail in IPv4 ESP input: %s\n", ipsec_logsastr(sav))); ipsecstat.in_inval++; goto bad; } ipsecstat.in_esphist[sav->alg_enc]++; m->m_flags |= M_DECRYPTED; /* * find the trailer of the ESP. */ m_copydata(m, m->m_pkthdr.len - sizeof(esptail), sizeof(esptail), (caddr_t)&esptail); nxt = esptail.esp_nxt; taillen = esptail.esp_padlen + sizeof(esptail); if (m->m_pkthdr.len < taillen || m->m_pkthdr.len - taillen < hlen) { /* ? */ ipseclog((LOG_WARNING, "bad pad length in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); ipsecstat.in_inval++; goto bad; } /* strip off the trailing pad area. */ m_adj(m, -taillen); #ifdef IPLEN_FLIPPED ip->ip_len = ip->ip_len - taillen; #else ip->ip_len = htons(ntohs(ip->ip_len) - taillen); #endif /* was it transmitted over the IPsec tunnel SA? */ if (ipsec4_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) { /* * strip off all the headers that precedes ESP header. * IP4 xx ESP IP4' payload -> IP4' payload * * XXX more sanity checks * XXX relationship with gif? */ u_int8_t tos; tos = ip->ip_tos; m_adj(m, off + esplen + ivlen); if (m->m_len < sizeof(*ip)) { m = m_pullup(m, sizeof(*ip)); if (!m) { ipsecstat.in_inval++; goto bad; } } ip = mtod(m, struct ip *); /* ECN consideration. */ ip_ecn_egress(ip4_ipsec_ecn, &tos, &ip->ip_tos); if (!key_checktunnelsanity(sav, AF_INET, (caddr_t)&ip->ip_src, (caddr_t)&ip->ip_dst)) { ipseclog((LOG_ERR, "ipsec tunnel address mismatch " "in IPv4 ESP input: %s %s\n", ipsec4_logpacketstr(ip, spi), ipsec_logsastr(sav))); ipsecstat.in_inval++; goto bad; } key_sa_recordxfer(sav, m); if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0 || ipsec_addhist(m, IPPROTO_IPV4, 0) != 0) { ipsecstat.in_nomem++; goto bad; } if (! IF_HANDOFF(&ipintrq, m, NULL)) { ipsecstat.in_inval++; m = NULL; goto bad; } m = NULL; schednetisr(NETISR_IP); /* can be skipped but to make sure */ nxt = IPPROTO_DONE; } else { /* * strip off ESP header and IV. * even in m_pulldown case, we need to strip off ESP so that * we can always compute checksum for AH correctly. */ size_t stripsiz; stripsiz = esplen + ivlen; ip = mtod(m, struct ip *); ovbcopy((caddr_t)ip, (caddr_t)(((u_char *)ip) + stripsiz), off); m->m_data += stripsiz; m->m_len -= stripsiz; m->m_pkthdr.len -= stripsiz; ip = mtod(m, struct ip *); #ifdef IPLEN_FLIPPED ip->ip_len = ip->ip_len - stripsiz; #else ip->ip_len = htons(ntohs(ip->ip_len) - stripsiz); #endif ip->ip_p = nxt; key_sa_recordxfer(sav, m); if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0) { ipsecstat.in_nomem++; goto bad; } if (nxt != IPPROTO_DONE) { if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && ipsec4_in_reject(m, NULL)) { ipsecstat.in_polvio++; goto bad; } (*inetsw[ip_protox[nxt]].pr_input)(m, off); } else m_freem(m); m = NULL; } if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp4_input call free SA:%p\n", sav)); key_freesav(sav); } ipsecstat.in_success++; return; bad: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp4_input call free SA:%p\n", sav)); key_freesav(sav); } if (m) m_freem(m); return; } #endif /* INET */ #ifdef INET6 int esp6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp; int off = *offp; struct ip6_hdr *ip6; struct esp *esp; struct esptail esptail; u_int32_t spi; struct secasvar *sav = NULL; size_t taillen; u_int16_t nxt; const struct esp_algorithm *algo; int ivlen; size_t esplen; /* sanity check for alignment. */ if (off % 4 != 0 || m->m_pkthdr.len % 4 != 0) { ipseclog((LOG_ERR, "IPv6 ESP input: packet alignment problem " "(off=%d, pktlen=%d)\n", off, m->m_pkthdr.len)); ipsec6stat.in_inval++; goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, ESPMAXLEN, IPPROTO_DONE); esp = (struct esp *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(esp, struct esp *, m, off, ESPMAXLEN); if (esp == NULL) { ipsec6stat.in_inval++; return IPPROTO_DONE; } #endif ip6 = mtod(m, struct ip6_hdr *); if (ntohs(ip6->ip6_plen) == 0) { ipseclog((LOG_ERR, "IPv6 ESP input: " "ESP with IPv6 jumbogram is not supported.\n")); ipsec6stat.in_inval++; goto bad; } /* find the sassoc. */ spi = esp->esp_spi; if ((sav = key_allocsa(AF_INET6, (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst, IPPROTO_ESP, spi)) == 0) { ipseclog((LOG_WARNING, "IPv6 ESP input: no key association found for spi %u\n", (u_int32_t)ntohl(spi))); ipsec6stat.in_nosa++; goto bad; } KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp6_input called to allocate SA:%p\n", sav)); if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { ipseclog((LOG_DEBUG, "IPv6 ESP input: non-mature/dying SA found for spi %u\n", (u_int32_t)ntohl(spi))); ipsec6stat.in_badspi++; goto bad; } algo = esp_algorithm_lookup(sav->alg_enc); if (!algo) { ipseclog((LOG_DEBUG, "IPv6 ESP input: " "unsupported encryption algorithm for spi %u\n", (u_int32_t)ntohl(spi))); ipsec6stat.in_badspi++; goto bad; } /* check if we have proper ivlen information */ ivlen = sav->ivlen; if (ivlen < 0) { ipseclog((LOG_ERR, "inproper ivlen in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); ipsec6stat.in_badspi++; goto bad; } if (!((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay && (sav->alg_auth && sav->key_auth))) goto noreplaycheck; if (sav->alg_auth == SADB_X_AALG_NULL || sav->alg_auth == SADB_AALG_NONE) goto noreplaycheck; /* * check for sequence number. */ if (ipsec_chkreplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) ; /* okey */ else { ipsec6stat.in_espreplay++; ipseclog((LOG_WARNING, "replay packet in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); goto bad; } /* check ICV */ { u_char sum0[AH_MAXSUMSIZE]; u_char sum[AH_MAXSUMSIZE]; const struct ah_algorithm *sumalgo; size_t siz; sumalgo = ah_algorithm_lookup(sav->alg_auth); if (!sumalgo) goto noreplaycheck; siz = (((*sumalgo->sumsiz)(sav) + 3) & ~(4 - 1)); if (m->m_pkthdr.len < off + ESPMAXLEN + siz) { ipsecstat.in_inval++; goto bad; } if (AH_MAXSUMSIZE < siz) { ipseclog((LOG_DEBUG, "internal error: AH_MAXSUMSIZE must be larger than %lu\n", (u_long)siz)); ipsec6stat.in_inval++; goto bad; } m_copydata(m, m->m_pkthdr.len - siz, siz, &sum0[0]); if (esp_auth(m, off, m->m_pkthdr.len - off - siz, sav, sum)) { ipseclog((LOG_WARNING, "auth fail in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); ipsec6stat.in_espauthfail++; goto bad; } if (bcmp(sum0, sum, siz) != 0) { ipseclog((LOG_WARNING, "auth fail in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); ipsec6stat.in_espauthfail++; goto bad; } /* strip off the authentication data */ m_adj(m, -siz); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - siz); m->m_flags |= M_AUTHIPDGM; ipsec6stat.in_espauthsucc++; } /* * update sequence number. */ if ((sav->flags & SADB_X_EXT_OLD) == 0 && sav->replay) { if (ipsec_updatereplay(ntohl(((struct newesp *)esp)->esp_seq), sav)) { ipsec6stat.in_espreplay++; goto bad; } } noreplaycheck: /* process main esp header. */ if (sav->flags & SADB_X_EXT_OLD) { /* RFC 1827 */ esplen = sizeof(struct esp); } else { /* RFC 2406 */ if (sav->flags & SADB_X_EXT_DERIV) esplen = sizeof(struct esp); else esplen = sizeof(struct newesp); } if (m->m_pkthdr.len < off + esplen + ivlen + sizeof(esptail)) { ipseclog((LOG_WARNING, "IPv6 ESP input: packet too short\n")); ipsec6stat.in_inval++; goto bad; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, esplen + ivlen, IPPROTO_DONE); /* XXX */ #else IP6_EXTHDR_GET(esp, struct esp *, m, off, esplen + ivlen); if (esp == NULL) { ipsec6stat.in_inval++; m = NULL; goto bad; } #endif ip6 = mtod(m, struct ip6_hdr *); /* set it again just in case */ /* * pre-compute and cache intermediate key */ if (esp_schedule(algo, sav) != 0) { ipsec6stat.in_inval++; goto bad; } /* * decrypt the packet. */ if (!algo->decrypt) panic("internal error: no decrypt function"); if ((*algo->decrypt)(m, off, sav, algo, ivlen)) { /* m is already freed */ m = NULL; ipseclog((LOG_ERR, "decrypt fail in IPv6 ESP input: %s\n", ipsec_logsastr(sav))); ipsec6stat.in_inval++; goto bad; } ipsec6stat.in_esphist[sav->alg_enc]++; m->m_flags |= M_DECRYPTED; /* * find the trailer of the ESP. */ m_copydata(m, m->m_pkthdr.len - sizeof(esptail), sizeof(esptail), (caddr_t)&esptail); nxt = esptail.esp_nxt; taillen = esptail.esp_padlen + sizeof(esptail); if (m->m_pkthdr.len < taillen || m->m_pkthdr.len - taillen < sizeof(struct ip6_hdr)) { /* ? */ ipseclog((LOG_WARNING, "bad pad length in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); ipsec6stat.in_inval++; goto bad; } /* strip off the trailing pad area. */ m_adj(m, -taillen); ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - taillen); /* was it transmitted over the IPsec tunnel SA? */ if (ipsec6_tunnel_validate(m, off + esplen + ivlen, nxt, sav)) { /* * strip off all the headers that precedes ESP header. * IP6 xx ESP IP6' payload -> IP6' payload * * XXX more sanity checks * XXX relationship with gif? */ u_int32_t flowinfo; /* net endian */ flowinfo = ip6->ip6_flow; m_adj(m, off + esplen + ivlen); if (m->m_len < sizeof(*ip6)) { #ifndef PULLDOWN_TEST /* * m_pullup is prohibited in KAME IPv6 input processing * but there's no other way! */ #else /* okay to pullup in m_pulldown style */ #endif m = m_pullup(m, sizeof(*ip6)); if (!m) { ipsec6stat.in_inval++; goto bad; } } ip6 = mtod(m, struct ip6_hdr *); /* ECN consideration. */ ip6_ecn_egress(ip6_ipsec_ecn, &flowinfo, &ip6->ip6_flow); if (!key_checktunnelsanity(sav, AF_INET6, (caddr_t)&ip6->ip6_src, (caddr_t)&ip6->ip6_dst)) { ipseclog((LOG_ERR, "ipsec tunnel address mismatch " "in IPv6 ESP input: %s %s\n", ipsec6_logpacketstr(ip6, spi), ipsec_logsastr(sav))); ipsec6stat.in_inval++; goto bad; } key_sa_recordxfer(sav, m); if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0 || ipsec_addhist(m, IPPROTO_IPV6, 0) != 0) { ipsec6stat.in_nomem++; goto bad; } if (! IF_HANDOFF(&ip6intrq, m, NULL)) { ipsec6stat.in_inval++; m = NULL; goto bad; } m = NULL; schednetisr(NETISR_IPV6); /* can be skipped but to make sure */ nxt = IPPROTO_DONE; } else { /* * strip off ESP header and IV. * even in m_pulldown case, we need to strip off ESP so that * we can always compute checksum for AH correctly. */ size_t stripsiz; char *prvnxtp; /* * Set the next header field of the previous header correctly. */ prvnxtp = ip6_get_prevhdr(m, off); /* XXX */ *prvnxtp = nxt; stripsiz = esplen + ivlen; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len >= stripsiz + off) { ovbcopy((caddr_t)ip6, ((caddr_t)ip6) + stripsiz, off); m->m_data += stripsiz; m->m_len -= stripsiz; m->m_pkthdr.len -= stripsiz; } else { /* * this comes with no copy if the boundary is on * cluster */ struct mbuf *n; n = m_split(m, off, M_DONTWAIT); if (n == NULL) { /* m is retained by m_split */ goto bad; } m_adj(n, stripsiz); m_cat(m, n); /* m_cat does not update m_pkthdr.len */ m->m_pkthdr.len += n->m_pkthdr.len; } #ifndef PULLDOWN_TEST /* * KAME requires that the packet to be contiguous on the * mbuf. We need to make that sure. * this kind of code should be avoided. * XXX other conditions to avoid running this part? */ if (m->m_len != m->m_pkthdr.len) { struct mbuf *n = NULL; int maxlen; MGETHDR(n, M_DONTWAIT, MT_HEADER); maxlen = MHLEN; if (n) - M_COPY_PKTHDR(n, m); + M_MOVE_PKTHDR(n, m); if (n && m->m_pkthdr.len > maxlen) { MCLGET(n, M_DONTWAIT); maxlen = MCLBYTES; if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (!n) { printf("esp6_input: mbuf allocation failed\n"); goto bad; } if (m->m_pkthdr.len <= maxlen) { m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); n->m_len = m->m_pkthdr.len; n->m_pkthdr.len = m->m_pkthdr.len; n->m_next = NULL; m_freem(m); } else { m_copydata(m, 0, maxlen, mtod(n, caddr_t)); n->m_len = maxlen; n->m_pkthdr.len = m->m_pkthdr.len; n->m_next = m; m_adj(m, maxlen); - m->m_flags &= ~M_PKTHDR; } m = n; } #endif ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - stripsiz); key_sa_recordxfer(sav, m); if (ipsec_addhist(m, IPPROTO_ESP, spi) != 0) { ipsec6stat.in_nomem++; goto bad; } } *offp = off; *mp = m; if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp6_input call free SA:%p\n", sav)); key_freesav(sav); } ipsec6stat.in_success++; return nxt; bad: if (sav) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP esp6_input call free SA:%p\n", sav)); key_freesav(sav); } if (m) m_freem(m); return IPPROTO_DONE; } void esp6_ctlinput(cmd, sa, d) int cmd; struct sockaddr *sa; void *d; { const struct newesp *espp; struct newesp esp; struct ip6ctlparam *ip6cp = NULL, ip6cp1; struct secasvar *sav; struct ip6_hdr *ip6; struct mbuf *m; int off; struct sockaddr_in6 *sa6_src, *sa6_dst; if (sa->sa_family != AF_INET6 || sa->sa_len != sizeof(struct sockaddr_in6)) return; if ((unsigned)cmd >= PRC_NCMDS) return; /* if the parameter is from icmp6, decode it. */ if (d != NULL) { ip6cp = (struct ip6ctlparam *)d; m = ip6cp->ip6c_m; ip6 = ip6cp->ip6c_ip6; off = ip6cp->ip6c_off; } else { m = NULL; ip6 = NULL; off = 0; /* calm gcc */ } if (ip6) { /* * Notify the error to all possible sockets via pfctlinput2. * Since the upper layer information (such as protocol type, * source and destination ports) is embedded in the encrypted * data and might have been cut, we can't directly call * an upper layer ctlinput function. However, the pcbnotify * function will consider source and destination addresses * as well as the flow info value, and may be able to find * some PCB that should be notified. * Although pfctlinput2 will call esp6_ctlinput(), there is * no possibility of an infinite loop of function calls, * because we don't pass the inner IPv6 header. */ bzero(&ip6cp1, sizeof(ip6cp1)); ip6cp1.ip6c_src = ip6cp->ip6c_src; pfctlinput2(cmd, sa, (void *)&ip6cp1); /* * Then go to special cases that need ESP header information. * XXX: We assume that when ip6 is non NULL, * M and OFF are valid. */ /* check if we can safely examine src and dst ports */ if (m->m_pkthdr.len < off + sizeof(esp)) return; if (m->m_len < off + sizeof(esp)) { /* * this should be rare case, * so we compromise on this copy... */ m_copydata(m, off, sizeof(esp), (caddr_t)&esp); espp = &esp; } else espp = (struct newesp*)(mtod(m, caddr_t) + off); if (cmd == PRC_MSGSIZE) { int valid = 0; /* * Check to see if we have a valid SA corresponding to * the address in the ICMP message payload. */ sa6_src = ip6cp->ip6c_src; sa6_dst = (struct sockaddr_in6 *)sa; sav = key_allocsa(AF_INET6, (caddr_t)&sa6_src->sin6_addr, (caddr_t)&sa6_dst->sin6_addr, IPPROTO_ESP, espp->esp_spi); if (sav) { if (sav->state == SADB_SASTATE_MATURE || sav->state == SADB_SASTATE_DYING) valid++; key_freesav(sav); } /* XXX Further validation? */ /* * Depending on the value of "valid" and routing table * size (mtudisc_{hi,lo}wat), we will: * - recalcurate the new MTU and create the * corresponding routing entry, or * - ignore the MTU change notification. */ icmp6_mtudisc_update((struct ip6ctlparam *)d, valid); } } else { /* we normally notify any pcb here */ } } #endif /* INET6 */ Index: head/sys/netinet6/icmp6.c =================================================================== --- head/sys/netinet6/icmp6.c (revision 108465) +++ head/sys/netinet6/icmp6.c (revision 108466) @@ -1,2874 +1,2883 @@ /* $FreeBSD$ */ /* $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif #ifdef FAST_IPSEC #include #include #define IPSEC #endif #include #ifdef HAVE_NRL_INPCB /* inpcb members */ #define in6pcb inpcb #define in6p_laddr inp_laddr6 #define in6p_faddr inp_faddr6 #define in6p_icmp6filt inp_icmp6filt #define in6p_route inp_route #define in6p_socket inp_socket #define in6p_flags inp_flags #define in6p_moptions inp_moptions6 #define in6p_outputopts inp_outputopts6 #define in6p_ip6 inp_ipv6 #define in6p_flowinfo inp_flowinfo #define in6p_sp inp_sp #define in6p_next inp_next #define in6p_prev inp_prev /* macro names */ #define sotoin6pcb sotoinpcb /* function names */ #define in6_pcbdetach in_pcbdetach #define in6_rtchange in_rtchange /* * for KAME src sync over BSD*'s. XXX: FreeBSD (>=3) are VERY different from * others... */ #define in6p_ip6_nxt inp_ipv6.ip6_nxt #endif extern struct domain inet6domain; struct icmp6stat icmp6stat; extern struct inpcbhead ripcb; extern int icmp6errppslim; static int icmp6errpps_count = 0; static struct timeval icmp6errppslim_last; extern int icmp6_nodeinfo; static void icmp6_errcount __P((struct icmp6errstat *, int, int)); static int icmp6_rip6_input __P((struct mbuf **, int)); static int icmp6_ratelimit __P((const struct in6_addr *, const int, const int)); static const char *icmp6_redirect_diag __P((struct in6_addr *, struct in6_addr *, struct in6_addr *)); #define HAVE_PPSRATECHECK #ifndef HAVE_PPSRATECHECK static int ppsratecheck __P((struct timeval *, int *, int)); #endif static struct mbuf *ni6_input __P((struct mbuf *, int)); static struct mbuf *ni6_nametodns __P((const char *, int, int)); static int ni6_dnsmatch __P((const char *, int, const char *, int)); static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, char *)); static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int)); static int icmp6_notify_error __P((struct mbuf *, int, int, int)); #ifdef COMPAT_RFC1885 static struct route_in6 icmp6_reflect_rt; #endif void icmp6_init() { mld6_init(); } static void icmp6_errcount(stat, type, code) struct icmp6errstat *stat; int type, code; { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: stat->icp6errs_dst_unreach_noroute++; return; case ICMP6_DST_UNREACH_ADMIN: stat->icp6errs_dst_unreach_admin++; return; case ICMP6_DST_UNREACH_BEYONDSCOPE: stat->icp6errs_dst_unreach_beyondscope++; return; case ICMP6_DST_UNREACH_ADDR: stat->icp6errs_dst_unreach_addr++; return; case ICMP6_DST_UNREACH_NOPORT: stat->icp6errs_dst_unreach_noport++; return; } break; case ICMP6_PACKET_TOO_BIG: stat->icp6errs_packet_too_big++; return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: stat->icp6errs_time_exceed_transit++; return; case ICMP6_TIME_EXCEED_REASSEMBLY: stat->icp6errs_time_exceed_reassembly++; return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: stat->icp6errs_paramprob_header++; return; case ICMP6_PARAMPROB_NEXTHEADER: stat->icp6errs_paramprob_nextheader++; return; case ICMP6_PARAMPROB_OPTION: stat->icp6errs_paramprob_option++; return; } break; case ND_REDIRECT: stat->icp6errs_redirect++; return; } stat->icp6errs_unknown++; } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(m, type, code, param) struct mbuf *m; int type, code, param; { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; icmp6stat.icp6s_error++; /* count per-type-code statistics */ icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { icmp6stat.icp6s_canterror++; goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * Multicast destination check. For unrecognized option errors, * this check has already done in ip6_unknown_opt(), so we can * check only for other errors. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* Source address check. XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ icmp6stat.icp6s_canterror++; goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { icmp6stat.icp6s_toofreq++; goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_DONTWAIT); if (m && m->m_len < preplen) m = m_pullup(m, preplen); if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src)) oip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst)) oip6->ip6_dst.s6_addr16[1] = 0; icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and outut path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; icmp6stat.icp6s_outhist[type]++; icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell wheter or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(mp, offp, proto) struct mbuf **mp; int *offp, proto; { struct mbuf *m = *mp, *n; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); if (icmp6len < sizeof(struct icmp6_hdr)) { icmp6stat.icp6s_tooshort++; goto freeit; } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(&ip6->ip6_src))); icmp6stat.icp6s_checksum++; goto freeit; } if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* * Deliver very specific ICMP6 type only. * This is important to deilver TOOBIG. Otherwise PMTUD * will not work. */ switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: case ICMP6_PACKET_TOO_BIG: case ICMP6_TIME_EXCEEDED: break; default: goto freeit; } } icmp6stat.icp6s_inhist[icmp6->icmp6_type]++; icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: code = PRC_HOSTDEAD; break; #ifdef COMPAT_RFC1885 case ICMP6_DST_UNREACH_NOTNEIGHBOR: code = PRC_UNREACH_SRCFAIL; break; #else case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; #endif case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig); if (code != 0) goto badcode; code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: case ICMP6_TIME_EXCEED_REASSEMBLY: code += PRC_TIMXCEED_INTRANS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ break; } if ((n->m_flags & M_EXT) != 0 || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; const int maxlen = sizeof(*nip6) + sizeof(*nicmp6); /* * Prepare an internal mbuf. m_pullup() doesn't * always copy the length we specified. */ if (maxlen >= MCLBYTES) { /* Give up remote */ m_freem(n0); break; } MGETHDR(n, M_DONTWAIT, n0->m_type); if (n && maxlen >= MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (n == NULL) { /* Give up remote */ m_freem(n0); break; } - M_COPY_PKTHDR(n, n0); + M_MOVE_PKTHDR(n, n0); /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); n->m_pkthdr.len += n0->m_pkthdr.len; n->m_next = n0; - n0->m_flags &= ~M_PKTHDR; } else { nip6 = mtod(n, struct ip6_hdr *); nicmp6 = (struct icmp6_hdr *)((caddr_t)nip6 + off); noff = off; } nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { icmp6stat.icp6s_reflect++; icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++; icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: if (icmp6len < sizeof(struct mld_hdr)) goto badlen; if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */ icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery); else icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport); if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ mld6_input(m, off); m = NULL; goto freeit; } mld6_input(n, off); /* m stays. */ break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone); if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */ goto badlen; break; /* nothing to be done in kernel */ case MLD_MTRACE_RESP: case MLD_MTRACE: /* XXX: these two are experimental. not officially defind. */ /* XXX: per-interface statistics? */ break; /* just pass it to applications */ case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; #define hostnamelen strlen(hostname) if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { u_char *p; int maxlen, maxhlen; if ((icmp6_nodeinfo & 5) != 5) break; if (code != 0) goto badcode; maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4; if (maxlen >= MCLBYTES) { /* Give up remote */ break; } MGETHDR(n, M_DONTWAIT, m->m_type); if (n && maxlen > MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } + if (!m_dup_pkthdr(n, m, M_DONTWAIT)) { + /* + * Previous code did a blind M_COPY_PKTHDR + * and said "just for rcvif". If true, then + * we could tolerate the dup failing (due to + * the deep copy of the tag chain). For now + * be conservative and just fail. + */ + m_free(n); + n = NULL; + } if (n == NULL) { /* Give up remote */ break; } n->m_pkthdr.rcvif = NULL; n->m_len = 0; maxhlen = M_TRAILINGSPACE(n) - maxlen; if (maxhlen > hostnamelen) maxhlen = hostnamelen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */ noff = sizeof(struct ip6_hdr); - M_COPY_PKTHDR(n, m); /* just for rcvif */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } #undef hostnamelen if (n) { icmp6stat.icp6s_reflect++; icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++; icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_rs_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_rs_input(n, off, icmp6len); /* m stays. */ break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ra_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ra_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ icmp6_redirect_input(m, off); m = NULL; goto freeit; } icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(m, off, icmp6len, code)) { /* In this case, m should've been freed. */ return(IPPROTO_DONE); } break; badcode: icmp6stat.icp6s_badcode++; break; badlen: icmp6stat.icp6s_badlen++; break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(m, off, icmp6len, code) struct mbuf *m; int off, icmp6len; { struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { icmp6stat.icp6s_tooshort++; goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc) __P((int, struct sockaddr *, void *)); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return(-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; icmp6dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &icmp6dst.sin6_addr); #ifndef SCOPEDROUTING if (in6_embedscope(&icmp6dst.sin6_addr, &icmp6dst, NULL, NULL)) { /* should be impossbile */ nd6log((LOG_DEBUG, "icmp6_notify_error: in6_embedscope failed\n")); goto freeit; } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. */ bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; icmp6src.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &icmp6src.sin6_addr); #ifndef SCOPEDROUTING if (in6_embedscope(&icmp6src.sin6_addr, &icmp6src, NULL, NULL)) { /* should be impossbile */ nd6log((LOG_DEBUG, "icmp6_notify_error: in6_embedscope failed\n")); goto freeit; } #endif icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*) __P((int, struct sockaddr *, void *))) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } return(0); freeit: m_freem(m); return(-1); } void icmp6_mtudisc_update(ip6cp, validated) struct ip6ctlparam *ip6cp; int validated; { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct rtentry *rt = NULL; struct sockaddr_in6 sin6; if (!validated) return; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = PF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_addr = *dst; /* XXX normally, this won't happen */ if (IN6_IS_ADDR_LINKLOCAL(dst)) { sin6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); } /* sin6.sin6_scope_id = XXX: should be set if DST is a scoped addr */ rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_CLONING | RTF_PRCLONING); if (rt && (rt->rt_flags & RTF_HOST) && !(rt->rt_rmx.rmx_locks & RTV_MTU)) { if (mtu < IPV6_MMTU) { /* xxx */ rt->rt_rmx.rmx_locks |= RTV_MTU; } else if (mtu < rt->rt_ifp->if_mtu && rt->rt_rmx.rmx_mtu > mtu) { icmp6stat.icp6s_pmtuchg++; rt->rt_rmx.rmx_mtu = mtu; } } if (rt) { /* XXX: need braces to avoid conflict with else in RTFREE. */ RTFREE(rt); } } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ #define hostnamelen strlen(hostname) static struct mbuf * ni6_input(m, off) struct mbuf *m; int off; { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct sockaddr_in6 sin6; /* double meaning; ip6_dst and subjectaddr */ struct sockaddr_in6 sin6_d; /* XXX: we should retrieve this from m_aux */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return NULL; } #endif /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [icmp-name-lookups-07, Section 4.] */ bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&ip6->ip6_dst, &sin6.sin6_addr, sizeof(sin6.sin6_addr)); /* XXX scopeid */ if ((ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)&sin6)) != NULL) { /* unicast/anycast, fine */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } } else if (IN6_IS_ADDR_MC_LINKLOCAL(&sin6.sin6_addr)) ; /* link-local multicast, fine */ else goto bad; /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(sin6.sin6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&sin6.sin6_addr); sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &sin6.sin6_addr); #ifndef SCOPEDROUTING in6_embedscope(&sin6.sin6_addr, &sin6, NULL, NULL); #endif bzero(&sin6_d, sizeof(sin6_d)); sin6_d.sin6_family = AF_INET6; /* not used, actually */ sin6_d.sin6_len = sizeof(sin6_d); /* ditto */ sin6_d.sin6_addr = ip6->ip6_dst; sin6_d.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_dst); #ifndef SCOPEDROUTING in6_embedscope(&sin6_d.sin6_addr, &sin6_d, NULL, NULL); #endif subj = (char *)&sin6; if (SA6_ARE_ADDR_EQUAL(&sin6, &sin6_d)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ n = ni6_nametodns(hostname, hostnamelen, 0); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((icmp6_nodeinfo & 1) == 0) goto bad; break; case NI_QTYPE_NODEADDR: if ((icmp6_nodeinfo & 2) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* allocate an mbuf to reply. */ MGETHDR(n, M_DONTWAIT, m->m_type); if (n == NULL) { m_freem(m); return(NULL); } - M_COPY_PKTHDR(n, m); /* just for recvif */ + M_MOVE_PKTHDR(n, m); /* just for recvif */ if (replylen > MHLEN) { if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { goto bad; } } n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in variable "hostname"? */ n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return(n); bad: m_freem(m); if (n) m_freem(n); return(NULL); } #undef hostnamelen /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. */ static struct mbuf * ni6_nametodns(name, namelen, old) const char *name; int namelen; int old; /* return pascal string if non-zero */ { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ MGET(m, M_DONTWAIT, MT_DATA); if (m && len > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto fail; } if (!m) goto fail; m->m_next = NULL; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(a, alen, b, blen) const char *a; int alen; const char *b; int blen; { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(ni6, m, ifpp, subj) struct icmp6_nodeinfo *ni6; struct mbuf *m; struct ifnet **ifpp; char *subj; { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct sockaddr_in6 *subj_ip6 = NULL; /* XXX pedant */ int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return(0); subj_ip6 = (struct sockaddr_in6 *)subj; break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return(0); } } IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { addrsofif = 0; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(&subj_ip6->sin6_addr, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { continue; } addrsofif++; /* count the address */ } if (iffound) { *ifpp = ifp; IFNET_RUNLOCK(); return(addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK(); return(addrs); } static int ni6_store_addrs(ni6, nni6, ifp0, resid) struct icmp6_nodeinfo *ni6, *nni6; struct ifnet *ifp0; int resid; { struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet); struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return(0); /* needless to copy */ IFNET_RLOCK(); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & 4) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK(); return(copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_second) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); /* XXX: KAME link-local hack; remove ifindex */ if (IN6_IS_ADDR_LINKLOCAL(&ifa6->ia_addr.sin6_addr)) ((struct in6_addr *)cp)->s6_addr16[1] = 0; cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK(); return(copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(mp, off) struct mbuf **mp; int off; { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct in6pcb *in6p; struct in6pcb *last = NULL; struct sockaddr_in6 rip6src; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return IPPROTO_DONE; } #endif bzero(&rip6src, sizeof(rip6src)); rip6src.sin6_len = sizeof(struct sockaddr_in6); rip6src.sin6_family = AF_INET6; /* KAME hack: recover scopeid */ (void)in6_recoverscope(&rip6src, &ip6->ip6_src, m->m_pkthdr.rcvif); LIST_FOREACH(in6p, &ripcb, inp_list) { if ((in6p->inp_vflag & INP_IPV6) == 0) continue; #ifdef HAVE_NRL_INPCB if (!(in6p->in6p_flags & INP_IPV6)) continue; #endif if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) continue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; if (in6p->in6p_icmp6filt && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) continue; if (last) { struct mbuf *n; if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, n); /* strip intermediate headers */ m_adj(n, off); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&rip6src, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } } else sorwakeup(last->in6p_socket); opts = NULL; } } last = in6p; } if (last) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, &opts, ip6, m); /* strip intermediate headers */ m_adj(m, off); if (sbappendaddr(&last->in6p_socket->so_rcv, (struct sockaddr *)&rip6src, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); } else sorwakeup(last->in6p_socket); } else { m_freem(m); ip6stat.ip6s_delivered--; } return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(m, off) struct mbuf *m; size_t off; { struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia; struct in6_addr t, *src = 0; int plen; int type, code; struct ifnet *outif = NULL; struct sockaddr_in6 sa6_src, sa6_dst; #ifdef COMPAT_RFC1885 int mtu = IPV6_MMTU; struct sockaddr_in6 *sin6 = &icmp6_reflect_rt.ro_dst; #endif /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ t = ip6->ip6_dst; /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; /* * XXX: make sure to embed scope zone information, using * already embedded IDs or the received interface (if any). * Note that rcvif may be NULL. * TODO: scoped routing case (XXX). */ bzero(&sa6_src, sizeof(sa6_src)); sa6_src.sin6_family = AF_INET6; sa6_src.sin6_len = sizeof(sa6_src); sa6_src.sin6_addr = ip6->ip6_dst; in6_recoverscope(&sa6_src, &ip6->ip6_dst, m->m_pkthdr.rcvif); in6_embedscope(&ip6->ip6_dst, &sa6_src, NULL, NULL); bzero(&sa6_dst, sizeof(sa6_dst)); sa6_dst.sin6_family = AF_INET6; sa6_dst.sin6_len = sizeof(sa6_dst); sa6_dst.sin6_addr = t; in6_recoverscope(&sa6_dst, &t, m->m_pkthdr.rcvif); in6_embedscope(&t, &sa6_dst, NULL, NULL); #ifdef COMPAT_RFC1885 /* * xxx guess MTU * RFC 1885 requires that echo reply should be truncated if it * does not fit in with (return) path MTU, but the description was * removed in the new spec. */ if (icmp6_reflect_rt.ro_rt == 0 || ! (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, &ip6->ip6_dst))) { if (icmp6_reflect_rt.ro_rt) { RTFREE(icmp6_reflect_rt.ro_rt); icmp6_reflect_rt.ro_rt = 0; } bzero(sin6, sizeof(*sin6)); sin6->sin6_family = PF_INET6; sin6->sin6_len = sizeof(struct sockaddr_in6); sin6->sin6_addr = ip6->ip6_dst; rtalloc_ign((struct route *)&icmp6_reflect_rt.ro_rt, RTF_PRCLONING); } if (icmp6_reflect_rt.ro_rt == 0) goto bad; if ((icmp6_reflect_rt.ro_rt->rt_flags & RTF_HOST) && mtu < icmp6_reflect_rt.ro_rt->rt_ifp->if_mtu) mtu = icmp6_reflect_rt.ro_rt->rt_rmx.rmx_mtu; if (mtu < m->m_pkthdr.len) { plen -= (m->m_pkthdr.len - mtu); m_adj(m, mtu - m->m_pkthdr.len); } #endif /* * If the incoming packet was addressed directly to us(i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case would be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. */ for (ia = in6_ifaddr; ia; ia = ia->ia_next) if (IN6_ARE_ADDR_EQUAL(&t, &ia->ia_addr.sin6_addr) && (ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)) == 0) { src = &t; break; } if (ia == NULL && IN6_IS_ADDR_LINKLOCAL(&t) && (m->m_flags & M_LOOP)) { /* * This is the case if the dst is our link-local address * and the sender is also ourselves. */ src = &t; } if (src == 0) { int e; struct route_in6 ro; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ bzero(&ro, sizeof(ro)); src = in6_selectsrc(&sa6_src, NULL, NULL, &ro, NULL, &e); if (ro.ro_rt) RTFREE(ro.ro_rt); /* XXX: we could use this */ if (src == NULL) { nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", ip6_sprintf(&sa6_src.sin6_addr), e)); goto bad; } } ip6->ip6_src = *src; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; if (m->m_pkthdr.rcvif) { /* XXX: This may not be the outgoing interface */ ip6->ip6_hlim = nd_ifinfo[m->m_pkthdr.rcvif->if_index].chlim; } else ip6->ip6_hlim = ip6_defhlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); #ifdef COMPAT_RFC1885 ip6_output(m, NULL, &icmp6_reflect_rt, 0, NULL, &outif, NULL); #else ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); #endif if (outif) icmp6_ifoutstat_inc(outif, type, code); return; bad: m_freem(m); return; } void icmp6_fasttimo() { mld6_fasttimeo(); } static const char * icmp6_redirect_diag(src6, dst6, tgt6) struct in6_addr *src6; struct in6_addr *dst6; struct in6_addr *tgt6; { static char buf[1024]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(src6), ip6_sprintf(dst6), ip6_sprintf(tgt6)); return buf; } void icmp6_redirect_input(m, off) struct mbuf *m; int off; { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; u_char *redirhdr = NULL; int redirhdrlen = 0; struct rtentry *rt = NULL; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; if (!m || !ifp) return; /* XXX if we are router, we don't update route by icmp6 redirect */ if (ip6_forwarding) goto freeit; if (!icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) redtgt6.s6_addr16[1] = htons(ifp->if_index); if (IN6_IS_ADDR_LINKLOCAL(&reddst6)) reddst6.s6_addr16[1] = htons(ifp->if_index); /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(&src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(&src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct sockaddr_in6 sin6; struct in6_addr *gw6; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); RTFREE(rt); goto bad; } gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(gw6), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); RTFREE(rt); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } RTFREE(rt); rt = NULL; } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* validation passed */ icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "icmp6_redirect_input: " "invalid ND option, rejected: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (ndopts.nd_opts_rh) { redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len; redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */ } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "icmp6_redirect_input: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", ip6_sprintf(&redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); if (!is_onlink) { /* better router case. perform rtredirect. */ /* perform rtredirect */ struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; bzero(&sdst, sizeof(sdst)); bzero(&sgw, sizeof(sgw)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw, (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc, (struct rtentry **)NULL); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); #ifdef IPSEC key_sa_routechange((struct sockaddr *)&sdst); #endif } freeit: m_freem(m); return; bad: icmp6stat.icp6s_badredirect++; m_freem(m); } void icmp6_redirect_output(m0, rt) struct mbuf *m0; struct rtentry *rt; { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!ip6_forwarding || ip6_accept_rtadv) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; /* we don't currently use sin6_scope_id, but eventually use it */ src_sa.sin6_scope_id = in6_addr2scopeid(ifp, &sip6->ip6_src); if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m && IPV6_MMTU >= MHLEN) MCLGET(m, M_DONTWAIT); if (!m) goto fail; m->m_pkthdr.rcvif = NULL; m->m_len = 0; maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ struct rtentry *rt_router = NULL; int len; struct sockaddr_dl *sdl; struct nd_opt_hdr *nd_opt; char *lladdr; rt_router = nd6_lookup(router_ll6, 0, ifp); if (!rt_router) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (!(rt_router->rt_flags & RTF_GATEWAY) && (rt_router->rt_flags & RTF_LLINFO) && (rt_router->rt_gateway->sa_family == AF_LINK) && (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) && sdl->sdl_alen) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen); p += len; } } nolladdropt:; m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by original * ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d\n", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; } noredhdropt:; if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_src)) sip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_ADDR_LINKLOCAL(&sip6->ip6_dst)) sip6->ip6_dst.s6_addr16[1] = 0; #if 0 if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = 0; #endif if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_target)) nd_rd->nd_rd_target.s6_addr16[1] = 0; if (IN6_IS_ADDR_LINKLOCAL(&nd_rd->nd_rd_dst)) nd_rd->nd_rd_dst.s6_addr16[1] = 0; ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } icmp6stat.icp6s_outhist[ND_REDIRECT]++; return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } #ifdef HAVE_NRL_INPCB #define sotoin6pcb sotoinpcb #define in6pcb inpcb #define in6p_icmp6filt inp_icmp6filt #endif /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter *p; if (optlen != sizeof(*p)) { error = EMSGSIZE; break; } if (inp->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen, optlen); break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { if (inp->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sooptcopyout(sopt, inp->in6p_icmp6filt, sizeof(struct icmp6_filter)); break; } default: error = ENOPROTOOPT; break; } break; } return(error); } #ifdef HAVE_NRL_INPCB #undef sotoin6pcb #undef in6pcb #undef in6p_icmp6filt #endif #ifndef HAVE_PPSRATECHECK #ifndef timersub #define timersub(tvp, uvp, vvp) \ do { \ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \ if ((vvp)->tv_usec < 0) { \ (vvp)->tv_sec--; \ (vvp)->tv_usec += 1000000; \ } \ } while (0) #endif /* * ppsratecheck(): packets (or events) per second limitation. */ static int ppsratecheck(lasttime, curpps, maxpps) struct timeval *lasttime; int *curpps; int maxpps; /* maximum pps allowed */ { struct timeval tv, delta; int s, rv; s = splclock(); microtime(&tv); splx(s); timersub(&tv, lasttime, &delta); /* * Check for 0,0 so that the message will be seen at least once. * If more than one second has passed since the last update of * lasttime, reset the counter. * * We do increment *curpps even in *curpps < maxpps case, as some may * try to use *curpps for stat purposes as well. */ if ((lasttime->tv_sec == 0 && lasttime->tv_usec == 0) || delta.tv_sec >= 1) { *lasttime = tv; *curpps = 0; rv = 1; } else if (maxpps < 0) rv = 1; else if (*curpps < maxpps) rv = 1; else rv = 0; #if 1 /* DIAGNOSTIC? */ /* be careful about wrap-around */ if (*curpps + 1 > *curpps) *curpps = *curpps + 1; #else /* * assume that there's not too many calls to this function. * not sure if the assumption holds, as it depends on *caller's* * behavior, not the behavior of this function. * IMHO it is wrong to make assumption on the caller's behavior, * so the above #if is #if 1, not #ifdef DIAGNOSTIC. */ *curpps = *curpps + 1; #endif return (rv); } #endif /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? */ static int icmp6_ratelimit(dst, type, code) const struct in6_addr *dst; /* not used at this moment */ const int type; /* not used at this moment */ const int code; /* not used at this moment */ { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count, icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } Index: head/sys/netinet6/ip6_input.c =================================================================== --- head/sys/netinet6/ip6_input.c (revision 108465) +++ head/sys/netinet6/ip6_input.c (revision 108466) @@ -1,1663 +1,1663 @@ /* $FreeBSD$ */ /* $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 */ #include "opt_ip6fw.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pfil_hooks.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PFIL_HOOKS #include #endif #include #include #ifdef INET #include #include #endif /* INET */ #include #include #include #include #include #include #include #include #ifdef IPSEC #include #ifdef INET6 #include #endif #endif #ifdef FAST_IPSEC #include #include #define IPSEC #endif /* FAST_IPSEC */ #include #include #include extern struct domain inet6domain; u_char ip6_protox[IPPROTO_MAX]; static int ip6qmaxlen = IFQ_MAXLEN; struct in6_ifaddr *in6_ifaddr; extern struct callout in6_tmpaddrtimer_ch; int ip6_forward_srcrt; /* XXX */ int ip6_sourcecheck; /* XXX */ int ip6_sourcecheck_interval; /* XXX */ int ip6_ours_check_algorithm; /* firewall hooks */ ip6_fw_chk_t *ip6_fw_chk_ptr; ip6_fw_ctl_t *ip6_fw_ctl_ptr; int ip6_fw_enable = 1; struct ip6stat ip6stat; static void ip6_init2 __P((void *)); static struct ip6aux *ip6_setdstifaddr __P((struct mbuf *, struct in6_ifaddr *)); static int ip6_hopopts_input __P((u_int32_t *, u_int32_t *, struct mbuf **, int *)); #ifdef PULLDOWN_TEST static struct mbuf *ip6_pullexthdr __P((struct mbuf *, size_t, int)); #endif /* * IP6 initialization: fill in IP6 protocol switch table. * All protocols not implemented in kernel go to raw IP6 protocol handler. */ void ip6_init() { struct ip6protosw *pr; int i; struct timeval tv; #ifdef DIAGNOSTIC if (sizeof(struct protosw) != sizeof(struct ip6protosw)) panic("sizeof(protosw) != sizeof(ip6protosw)"); #endif pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip6_init"); for (i = 0; i < IPPROTO_MAX; i++) ip6_protox[i] = pr - inet6sw; for (pr = (struct ip6protosw *)inet6domain.dom_protosw; pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET6 && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) ip6_protox[pr->pr_protocol] = pr - inet6sw; ip6intrq.ifq_maxlen = ip6qmaxlen; mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF); ip6intrq_present = 1; register_netisr(NETISR_IPV6, ip6intr); nd6_init(); frag6_init(); /* * in many cases, random() here does NOT return random number * as initialization during bootstrap time occur in fixed order. */ microtime(&tv); ip6_flow_seq = random() ^ tv.tv_usec; microtime(&tv); ip6_desync_factor = (random() ^ tv.tv_usec) % MAX_TEMP_DESYNC_FACTOR; } static void ip6_init2(dummy) void *dummy; { /* * to route local address of p2p link to loopback, * assign loopback address first. */ in6_ifattach(&loif[0], NULL); /* nd6_timer_init */ callout_init(&nd6_timer_ch, 0); callout_reset(&nd6_timer_ch, hz, nd6_timer, NULL); /* router renumbering prefix list maintenance */ callout_init(&in6_rr_timer_ch, 0); callout_reset(&in6_rr_timer_ch, hz, in6_rr_timer, NULL); /* timer for regeneranation of temporary addresses randomize ID */ callout_reset(&in6_tmpaddrtimer_ch, (ip6_temp_preferred_lifetime - ip6_desync_factor - ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL); } /* cheat */ /* This must be after route_init(), which is now SI_ORDER_THIRD */ SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL); /* * IP6 input interrupt handling. Just pass the packet to ip6_input. */ void ip6intr() { int s; struct mbuf *m; for (;;) { s = splimp(); IF_DEQUEUE(&ip6intrq, m); splx(s); if (m == 0) return; ip6_input(m); } } extern struct route_in6 ip6_forward_rt; void ip6_input(m) struct mbuf *m; { struct ip6_hdr *ip6; int off = sizeof(struct ip6_hdr), nest; u_int32_t plen; u_int32_t rtalert = ~0; int nxt, ours = 0; struct ifnet *deliverifp = NULL; #ifdef PFIL_HOOKS struct packet_filter_hook *pfh; struct mbuf *m0; int rv; #endif /* PFIL_HOOKS */ #ifdef IPSEC /* * should the inner packet be considered authentic? * see comment in ah4_input(). */ if (m) { m->m_flags &= ~M_AUTHIPHDR; m->m_flags &= ~M_AUTHIPDGM; } #endif /* * make sure we don't have onion peering information into m_aux. */ ip6_delaux(m); /* * mbuf statistics */ if (m->m_flags & M_EXT) { if (m->m_next) ip6stat.ip6s_mext2m++; else ip6stat.ip6s_mext1++; } else { #define M2MMAX (sizeof(ip6stat.ip6s_m2m)/sizeof(ip6stat.ip6s_m2m[0])) if (m->m_next) { if (m->m_flags & M_LOOP) { ip6stat.ip6s_m2m[loif[0].if_index]++; /* XXX */ } else if (m->m_pkthdr.rcvif->if_index < M2MMAX) ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++; else ip6stat.ip6s_m2m[0]++; } else ip6stat.ip6s_m1++; #undef M2MMAX } in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive); ip6stat.ip6s_total++; #ifndef PULLDOWN_TEST /* * L2 bridge code and some other code can return mbuf chain * that does not conform to KAME requirement. too bad. * XXX: fails to join if interface MTU > MCLBYTES. jumbogram? */ if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) { struct mbuf *n; MGETHDR(n, M_DONTWAIT, MT_HEADER); if (n) - M_COPY_PKTHDR(n, m); + M_MOVE_PKTHDR(n, m); if (n && m->m_pkthdr.len > MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_freem(n); n = NULL; } } if (n == NULL) { m_freem(m); return; /*ENOBUFS*/ } m_copydata(m, 0, m->m_pkthdr.len, mtod(n, caddr_t)); n->m_len = m->m_pkthdr.len; m_freem(m); m = n; } IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /*nothing*/); #endif if (m->m_len < sizeof(struct ip6_hdr)) { struct ifnet *inifp; inifp = m->m_pkthdr.rcvif; if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == 0) { ip6stat.ip6s_toosmall++; in6_ifstat_inc(inifp, ifs6_in_hdrerr); return; } } ip6 = mtod(m, struct ip6_hdr *); if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) { ip6stat.ip6s_badvers++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); goto bad; } #ifdef PFIL_HOOKS /* * Run through list of hooks for input packets. If there are any * filters which require that additional packets in the flow are * not fast-forwarded, they must clear the M_CANFASTFWD flag. * Note that filters must _never_ set this flag, as another filter * in the list may have previously cleared it. */ m0 = m; pfh = pfil_hook_get(PFIL_IN, &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh); for (; pfh; pfh = pfh->pfil_link.tqe_next) if (pfh->pfil_func) { rv = pfh->pfil_func(ip6, sizeof(*ip6), m->m_pkthdr.rcvif, 0, &m0); if (rv) return; m = m0; if (m == NULL) return; ip6 = mtod(m, struct ip6_hdr *); } #endif /* PFIL_HOOKS */ ip6stat.ip6s_nxthist[ip6->ip6_nxt]++; /* * Check with the firewall... */ if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; /* If ipfw says divert, we have to just drop packet */ /* use port as a dummy argument */ if ((*ip6_fw_chk_ptr)(&ip6, NULL, &port, &m)) { m_freem(m); m = NULL; } if (!m) return; } /* * Check against address spoofing/corruption. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) { /* * XXX: "badscope" is not very suitable for a multicast source. */ ip6stat.ip6s_badscope++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); goto bad; } if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) || IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) && (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { ip6stat.ip6s_badscope++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); goto bad; } /* * The following check is not documented in specs. A malicious * party may be able to use IPv4 mapped addr to confuse tcp/udp stack * and bypass security checks (act as if it was from 127.0.0.1 by using * IPv6 src ::ffff:127.0.0.1). Be cautious. * * This check chokes if we are in an SIIT cloud. As none of BSDs * support IPv4-less kernel compilation, we cannot support SIIT * environment at all. So, it makes more sense for us to reject any * malicious packets for non-SIIT environment, than try to do a * partical support for SIIT environment. */ if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { ip6stat.ip6s_badscope++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); goto bad; } #if 0 /* * Reject packets with IPv4 compatible addresses (auto tunnel). * * The code forbids auto tunnel relay case in RFC1933 (the check is * stronger than RFC1933). We may want to re-enable it if mech-xx * is revised to forbid relaying case. */ if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) || IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) { ip6stat.ip6s_badscope++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); goto bad; } #endif /* drop packets if interface ID portion is already filled */ if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) { if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src) && ip6->ip6_src.s6_addr16[1]) { ip6stat.ip6s_badscope++; goto bad; } if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst) && ip6->ip6_dst.s6_addr16[1]) { ip6stat.ip6s_badscope++; goto bad; } } if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index); #if 0 /* this case seems to be unnecessary. (jinmei, 20010401) */ /* * We use rt->rt_ifp to determine if the address is ours or not. * If rt_ifp is lo0, the address is ours. * The problem here is, rt->rt_ifp for fe80::%lo0/64 is set to lo0, * so any address under fe80::%lo0/64 will be mistakenly considered * local. The special case is supplied to handle the case properly * by actually looking at interface addresses * (using in6ifa_ifpwithaddr). */ if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) != 0 && IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_dst)) { if (!in6ifa_ifpwithaddr(m->m_pkthdr.rcvif, &ip6->ip6_dst)) { icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, 0); /* m is already freed */ return; } ours = 1; deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } #endif /* * Multicast check */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { struct in6_multi *in6m = 0; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast); /* * See if we belong to the destination multicast group on the * arrival interface. */ IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m); if (in6m) ours = 1; else if (!ip6_mrouter) { ip6stat.ip6s_notmember++; ip6stat.ip6s_cantforward++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); goto bad; } deliverifp = m->m_pkthdr.rcvif; goto hbhcheck; } /* * Unicast check */ switch (ip6_ours_check_algorithm) { default: /* * XXX: I intentionally broke our indentation rule here, * since this switch-case is just for measurement and * therefore should soon be removed. */ if (ip6_forward_rt.ro_rt != NULL && (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 && IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &((struct sockaddr_in6 *)(&ip6_forward_rt.ro_dst))->sin6_addr)) ip6stat.ip6s_forward_cachehit++; else { struct sockaddr_in6 *dst6; if (ip6_forward_rt.ro_rt) { /* route is down or destination is different */ ip6stat.ip6s_forward_cachemiss++; RTFREE(ip6_forward_rt.ro_rt); ip6_forward_rt.ro_rt = 0; } bzero(&ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6)); dst6 = (struct sockaddr_in6 *)&ip6_forward_rt.ro_dst; dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_family = AF_INET6; dst6->sin6_addr = ip6->ip6_dst; #ifdef SCOPEDROUTING ip6_forward_rt.ro_dst.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_dst); #endif rtalloc_ign((struct route *)&ip6_forward_rt, RTF_PRCLONING); } #define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key)) /* * Accept the packet if the forwarding interface to the destination * according to the routing table is the loopback interface, * unless the associated route has a gateway. * Note that this approach causes to accept a packet if there is a * route to the loopback interface for the destination of the packet. * But we think it's even useful in some situations, e.g. when using * a special daemon which wants to intercept the packet. * * XXX: some OSes automatically make a cloned route for the destination * of an outgoing packet. If the outgoing interface of the packet * is a loopback one, the kernel would consider the packet to be * accepted, even if we have no such address assinged on the interface. * We check the cloned flag of the route entry to reject such cases, * assuming that route entries for our own addresses are not made by * cloning (it should be true because in6_addloop explicitly installs * the host route). However, we might have to do an explicit check * while it would be less efficient. Or, should we rather install a * reject route for such a case? */ if (ip6_forward_rt.ro_rt && (ip6_forward_rt.ro_rt->rt_flags & (RTF_HOST|RTF_GATEWAY)) == RTF_HOST && #ifdef RTF_WASCLONED !(ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) && #endif #ifdef RTF_CLONED !(ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) && #endif #if 0 /* * The check below is redundant since the comparison of * the destination and the key of the rtentry has * already done through looking up the routing table. */ IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &rt6_key(ip6_forward_rt.ro_rt)->sin6_addr) #endif ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) { struct in6_ifaddr *ia6 = (struct in6_ifaddr *)ip6_forward_rt.ro_rt->rt_ifa; /* * record address information into m_aux. */ (void)ip6_setdstifaddr(m, ia6); /* * packets to a tentative, duplicated, or somehow invalid * address must not be accepted. */ if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) { /* this address is ready */ ours = 1; deliverifp = ia6->ia_ifp; /* correct? */ /* Count the packet in the ip address stats */ ia6->ia_ifa.if_ipackets++; ia6->ia_ifa.if_ibytes += m->m_pkthdr.len; goto hbhcheck; } else { /* address is not ready, so discard the packet. */ nd6log((LOG_INFO, "ip6_input: packet to an unready address %s->%s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst))); goto bad; } } } /* XXX indentation (see above) */ /* * FAITH(Firewall Aided Internet Translator) */ if (ip6_keepfaith) { if (ip6_forward_rt.ro_rt && ip6_forward_rt.ro_rt->rt_ifp && ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) { /* XXX do we need more sanity checks? */ ours = 1; deliverifp = ip6_forward_rt.ro_rt->rt_ifp; /* faith */ goto hbhcheck; } } /* * Now there is no reason to process the packet if it's not our own * and we're not a router. */ if (!ip6_forwarding) { ip6stat.ip6s_cantforward++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); goto bad; } hbhcheck: /* * record address information into m_aux, if we don't have one yet. * note that we are unable to record it, if the address is not listed * as our interface address (e.g. multicast addresses, addresses * within FAITH prefixes and such). */ if (deliverifp && !ip6_getdstifaddr(m)) { struct in6_ifaddr *ia6; ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst); if (ia6) { if (!ip6_setdstifaddr(m, ia6)) { /* * XXX maybe we should drop the packet here, * as we could not provide enough information * to the upper layers. */ } } } /* * Process Hop-by-Hop options header if it's contained. * m may be modified in ip6_hopopts_input(). * If a JumboPayload option is included, plen will also be modified. */ plen = (u_int32_t)ntohs(ip6->ip6_plen); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { struct ip6_hbh *hbh; if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) { #if 0 /*touches NULL pointer*/ in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); #endif return; /* m have already been freed */ } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* * if the payload length field is 0 and the next header field * indicates Hop-by-Hop Options header, then a Jumbo Payload * option MUST be included. */ if (ip6->ip6_plen == 0 && plen == 0) { /* * Note that if a valid jumbo payload option is * contained, ip6_hoptops_input() must set a valid * (non-zero) payload length to the variable plen. */ ip6stat.ip6s_badoptions++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr); icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, (caddr_t)&ip6->ip6_plen - (caddr_t)ip6); return; } #ifndef PULLDOWN_TEST /* ip6_hopopts_input() ensures that mbuf is contiguous */ hbh = (struct ip6_hbh *)(ip6 + 1); #else IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); if (hbh == NULL) { ip6stat.ip6s_tooshort++; return; } #endif nxt = hbh->ip6h_nxt; /* * accept the packet if a router alert option is included * and we act as an IPv6 router. */ if (rtalert != ~0 && ip6_forwarding) ours = 1; } else nxt = ip6->ip6_nxt; /* * Check that the amount of data in the buffers * is as at least much as the IPv6 header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) { ip6stat.ip6s_tooshort++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); goto bad; } if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) { if (m->m_len == m->m_pkthdr.len) { m->m_len = sizeof(struct ip6_hdr) + plen; m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; } else m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len); } /* * Forward if desirable. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* * If we are acting as a multicast router, all * incoming multicast packets are passed to the * kernel-level multicast forwarding function. * The packet is returned (relatively) intact; if * ip6_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. */ if (ip6_mrouter && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { ip6stat.ip6s_cantforward++; m_freem(m); return; } if (!ours) { m_freem(m); return; } } else if (!ours) { ip6_forward(m, 0); return; } ip6 = mtod(m, struct ip6_hdr *); /* * Malicious party may be able to use IPv4 mapped addr to confuse * tcp/udp stack and bypass security checks (act as if it was from * 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious. * * For SIIT end node behavior, you may want to disable the check. * However, you will become vulnerable to attacks using IPv4 mapped * source. */ if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) || IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) { ip6stat.ip6s_badscope++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr); goto bad; } /* * Tell launch routine the next header */ ip6stat.ip6s_delivered++; in6_ifstat_inc(deliverifp, ifs6_in_deliver); nest = 0; while (nxt != IPPROTO_DONE) { if (ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) { ip6stat.ip6s_toomanyhdr++; goto bad; } /* * protection against faulty packet - there should be * more sanity checks in header chain processing. */ if (m->m_pkthdr.len < off) { ip6stat.ip6s_tooshort++; in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); goto bad; } #if 0 /* * do we need to do it for every header? yeah, other * functions can play with it (like re-allocate and copy). */ mhist = ip6_addaux(m); if (mhist && M_TRAILINGSPACE(mhist) >= sizeof(nxt)) { hist = mtod(mhist, caddr_t) + mhist->m_len; bcopy(&nxt, hist, sizeof(nxt)); mhist->m_len += sizeof(nxt); } else { ip6stat.ip6s_toomanyhdr++; goto bad; } #endif #ifdef IPSEC /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && ipsec6_in_reject(m, NULL)) { ipsec6stat.in_polvio++; goto bad; } #endif nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } return; bad: m_freem(m); } /* * set/grab in6_ifaddr correspond to IPv6 destination address. * XXX backward compatibility wrapper */ static struct ip6aux * ip6_setdstifaddr(m, ia6) struct mbuf *m; struct in6_ifaddr *ia6; { struct ip6aux *n; n = ip6_addaux(m); if (n) n->ip6a_dstia6 = ia6; return n; /* NULL if failed to set */ } struct in6_ifaddr * ip6_getdstifaddr(m) struct mbuf *m; { struct ip6aux *n; n = ip6_findaux(m); if (n) return n->ip6a_dstia6; else return NULL; } /* * Hop-by-Hop options header processing. If a valid jumbo payload option is * included, the real payload length will be stored in plenp. */ static int ip6_hopopts_input(plenp, rtalertp, mp, offp) u_int32_t *plenp; u_int32_t *rtalertp; /* XXX: should be stored more smart way */ struct mbuf **mp; int *offp; { struct mbuf *m = *mp; int off = *offp, hbhlen; struct ip6_hbh *hbh; u_int8_t *opt; /* validation of the length of the header */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); hbhlen = (hbh->ip6h_len + 1) << 3; IP6_EXTHDR_CHECK(m, off, hbhlen, -1); hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), sizeof(struct ip6_hbh)); if (hbh == NULL) { ip6stat.ip6s_tooshort++; return -1; } hbhlen = (hbh->ip6h_len + 1) << 3; IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr), hbhlen); if (hbh == NULL) { ip6stat.ip6s_tooshort++; return -1; } #endif off += hbhlen; hbhlen -= sizeof(struct ip6_hbh); opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh); if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh), hbhlen, rtalertp, plenp) < 0) return(-1); *offp = off; *mp = m; return(0); } /* * Search header for all Hop-by-hop options and process each option. * This function is separate from ip6_hopopts_input() in order to * handle a case where the sending node itself process its hop-by-hop * options header. In such a case, the function is called from ip6_output(). * * The function assumes that hbh header is located right after the IPv6 header * (RFC2460 p7), opthead is pointer into data content in m, and opthead to * opthead + hbhlen is located in continuous memory region. */ int ip6_process_hopopts(m, opthead, hbhlen, rtalertp, plenp) struct mbuf *m; u_int8_t *opthead; int hbhlen; u_int32_t *rtalertp; u_int32_t *plenp; { struct ip6_hdr *ip6; int optlen = 0; u_int8_t *opt = opthead; u_int16_t rtalert_val; u_int32_t jumboplen; const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh); for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) { switch (*opt) { case IP6OPT_PAD1: optlen = 1; break; case IP6OPT_PADN: if (hbhlen < IP6OPT_MINLEN) { ip6stat.ip6s_toosmall++; goto bad; } optlen = *(opt + 1) + 2; break; case IP6OPT_RTALERT: /* XXX may need check for alignment */ if (hbhlen < IP6OPT_RTALERT_LEN) { ip6stat.ip6s_toosmall++; goto bad; } if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) { /* XXX stat */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); return(-1); } optlen = IP6OPT_RTALERT_LEN; bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2); *rtalertp = ntohs(rtalert_val); break; case IP6OPT_JUMBO: /* XXX may need check for alignment */ if (hbhlen < IP6OPT_JUMBO_LEN) { ip6stat.ip6s_toosmall++; goto bad; } if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) { /* XXX stat */ icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 1 - opthead); return(-1); } optlen = IP6OPT_JUMBO_LEN; /* * IPv6 packets that have non 0 payload length * must not contain a jumbo payload option. */ ip6 = mtod(m, struct ip6_hdr *); if (ip6->ip6_plen) { ip6stat.ip6s_badoptions++; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt - opthead); return(-1); } /* * We may see jumbolen in unaligned location, so * we'd need to perform bcopy(). */ bcopy(opt + 2, &jumboplen, sizeof(jumboplen)); jumboplen = (u_int32_t)htonl(jumboplen); #if 1 /* * if there are multiple jumbo payload options, * *plenp will be non-zero and the packet will be * rejected. * the behavior may need some debate in ipngwg - * multiple options does not make sense, however, * there's no explicit mention in specification. */ if (*plenp != 0) { ip6stat.ip6s_badoptions++; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); return(-1); } #endif /* * jumbo payload length must be larger than 65535. */ if (jumboplen <= IPV6_MAXPACKET) { ip6stat.ip6s_badoptions++; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, erroff + opt + 2 - opthead); return(-1); } *plenp = jumboplen; break; default: /* unknown option */ if (hbhlen < IP6OPT_MINLEN) { ip6stat.ip6s_toosmall++; goto bad; } optlen = ip6_unknown_opt(opt, m, erroff + opt - opthead); if (optlen == -1) return(-1); optlen += 2; break; } } return(0); bad: m_freem(m); return(-1); } /* * Unknown option processing. * The third argument `off' is the offset from the IPv6 header to the option, * which is necessary if the IPv6 header the and option header and IPv6 header * is not continuous in order to return an ICMPv6 error. */ int ip6_unknown_opt(optp, m, off) u_int8_t *optp; struct mbuf *m; int off; { struct ip6_hdr *ip6; switch (IP6OPT_TYPE(*optp)) { case IP6OPT_TYPE_SKIP: /* ignore the option */ return((int)*(optp + 1)); case IP6OPT_TYPE_DISCARD: /* silently discard */ m_freem(m); return(-1); case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */ ip6stat.ip6s_badoptions++; icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); return(-1); case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */ ip6stat.ip6s_badoptions++; ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || (m->m_flags & (M_BCAST|M_MCAST))) m_freem(m); else icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off); return(-1); } m_freem(m); /* XXX: NOTREACHED */ return(-1); } /* * Create the "control" list for this pcb. * The function will not modify mbuf chain at all. * * with KAME mbuf chain restriction: * The routine will be called from upper layer handlers like tcp6_input(). * Thus the routine assumes that the caller (tcp6_input) have already * called IP6_EXTHDR_CHECK() and all the extension headers are located in the * very first mbuf on the mbuf chain. */ void ip6_savecontrol(in6p, mp, ip6, m) struct inpcb *in6p; struct mbuf **mp; struct ip6_hdr *ip6; struct mbuf *m; { #if __FreeBSD_version >= 500000 struct thread *td = curthread; /* XXX */ #else struct proc *td = curproc; /* XXX */ #endif int privileged = 0; int rthdr_exist = 0; if (td && !suser(td)) privileged++; #ifdef SO_TIMESTAMP if ((in6p->in6p_socket->so_options & SO_TIMESTAMP) != 0) { struct timeval tv; microtime(&tv); *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), SCM_TIMESTAMP, SOL_SOCKET); if (*mp) { mp = &(*mp)->m_next; } } #endif /* RFC 2292 sec. 5 */ if ((in6p->in6p_flags & IN6P_PKTINFO) != 0) { struct in6_pktinfo pi6; bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr)); if (IN6_IS_SCOPE_LINKLOCAL(&pi6.ipi6_addr)) pi6.ipi6_addr.s6_addr16[1] = 0; pi6.ipi6_ifindex = (m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0; *mp = sbcreatecontrol((caddr_t) &pi6, sizeof(struct in6_pktinfo), IPV6_PKTINFO, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } if ((in6p->in6p_flags & IN6P_HOPLIMIT) != 0) { int hlim = ip6->ip6_hlim & 0xff; *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int), IPV6_HOPLIMIT, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; } /* * IPV6_HOPOPTS socket option. We require super-user privilege * for the option, but it might be too strict, since there might * be some hop-by-hop options which can be returned to normal user. * See RFC 2292 section 6. */ if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0 && privileged) { /* * Check if a hop-by-hop options header is contatined in the * received packet, and if so, store the options as ancillary * data. Note that a hop-by-hop options header must be * just after the IPv6 header, which fact is assured through * the IPv6 input processing. */ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if (ip6->ip6_nxt == IPPROTO_HOPOPTS) { struct ip6_hbh *hbh; int hbhlen = 0; #ifdef PULLDOWN_TEST struct mbuf *ext; #endif #ifndef PULLDOWN_TEST hbh = (struct ip6_hbh *)(ip6 + 1); hbhlen = (hbh->ip6h_len + 1) << 3; #else ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr), ip6->ip6_nxt); if (ext == NULL) { ip6stat.ip6s_tooshort++; return; } hbh = mtod(ext, struct ip6_hbh *); hbhlen = (hbh->ip6h_len + 1) << 3; if (hbhlen != ext->m_len) { m_freem(ext); ip6stat.ip6s_tooshort++; return; } #endif /* * XXX: We copy whole the header even if a jumbo * payload option is included, which option is to * be removed before returning in the RFC 2292. * Note: this constraint is removed in 2292bis. */ *mp = sbcreatecontrol((caddr_t)hbh, hbhlen, IPV6_HOPOPTS, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; #ifdef PULLDOWN_TEST m_freem(ext); #endif } } /* IPV6_DSTOPTS and IPV6_RTHDR socket options */ if ((in6p->in6p_flags & (IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) { int proto, off, nxt; /* * go through the header chain to see if a routing header is * contained in the packet. We need this information to store * destination options headers (if any) properly. * XXX: performance issue. We should record this info when * processing extension headers in incoming routine. * (todo) use m_aux? */ proto = IPPROTO_IPV6; off = 0; nxt = -1; while (1) { int newoff; newoff = ip6_nexthdr(m, off, proto, &nxt); if (newoff < 0) break; if (newoff < off) /* invalid, check for safety */ break; if ((proto = nxt) == IPPROTO_ROUTING) { rthdr_exist = 1; break; } off = newoff; } } if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS | IN6P_RTHDRDSTOPTS)) != 0) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr); /* * Search for destination options headers or routing * header(s) through the header chain, and stores each * header as ancillary data. * Note that the order of the headers remains in * the chain of ancillary data. */ while (1) { /* is explicit loop prevention necessary? */ struct ip6_ext *ip6e = NULL; int elen; #ifdef PULLDOWN_TEST struct mbuf *ext = NULL; #endif /* * if it is not an extension header, don't try to * pull it from the chain. */ switch (nxt) { case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: case IPPROTO_AH: /* is it possible? */ break; default: goto loopend; } #ifndef PULLDOWN_TEST if (off + sizeof(*ip6e) > m->m_len) goto loopend; ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off); if (nxt == IPPROTO_AH) elen = (ip6e->ip6e_len + 2) << 2; else elen = (ip6e->ip6e_len + 1) << 3; if (off + elen > m->m_len) goto loopend; #else ext = ip6_pullexthdr(m, off, nxt); if (ext == NULL) { ip6stat.ip6s_tooshort++; return; } ip6e = mtod(ext, struct ip6_ext *); if (nxt == IPPROTO_AH) elen = (ip6e->ip6e_len + 2) << 2; else elen = (ip6e->ip6e_len + 1) << 3; if (elen != ext->m_len) { m_freem(ext); ip6stat.ip6s_tooshort++; return; } #endif switch (nxt) { case IPPROTO_DSTOPTS: if ((in6p->in6p_flags & IN6P_DSTOPTS) == 0) break; /* * We also require super-user privilege for * the option. * See the comments on IN6_HOPOPTS. */ if (!privileged) break; *mp = sbcreatecontrol((caddr_t)ip6e, elen, IPV6_DSTOPTS, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; break; case IPPROTO_ROUTING: if (!in6p->in6p_flags & IN6P_RTHDR) break; *mp = sbcreatecontrol((caddr_t)ip6e, elen, IPV6_RTHDR, IPPROTO_IPV6); if (*mp) mp = &(*mp)->m_next; break; case IPPROTO_HOPOPTS: case IPPROTO_AH: /* is it possible? */ break; default: /* * other cases have been filtered in the above. * none will visit this case. here we supply * the code just in case (nxt overwritten or * other cases). */ #ifdef PULLDOWN_TEST m_freem(ext); #endif goto loopend; } /* proceed with the next header. */ off += elen; nxt = ip6e->ip6e_nxt; ip6e = NULL; #ifdef PULLDOWN_TEST m_freem(ext); ext = NULL; #endif } loopend: ; } } #ifdef PULLDOWN_TEST /* * pull single extension header from mbuf chain. returns single mbuf that * contains the result, or NULL on error. */ static struct mbuf * ip6_pullexthdr(m, off, nxt) struct mbuf *m; size_t off; int nxt; { struct ip6_ext ip6e; size_t elen; struct mbuf *n; #ifdef DIAGNOSTIC switch (nxt) { case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: case IPPROTO_AH: /* is it possible? */ break; default: printf("ip6_pullexthdr: invalid nxt=%d\n", nxt); } #endif m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); if (nxt == IPPROTO_AH) elen = (ip6e.ip6e_len + 2) << 2; else elen = (ip6e.ip6e_len + 1) << 3; MGET(n, M_DONTWAIT, MT_DATA); if (n && elen >= MLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (!n) return NULL; n->m_len = 0; if (elen >= M_TRAILINGSPACE(n)) { m_free(n); return NULL; } m_copydata(m, off, elen, mtod(n, caddr_t)); n->m_len = elen; return n; } #endif /* * Get pointer to the previous header followed by the header * currently processed. * XXX: This function supposes that * M includes all headers, * the next header field and the header length field of each header * are valid, and * the sum of each header length equals to OFF. * Because of these assumptions, this function must be called very * carefully. Moreover, it will not be used in the near future when * we develop `neater' mechanism to process extension headers. */ char * ip6_get_prevhdr(m, off) struct mbuf *m; int off; { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if (off == sizeof(struct ip6_hdr)) return(&ip6->ip6_nxt); else { int len, nxt; struct ip6_ext *ip6e = NULL; nxt = ip6->ip6_nxt; len = sizeof(struct ip6_hdr); while (len < off) { ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len); switch (nxt) { case IPPROTO_FRAGMENT: len += sizeof(struct ip6_frag); break; case IPPROTO_AH: len += (ip6e->ip6e_len + 2) << 2; break; default: len += (ip6e->ip6e_len + 1) << 3; break; } nxt = ip6e->ip6e_nxt; } if (ip6e) return(&ip6e->ip6e_nxt); else return NULL; } } /* * get next header offset. m will be retained. */ int ip6_nexthdr(m, off, proto, nxtp) struct mbuf *m; int off; int proto; int *nxtp; { struct ip6_hdr ip6; struct ip6_ext ip6e; struct ip6_frag fh; /* just in case */ if (m == NULL) panic("ip6_nexthdr: m == NULL"); if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off) return -1; switch (proto) { case IPPROTO_IPV6: if (m->m_pkthdr.len < off + sizeof(ip6)) return -1; m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6); if (nxtp) *nxtp = ip6.ip6_nxt; off += sizeof(ip6); return off; case IPPROTO_FRAGMENT: /* * terminate parsing if it is not the first fragment, * it does not make sense to parse through it. */ if (m->m_pkthdr.len < off + sizeof(fh)) return -1; m_copydata(m, off, sizeof(fh), (caddr_t)&fh); if ((ntohs(fh.ip6f_offlg) & IP6F_OFF_MASK) != 0) return -1; if (nxtp) *nxtp = fh.ip6f_nxt; off += sizeof(struct ip6_frag); return off; case IPPROTO_AH: if (m->m_pkthdr.len < off + sizeof(ip6e)) return -1; m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); if (nxtp) *nxtp = ip6e.ip6e_nxt; off += (ip6e.ip6e_len + 2) << 2; return off; case IPPROTO_HOPOPTS: case IPPROTO_ROUTING: case IPPROTO_DSTOPTS: if (m->m_pkthdr.len < off + sizeof(ip6e)) return -1; m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); if (nxtp) *nxtp = ip6e.ip6e_nxt; off += (ip6e.ip6e_len + 1) << 3; return off; case IPPROTO_NONE: case IPPROTO_ESP: case IPPROTO_IPCOMP: /* give up */ return -1; default: return -1; } return -1; } /* * get offset for the last header in the chain. m will be kept untainted. */ int ip6_lasthdr(m, off, proto, nxtp) struct mbuf *m; int off; int proto; int *nxtp; { int newoff; int nxt; if (!nxtp) { nxt = -1; nxtp = &nxt; } while (1) { newoff = ip6_nexthdr(m, off, proto, nxtp); if (newoff < 0) return off; else if (newoff < off) return -1; /* invalid */ else if (newoff == off) return newoff; off = newoff; proto = *nxtp; } } struct ip6aux * ip6_addaux(m) struct mbuf *m; { struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); if (!tag) { tag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof (struct ip6aux), M_DONTWAIT); if (tag) m_tag_prepend(m, tag); } if (tag) bzero(tag+1, sizeof (struct ip6aux)); return tag ? (struct ip6aux*)(tag+1) : NULL; } struct ip6aux * ip6_findaux(m) struct mbuf *m; { struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); return tag ? (struct ip6aux*)(tag+1) : NULL; } void ip6_delaux(m) struct mbuf *m; { struct m_tag *tag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL); if (tag) m_tag_delete(m, tag); } /* * System control for IP6 */ u_char inet6ctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, EMSGSIZE, EHOSTUNREACH, 0, 0, 0, 0, 0, 0, ENOPROTOOPT }; Index: head/sys/netinet6/ip6_output.c =================================================================== --- head/sys/netinet6/ip6_output.c (revision 108465) +++ head/sys/netinet6/ip6_output.c (revision 108466) @@ -1,2616 +1,2616 @@ /* $FreeBSD$ */ /* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 */ #include "opt_ip6fw.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_pfil_hooks.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PFIL_HOOKS #include #endif #include #include #include #include #include #include #include #include #ifdef IPSEC #include #ifdef INET6 #include #endif #include #endif /* IPSEC */ #ifdef FAST_IPSEC #include #include #include #endif /* FAST_IPSEC */ #include #include #include static MALLOC_DEFINE(M_IPMOPTS, "ip6_moptions", "internet multicast options"); struct ip6_exthdrs { struct mbuf *ip6e_ip6; struct mbuf *ip6e_hbh; struct mbuf *ip6e_dest1; struct mbuf *ip6e_rthdr; struct mbuf *ip6e_dest2; }; static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *, struct socket *, struct sockopt *sopt)); static int ip6_setmoptions __P((int, struct ip6_moptions **, struct mbuf *)); static int ip6_getmoptions __P((int, struct ip6_moptions *, struct mbuf **)); static int ip6_copyexthdr __P((struct mbuf **, caddr_t, int)); static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int, struct ip6_frag **)); static int ip6_insert_jumboopt __P((struct ip6_exthdrs *, u_int32_t)); static int ip6_splithdr __P((struct mbuf *, struct ip6_exthdrs *)); /* * IP6 output. The packet in mbuf chain m contains a skeletal IP6 * header (with pri, len, nxt, hlim, src, dst). * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * * type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and * nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one, * which is rt_rmx.rmx_mtu. */ int ip6_output(m0, opt, ro, flags, im6o, ifpp, inp) struct mbuf *m0; struct ip6_pktopts *opt; struct route_in6 *ro; int flags; struct ip6_moptions *im6o; struct ifnet **ifpp; /* XXX: just for statistics */ struct inpcb *inp; { struct ip6_hdr *ip6, *mhip6; struct ifnet *ifp, *origifp; struct mbuf *m = m0; int hlen, tlen, len, off; struct route_in6 ip6route; struct sockaddr_in6 *dst; int error = 0; struct in6_ifaddr *ia = NULL; u_long mtu; u_int32_t optlen = 0, plen = 0, unfragpartlen = 0; struct ip6_exthdrs exthdrs; struct in6_addr finaldst; struct route_in6 *ro_pmtu = NULL; int hdrsplit = 0; int needipsec = 0; #ifdef PFIL_HOOKS struct packet_filter_hook *pfh; struct mbuf *m1; int rv; #endif /* PFIL_HOOKS */ #ifdef IPSEC int needipsectun = 0; struct secpolicy *sp = NULL; struct socket *so = inp ? inp->inp_socket : NULL; ip6 = mtod(m, struct ip6_hdr *); #endif /* IPSEC */ #ifdef FAST_IPSEC int needipsectun = 0; struct secpolicy *sp = NULL; ip6 = mtod(m, struct ip6_hdr *); #endif /* FAST_IPSEC */ #define MAKE_EXTHDR(hp, mp) \ do { \ if (hp) { \ struct ip6_ext *eh = (struct ip6_ext *)(hp); \ error = ip6_copyexthdr((mp), (caddr_t)(hp), \ ((eh)->ip6e_len + 1) << 3); \ if (error) \ goto freehdrs; \ } \ } while (0) bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh); /* Destination options header(1st part) */ MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1); /* Routing header */ MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr); /* Destination options header(2nd part) */ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } #ifdef IPSEC /* get a security policy for this packet */ if (so == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); else sp = ipsec6_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error); if (sp == NULL) { ipsec6stat.out_inval++; goto freehdrs; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ ipsec6stat.out_polvio++; goto freehdrs; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ needipsec = 0; break; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); goto freehdrs; } needipsec = 1; break; case IPSEC_POLICY_ENTRUST: default: printf("ip6_output: Invalid policy found. %d\n", sp->policy); } #endif /* IPSEC */ #ifdef FAST_IPSEC /* get a security policy for this packet */ if (inp == NULL) sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, 0, &error); else sp = ipsec_getpolicybysock(m, IPSEC_DIR_OUTBOUND, inp, &error); if (sp == NULL) { newipsecstat.ips_out_inval++; goto freehdrs; } error = 0; /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: /* * This packet is just discarded. */ newipsecstat.ips_out_polvio++; goto freehdrs; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: /* no need to do IPsec. */ needipsec = 0; break; case IPSEC_POLICY_IPSEC: if (sp->req == NULL) { /* acquire a policy */ error = key_spdacquire(sp); goto freehdrs; } needipsec = 1; break; case IPSEC_POLICY_ENTRUST: default: printf("ip6_output: Invalid policy found. %d\n", sp->policy); } #endif /* FAST_IPSEC */ /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ optlen = 0; if (exthdrs.ip6e_hbh) optlen += exthdrs.ip6e_hbh->m_len; if (exthdrs.ip6e_dest1) optlen += exthdrs.ip6e_dest1->m_len; if (exthdrs.ip6e_rthdr) optlen += exthdrs.ip6e_rthdr->m_len; unfragpartlen = optlen + sizeof(struct ip6_hdr); /* NOTE: we don't add AH/ESP length here. do that later. */ if (exthdrs.ip6e_dest2) optlen += exthdrs.ip6e_dest2->m_len; /* * If we need IPsec, or there is at least one extension header, * separate IP6 header from the payload. */ if ((needipsec || optlen) && !hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); /* adjust mbuf packet header length */ m->m_pkthdr.len += optlen; plen = m->m_pkthdr.len - sizeof(*ip6); /* If this is a jumbo payload, insert a jumbo payload option. */ if (plen > IPV6_MAXPACKET) { if (!hdrsplit) { if ((error = ip6_splithdr(m, &exthdrs)) != 0) { m = NULL; goto freehdrs; } m = exthdrs.ip6e_ip6; hdrsplit++; } /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0) goto freehdrs; ip6->ip6_plen = 0; } else ip6->ip6_plen = htons(plen); /* * Concatenate headers and fill in next header fields. * Here we have, on "m" * IPv6 payload * and we insert headers accordingly. Finally, we should be getting: * IPv6 hbh dest1 rthdr ah* [esp* dest2 payload] * * during the header composing process, "m" points to IPv6 header. * "mprev" points to an extension header prior to esp. */ { u_char *nexthdrp = &ip6->ip6_nxt; struct mbuf *mprev = m; /* * we treat dest2 specially. this makes IPsec processing * much easier. the goal here is to make mprev point the * mbuf prior to dest2. * * result: IPv6 dest2 payload * m and mprev will point to IPv6 header. */ if (exthdrs.ip6e_dest2) { if (!hdrsplit) panic("assumption failed: hdr not split"); exthdrs.ip6e_dest2->m_next = m->m_next; m->m_next = exthdrs.ip6e_dest2; *mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_DSTOPTS; } #define MAKE_CHAIN(m, mp, p, i)\ do {\ if (m) {\ if (!hdrsplit) \ panic("assumption failed: hdr not split"); \ *mtod((m), u_char *) = *(p);\ *(p) = (i);\ p = mtod((m), u_char *);\ (m)->m_next = (mp)->m_next;\ (mp)->m_next = (m);\ (mp) = (m);\ }\ } while (0) /* * result: IPv6 hbh dest1 rthdr dest2 payload * m will point to IPv6 header. mprev will point to the * extension header prior to dest2 (rthdr in the above case). */ MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS); MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp, IPPROTO_DSTOPTS); MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp, IPPROTO_ROUTING); #if defined(IPSEC) || defined(FAST_IPSEC) if (!needipsec) goto skip_ipsec2; /* * pointers after IPsec headers are not valid any more. * other pointers need a great care too. * (IPsec routines should not mangle mbufs prior to AH/ESP) */ exthdrs.ip6e_dest2 = NULL; { struct ip6_rthdr *rh = NULL; int segleft_org = 0; struct ipsec_output_state state; if (exthdrs.ip6e_rthdr) { rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *); segleft_org = rh->ip6r_segleft; rh->ip6r_segleft = 0; } bzero(&state, sizeof(state)); state.m = m; error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags, &needipsectun); m = state.m; if (error) { /* mbuf is already reclaimed in ipsec6_output_trans. */ m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /* fall through */ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } if (exthdrs.ip6e_rthdr) { /* ah6_output doesn't modify mbuf chain */ rh->ip6r_segleft = segleft_org; } } skip_ipsec2:; #endif } /* * If there is a routing header, replace destination address field * with the first hop of the routing header. */ if (exthdrs.ip6e_rthdr) { struct ip6_rthdr *rh = (struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *)); struct ip6_rthdr0 *rh0; finaldst = ip6->ip6_dst; switch (rh->ip6r_type) { case IPV6_RTHDR_TYPE_0: rh0 = (struct ip6_rthdr0 *)rh; ip6->ip6_dst = rh0->ip6r0_addr[0]; bcopy((caddr_t)&rh0->ip6r0_addr[1], (caddr_t)&rh0->ip6r0_addr[0], sizeof(struct in6_addr)*(rh0->ip6r0_segleft - 1) ); rh0->ip6r0_addr[rh0->ip6r0_segleft - 1] = finaldst; break; default: /* is it possible? */ error = EINVAL; goto bad; } } /* Source address validation */ if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && (flags & IPV6_DADOUTPUT) == 0) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; } if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) { error = EOPNOTSUPP; ip6stat.ip6s_badscope++; goto bad; } ip6stat.ip6s_localout++; /* * Route packet. */ if (ro == 0) { ro = &ip6route; bzero((caddr_t)ro, sizeof(*ro)); } ro_pmtu = ro; if (opt && opt->ip6po_rthdr) ro = &opt->ip6po_route; dst = (struct sockaddr_in6 *)&ro->ro_dst; /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. */ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || dst->sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&dst->sin6_addr, &ip6->ip6_dst))) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == 0) { bzero(dst, sizeof(*dst)); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_addr = ip6->ip6_dst; #ifdef SCOPEDROUTING /* XXX: sin6_scope_id should already be fixed at this point */ if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) dst->sin6_scope_id = ntohs(dst->sin6_addr.s6_addr16[1]); #endif } #if defined(IPSEC) || defined(FAST_IPSEC) if (needipsec && needipsectun) { struct ipsec_output_state state; /* * All the extension headers will become inaccessible * (since they can be encrypted). * Don't panic, we need no more updates to extension headers * on inner IPv6 packet (since they are now encapsulated). * * IPv6 [ESP|AH] IPv6 [extension headers] payload */ bzero(&exthdrs, sizeof(exthdrs)); exthdrs.ip6e_ip6 = m; bzero(&state, sizeof(state)); state.m = m; state.ro = (struct route *)ro; state.dst = (struct sockaddr *)dst; error = ipsec6_output_tunnel(&state, sp, flags); m = state.m; ro = (struct route_in6 *)state.ro; dst = (struct sockaddr_in6 *)state.dst; if (error) { /* mbuf is already reclaimed in ipsec6_output_tunnel. */ m0 = m = NULL; m = NULL; switch (error) { case EHOSTUNREACH: case ENETUNREACH: case EMSGSIZE: case ENOBUFS: case ENOMEM: break; default: printf("ip6_output (ipsec): error code %d\n", error); /* fall through */ case ENOENT: /* don't show these error codes to the user */ error = 0; break; } goto bad; } exthdrs.ip6e_ip6 = m; } #endif /* IPSEC */ if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { /* Unicast */ #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) /* xxx * interface selection comes here * if an interface is specified from an upper layer, * ifp must point it. */ if (ro->ro_rt == 0) { /* * non-bsdi always clone routes, if parent is * PRF_CLONING. */ rtalloc((struct route *)ro); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard); */ goto bad; } ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in6 *)ro->ro_rt->rt_gateway; m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ in6_ifstat_inc(ifp, ifs6_out_request); /* * Check if the outgoing interface conflicts with * the interface specified by ifi6_ifindex (if specified). * Note that loopback interface is always okay. * (this may happen when we are sending a packet to one of * our own addresses.) */ if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) { if (!(ifp->if_flags & IFF_LOOPBACK) && ifp->if_index != opt->ip6po_pktinfo->ipi6_ifindex) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = EHOSTUNREACH; goto bad; } } if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; } else { /* Multicast */ struct in6_multi *in6m; m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST; /* * See if the caller provided any multicast options */ ifp = NULL; if (im6o != NULL) { ip6->ip6_hlim = im6o->im6o_multicast_hlim; if (im6o->im6o_multicast_ifp != NULL) ifp = im6o->im6o_multicast_ifp; } else ip6->ip6_hlim = ip6_defmcasthlim; /* * See if the caller provided the outgoing interface * as an ancillary data. * Boundary check for ifindex is assumed to be already done. */ if (opt && opt->ip6po_pktinfo && opt->ip6po_pktinfo->ipi6_ifindex) ifp = ifnet_byindex(opt->ip6po_pktinfo->ipi6_ifindex); /* * If the destination is a node-local scope multicast, * the packet should be loop-backed only. */ if (IN6_IS_ADDR_MC_NODELOCAL(&ip6->ip6_dst)) { /* * If the outgoing interface is already specified, * it should be a loopback interface. */ if (ifp && (ifp->if_flags & IFF_LOOPBACK) == 0) { ip6stat.ip6s_badscope++; error = ENETUNREACH; /* XXX: better error? */ /* XXX correct ifp? */ in6_ifstat_inc(ifp, ifs6_out_discard); goto bad; } else { ifp = &loif[0]; } } if (opt && opt->ip6po_hlim != -1) ip6->ip6_hlim = opt->ip6po_hlim & 0xff; /* * If caller did not provide an interface lookup a * default in the routing table. This is either a * default for the speicfied group (i.e. a host * route), or a multicast default (a route for the * ``net'' ff00::/8). */ if (ifp == NULL) { if (ro->ro_rt == 0) { ro->ro_rt = rtalloc1((struct sockaddr *) &ro->ro_dst, 0, 0UL); } if (ro->ro_rt == 0) { ip6stat.ip6s_noroute++; error = EHOSTUNREACH; /* XXX in6_ifstat_inc(ifp, ifs6_out_discard) */ goto bad; } ia = ifatoia6(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; } if ((flags & IPV6_FORWARDING) == 0) in6_ifstat_inc(ifp, ifs6_out_request); in6_ifstat_inc(ifp, ifs6_out_mcast); /* * Confirm that the outgoing interface supports multicast. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { ip6stat.ip6s_noroute++; in6_ifstat_inc(ifp, ifs6_out_discard); error = ENETUNREACH; goto bad; } IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); if (in6m != NULL && (im6o == NULL || im6o->im6o_multicast_loop)) { /* * If we belong to the destination multicast group * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ ip6_mloopback(ifp, m, dst); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IPV6_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip6_mloopback(), * above, will be forwarded by the ip6_input() routine, * if necessary. */ if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) { if (ip6_mforward(ip6, ifp, m) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a hoplimit of zero may be looped back, * above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip6_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK)) { m_freem(m); goto done; } } /* * Fill the outgoing inteface to tell the upper layer * to increment per-interface statistics. */ if (ifpp) *ifpp = ifp; /* * Determine path MTU. */ if (ro_pmtu != ro) { /* The first hop and the final destination may differ. */ struct sockaddr_in6 *sin6_fin = (struct sockaddr_in6 *)&ro_pmtu->ro_dst; if (ro_pmtu->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&sin6_fin->sin6_addr, &finaldst))) { RTFREE(ro_pmtu->ro_rt); ro_pmtu->ro_rt = (struct rtentry *)0; } if (ro_pmtu->ro_rt == 0) { bzero(sin6_fin, sizeof(*sin6_fin)); sin6_fin->sin6_family = AF_INET6; sin6_fin->sin6_len = sizeof(struct sockaddr_in6); sin6_fin->sin6_addr = finaldst; rtalloc((struct route *)ro_pmtu); } } if (ro_pmtu->ro_rt != NULL) { u_int32_t ifmtu = nd_ifinfo[ifp->if_index].linkmtu; mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu; if (mtu > ifmtu || mtu == 0) { /* * The MTU on the route is larger than the MTU on * the interface! This shouldn't happen, unless the * MTU of the interface has been changed after the * interface was brought up. Change the MTU in the * route to match the interface MTU (as long as the * field isn't locked). * * if MTU on the route is 0, we need to fix the MTU. * this case happens with path MTU discovery timeouts. */ mtu = ifmtu; if ((ro_pmtu->ro_rt->rt_rmx.rmx_locks & RTV_MTU) == 0) ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu; /* XXX */ } } else { mtu = nd_ifinfo[ifp->if_index].linkmtu; } /* * advanced API (IPV6_USE_MIN_MTU) overrides mtu setting */ if ((flags & IPV6_MINMTU) != 0 && mtu > IPV6_MMTU) mtu = IPV6_MMTU; /* Fake scoped addresses */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { /* * If source or destination address is a scoped address, and * the packet is going to be sent to a loopback interface, * we should keep the original interface. */ /* * XXX: this is a very experimental and temporary solution. * We eventually have sockaddr_in6 and use the sin6_scope_id * field of the structure here. * We rely on the consistency between two scope zone ids * of source and destination, which should already be assured. * Larger scopes than link will be supported in the future. */ origifp = NULL; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) origifp = ifnet_byindex(ntohs(ip6->ip6_src.s6_addr16[1])); else if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) origifp = ifnet_byindex(ntohs(ip6->ip6_dst.s6_addr16[1])); /* * XXX: origifp can be NULL even in those two cases above. * For example, if we remove the (only) link-local address * from the loopback interface, and try to send a link-local * address without link-id information. Then the source * address is ::1, and the destination address is the * link-local address with its s6_addr16[1] being zero. * What is worse, if the packet goes to the loopback interface * by a default rejected route, the null pointer would be * passed to looutput, and the kernel would hang. * The following last resort would prevent such disaster. */ if (origifp == NULL) origifp = ifp; } else origifp = ifp; #ifndef SCOPEDROUTING /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); #endif /* * Check with the firewall... */ if (ip6_fw_enable && ip6_fw_chk_ptr) { u_short port = 0; m->m_pkthdr.rcvif = NULL; /* XXX */ /* If ipfw says divert, we have to just drop packet */ if ((*ip6_fw_chk_ptr)(&ip6, ifp, &port, &m)) { m_freem(m); goto done; } if (!m) { error = EACCES; goto done; } } /* * If the outgoing packet contains a hop-by-hop options header, * it must be examined and processed even by the source node. * (RFC 2460, section 4.) */ if (exthdrs.ip6e_hbh) { struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *); u_int32_t dummy1; /* XXX unused */ u_int32_t dummy2; /* XXX unused */ #ifdef DIAGNOSTIC if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len) panic("ip6e_hbh is not continuous"); #endif /* * XXX: if we have to send an ICMPv6 error to the sender, * we need the M_LOOP flag since icmp6_error() expects * the IPv6 and the hop-by-hop options header are * continuous unless the flag is set. */ m->m_flags |= M_LOOP; m->m_pkthdr.rcvif = ifp; if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1), ((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh), &dummy1, &dummy2) < 0) { /* m was already freed at this point */ error = EINVAL;/* better error? */ goto done; } m->m_flags &= ~M_LOOP; /* XXX */ m->m_pkthdr.rcvif = NULL; } #ifdef PFIL_HOOKS /* * Run through list of hooks for output packets. */ m1 = m; pfh = pfil_hook_get(PFIL_OUT, &inet6sw[ip6_protox[IPPROTO_IPV6]].pr_pfh); for (; pfh; pfh = pfh->pfil_link.tqe_next) if (pfh->pfil_func) { rv = pfh->pfil_func(ip6, sizeof(*ip6), ifp, 1, &m1); if (rv) { error = EHOSTUNREACH; goto done; } m = m1; if (m == NULL) goto done; ip6 = mtod(m, struct ip6_hdr *); } #endif /* PFIL_HOOKS */ /* * Send the packet to the outgoing interface. * If necessary, do IPv6 fragmentation before sending. */ tlen = m->m_pkthdr.len; if (tlen <= mtu #ifdef notyet /* * On any link that cannot convey a 1280-octet packet in one piece, * link-specific fragmentation and reassembly must be provided at * a layer below IPv6. [RFC 2460, sec.5] * Thus if the interface has ability of link-level fragmentation, * we can just send the packet even if the packet size is * larger than the link's MTU. * XXX: IFF_FRAGMENTABLE (or such) flag has not been defined yet... */ || ifp->if_flags & IFF_FRAGMENTABLE #endif ) { /* Record statistics for this interface address. */ if (ia && !(flags & IPV6_FORWARDING)) { ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); goto done; } else if (mtu < IPV6_MMTU) { /* * note that path MTU is never less than IPV6_MMTU * (see icmp6_input). */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else if (ip6->ip6_plen == 0) { /* jumbo payload cannot be fragmented */ error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } else { struct mbuf **mnext, *m_frgpart; struct ip6_frag *ip6f; u_int32_t id = htonl(ip6_id++); u_char nextproto; /* * Too large for the destination or interface; * fragment if possible. * Must be able to put at least 8 bytes per fragment. */ hlen = unfragpartlen; if (mtu > IPV6_MAXPACKET) mtu = IPV6_MAXPACKET; len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7; if (len < 8) { error = EMSGSIZE; in6_ifstat_inc(ifp, ifs6_out_fragfail); goto bad; } mnext = &m->m_nextpkt; /* * Change the next header field of the last header in the * unfragmentable part. */ if (exthdrs.ip6e_rthdr) { nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *); *mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_dest1) { nextproto = *mtod(exthdrs.ip6e_dest1, u_char *); *mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT; } else if (exthdrs.ip6e_hbh) { nextproto = *mtod(exthdrs.ip6e_hbh, u_char *); *mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT; } else { nextproto = ip6->ip6_nxt; ip6->ip6_nxt = IPPROTO_FRAGMENT; } /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto * chain. */ m0 = m; for (off = hlen; off < tlen; off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); if (!m) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m->m_pkthdr.rcvif = NULL; m->m_flags = m0->m_flags & M_COPYFLAGS; *mnext = m; mnext = &m->m_nextpkt; m->m_data += max_linkhdr; mhip6 = mtod(m, struct ip6_hdr *); *mhip6 = *ip6; m->m_len = sizeof(*mhip6); error = ip6_insertfraghdr(m0, m, hlen, &ip6f); if (error) { ip6stat.ip6s_odropped++; goto sendorfree; } ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7)); if (off + len >= tlen) len = tlen - off; else ip6f->ip6f_offlg |= IP6F_MORE_FRAG; mhip6->ip6_plen = htons((u_short)(len + hlen + sizeof(*ip6f) - sizeof(struct ip6_hdr))); if ((m_frgpart = m_copy(m0, off, len)) == 0) { error = ENOBUFS; ip6stat.ip6s_odropped++; goto sendorfree; } m_cat(m, m_frgpart); m->m_pkthdr.len = len + hlen + sizeof(*ip6f); m->m_pkthdr.rcvif = (struct ifnet *)0; ip6f->ip6f_reserved = 0; ip6f->ip6f_ident = id; ip6f->ip6f_nxt = nextproto; ip6stat.ip6s_ofragments++; in6_ifstat_inc(ifp, ifs6_out_fragcreat); } in6_ifstat_inc(ifp, ifs6_out_fragok); } /* * Remove leading garbages. */ sendorfree: m = m0->m_nextpkt; m0->m_nextpkt = 0; m_freem(m0); for (m0 = m; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) { /* Record statistics for this interface address. */ if (ia) { ia->ia_ifa.if_opackets++; ia->ia_ifa.if_obytes += m->m_pkthdr.len; } #ifdef IPSEC /* clean ipsec history once it goes out of the node */ ipsec_delaux(m); #endif error = nd6_output(ifp, origifp, m, dst, ro->ro_rt); } else m_freem(m); } if (error == 0) ip6stat.ip6s_fragmented++; done: if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */ RTFREE(ro->ro_rt); } else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) { RTFREE(ro_pmtu->ro_rt); } #ifdef IPSEC if (sp != NULL) key_freesp(sp); #endif /* IPSEC */ #ifdef FAST_IPSEC if (sp != NULL) KEY_FREESP(&sp); #endif /* FAST_IPSEC */ return(error); freehdrs: m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */ m_freem(exthdrs.ip6e_dest1); m_freem(exthdrs.ip6e_rthdr); m_freem(exthdrs.ip6e_dest2); /* fall through */ bad: m_freem(m); goto done; } static int ip6_copyexthdr(mp, hdr, hlen) struct mbuf **mp; caddr_t hdr; int hlen; { struct mbuf *m; if (hlen > MCLBYTES) return(ENOBUFS); /* XXX */ MGET(m, M_DONTWAIT, MT_DATA); if (!m) return(ENOBUFS); if (hlen > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) { m_free(m); return(ENOBUFS); } } m->m_len = hlen; if (hdr) bcopy(hdr, mtod(m, caddr_t), hlen); *mp = m; return(0); } /* * Insert jumbo payload option. */ static int ip6_insert_jumboopt(exthdrs, plen) struct ip6_exthdrs *exthdrs; u_int32_t plen; { struct mbuf *mopt; u_char *optbuf; u_int32_t v; #define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */ /* * If there is no hop-by-hop options header, allocate new one. * If there is one but it doesn't have enough space to store the * jumbo payload option, allocate a cluster to store the whole options. * Otherwise, use it to store the options. */ if (exthdrs->ip6e_hbh == 0) { MGET(mopt, M_DONTWAIT, MT_DATA); if (mopt == 0) return(ENOBUFS); mopt->m_len = JUMBOOPTLEN; optbuf = mtod(mopt, u_char *); optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */ exthdrs->ip6e_hbh = mopt; } else { struct ip6_hbh *hbh; mopt = exthdrs->ip6e_hbh; if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) { /* * XXX assumption: * - exthdrs->ip6e_hbh is not referenced from places * other than exthdrs. * - exthdrs->ip6e_hbh is not an mbuf chain. */ int oldoptlen = mopt->m_len; struct mbuf *n; /* * XXX: give up if the whole (new) hbh header does * not fit even in an mbuf cluster. */ if (oldoptlen + JUMBOOPTLEN > MCLBYTES) return(ENOBUFS); /* * As a consequence, we must always prepare a cluster * at this point. */ MGET(n, M_DONTWAIT, MT_DATA); if (n) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_freem(n); n = NULL; } } if (!n) return(ENOBUFS); n->m_len = oldoptlen + JUMBOOPTLEN; bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t), oldoptlen); optbuf = mtod(n, caddr_t) + oldoptlen; m_freem(mopt); mopt = exthdrs->ip6e_hbh = n; } else { optbuf = mtod(mopt, u_char *) + mopt->m_len; mopt->m_len += JUMBOOPTLEN; } optbuf[0] = IP6OPT_PADN; optbuf[1] = 1; /* * Adjust the header length according to the pad and * the jumbo payload option. */ hbh = mtod(mopt, struct ip6_hbh *); hbh->ip6h_len += (JUMBOOPTLEN >> 3); } /* fill in the option. */ optbuf[2] = IP6OPT_JUMBO; optbuf[3] = 4; v = (u_int32_t)htonl(plen + JUMBOOPTLEN); bcopy(&v, &optbuf[4], sizeof(u_int32_t)); /* finally, adjust the packet header length */ exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN; return(0); #undef JUMBOOPTLEN } /* * Insert fragment header and copy unfragmentable header portions. */ static int ip6_insertfraghdr(m0, m, hlen, frghdrp) struct mbuf *m0, *m; int hlen; struct ip6_frag **frghdrp; { struct mbuf *n, *mlast; if (hlen > sizeof(struct ip6_hdr)) { n = m_copym(m0, sizeof(struct ip6_hdr), hlen - sizeof(struct ip6_hdr), M_DONTWAIT); if (n == 0) return(ENOBUFS); m->m_next = n; } else n = m; /* Search for the last mbuf of unfragmentable part. */ for (mlast = n; mlast->m_next; mlast = mlast->m_next) ; if ((mlast->m_flags & M_EXT) == 0 && M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) { /* use the trailing space of the last mbuf for the fragment hdr */ *frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) + mlast->m_len); mlast->m_len += sizeof(struct ip6_frag); m->m_pkthdr.len += sizeof(struct ip6_frag); } else { /* allocate a new mbuf for the fragment header */ struct mbuf *mfrg; MGET(mfrg, M_DONTWAIT, MT_DATA); if (mfrg == 0) return(ENOBUFS); mfrg->m_len = sizeof(struct ip6_frag); *frghdrp = mtod(mfrg, struct ip6_frag *); mlast->m_next = mfrg; } return(0); } /* * IP6 socket option processing. */ int ip6_ctloutput(so, sopt) struct socket *so; struct sockopt *sopt; { int privileged; struct inpcb *in6p = sotoinpcb(so); int error, optval; int level, op, optname; int optlen; struct thread *td; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; td = sopt->sopt_td; } else { panic("ip6_ctloutput: arg soopt is NULL"); } error = optval = 0; privileged = (td == 0 || suser(td)) ? 0 : 1; if (level == IPPROTO_IPV6) { switch (op) { case SOPT_SET: switch (optname) { case IPV6_PKTOPTIONS: { struct mbuf *m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; error = ip6_pcbopts(&in6p->in6p_outputopts, m, so, sopt); m_freem(m); /* XXX */ break; } /* * Use of some Hop-by-Hop options or some * Destination options, might require special * privilege. That is, normal applications * (without special privilege) might be forbidden * from setting certain options in outgoing packets, * and might never see certain options in received * packets. [RFC 2292 Section 6] * KAME specific note: * KAME prevents non-privileged users from sending or * receiving ANY hbh/dst options in order to avoid * overhead of parsing options in the kernel. */ case IPV6_UNICAST_HOPS: case IPV6_CHECKSUM: case IPV6_FAITH: case IPV6_V6ONLY: if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_UNICAST_HOPS: if (optval < -1 || optval >= 256) error = EINVAL; else { /* -1 = kernel default */ in6p->in6p_hops = optval; if ((in6p->in6p_vflag & INP_IPV4) != 0) in6p->inp_ip_ttl = optval; } break; #define OPTSET(bit) \ do { \ if (optval) \ in6p->in6p_flags |= (bit); \ else \ in6p->in6p_flags &= ~(bit); \ } while (0) #define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0) case IPV6_CHECKSUM: in6p->in6p_cksum = optval; break; case IPV6_FAITH: OPTSET(IN6P_FAITH); break; case IPV6_V6ONLY: /* * make setsockopt(IPV6_V6ONLY) * available only prior to bind(2). * see ipng mailing list, Jun 22 2001. */ if (in6p->in6p_lport || !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) { error = EINVAL; break; } OPTSET(IN6P_IPV6_V6ONLY); if (optval) in6p->in6p_vflag &= ~INP_IPV4; else in6p->in6p_vflag |= INP_IPV4; break; } break; case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_HOPOPTS: case IPV6_DSTOPTS: case IPV6_RTHDR: /* RFC 2292 */ if (optlen != sizeof(int)) { error = EINVAL; break; } error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optname) { case IPV6_PKTINFO: OPTSET(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: OPTSET(IN6P_HOPLIMIT); break; case IPV6_HOPOPTS: /* * Check super-user privilege. * See comments for IPV6_RECVHOPOPTS. */ if (!privileged) return(EPERM); OPTSET(IN6P_HOPOPTS); break; case IPV6_DSTOPTS: if (!privileged) return(EPERM); OPTSET(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */ break; case IPV6_RTHDR: OPTSET(IN6P_RTHDR); break; } break; #undef OPTSET case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: { struct mbuf *m; if (sopt->sopt_valsize > MLEN) { error = EMSGSIZE; break; } /* XXX */ MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; break; } m->m_len = sopt->sopt_valsize; error = sooptcopyin(sopt, mtod(m, char *), m->m_len, m->m_len); error = ip6_setmoptions(sopt->sopt_name, &in6p->in6p_moptions, m); (void)m_free(m); } break; case IPV6_PORTRANGE: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; switch (optval) { case IPV6_PORTRANGE_DEFAULT: in6p->in6p_flags &= ~(IN6P_LOWPORT); in6p->in6p_flags &= ~(IN6P_HIGHPORT); break; case IPV6_PORTRANGE_HIGH: in6p->in6p_flags &= ~(IN6P_LOWPORT); in6p->in6p_flags |= IN6P_HIGHPORT; break; case IPV6_PORTRANGE_LOW: in6p->in6p_flags &= ~(IN6P_HIGHPORT); in6p->in6p_flags |= IN6P_LOWPORT; break; default: error = EINVAL; break; } break; #if defined(IPSEC) || defined(FAST_IPSEC) case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m; if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ break; if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ break; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec6_set_policy(in6p, optname, req, len, privileged); m_freem(m); } break; #endif /* KAME IPSEC */ case IPV6_FW_ADD: case IPV6_FW_DEL: case IPV6_FW_FLUSH: case IPV6_FW_ZERO: { struct mbuf *m; struct mbuf **mp = &m; if (ip6_fw_ctl_ptr == NULL) return EINVAL; /* XXX */ if ((error = soopt_getm(sopt, &m)) != 0) break; /* XXX */ if ((error = soopt_mcopyin(sopt, m)) != 0) break; error = (*ip6_fw_ctl_ptr)(optname, mp); m = *mp; } break; default: error = ENOPROTOOPT; break; } break; case SOPT_GET: switch (optname) { case IPV6_PKTOPTIONS: if (in6p->in6p_options) { struct mbuf *m; m = m_copym(in6p->in6p_options, 0, M_COPYALL, M_TRYWAIT); error = soopt_mcopyout(sopt, m); if (error == 0) m_freem(m); } else sopt->sopt_valsize = 0; break; case IPV6_UNICAST_HOPS: case IPV6_CHECKSUM: case IPV6_FAITH: case IPV6_V6ONLY: case IPV6_PORTRANGE: switch (optname) { case IPV6_UNICAST_HOPS: optval = in6p->in6p_hops; break; case IPV6_CHECKSUM: optval = in6p->in6p_cksum; break; case IPV6_FAITH: optval = OPTBIT(IN6P_FAITH); break; case IPV6_V6ONLY: optval = OPTBIT(IN6P_IPV6_V6ONLY); break; case IPV6_PORTRANGE: { int flags; flags = in6p->in6p_flags; if (flags & IN6P_HIGHPORT) optval = IPV6_PORTRANGE_HIGH; else if (flags & IN6P_LOWPORT) optval = IPV6_PORTRANGE_LOW; else optval = 0; break; } } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_PKTINFO: case IPV6_HOPLIMIT: case IPV6_HOPOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: if (optname == IPV6_HOPOPTS || optname == IPV6_DSTOPTS || !privileged) return(EPERM); switch (optname) { case IPV6_PKTINFO: optval = OPTBIT(IN6P_PKTINFO); break; case IPV6_HOPLIMIT: optval = OPTBIT(IN6P_HOPLIMIT); break; case IPV6_HOPOPTS: if (!privileged) return(EPERM); optval = OPTBIT(IN6P_HOPOPTS); break; case IPV6_RTHDR: optval = OPTBIT(IN6P_RTHDR); break; case IPV6_DSTOPTS: if (!privileged) return(EPERM); optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); break; } error = sooptcopyout(sopt, &optval, sizeof optval); break; case IPV6_MULTICAST_IF: case IPV6_MULTICAST_HOPS: case IPV6_MULTICAST_LOOP: case IPV6_JOIN_GROUP: case IPV6_LEAVE_GROUP: { struct mbuf *m; error = ip6_getmoptions(sopt->sopt_name, in6p->in6p_moptions, &m); if (error == 0) error = sooptcopyout(sopt, mtod(m, char *), m->m_len); m_freem(m); } break; #if defined(IPSEC) || defined(FAST_IPSEC) case IPV6_IPSEC_POLICY: { caddr_t req = NULL; size_t len = 0; struct mbuf *m = NULL; struct mbuf **mp = &m; error = soopt_getm(sopt, &m); /* XXX */ if (error != 0) break; error = soopt_mcopyin(sopt, m); /* XXX */ if (error != 0) break; if (m) { req = mtod(m, caddr_t); len = m->m_len; } error = ipsec6_get_policy(in6p, req, len, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /*XXX*/ if (error == 0 && m) m_freem(m); break; } #endif /* KAME IPSEC */ case IPV6_FW_GET: { struct mbuf *m; struct mbuf **mp = &m; if (ip6_fw_ctl_ptr == NULL) { return EINVAL; } error = (*ip6_fw_ctl_ptr)(optname, mp); if (error == 0) error = soopt_mcopyout(sopt, m); /* XXX */ if (error == 0 && m) m_freem(m); } break; default: error = ENOPROTOOPT; break; } break; } } else { error = EINVAL; } return(error); } /* * Set up IP6 options in pcb for insertion in output packets or * specifying behavior of outgoing packets. */ static int ip6_pcbopts(pktopt, m, so, sopt) struct ip6_pktopts **pktopt; struct mbuf *m; struct socket *so; struct sockopt *sopt; { struct ip6_pktopts *opt = *pktopt; int error = 0; struct thread *td = sopt->sopt_td; int priv = 0; /* turn off any old options. */ if (opt) { #ifdef DIAGNOSTIC if (opt->ip6po_pktinfo || opt->ip6po_nexthop || opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 || opt->ip6po_rhinfo.ip6po_rhi_rthdr) printf("ip6_pcbopts: all specified options are cleared.\n"); #endif ip6_clearpktopts(opt, 1, -1); } else opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK); *pktopt = NULL; if (!m || m->m_len == 0) { /* * Only turning off any previous options, regardless of * whether the opt is just created or given. */ free(opt, M_IP6OPT); return(0); } /* set options specified by user. */ if (td && !suser(td)) priv = 1; if ((error = ip6_setpktoptions(m, opt, priv, 1)) != 0) { ip6_clearpktopts(opt, 1, -1); /* XXX: discard all options */ free(opt, M_IP6OPT); return(error); } *pktopt = opt; return(0); } /* * initialize ip6_pktopts. beware that there are non-zero default values in * the struct. */ void init_ip6pktopts(opt) struct ip6_pktopts *opt; { bzero(opt, sizeof(*opt)); opt->ip6po_hlim = -1; /* -1 means default hop limit */ } void ip6_clearpktopts(pktopt, needfree, optname) struct ip6_pktopts *pktopt; int needfree, optname; { if (pktopt == NULL) return; if (optname == -1) { if (needfree && pktopt->ip6po_pktinfo) free(pktopt->ip6po_pktinfo, M_IP6OPT); pktopt->ip6po_pktinfo = NULL; } if (optname == -1) pktopt->ip6po_hlim = -1; if (optname == -1) { if (needfree && pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); pktopt->ip6po_nexthop = NULL; } if (optname == -1) { if (needfree && pktopt->ip6po_hbh) free(pktopt->ip6po_hbh, M_IP6OPT); pktopt->ip6po_hbh = NULL; } if (optname == -1) { if (needfree && pktopt->ip6po_dest1) free(pktopt->ip6po_dest1, M_IP6OPT); pktopt->ip6po_dest1 = NULL; } if (optname == -1) { if (needfree && pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; if (pktopt->ip6po_route.ro_rt) { RTFREE(pktopt->ip6po_route.ro_rt); pktopt->ip6po_route.ro_rt = NULL; } } if (optname == -1) { if (needfree && pktopt->ip6po_dest2) free(pktopt->ip6po_dest2, M_IP6OPT); pktopt->ip6po_dest2 = NULL; } } #define PKTOPT_EXTHDRCPY(type) \ do {\ if (src->type) {\ int hlen =\ (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\ dst->type = malloc(hlen, M_IP6OPT, canwait);\ if (dst->type == NULL && canwait == M_NOWAIT)\ goto bad;\ bcopy(src->type, dst->type, hlen);\ }\ } while (0) struct ip6_pktopts * ip6_copypktopts(src, canwait) struct ip6_pktopts *src; int canwait; { struct ip6_pktopts *dst; if (src == NULL) { printf("ip6_clearpktopts: invalid argument\n"); return(NULL); } dst = malloc(sizeof(*dst), M_IP6OPT, canwait); if (dst == NULL && canwait == M_NOWAIT) return (NULL); bzero(dst, sizeof(*dst)); dst->ip6po_hlim = src->ip6po_hlim; if (src->ip6po_pktinfo) { dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo), M_IP6OPT, canwait); if (dst->ip6po_pktinfo == NULL && canwait == M_NOWAIT) goto bad; *dst->ip6po_pktinfo = *src->ip6po_pktinfo; } if (src->ip6po_nexthop) { dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len, M_IP6OPT, canwait); if (dst->ip6po_nexthop == NULL && canwait == M_NOWAIT) goto bad; bcopy(src->ip6po_nexthop, dst->ip6po_nexthop, src->ip6po_nexthop->sa_len); } PKTOPT_EXTHDRCPY(ip6po_hbh); PKTOPT_EXTHDRCPY(ip6po_dest1); PKTOPT_EXTHDRCPY(ip6po_dest2); PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */ return(dst); bad: if (dst->ip6po_pktinfo) free(dst->ip6po_pktinfo, M_IP6OPT); if (dst->ip6po_nexthop) free(dst->ip6po_nexthop, M_IP6OPT); if (dst->ip6po_hbh) free(dst->ip6po_hbh, M_IP6OPT); if (dst->ip6po_dest1) free(dst->ip6po_dest1, M_IP6OPT); if (dst->ip6po_dest2) free(dst->ip6po_dest2, M_IP6OPT); if (dst->ip6po_rthdr) free(dst->ip6po_rthdr, M_IP6OPT); free(dst, M_IP6OPT); return(NULL); } #undef PKTOPT_EXTHDRCPY void ip6_freepcbopts(pktopt) struct ip6_pktopts *pktopt; { if (pktopt == NULL) return; ip6_clearpktopts(pktopt, 1, -1); free(pktopt, M_IP6OPT); } /* * Set the IP6 multicast options in response to user setsockopt(). */ static int ip6_setmoptions(optname, im6op, m) int optname; struct ip6_moptions **im6op; struct mbuf *m; { int error = 0; u_int loop, ifindex; struct ipv6_mreq *mreq; struct ifnet *ifp; struct ip6_moptions *im6o = *im6op; struct route_in6 ro; struct sockaddr_in6 *dst; struct in6_multi_mship *imm; struct thread *td = curthread; /* XXX */ if (im6o == NULL) { /* * No multicast option buffer attached to the pcb; * allocate one and initialize to default values. */ im6o = (struct ip6_moptions *) malloc(sizeof(*im6o), M_IPMOPTS, M_WAITOK); if (im6o == NULL) return(ENOBUFS); *im6op = im6o; im6o->im6o_multicast_ifp = NULL; im6o->im6o_multicast_hlim = ip6_defmcasthlim; im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP; LIST_INIT(&im6o->im6o_memberships); } switch (optname) { case IPV6_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. */ if (m == NULL || m->m_len != sizeof(u_int)) { error = EINVAL; break; } bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex)); if (ifindex < 0 || if_index < ifindex) { error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifnet_byindex(ifindex); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } im6o->im6o_multicast_ifp = ifp; break; case IPV6_MULTICAST_HOPS: { /* * Set the IP6 hoplimit for outgoing multicast packets. */ int optval; if (m == NULL || m->m_len != sizeof(int)) { error = EINVAL; break; } bcopy(mtod(m, u_int *), &optval, sizeof(optval)); if (optval < -1 || optval >= 256) error = EINVAL; else if (optval == -1) im6o->im6o_multicast_hlim = ip6_defmcasthlim; else im6o->im6o_multicast_hlim = optval; break; } case IPV6_MULTICAST_LOOP: /* * Set the loopback flag for outgoing multicast packets. * Must be zero or one. */ if (m == NULL || m->m_len != sizeof(u_int)) { error = EINVAL; break; } bcopy(mtod(m, u_int *), &loop, sizeof(loop)); if (loop > 1) { error = EINVAL; break; } im6o->im6o_multicast_loop = loop; break; case IPV6_JOIN_GROUP: /* * Add a multicast group membership. * Group must be a valid IP6 multicast address. */ if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ipv6_mreq *); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { /* * We use the unspecified address to specify to accept * all multicast addresses. Only super user is allowed * to do this. */ if (suser(td)) { error = EACCES; break; } } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { error = EINVAL; break; } /* * If the interface is specified, validate it. */ if (mreq->ipv6mr_interface < 0 || if_index < mreq->ipv6mr_interface) { error = ENXIO; /* XXX EINVAL? */ break; } /* * If no interface was explicitly specified, choose an * appropriate one according to the given multicast address. */ if (mreq->ipv6mr_interface == 0) { /* * If the multicast address is in node-local scope, * the interface should be a loopback interface. * Otherwise, look up the routing table for the * address, and choose the outgoing interface. * XXX: is it a good approach? */ if (IN6_IS_ADDR_MC_NODELOCAL(&mreq->ipv6mr_multiaddr)) { ifp = &loif[0]; } else { ro.ro_rt = NULL; dst = (struct sockaddr_in6 *)&ro.ro_dst; bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = mreq->ipv6mr_multiaddr; rtalloc((struct route *)&ro); if (ro.ro_rt == NULL) { error = EADDRNOTAVAIL; break; } ifp = ro.ro_rt->rt_ifp; rtfree(ro.ro_rt); } } else ifp = ifnet_byindex(mreq->ipv6mr_interface); /* * See if we found an interface, and confirm that it * supports multicast */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } /* * Put interface index into the multicast address, * if the address has link-local scope. */ if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { mreq->ipv6mr_multiaddr.s6_addr16[1] = htons(mreq->ipv6mr_interface); } /* * See if the membership already exists. */ for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) if (imm->i6mm_maddr->in6m_ifp == ifp && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, &mreq->ipv6mr_multiaddr)) break; if (imm != NULL) { error = EADDRINUSE; break; } /* * Everything looks good; add a new record to the multicast * address list for the given interface. */ imm = malloc(sizeof(*imm), M_IPMADDR, M_WAITOK); if (imm == NULL) { error = ENOBUFS; break; } if ((imm->i6mm_maddr = in6_addmulti(&mreq->ipv6mr_multiaddr, ifp, &error)) == NULL) { free(imm, M_IPMADDR); break; } LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain); break; case IPV6_LEAVE_GROUP: /* * Drop a multicast group membership. * Group must be a valid IP6 multicast address. */ if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ipv6_mreq *); if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) { if (suser(td)) { error = EACCES; break; } } else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) { error = EINVAL; break; } /* * If an interface address was specified, get a pointer * to its ifnet structure. */ if (mreq->ipv6mr_interface < 0 || if_index < mreq->ipv6mr_interface) { error = ENXIO; /* XXX EINVAL? */ break; } ifp = ifnet_byindex(mreq->ipv6mr_interface); /* * Put interface index into the multicast address, * if the address has link-local scope. */ if (IN6_IS_ADDR_MC_LINKLOCAL(&mreq->ipv6mr_multiaddr)) { mreq->ipv6mr_multiaddr.s6_addr16[1] = htons(mreq->ipv6mr_interface); } /* * Find the membership in the membership list. */ for (imm = im6o->im6o_memberships.lh_first; imm != NULL; imm = imm->i6mm_chain.le_next) { if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) && IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr, &mreq->ipv6mr_multiaddr)) break; } if (imm == NULL) { /* Unable to resolve interface */ error = EADDRNOTAVAIL; break; } /* * Give up the multicast address record to which the * membership points. */ LIST_REMOVE(imm, i6mm_chain); in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); break; default: error = EOPNOTSUPP; break; } /* * If all options have default values, no need to keep the mbuf. */ if (im6o->im6o_multicast_ifp == NULL && im6o->im6o_multicast_hlim == ip6_defmcasthlim && im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP && im6o->im6o_memberships.lh_first == NULL) { free(*im6op, M_IPMOPTS); *im6op = NULL; } return(error); } /* * Return the IP6 multicast options in response to user getsockopt(). */ static int ip6_getmoptions(optname, im6o, mp) int optname; struct ip6_moptions *im6o; struct mbuf **mp; { u_int *hlim, *loop, *ifindex; *mp = m_get(M_TRYWAIT, MT_HEADER); /* XXX */ switch (optname) { case IPV6_MULTICAST_IF: ifindex = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL || im6o->im6o_multicast_ifp == NULL) *ifindex = 0; else *ifindex = im6o->im6o_multicast_ifp->if_index; return(0); case IPV6_MULTICAST_HOPS: hlim = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL) *hlim = ip6_defmcasthlim; else *hlim = im6o->im6o_multicast_hlim; return(0); case IPV6_MULTICAST_LOOP: loop = mtod(*mp, u_int *); (*mp)->m_len = sizeof(u_int); if (im6o == NULL) *loop = ip6_defmcasthlim; else *loop = im6o->im6o_multicast_loop; return(0); default: return(EOPNOTSUPP); } } /* * Discard the IP6 multicast options. */ void ip6_freemoptions(im6o) struct ip6_moptions *im6o; { struct in6_multi_mship *imm; if (im6o == NULL) return; while ((imm = im6o->im6o_memberships.lh_first) != NULL) { LIST_REMOVE(imm, i6mm_chain); if (imm->i6mm_maddr) in6_delmulti(imm->i6mm_maddr); free(imm, M_IPMADDR); } free(im6o, M_IPMOPTS); } /* * Set IPv6 outgoing packet options based on advanced API. */ int ip6_setpktoptions(control, opt, priv, needcopy) struct mbuf *control; struct ip6_pktopts *opt; int priv, needcopy; { struct cmsghdr *cm = 0; if (control == 0 || opt == 0) return(EINVAL); init_ip6pktopts(opt); /* * XXX: Currently, we assume all the optional information is stored * in a single mbuf. */ if (control->m_next) return(EINVAL); for (; control->m_len; control->m_data += CMSG_ALIGN(cm->cmsg_len), control->m_len -= CMSG_ALIGN(cm->cmsg_len)) { cm = mtod(control, struct cmsghdr *); if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len) return(EINVAL); if (cm->cmsg_level != IPPROTO_IPV6) continue; /* * XXX should check if RFC2292 API is mixed with 2292bis API */ switch (cm->cmsg_type) { case IPV6_PKTINFO: if (cm->cmsg_len != CMSG_LEN(sizeof(struct in6_pktinfo))) return(EINVAL); if (needcopy) { /* XXX: Is it really WAITOK? */ opt->ip6po_pktinfo = malloc(sizeof(struct in6_pktinfo), M_IP6OPT, M_WAITOK); bcopy(CMSG_DATA(cm), opt->ip6po_pktinfo, sizeof(struct in6_pktinfo)); } else opt->ip6po_pktinfo = (struct in6_pktinfo *)CMSG_DATA(cm); if (opt->ip6po_pktinfo->ipi6_ifindex && IN6_IS_ADDR_LINKLOCAL(&opt->ip6po_pktinfo->ipi6_addr)) opt->ip6po_pktinfo->ipi6_addr.s6_addr16[1] = htons(opt->ip6po_pktinfo->ipi6_ifindex); if (opt->ip6po_pktinfo->ipi6_ifindex > if_index || opt->ip6po_pktinfo->ipi6_ifindex < 0) { return(ENXIO); } /* * Check if the requested source address is indeed a * unicast address assigned to the node, and can be * used as the packet's source address. */ if (!IN6_IS_ADDR_UNSPECIFIED(&opt->ip6po_pktinfo->ipi6_addr)) { struct in6_ifaddr *ia6; struct sockaddr_in6 sin6; bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(sin6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = opt->ip6po_pktinfo->ipi6_addr; ia6 = (struct in6_ifaddr *)ifa_ifwithaddr(sin6tosa(&sin6)); if (ia6 == NULL || (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY)) != 0) return(EADDRNOTAVAIL); } break; case IPV6_HOPLIMIT: if (cm->cmsg_len != CMSG_LEN(sizeof(int))) return(EINVAL); opt->ip6po_hlim = *(int *)CMSG_DATA(cm); if (opt->ip6po_hlim < -1 || opt->ip6po_hlim > 255) return(EINVAL); break; case IPV6_NEXTHOP: if (!priv) return(EPERM); if (cm->cmsg_len < sizeof(u_char) || /* check if cmsg_len is large enough for sa_len */ cm->cmsg_len < CMSG_LEN(*CMSG_DATA(cm))) return(EINVAL); if (needcopy) { opt->ip6po_nexthop = malloc(*CMSG_DATA(cm), M_IP6OPT, M_WAITOK); bcopy(CMSG_DATA(cm), opt->ip6po_nexthop, *CMSG_DATA(cm)); } else opt->ip6po_nexthop = (struct sockaddr *)CMSG_DATA(cm); break; case IPV6_HOPOPTS: { struct ip6_hbh *hbh; int hbhlen; if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_hbh))) return(EINVAL); hbh = (struct ip6_hbh *)CMSG_DATA(cm); hbhlen = (hbh->ip6h_len + 1) << 3; if (cm->cmsg_len != CMSG_LEN(hbhlen)) return(EINVAL); if (needcopy) { opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_WAITOK); bcopy(hbh, opt->ip6po_hbh, hbhlen); } else opt->ip6po_hbh = hbh; break; } case IPV6_DSTOPTS: { struct ip6_dest *dest, **newdest; int destlen; if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest))) return(EINVAL); dest = (struct ip6_dest *)CMSG_DATA(cm); destlen = (dest->ip6d_len + 1) << 3; if (cm->cmsg_len != CMSG_LEN(destlen)) return(EINVAL); /* * The old advacned API is ambiguous on this * point. Our approach is to determine the * position based according to the existence * of a routing header. Note, however, that * this depends on the order of the extension * headers in the ancillary data; the 1st part * of the destination options header must * appear before the routing header in the * ancillary data, too. * RFC2292bis solved the ambiguity by * introducing separate cmsg types. */ if (opt->ip6po_rthdr == NULL) newdest = &opt->ip6po_dest1; else newdest = &opt->ip6po_dest2; if (needcopy) { *newdest = malloc(destlen, M_IP6OPT, M_WAITOK); bcopy(dest, *newdest, destlen); } else *newdest = dest; break; } case IPV6_RTHDR: { struct ip6_rthdr *rth; int rthlen; if (cm->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr))) return(EINVAL); rth = (struct ip6_rthdr *)CMSG_DATA(cm); rthlen = (rth->ip6r_len + 1) << 3; if (cm->cmsg_len != CMSG_LEN(rthlen)) return(EINVAL); switch (rth->ip6r_type) { case IPV6_RTHDR_TYPE_0: /* must contain one addr */ if (rth->ip6r_len == 0) return(EINVAL); /* length must be even */ if (rth->ip6r_len % 2) return(EINVAL); if (rth->ip6r_len / 2 != rth->ip6r_segleft) return(EINVAL); break; default: return(EINVAL); /* not supported */ } if (needcopy) { opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_WAITOK); bcopy(rth, opt->ip6po_rthdr, rthlen); } else opt->ip6po_rthdr = rth; break; } default: return(ENOPROTOOPT); } } return(0); } /* * Routine called from ip6_output() to loop back a copy of an IP6 multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be &loif -- easier than replicating that code here. */ void ip6_mloopback(ifp, m, dst) struct ifnet *ifp; struct mbuf *m; struct sockaddr_in6 *dst; { struct mbuf *copym; struct ip6_hdr *ip6; copym = m_copy(m, 0, M_COPYALL); if (copym == NULL) return; /* * Make sure to deep-copy IPv6 header portion in case the data * is in an mbuf cluster, so that we can safely override the IPv6 * header portion later. */ if ((copym->m_flags & M_EXT) != 0 || copym->m_len < sizeof(struct ip6_hdr)) { copym = m_pullup(copym, sizeof(struct ip6_hdr)); if (copym == NULL) return; } #ifdef DIAGNOSTIC if (copym->m_len < sizeof(*ip6)) { m_freem(copym); return; } #endif ip6 = mtod(copym, struct ip6_hdr *); #ifndef SCOPEDROUTING /* * clear embedded scope identifiers if necessary. * in6_clearscope will touch the addresses only when necessary. */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); #endif (void)if_simloop(ifp, copym, dst->sin6_family, 0); } /* * Chop IPv6 header off from the payload. */ static int ip6_splithdr(m, exthdrs) struct mbuf *m; struct ip6_exthdrs *exthdrs; { struct mbuf *mh; struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); if (m->m_len > sizeof(*ip6)) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); if (mh == 0) { m_freem(m); return ENOBUFS; } - M_COPY_PKTHDR(mh, m); + M_MOVE_PKTHDR(mh, m); MH_ALIGN(mh, sizeof(*ip6)); m->m_flags &= ~M_PKTHDR; m->m_len -= sizeof(*ip6); m->m_data += sizeof(*ip6); mh->m_next = m; m = mh; m->m_len = sizeof(*ip6); bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6)); } exthdrs->ip6e_ip6 = m; return 0; } /* * Compute IPv6 extension header length. */ int ip6_optlen(in6p) struct in6pcb *in6p; { int len; if (!in6p->in6p_outputopts) return 0; len = 0; #define elen(x) \ (((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0) len += elen(in6p->in6p_outputopts->ip6po_hbh); if (in6p->in6p_outputopts->ip6po_rthdr) /* dest1 is valid with rthdr only */ len += elen(in6p->in6p_outputopts->ip6po_dest1); len += elen(in6p->in6p_outputopts->ip6po_rthdr); len += elen(in6p->in6p_outputopts->ip6po_dest2); return len; #undef elen } Index: head/sys/netinet6/ipsec.c =================================================================== --- head/sys/netinet6/ipsec.c (revision 108465) +++ head/sys/netinet6/ipsec.c (revision 108466) @@ -1,3502 +1,3496 @@ /* $FreeBSD$ */ /* $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #ifdef IPSEC_ESP #include #ifdef INET6 #include #endif #endif #include #ifdef INET6 #include #endif #include #include #include #include #include #ifdef IPSEC_DEBUG int ipsec_debug = 1; #else int ipsec_debug = 0; #endif struct ipsecstat ipsecstat; int ip4_ah_cleartos = 1; int ip4_ah_offsetmask = 0; /* maybe IP_DF? */ int ip4_ipsec_dfbit = 0; /* DF bit on encap. 0: clear 1: set 2: copy */ int ip4_esp_trans_deflev = IPSEC_LEVEL_USE; int ip4_esp_net_deflev = IPSEC_LEVEL_USE; int ip4_ah_trans_deflev = IPSEC_LEVEL_USE; int ip4_ah_net_deflev = IPSEC_LEVEL_USE; struct secpolicy ip4_def_policy; int ip4_ipsec_ecn = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ int ip4_esp_randpad = -1; #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet_ipsec); #ifdef INET6 SYSCTL_DECL(_net_inet6_ipsec6); #endif #endif /* net.inet.ipsec */ SYSCTL_STRUCT(_net_inet_ipsec, IPSECCTL_STATS, stats, CTLFLAG_RD, &ipsecstat, ipsecstat, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, CTLFLAG_RW, &ip4_def_policy.policy, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_RW, &ip4_esp_trans_deflev, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, CTLFLAG_RW, &ip4_esp_net_deflev, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, CTLFLAG_RW, &ip4_ah_trans_deflev, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, CTLFLAG_RW, &ip4_ah_net_deflev, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, CTLFLAG_RW, &ip4_ah_cleartos, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask, CTLFLAG_RW, &ip4_ah_offsetmask, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, CTLFLAG_RW, &ip4_ipsec_dfbit, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, CTLFLAG_RW, &ip4_ipsec_ecn, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, CTLFLAG_RW, &ipsec_debug, 0, ""); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ESP_RANDPAD, esp_randpad, CTLFLAG_RW, &ip4_esp_randpad, 0, ""); #ifdef INET6 struct ipsecstat ipsec6stat; int ip6_esp_trans_deflev = IPSEC_LEVEL_USE; int ip6_esp_net_deflev = IPSEC_LEVEL_USE; int ip6_ah_trans_deflev = IPSEC_LEVEL_USE; int ip6_ah_net_deflev = IPSEC_LEVEL_USE; struct secpolicy ip6_def_policy; int ip6_ipsec_ecn = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ int ip6_esp_randpad = -1; /* net.inet6.ipsec6 */ SYSCTL_STRUCT(_net_inet6_ipsec6, IPSECCTL_STATS, stats, CTLFLAG_RD, &ipsec6stat, ipsecstat, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, CTLFLAG_RW, &ip6_def_policy.policy, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_RW, &ip6_esp_trans_deflev, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, CTLFLAG_RW, &ip6_esp_net_deflev, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, CTLFLAG_RW, &ip6_ah_trans_deflev, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, CTLFLAG_RW, &ip6_ah_net_deflev, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN, ecn, CTLFLAG_RW, &ip6_ipsec_ecn, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, CTLFLAG_RW, &ipsec_debug, 0, ""); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ESP_RANDPAD, esp_randpad, CTLFLAG_RW, &ip6_esp_randpad, 0, ""); #endif /* INET6 */ static int ipsec_setspidx_mbuf __P((struct secpolicyindex *, u_int, u_int, struct mbuf *, int)); static int ipsec4_setspidx_inpcb __P((struct mbuf *, struct inpcb *pcb)); #ifdef INET6 static int ipsec6_setspidx_in6pcb __P((struct mbuf *, struct in6pcb *pcb)); #endif static int ipsec_setspidx __P((struct mbuf *, struct secpolicyindex *, int)); static void ipsec4_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int)); static int ipsec4_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *)); #ifdef INET6 static void ipsec6_get_ulp __P((struct mbuf *m, struct secpolicyindex *, int)); static int ipsec6_setspidx_ipaddr __P((struct mbuf *, struct secpolicyindex *)); #endif static struct inpcbpolicy *ipsec_newpcbpolicy __P((void)); static void ipsec_delpcbpolicy __P((struct inpcbpolicy *)); static struct secpolicy *ipsec_deepcopy_policy __P((struct secpolicy *src)); static int ipsec_set_policy __P((struct secpolicy **pcb_sp, int optname, caddr_t request, size_t len, int priv)); static int ipsec_get_policy __P((struct secpolicy *pcb_sp, struct mbuf **mp)); static void vshiftl __P((unsigned char *, int, int)); static int ipsec_in_reject __P((struct secpolicy *, struct mbuf *)); static size_t ipsec_hdrsiz __P((struct secpolicy *)); #ifdef INET static struct mbuf *ipsec4_splithdr __P((struct mbuf *)); #endif #ifdef INET6 static struct mbuf *ipsec6_splithdr __P((struct mbuf *)); #endif #ifdef INET static int ipsec4_encapsulate __P((struct mbuf *, struct secasvar *)); #endif #ifdef INET6 static int ipsec6_encapsulate __P((struct mbuf *, struct secasvar *)); #endif /* * For OUTBOUND packet having a socket. Searching SPD for packet, * and return a pointer to SP. * OUT: NULL: no apropreate SP found, the following value is set to error. * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. * others : error occured. * others: a pointer to SP * * NOTE: IPv6 mapped adddress concern is implemented here. */ struct secpolicy * ipsec4_getpolicybysock(m, dir, so, error) struct mbuf *m; u_int dir; struct socket *so; int *error; { struct inpcbpolicy *pcbsp = NULL; struct secpolicy *currsp = NULL; /* policy on socket */ struct secpolicy *kernsp = NULL; /* policy on kernel */ /* sanity check */ if (m == NULL || so == NULL || error == NULL) panic("ipsec4_getpolicybysock: NULL pointer was passed.\n"); switch (so->so_proto->pr_domain->dom_family) { case AF_INET: /* set spidx in pcb */ *error = ipsec4_setspidx_inpcb(m, sotoinpcb(so)); break; #ifdef INET6 case AF_INET6: /* set spidx in pcb */ *error = ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); break; #endif default: panic("ipsec4_getpolicybysock: unsupported address family\n"); } if (*error) return NULL; switch (so->so_proto->pr_domain->dom_family) { case AF_INET: pcbsp = sotoinpcb(so)->inp_sp; break; #ifdef INET6 case AF_INET6: pcbsp = sotoin6pcb(so)->in6p_sp; break; #endif } /* sanity check */ if (pcbsp == NULL) panic("ipsec4_getpolicybysock: pcbsp is NULL.\n"); switch (dir) { case IPSEC_DIR_INBOUND: currsp = pcbsp->sp_in; break; case IPSEC_DIR_OUTBOUND: currsp = pcbsp->sp_out; break; default: panic("ipsec4_getpolicybysock: illegal direction.\n"); } /* sanity check */ if (currsp == NULL) panic("ipsec4_getpolicybysock: currsp is NULL.\n"); /* when privilieged socket */ if (pcbsp->priv) { switch (currsp->policy) { case IPSEC_POLICY_BYPASS: currsp->refcnt++; *error = 0; return currsp; case IPSEC_POLICY_ENTRUST: /* look for a policy in SPD */ kernsp = key_allocsp(&currsp->spidx, dir); /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec4_getpolicybysock called " "to allocate SP:%p\n", kernsp)); *error = 0; return kernsp; } /* no SP found */ if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD && ip4_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", ip4_def_policy.policy, IPSEC_POLICY_NONE)); ip4_def_policy.policy = IPSEC_POLICY_NONE; } ip4_def_policy.refcnt++; *error = 0; return &ip4_def_policy; case IPSEC_POLICY_IPSEC: currsp->refcnt++; *error = 0; return currsp; default: ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " "Invalid policy for PCB %d\n", currsp->policy)); *error = EINVAL; return NULL; } /* NOTREACHED */ } /* when non-privilieged socket */ /* look for a policy in SPD */ kernsp = key_allocsp(&currsp->spidx, dir); /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec4_getpolicybysock called " "to allocate SP:%p\n", kernsp)); *error = 0; return kernsp; } /* no SP found */ switch (currsp->policy) { case IPSEC_POLICY_BYPASS: ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " "Illegal policy for non-priviliged defined %d\n", currsp->policy)); *error = EINVAL; return NULL; case IPSEC_POLICY_ENTRUST: if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD && ip4_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", ip4_def_policy.policy, IPSEC_POLICY_NONE)); ip4_def_policy.policy = IPSEC_POLICY_NONE; } ip4_def_policy.refcnt++; *error = 0; return &ip4_def_policy; case IPSEC_POLICY_IPSEC: currsp->refcnt++; *error = 0; return currsp; default: ipseclog((LOG_ERR, "ipsec4_getpolicybysock: " "Invalid policy for PCB %d\n", currsp->policy)); *error = EINVAL; return NULL; } /* NOTREACHED */ } /* * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, * and return a pointer to SP. * OUT: positive: a pointer to the entry for security policy leaf matched. * NULL: no apropreate SP found, the following value is set to error. * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. * others : error occured. */ struct secpolicy * ipsec4_getpolicybyaddr(m, dir, flag, error) struct mbuf *m; u_int dir; int flag; int *error; { struct secpolicy *sp = NULL; /* sanity check */ if (m == NULL || error == NULL) panic("ipsec4_getpolicybyaddr: NULL pointer was passed.\n"); { struct secpolicyindex spidx; bzero(&spidx, sizeof(spidx)); /* make a index to look for a policy */ *error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET, m, (flag & IP_FORWARDING) ? 0 : 1); if (*error != 0) return NULL; sp = key_allocsp(&spidx, dir); } /* SP found */ if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec4_getpolicybyaddr called " "to allocate SP:%p\n", sp)); *error = 0; return sp; } /* no SP found */ if (ip4_def_policy.policy != IPSEC_POLICY_DISCARD && ip4_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy:%d->%d\n", ip4_def_policy.policy, IPSEC_POLICY_NONE)); ip4_def_policy.policy = IPSEC_POLICY_NONE; } ip4_def_policy.refcnt++; *error = 0; return &ip4_def_policy; } #ifdef INET6 /* * For OUTBOUND packet having a socket. Searching SPD for packet, * and return a pointer to SP. * OUT: NULL: no apropreate SP found, the following value is set to error. * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. * others : error occured. * others: a pointer to SP */ struct secpolicy * ipsec6_getpolicybysock(m, dir, so, error) struct mbuf *m; u_int dir; struct socket *so; int *error; { struct inpcbpolicy *pcbsp = NULL; struct secpolicy *currsp = NULL; /* policy on socket */ struct secpolicy *kernsp = NULL; /* policy on kernel */ /* sanity check */ if (m == NULL || so == NULL || error == NULL) panic("ipsec6_getpolicybysock: NULL pointer was passed.\n"); #ifdef DIAGNOSTIC if (so->so_proto->pr_domain->dom_family != AF_INET6) panic("ipsec6_getpolicybysock: socket domain != inet6\n"); #endif /* set spidx in pcb */ ipsec6_setspidx_in6pcb(m, sotoin6pcb(so)); pcbsp = sotoin6pcb(so)->in6p_sp; /* sanity check */ if (pcbsp == NULL) panic("ipsec6_getpolicybysock: pcbsp is NULL.\n"); switch (dir) { case IPSEC_DIR_INBOUND: currsp = pcbsp->sp_in; break; case IPSEC_DIR_OUTBOUND: currsp = pcbsp->sp_out; break; default: panic("ipsec6_getpolicybysock: illegal direction.\n"); } /* sanity check */ if (currsp == NULL) panic("ipsec6_getpolicybysock: currsp is NULL.\n"); /* when privilieged socket */ if (pcbsp->priv) { switch (currsp->policy) { case IPSEC_POLICY_BYPASS: currsp->refcnt++; *error = 0; return currsp; case IPSEC_POLICY_ENTRUST: /* look for a policy in SPD */ kernsp = key_allocsp(&currsp->spidx, dir); /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec6_getpolicybysock called " "to allocate SP:%p\n", kernsp)); *error = 0; return kernsp; } /* no SP found */ if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD && ip6_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", ip6_def_policy.policy, IPSEC_POLICY_NONE)); ip6_def_policy.policy = IPSEC_POLICY_NONE; } ip6_def_policy.refcnt++; *error = 0; return &ip6_def_policy; case IPSEC_POLICY_IPSEC: currsp->refcnt++; *error = 0; return currsp; default: ipseclog((LOG_ERR, "ipsec6_getpolicybysock: " "Invalid policy for PCB %d\n", currsp->policy)); *error = EINVAL; return NULL; } /* NOTREACHED */ } /* when non-privilieged socket */ /* look for a policy in SPD */ kernsp = key_allocsp(&currsp->spidx, dir); /* SP found */ if (kernsp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec6_getpolicybysock called " "to allocate SP:%p\n", kernsp)); *error = 0; return kernsp; } /* no SP found */ switch (currsp->policy) { case IPSEC_POLICY_BYPASS: ipseclog((LOG_ERR, "ipsec6_getpolicybysock: " "Illegal policy for non-priviliged defined %d\n", currsp->policy)); *error = EINVAL; return NULL; case IPSEC_POLICY_ENTRUST: if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD && ip6_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", ip6_def_policy.policy, IPSEC_POLICY_NONE)); ip6_def_policy.policy = IPSEC_POLICY_NONE; } ip6_def_policy.refcnt++; *error = 0; return &ip6_def_policy; case IPSEC_POLICY_IPSEC: currsp->refcnt++; *error = 0; return currsp; default: ipseclog((LOG_ERR, "ipsec6_policybysock: Invalid policy for PCB %d\n", currsp->policy)); *error = EINVAL; return NULL; } /* NOTREACHED */ } /* * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, * and return a pointer to SP. * `flag' means that packet is to be forwarded whether or not. * flag = 1: forwad * OUT: positive: a pointer to the entry for security policy leaf matched. * NULL: no apropreate SP found, the following value is set to error. * 0 : bypass * EACCES : discard packet. * ENOENT : ipsec_acquire() in progress, maybe. * others : error occured. */ #ifndef IP_FORWARDING #define IP_FORWARDING 1 #endif struct secpolicy * ipsec6_getpolicybyaddr(m, dir, flag, error) struct mbuf *m; u_int dir; int flag; int *error; { struct secpolicy *sp = NULL; /* sanity check */ if (m == NULL || error == NULL) panic("ipsec6_getpolicybyaddr: NULL pointer was passed.\n"); { struct secpolicyindex spidx; bzero(&spidx, sizeof(spidx)); /* make a index to look for a policy */ *error = ipsec_setspidx_mbuf(&spidx, dir, AF_INET6, m, (flag & IP_FORWARDING) ? 0 : 1); if (*error != 0) return NULL; sp = key_allocsp(&spidx, dir); } /* SP found */ if (sp != NULL) { KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec6_getpolicybyaddr called " "to allocate SP:%p\n", sp)); *error = 0; return sp; } /* no SP found */ if (ip6_def_policy.policy != IPSEC_POLICY_DISCARD && ip6_def_policy.policy != IPSEC_POLICY_NONE) { ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", ip6_def_policy.policy, IPSEC_POLICY_NONE)); ip6_def_policy.policy = IPSEC_POLICY_NONE; } ip6_def_policy.refcnt++; *error = 0; return &ip6_def_policy; } #endif /* INET6 */ /* * set IP address into spidx from mbuf. * When Forwarding packet and ICMP echo reply, this function is used. * * IN: get the followings from mbuf. * protocol family, src, dst, next protocol * OUT: * 0: success. * other: failure, and set errno. */ int ipsec_setspidx_mbuf(spidx, dir, family, m, needport) struct secpolicyindex *spidx; u_int dir, family; struct mbuf *m; int needport; { int error; /* sanity check */ if (spidx == NULL || m == NULL) panic("ipsec_setspidx_mbuf: NULL pointer was passed.\n"); bzero(spidx, sizeof(*spidx)); error = ipsec_setspidx(m, spidx, needport); if (error) goto bad; spidx->dir = dir; return 0; bad: /* XXX initialize */ bzero(spidx, sizeof(*spidx)); return EINVAL; } static int ipsec4_setspidx_inpcb(m, pcb) struct mbuf *m; struct inpcb *pcb; { struct secpolicyindex *spidx; int error; /* sanity check */ if (pcb == NULL) panic("ipsec4_setspidx_inpcb: no PCB found.\n"); if (pcb->inp_sp == NULL) panic("ipsec4_setspidx_inpcb: no inp_sp found.\n"); if (pcb->inp_sp->sp_out == NULL || pcb->inp_sp->sp_in == NULL) panic("ipsec4_setspidx_inpcb: no sp_in/out found.\n"); bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx)); bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx)); spidx = &pcb->inp_sp->sp_in->spidx; error = ipsec_setspidx(m, spidx, 1); if (error) goto bad; spidx->dir = IPSEC_DIR_INBOUND; spidx = &pcb->inp_sp->sp_out->spidx; error = ipsec_setspidx(m, spidx, 1); if (error) goto bad; spidx->dir = IPSEC_DIR_OUTBOUND; return 0; bad: bzero(&pcb->inp_sp->sp_in->spidx, sizeof(*spidx)); bzero(&pcb->inp_sp->sp_out->spidx, sizeof(*spidx)); return error; } #ifdef INET6 static int ipsec6_setspidx_in6pcb(m, pcb) struct mbuf *m; struct in6pcb *pcb; { struct secpolicyindex *spidx; int error; /* sanity check */ if (pcb == NULL) panic("ipsec6_setspidx_in6pcb: no PCB found.\n"); if (pcb->in6p_sp == NULL) panic("ipsec6_setspidx_in6pcb: no in6p_sp found.\n"); if (pcb->in6p_sp->sp_out == NULL || pcb->in6p_sp->sp_in == NULL) panic("ipsec6_setspidx_in6pcb: no sp_in/out found.\n"); bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx)); bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx)); spidx = &pcb->in6p_sp->sp_in->spidx; error = ipsec_setspidx(m, spidx, 1); if (error) goto bad; spidx->dir = IPSEC_DIR_INBOUND; spidx = &pcb->in6p_sp->sp_out->spidx; error = ipsec_setspidx(m, spidx, 1); if (error) goto bad; spidx->dir = IPSEC_DIR_OUTBOUND; return 0; bad: bzero(&pcb->in6p_sp->sp_in->spidx, sizeof(*spidx)); bzero(&pcb->in6p_sp->sp_out->spidx, sizeof(*spidx)); return error; } #endif /* * configure security policy index (src/dst/proto/sport/dport) * by looking at the content of mbuf. * the caller is responsible for error recovery (like clearing up spidx). */ static int ipsec_setspidx(m, spidx, needport) struct mbuf *m; struct secpolicyindex *spidx; int needport; { struct ip *ip = NULL; struct ip ipbuf; u_int v; struct mbuf *n; int len; int error; if (m == NULL) panic("ipsec_setspidx: m == 0 passed.\n"); /* * validate m->m_pkthdr.len. we see incorrect length if we * mistakenly call this function with inconsistent mbuf chain * (like 4.4BSD tcp/udp processing). XXX should we panic here? */ len = 0; for (n = m; n; n = n->m_next) len += n->m_len; if (m->m_pkthdr.len != len) { KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_setspidx: " "total of m_len(%d) != pkthdr.len(%d), " "ignored.\n", len, m->m_pkthdr.len)); return EINVAL; } if (m->m_pkthdr.len < sizeof(struct ip)) { KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_setspidx: " "pkthdr.len(%d) < sizeof(struct ip), ignored.\n", m->m_pkthdr.len)); return EINVAL; } if (m->m_len >= sizeof(*ip)) ip = mtod(m, struct ip *); else { m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf); ip = &ipbuf; } #ifdef _IP_VHL v = _IP_VHL_V(ip->ip_vhl); #else v = ip->ip_v; #endif switch (v) { case 4: error = ipsec4_setspidx_ipaddr(m, spidx); if (error) return error; ipsec4_get_ulp(m, spidx, needport); return 0; #ifdef INET6 case 6: if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) { KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_setspidx: " "pkthdr.len(%d) < sizeof(struct ip6_hdr), " "ignored.\n", m->m_pkthdr.len)); return EINVAL; } error = ipsec6_setspidx_ipaddr(m, spidx); if (error) return error; ipsec6_get_ulp(m, spidx, needport); return 0; #endif default: KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_setspidx: " "unknown IP version %u, ignored.\n", v)); return EINVAL; } } static void ipsec4_get_ulp(m, spidx, needport) struct mbuf *m; struct secpolicyindex *spidx; int needport; { struct ip ip; struct ip6_ext ip6e; u_int8_t nxt; int off; struct tcphdr th; struct udphdr uh; /* sanity check */ if (m == NULL) panic("ipsec4_get_ulp: NULL pointer was passed.\n"); if (m->m_pkthdr.len < sizeof(ip)) panic("ipsec4_get_ulp: too short\n"); /* set default */ spidx->ul_proto = IPSEC_ULPROTO_ANY; ((struct sockaddr_in *)&spidx->src)->sin_port = IPSEC_PORT_ANY; ((struct sockaddr_in *)&spidx->dst)->sin_port = IPSEC_PORT_ANY; m_copydata(m, 0, sizeof(ip), (caddr_t)&ip); /* ip_input() flips it into host endian XXX need more checking */ if (ip.ip_off & (IP_MF | IP_OFFMASK)) return; nxt = ip.ip_p; #ifdef _IP_VHL off = _IP_VHL_HL(ip->ip_vhl) << 2; #else off = ip.ip_hl << 2; #endif while (off < m->m_pkthdr.len) { switch (nxt) { case IPPROTO_TCP: spidx->ul_proto = nxt; if (!needport) return; if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) return; m_copydata(m, off, sizeof(th), (caddr_t)&th); ((struct sockaddr_in *)&spidx->src)->sin_port = th.th_sport; ((struct sockaddr_in *)&spidx->dst)->sin_port = th.th_dport; return; case IPPROTO_UDP: spidx->ul_proto = nxt; if (!needport) return; if (off + sizeof(struct udphdr) > m->m_pkthdr.len) return; m_copydata(m, off, sizeof(uh), (caddr_t)&uh); ((struct sockaddr_in *)&spidx->src)->sin_port = uh.uh_sport; ((struct sockaddr_in *)&spidx->dst)->sin_port = uh.uh_dport; return; case IPPROTO_AH: if (m->m_pkthdr.len > off + sizeof(ip6e)) return; m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e); off += (ip6e.ip6e_len + 2) << 2; nxt = ip6e.ip6e_nxt; break; case IPPROTO_ICMP: default: /* XXX intermediate headers??? */ spidx->ul_proto = nxt; return; } } } /* assumes that m is sane */ static int ipsec4_setspidx_ipaddr(m, spidx) struct mbuf *m; struct secpolicyindex *spidx; { struct ip *ip = NULL; struct ip ipbuf; struct sockaddr_in *sin; if (m->m_len >= sizeof(*ip)) ip = mtod(m, struct ip *); else { m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf); ip = &ipbuf; } sin = (struct sockaddr_in *)&spidx->src; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); bcopy(&ip->ip_src, &sin->sin_addr, sizeof(ip->ip_src)); spidx->prefs = sizeof(struct in_addr) << 3; sin = (struct sockaddr_in *)&spidx->dst; bzero(sin, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(struct sockaddr_in); bcopy(&ip->ip_dst, &sin->sin_addr, sizeof(ip->ip_dst)); spidx->prefd = sizeof(struct in_addr) << 3; return 0; } #ifdef INET6 static void ipsec6_get_ulp(m, spidx, needport) struct mbuf *m; struct secpolicyindex *spidx; int needport; { int off, nxt; struct tcphdr th; struct udphdr uh; /* sanity check */ if (m == NULL) panic("ipsec6_get_ulp: NULL pointer was passed.\n"); KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec6_get_ulp:\n"); kdebug_mbuf(m)); /* set default */ spidx->ul_proto = IPSEC_ULPROTO_ANY; ((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY; ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY; nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off < 0 || m->m_pkthdr.len < off) return; switch (nxt) { case IPPROTO_TCP: spidx->ul_proto = nxt; if (!needport) break; if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(th), (caddr_t)&th); ((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport; ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport; break; case IPPROTO_UDP: spidx->ul_proto = nxt; if (!needport) break; if (off + sizeof(struct udphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(uh), (caddr_t)&uh); ((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport; ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport; break; case IPPROTO_ICMPV6: default: /* XXX intermediate headers??? */ spidx->ul_proto = nxt; break; } } /* assumes that m is sane */ static int ipsec6_setspidx_ipaddr(m, spidx) struct mbuf *m; struct secpolicyindex *spidx; { struct ip6_hdr *ip6 = NULL; struct ip6_hdr ip6buf; struct sockaddr_in6 *sin6; if (m->m_len >= sizeof(*ip6)) ip6 = mtod(m, struct ip6_hdr *); else { m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); ip6 = &ip6buf; } sin6 = (struct sockaddr_in6 *)&spidx->src; bzero(sin6, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(struct sockaddr_in6); bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); } spidx->prefs = sizeof(struct in6_addr) << 3; sin6 = (struct sockaddr_in6 *)&spidx->dst; bzero(sin6, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(struct sockaddr_in6); bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } spidx->prefd = sizeof(struct in6_addr) << 3; return 0; } #endif static struct inpcbpolicy * ipsec_newpcbpolicy() { struct inpcbpolicy *p; p = (struct inpcbpolicy *)malloc(sizeof(*p), M_SECA, M_NOWAIT); return p; } static void ipsec_delpcbpolicy(p) struct inpcbpolicy *p; { free(p, M_SECA); } /* initialize policy in PCB */ int ipsec_init_policy(so, pcb_sp) struct socket *so; struct inpcbpolicy **pcb_sp; { struct inpcbpolicy *new; /* sanity check. */ if (so == NULL || pcb_sp == NULL) panic("ipsec_init_policy: NULL pointer was passed.\n"); new = ipsec_newpcbpolicy(); if (new == NULL) { ipseclog((LOG_DEBUG, "ipsec_init_policy: No more memory.\n")); return ENOBUFS; } bzero(new, sizeof(*new)); if (so->so_cred != 0 && so->so_cred->cr_uid == 0) new->priv = 1; else new->priv = 0; if ((new->sp_in = key_newsp()) == NULL) { ipsec_delpcbpolicy(new); return ENOBUFS; } new->sp_in->state = IPSEC_SPSTATE_ALIVE; new->sp_in->policy = IPSEC_POLICY_ENTRUST; if ((new->sp_out = key_newsp()) == NULL) { key_freesp(new->sp_in); ipsec_delpcbpolicy(new); return ENOBUFS; } new->sp_out->state = IPSEC_SPSTATE_ALIVE; new->sp_out->policy = IPSEC_POLICY_ENTRUST; *pcb_sp = new; return 0; } /* copy old ipsec policy into new */ int ipsec_copy_policy(old, new) struct inpcbpolicy *old, *new; { struct secpolicy *sp; sp = ipsec_deepcopy_policy(old->sp_in); if (sp) { key_freesp(new->sp_in); new->sp_in = sp; } else return ENOBUFS; sp = ipsec_deepcopy_policy(old->sp_out); if (sp) { key_freesp(new->sp_out); new->sp_out = sp; } else return ENOBUFS; new->priv = old->priv; return 0; } /* deep-copy a policy in PCB */ static struct secpolicy * ipsec_deepcopy_policy(src) struct secpolicy *src; { struct ipsecrequest *newchain = NULL; struct ipsecrequest *p; struct ipsecrequest **q; struct ipsecrequest *r; struct secpolicy *dst; dst = key_newsp(); if (src == NULL || dst == NULL) return NULL; /* * deep-copy IPsec request chain. This is required since struct * ipsecrequest is not reference counted. */ q = &newchain; for (p = src->req; p; p = p->next) { *q = (struct ipsecrequest *)malloc(sizeof(struct ipsecrequest), M_SECA, M_NOWAIT); if (*q == NULL) goto fail; bzero(*q, sizeof(**q)); (*q)->next = NULL; (*q)->saidx.proto = p->saidx.proto; (*q)->saidx.mode = p->saidx.mode; (*q)->level = p->level; (*q)->saidx.reqid = p->saidx.reqid; bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src)); bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst)); (*q)->sav = NULL; (*q)->sp = dst; q = &((*q)->next); } dst->req = newchain; dst->state = src->state; dst->policy = src->policy; /* do not touch the refcnt fields */ return dst; fail: for (p = newchain; p; p = r) { r = p->next; free(p, M_SECA); p = NULL; } return NULL; } /* set policy and ipsec request if present. */ static int ipsec_set_policy(pcb_sp, optname, request, len, priv) struct secpolicy **pcb_sp; int optname; caddr_t request; size_t len; int priv; { struct sadb_x_policy *xpl; struct secpolicy *newsp = NULL; int error; /* sanity check. */ if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL) return EINVAL; if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)request; KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_set_policy: passed policy\n"); kdebug_sadb_x_policy((struct sadb_ext *)xpl)); /* check policy type */ /* ipsec_set_policy() accepts IPSEC, ENTRUST and BYPASS. */ if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE) return EINVAL; /* check privileged socket */ if (priv == 0 && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) return EACCES; /* allocation new SP entry */ if ((newsp = key_msg2sp(xpl, len, &error)) == NULL) return error; newsp->state = IPSEC_SPSTATE_ALIVE; /* clear old SP and set new SP */ key_freesp(*pcb_sp); *pcb_sp = newsp; KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_set_policy: new policy\n"); kdebug_secpolicy(newsp)); return 0; } static int ipsec_get_policy(pcb_sp, mp) struct secpolicy *pcb_sp; struct mbuf **mp; { /* sanity check. */ if (pcb_sp == NULL || mp == NULL) return EINVAL; *mp = key_sp2msg(pcb_sp); if (!*mp) { ipseclog((LOG_DEBUG, "ipsec_get_policy: No more memory.\n")); return ENOBUFS; } (*mp)->m_type = MT_DATA; KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_get_policy:\n"); kdebug_mbuf(*mp)); return 0; } int ipsec4_set_policy(inp, optname, request, len, priv) struct inpcb *inp; int optname; caddr_t request; size_t len; int priv; { struct sadb_x_policy *xpl; struct secpolicy **pcb_sp; /* sanity check. */ if (inp == NULL || request == NULL) return EINVAL; if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)request; /* select direction */ switch (xpl->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: pcb_sp = &inp->inp_sp->sp_in; break; case IPSEC_DIR_OUTBOUND: pcb_sp = &inp->inp_sp->sp_out; break; default: ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n", xpl->sadb_x_policy_dir)); return EINVAL; } return ipsec_set_policy(pcb_sp, optname, request, len, priv); } int ipsec4_get_policy(inp, request, len, mp) struct inpcb *inp; caddr_t request; size_t len; struct mbuf **mp; { struct sadb_x_policy *xpl; struct secpolicy *pcb_sp; /* sanity check. */ if (inp == NULL || request == NULL || mp == NULL) return EINVAL; if (inp->inp_sp == NULL) panic("policy in PCB is NULL\n"); if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)request; /* select direction */ switch (xpl->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: pcb_sp = inp->inp_sp->sp_in; break; case IPSEC_DIR_OUTBOUND: pcb_sp = inp->inp_sp->sp_out; break; default: ipseclog((LOG_ERR, "ipsec4_set_policy: invalid direction=%u\n", xpl->sadb_x_policy_dir)); return EINVAL; } return ipsec_get_policy(pcb_sp, mp); } /* delete policy in PCB */ int ipsec4_delete_pcbpolicy(inp) struct inpcb *inp; { /* sanity check. */ if (inp == NULL) panic("ipsec4_delete_pcbpolicy: NULL pointer was passed.\n"); if (inp->inp_sp == NULL) return 0; if (inp->inp_sp->sp_in != NULL) { key_freesp(inp->inp_sp->sp_in); inp->inp_sp->sp_in = NULL; } if (inp->inp_sp->sp_out != NULL) { key_freesp(inp->inp_sp->sp_out); inp->inp_sp->sp_out = NULL; } ipsec_delpcbpolicy(inp->inp_sp); inp->inp_sp = NULL; return 0; } #ifdef INET6 int ipsec6_set_policy(in6p, optname, request, len, priv) struct in6pcb *in6p; int optname; caddr_t request; size_t len; int priv; { struct sadb_x_policy *xpl; struct secpolicy **pcb_sp; /* sanity check. */ if (in6p == NULL || request == NULL) return EINVAL; if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)request; /* select direction */ switch (xpl->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: pcb_sp = &in6p->in6p_sp->sp_in; break; case IPSEC_DIR_OUTBOUND: pcb_sp = &in6p->in6p_sp->sp_out; break; default: ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n", xpl->sadb_x_policy_dir)); return EINVAL; } return ipsec_set_policy(pcb_sp, optname, request, len, priv); } int ipsec6_get_policy(in6p, request, len, mp) struct in6pcb *in6p; caddr_t request; size_t len; struct mbuf **mp; { struct sadb_x_policy *xpl; struct secpolicy *pcb_sp; /* sanity check. */ if (in6p == NULL || request == NULL || mp == NULL) return EINVAL; if (in6p->in6p_sp == NULL) panic("policy in PCB is NULL\n"); if (len < sizeof(*xpl)) return EINVAL; xpl = (struct sadb_x_policy *)request; /* select direction */ switch (xpl->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: pcb_sp = in6p->in6p_sp->sp_in; break; case IPSEC_DIR_OUTBOUND: pcb_sp = in6p->in6p_sp->sp_out; break; default: ipseclog((LOG_ERR, "ipsec6_set_policy: invalid direction=%u\n", xpl->sadb_x_policy_dir)); return EINVAL; } return ipsec_get_policy(pcb_sp, mp); } int ipsec6_delete_pcbpolicy(in6p) struct in6pcb *in6p; { /* sanity check. */ if (in6p == NULL) panic("ipsec6_delete_pcbpolicy: NULL pointer was passed.\n"); if (in6p->in6p_sp == NULL) return 0; if (in6p->in6p_sp->sp_in != NULL) { key_freesp(in6p->in6p_sp->sp_in); in6p->in6p_sp->sp_in = NULL; } if (in6p->in6p_sp->sp_out != NULL) { key_freesp(in6p->in6p_sp->sp_out); in6p->in6p_sp->sp_out = NULL; } ipsec_delpcbpolicy(in6p->in6p_sp); in6p->in6p_sp = NULL; return 0; } #endif /* * return current level. * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned. */ u_int ipsec_get_reqlevel(isr) struct ipsecrequest *isr; { u_int level = 0; u_int esp_trans_deflev, esp_net_deflev, ah_trans_deflev, ah_net_deflev; /* sanity check */ if (isr == NULL || isr->sp == NULL) panic("ipsec_get_reqlevel: NULL pointer is passed.\n"); if (((struct sockaddr *)&isr->sp->spidx.src)->sa_family != ((struct sockaddr *)&isr->sp->spidx.dst)->sa_family) panic("ipsec_get_reqlevel: family mismatched.\n"); /* XXX note that we have ipseclog() expanded here - code sync issue */ #define IPSEC_CHECK_DEFAULT(lev) \ (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE \ && (lev) != IPSEC_LEVEL_UNIQUE) \ ? (ipsec_debug \ ? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ (lev), IPSEC_LEVEL_REQUIRE) \ : 0), \ (lev) = IPSEC_LEVEL_REQUIRE, \ (lev) \ : (lev)) /* set default level */ switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) { #ifdef INET case AF_INET: esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_trans_deflev); esp_net_deflev = IPSEC_CHECK_DEFAULT(ip4_esp_net_deflev); ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_trans_deflev); ah_net_deflev = IPSEC_CHECK_DEFAULT(ip4_ah_net_deflev); break; #endif #ifdef INET6 case AF_INET6: esp_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_trans_deflev); esp_net_deflev = IPSEC_CHECK_DEFAULT(ip6_esp_net_deflev); ah_trans_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_trans_deflev); ah_net_deflev = IPSEC_CHECK_DEFAULT(ip6_ah_net_deflev); break; #endif /* INET6 */ default: panic("key_get_reqlevel: Unknown family. %d\n", ((struct sockaddr *)&isr->sp->spidx.src)->sa_family); } #undef IPSEC_CHECK_DEFAULT /* set level */ switch (isr->level) { case IPSEC_LEVEL_DEFAULT: switch (isr->saidx.proto) { case IPPROTO_ESP: if (isr->saidx.mode == IPSEC_MODE_TUNNEL) level = esp_net_deflev; else level = esp_trans_deflev; break; case IPPROTO_AH: if (isr->saidx.mode == IPSEC_MODE_TUNNEL) level = ah_net_deflev; else level = ah_trans_deflev; case IPPROTO_IPCOMP: /* * we don't really care, as IPcomp document says that * we shouldn't compress small packets */ level = IPSEC_LEVEL_USE; break; default: panic("ipsec_get_reqlevel: " "Illegal protocol defined %u\n", isr->saidx.proto); } break; case IPSEC_LEVEL_USE: case IPSEC_LEVEL_REQUIRE: level = isr->level; break; case IPSEC_LEVEL_UNIQUE: level = IPSEC_LEVEL_REQUIRE; break; default: panic("ipsec_get_reqlevel: Illegal IPsec level %u\n", isr->level); } return level; } /* * Check AH/ESP integrity. * OUT: * 0: valid * 1: invalid */ static int ipsec_in_reject(sp, m) struct secpolicy *sp; struct mbuf *m; { struct ipsecrequest *isr; u_int level; int need_auth, need_conf, need_icv; KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec_in_reject: using SP\n"); kdebug_secpolicy(sp)); /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: return 1; case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: return 0; case IPSEC_POLICY_IPSEC: break; case IPSEC_POLICY_ENTRUST: default: panic("ipsec_hdrsiz: Invalid policy found. %d\n", sp->policy); } need_auth = 0; need_conf = 0; need_icv = 0; /* XXX should compare policy against ipsec header history */ for (isr = sp->req; isr != NULL; isr = isr->next) { /* get current level */ level = ipsec_get_reqlevel(isr); switch (isr->saidx.proto) { case IPPROTO_ESP: if (level == IPSEC_LEVEL_REQUIRE) { need_conf++; if (isr->sav != NULL && isr->sav->flags == SADB_X_EXT_NONE && isr->sav->alg_auth != SADB_AALG_NONE) need_icv++; } break; case IPPROTO_AH: if (level == IPSEC_LEVEL_REQUIRE) { need_auth++; need_icv++; } break; case IPPROTO_IPCOMP: /* * we don't really care, as IPcomp document says that * we shouldn't compress small packets, IPComp policy * should always be treated as being in "use" level. */ break; } } KEYDEBUG(KEYDEBUG_IPSEC_DUMP, printf("ipsec_in_reject: auth:%d conf:%d icv:%d m_flags:%x\n", need_auth, need_conf, need_icv, m->m_flags)); if ((need_conf && !(m->m_flags & M_DECRYPTED)) || (!need_auth && need_icv && !(m->m_flags & M_AUTHIPDGM)) || (need_auth && !(m->m_flags & M_AUTHIPHDR))) return 1; return 0; } /* * Check AH/ESP integrity. * This function is called from tcp_input(), udp_input(), * and {ah,esp}4_input for tunnel mode */ int ipsec4_in_reject_so(m, so) struct mbuf *m; struct socket *so; { struct secpolicy *sp = NULL; int error; int result; /* sanity check */ if (m == NULL) return 0; /* XXX should be panic ? */ /* get SP for this packet. * When we are called from ip_forward(), we call * ipsec4_getpolicybyaddr() with IP_FORWARDING flag. */ if (so == NULL) sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); else sp = ipsec4_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error); if (sp == NULL) return 0; /* XXX should be panic ? * -> No, there may be error. */ result = ipsec_in_reject(sp, m); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec4_in_reject_so call free SP:%p\n", sp)); key_freesp(sp); return result; } int ipsec4_in_reject(m, inp) struct mbuf *m; struct inpcb *inp; { if (inp == NULL) return ipsec4_in_reject_so(m, NULL); if (inp->inp_socket) return ipsec4_in_reject_so(m, inp->inp_socket); else panic("ipsec4_in_reject: invalid inpcb/socket"); } #ifdef INET6 /* * Check AH/ESP integrity. * This function is called from tcp6_input(), udp6_input(), * and {ah,esp}6_input for tunnel mode */ int ipsec6_in_reject_so(m, so) struct mbuf *m; struct socket *so; { struct secpolicy *sp = NULL; int error; int result; /* sanity check */ if (m == NULL) return 0; /* XXX should be panic ? */ /* get SP for this packet. * When we are called from ip_forward(), we call * ipsec6_getpolicybyaddr() with IP_FORWARDING flag. */ if (so == NULL) sp = ipsec6_getpolicybyaddr(m, IPSEC_DIR_INBOUND, IP_FORWARDING, &error); else sp = ipsec6_getpolicybysock(m, IPSEC_DIR_INBOUND, so, &error); if (sp == NULL) return 0; /* XXX should be panic ? */ result = ipsec_in_reject(sp, m); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec6_in_reject_so call free SP:%p\n", sp)); key_freesp(sp); return result; } int ipsec6_in_reject(m, in6p) struct mbuf *m; struct in6pcb *in6p; { if (in6p == NULL) return ipsec6_in_reject_so(m, NULL); if (in6p->in6p_socket) return ipsec6_in_reject_so(m, in6p->in6p_socket); else panic("ipsec6_in_reject: invalid in6p/socket"); } #endif /* * compute the byte size to be occupied by IPsec header. * in case it is tunneled, it includes the size of outer IP header. * NOTE: SP passed is free in this function. */ static size_t ipsec_hdrsiz(sp) struct secpolicy *sp; { struct ipsecrequest *isr; size_t siz, clen; KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec_hdrsiz: using SP\n"); kdebug_secpolicy(sp)); /* check policy */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: return 0; case IPSEC_POLICY_IPSEC: break; case IPSEC_POLICY_ENTRUST: default: panic("ipsec_hdrsiz: Invalid policy found. %d\n", sp->policy); } siz = 0; for (isr = sp->req; isr != NULL; isr = isr->next) { clen = 0; switch (isr->saidx.proto) { case IPPROTO_ESP: #ifdef IPSEC_ESP clen = esp_hdrsiz(isr); #else clen = 0; /* XXX */ #endif break; case IPPROTO_AH: clen = ah_hdrsiz(isr); break; case IPPROTO_IPCOMP: clen = sizeof(struct ipcomp); break; } if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { switch (((struct sockaddr *)&isr->saidx.dst)->sa_family) { case AF_INET: clen += sizeof(struct ip); break; #ifdef INET6 case AF_INET6: clen += sizeof(struct ip6_hdr); break; #endif default: ipseclog((LOG_ERR, "ipsec_hdrsiz: " "unknown AF %d in IPsec tunnel SA\n", ((struct sockaddr *)&isr->saidx.dst)->sa_family)); break; } } siz += clen; } return siz; } /* This function is called from ip_forward() and ipsec4_hdrsize_tcp(). */ size_t ipsec4_hdrsiz(m, dir, inp) struct mbuf *m; u_int dir; struct inpcb *inp; { struct secpolicy *sp = NULL; int error; size_t size; /* sanity check */ if (m == NULL) return 0; /* XXX should be panic ? */ if (inp != NULL && inp->inp_socket == NULL) panic("ipsec4_hdrsize: why is socket NULL but there is PCB."); /* get SP for this packet. * When we are called from ip_forward(), we call * ipsec4_getpolicybyaddr() with IP_FORWARDING flag. */ if (inp == NULL) sp = ipsec4_getpolicybyaddr(m, dir, IP_FORWARDING, &error); else sp = ipsec4_getpolicybysock(m, dir, inp->inp_socket, &error); if (sp == NULL) return 0; /* XXX should be panic ? */ size = ipsec_hdrsiz(sp); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec4_hdrsiz call free SP:%p\n", sp)); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec4_hdrsiz: size:%lu.\n", (unsigned long)size)); key_freesp(sp); return size; } #ifdef INET6 /* This function is called from ipsec6_hdrsize_tcp(), * and maybe from ip6_forward.() */ size_t ipsec6_hdrsiz(m, dir, in6p) struct mbuf *m; u_int dir; struct in6pcb *in6p; { struct secpolicy *sp = NULL; int error; size_t size; /* sanity check */ if (m == NULL) return 0; /* XXX shoud be panic ? */ if (in6p != NULL && in6p->in6p_socket == NULL) panic("ipsec6_hdrsize: why is socket NULL but there is PCB."); /* get SP for this packet */ /* XXX Is it right to call with IP_FORWARDING. */ if (in6p == NULL) sp = ipsec6_getpolicybyaddr(m, dir, IP_FORWARDING, &error); else sp = ipsec6_getpolicybysock(m, dir, in6p->in6p_socket, &error); if (sp == NULL) return 0; size = ipsec_hdrsiz(sp); KEYDEBUG(KEYDEBUG_IPSEC_STAMP, printf("DP ipsec6_hdrsiz call free SP:%p\n", sp)); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_hdrsiz: size:%lu.\n", (unsigned long)size)); key_freesp(sp); return size; } #endif /* INET6 */ #ifdef INET /* * encapsulate for ipsec tunnel. * ip->ip_src must be fixed later on. */ static int ipsec4_encapsulate(m, sav) struct mbuf *m; struct secasvar *sav; { struct ip *oip; struct ip *ip; size_t hlen; size_t plen; /* can't tunnel between different AFs */ if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET) { m_freem(m); return EINVAL; } #if 0 /* XXX if the dst is myself, perform nothing. */ if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) { m_freem(m); return EINVAL; } #endif if (m->m_len < sizeof(*ip)) panic("ipsec4_encapsulate: assumption failed (first mbuf length)"); ip = mtod(m, struct ip *); #ifdef _IP_VHL hlen = _IP_VHL_HL(ip->ip_vhl) << 2; #else hlen = ip->ip_hl << 2; #endif if (m->m_len != hlen) panic("ipsec4_encapsulate: assumption failed (first mbuf length)"); /* generate header checksum */ ip->ip_sum = 0; #ifdef _IP_VHL if (ip->ip_vhl == IP_VHL_BORING) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(m, hlen); #else ip->ip_sum = in_cksum(m, hlen); #endif plen = m->m_pkthdr.len; /* * grow the mbuf to accomodate the new IPv4 header. * NOTE: IPv4 options will never be copied. */ if (M_LEADINGSPACE(m->m_next) < hlen) { struct mbuf *n; MGET(n, M_DONTWAIT, MT_DATA); if (!n) { m_freem(m); return ENOBUFS; } n->m_len = hlen; n->m_next = m->m_next; m->m_next = n; m->m_pkthdr.len += hlen; oip = mtod(n, struct ip *); } else { m->m_next->m_len += hlen; m->m_next->m_data -= hlen; m->m_pkthdr.len += hlen; oip = mtod(m->m_next, struct ip *); } ip = mtod(m, struct ip *); ovbcopy((caddr_t)ip, (caddr_t)oip, hlen); m->m_len = sizeof(struct ip); m->m_pkthdr.len -= (hlen - sizeof(struct ip)); /* construct new IPv4 header. see RFC 2401 5.1.2.1 */ /* ECN consideration. */ ip_ecn_ingress(ip4_ipsec_ecn, &ip->ip_tos, &oip->ip_tos); #ifdef _IP_VHL ip->ip_vhl = IP_MAKE_VHL(IPVERSION, sizeof(struct ip) >> 2); #else ip->ip_hl = sizeof(struct ip) >> 2; #endif ip->ip_off &= htons(~IP_OFFMASK); ip->ip_off &= htons(~IP_MF); switch (ip4_ipsec_dfbit) { case 0: /* clear DF bit */ ip->ip_off &= htons(~IP_DF); break; case 1: /* set DF bit */ ip->ip_off |= htons(IP_DF); break; default: /* copy DF bit */ break; } ip->ip_p = IPPROTO_IPIP; if (plen + sizeof(struct ip) < IP_MAXPACKET) ip->ip_len = htons(plen + sizeof(struct ip)); else { ipseclog((LOG_ERR, "IPv4 ipsec: size exceeds limit: " "leave ip_len as is (invalid packet)\n")); } #ifdef RANDOM_IP_ID ip->ip_id = ip_randomid(); #else ip->ip_id = htons(ip_id++); #endif bcopy(&((struct sockaddr_in *)&sav->sah->saidx.src)->sin_addr, &ip->ip_src, sizeof(ip->ip_src)); bcopy(&((struct sockaddr_in *)&sav->sah->saidx.dst)->sin_addr, &ip->ip_dst, sizeof(ip->ip_dst)); ip->ip_ttl = IPDEFTTL; /* XXX Should ip_src be updated later ? */ return 0; } #endif /* INET */ #ifdef INET6 static int ipsec6_encapsulate(m, sav) struct mbuf *m; struct secasvar *sav; { struct ip6_hdr *oip6; struct ip6_hdr *ip6; size_t plen; /* can't tunnel between different AFs */ if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family || ((struct sockaddr *)&sav->sah->saidx.src)->sa_family != AF_INET6) { m_freem(m); return EINVAL; } #if 0 /* XXX if the dst is myself, perform nothing. */ if (key_ismyaddr((struct sockaddr *)&sav->sah->saidx.dst)) { m_freem(m); return EINVAL; } #endif plen = m->m_pkthdr.len; /* * grow the mbuf to accomodate the new IPv6 header. */ if (m->m_len != sizeof(struct ip6_hdr)) panic("ipsec6_encapsulate: assumption failed (first mbuf length)"); if (M_LEADINGSPACE(m->m_next) < sizeof(struct ip6_hdr)) { struct mbuf *n; MGET(n, M_DONTWAIT, MT_DATA); if (!n) { m_freem(m); return ENOBUFS; } n->m_len = sizeof(struct ip6_hdr); n->m_next = m->m_next; m->m_next = n; m->m_pkthdr.len += sizeof(struct ip6_hdr); oip6 = mtod(n, struct ip6_hdr *); } else { m->m_next->m_len += sizeof(struct ip6_hdr); m->m_next->m_data -= sizeof(struct ip6_hdr); m->m_pkthdr.len += sizeof(struct ip6_hdr); oip6 = mtod(m->m_next, struct ip6_hdr *); } ip6 = mtod(m, struct ip6_hdr *); ovbcopy((caddr_t)ip6, (caddr_t)oip6, sizeof(struct ip6_hdr)); /* Fake link-local scope-class addresses */ if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src)) oip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst)) oip6->ip6_dst.s6_addr16[1] = 0; /* construct new IPv6 header. see RFC 2401 5.1.2.2 */ /* ECN consideration. */ ip6_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &oip6->ip6_flow); if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr)) ip6->ip6_plen = htons(plen); else { /* ip6->ip6_plen will be updated in ip6_output() */ } ip6->ip6_nxt = IPPROTO_IPV6; bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.src)->sin6_addr, &ip6->ip6_src, sizeof(ip6->ip6_src)); bcopy(&((struct sockaddr_in6 *)&sav->sah->saidx.dst)->sin6_addr, &ip6->ip6_dst, sizeof(ip6->ip6_dst)); ip6->ip6_hlim = IPV6_DEFHLIM; /* XXX Should ip6_src be updated later ? */ return 0; } #endif /* INET6 */ /* * Check the variable replay window. * ipsec_chkreplay() performs replay check before ICV verification. * ipsec_updatereplay() updates replay bitmap. This must be called after * ICV verification (it also performs replay check, which is usually done * beforehand). * 0 (zero) is returned if packet disallowed, 1 if packet permitted. * * based on RFC 2401. */ int ipsec_chkreplay(seq, sav) u_int32_t seq; struct secasvar *sav; { const struct secreplay *replay; u_int32_t diff; int fr; u_int32_t wsizeb; /* constant: bits of window size */ int frlast; /* constant: last frame */ /* sanity check */ if (sav == NULL) panic("ipsec_chkreplay: NULL pointer was passed.\n"); replay = sav->replay; if (replay->wsize == 0) return 1; /* no need to check replay. */ /* constant */ frlast = replay->wsize - 1; wsizeb = replay->wsize << 3; /* sequence number of 0 is invalid */ if (seq == 0) return 0; /* first time is always okay */ if (replay->count == 0) return 1; if (seq > replay->lastseq) { /* larger sequences are okay */ return 1; } else { /* seq is equal or less than lastseq. */ diff = replay->lastseq - seq; /* over range to check, i.e. too old or wrapped */ if (diff >= wsizeb) return 0; fr = frlast - diff / 8; /* this packet already seen ? */ if ((replay->bitmap)[fr] & (1 << (diff % 8))) return 0; /* out of order but good */ return 1; } } /* * check replay counter whether to update or not. * OUT: 0: OK * 1: NG */ int ipsec_updatereplay(seq, sav) u_int32_t seq; struct secasvar *sav; { struct secreplay *replay; u_int32_t diff; int fr; u_int32_t wsizeb; /* constant: bits of window size */ int frlast; /* constant: last frame */ /* sanity check */ if (sav == NULL) panic("ipsec_chkreplay: NULL pointer was passed.\n"); replay = sav->replay; if (replay->wsize == 0) goto ok; /* no need to check replay. */ /* constant */ frlast = replay->wsize - 1; wsizeb = replay->wsize << 3; /* sequence number of 0 is invalid */ if (seq == 0) return 1; /* first time */ if (replay->count == 0) { replay->lastseq = seq; bzero(replay->bitmap, replay->wsize); (replay->bitmap)[frlast] = 1; goto ok; } if (seq > replay->lastseq) { /* seq is larger than lastseq. */ diff = seq - replay->lastseq; /* new larger sequence number */ if (diff < wsizeb) { /* In window */ /* set bit for this packet */ vshiftl(replay->bitmap, diff, replay->wsize); (replay->bitmap)[frlast] |= 1; } else { /* this packet has a "way larger" */ bzero(replay->bitmap, replay->wsize); (replay->bitmap)[frlast] = 1; } replay->lastseq = seq; /* larger is good */ } else { /* seq is equal or less than lastseq. */ diff = replay->lastseq - seq; /* over range to check, i.e. too old or wrapped */ if (diff >= wsizeb) return 1; fr = frlast - diff / 8; /* this packet already seen ? */ if ((replay->bitmap)[fr] & (1 << (diff % 8))) return 1; /* mark as seen */ (replay->bitmap)[fr] |= (1 << (diff % 8)); /* out of order but good */ } ok: if (replay->count == ~0) { /* set overflow flag */ replay->overflow++; /* don't increment, no more packets accepted */ if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) return 1; ipseclog((LOG_WARNING, "replay counter made %d cycle. %s\n", replay->overflow, ipsec_logsastr(sav))); } replay->count++; return 0; } /* * shift variable length buffer to left. * IN: bitmap: pointer to the buffer * nbit: the number of to shift. * wsize: buffer size (bytes). */ static void vshiftl(bitmap, nbit, wsize) unsigned char *bitmap; int nbit, wsize; { int s, j, i; unsigned char over; for (j = 0; j < nbit; j += 8) { s = (nbit - j < 8) ? (nbit - j): 8; bitmap[0] <<= s; for (i = 1; i < wsize; i++) { over = (bitmap[i] >> (8 - s)); bitmap[i] <<= s; bitmap[i-1] |= over; } } return; } const char * ipsec4_logpacketstr(ip, spi) struct ip *ip; u_int32_t spi; { static char buf[256]; char *p; u_int8_t *s, *d; s = (u_int8_t *)(&ip->ip_src); d = (u_int8_t *)(&ip->ip_dst); p = buf; snprintf(buf, sizeof(buf), "packet(SPI=%u ", (u_int32_t)ntohl(spi)); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), "src=%u.%u.%u.%u", s[0], s[1], s[2], s[3]); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), " dst=%u.%u.%u.%u", d[0], d[1], d[2], d[3]); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), ")"); return buf; } #ifdef INET6 const char * ipsec6_logpacketstr(ip6, spi) struct ip6_hdr *ip6; u_int32_t spi; { static char buf[256]; char *p; p = buf; snprintf(buf, sizeof(buf), "packet(SPI=%u ", (u_int32_t)ntohl(spi)); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), "src=%s", ip6_sprintf(&ip6->ip6_src)); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), " dst=%s", ip6_sprintf(&ip6->ip6_dst)); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), ")"); return buf; } #endif /* INET6 */ const char * ipsec_logsastr(sav) struct secasvar *sav; { static char buf[256]; char *p; struct secasindex *saidx = &sav->sah->saidx; /* validity check */ if (((struct sockaddr *)&sav->sah->saidx.src)->sa_family != ((struct sockaddr *)&sav->sah->saidx.dst)->sa_family) panic("ipsec_logsastr: family mismatched.\n"); p = buf; snprintf(buf, sizeof(buf), "SA(SPI=%u ", (u_int32_t)ntohl(sav->spi)); while (p && *p) p++; if (((struct sockaddr *)&saidx->src)->sa_family == AF_INET) { u_int8_t *s, *d; s = (u_int8_t *)&((struct sockaddr_in *)&saidx->src)->sin_addr; d = (u_int8_t *)&((struct sockaddr_in *)&saidx->dst)->sin_addr; snprintf(p, sizeof(buf) - (p - buf), "src=%d.%d.%d.%d dst=%d.%d.%d.%d", s[0], s[1], s[2], s[3], d[0], d[1], d[2], d[3]); } #ifdef INET6 else if (((struct sockaddr *)&saidx->src)->sa_family == AF_INET6) { snprintf(p, sizeof(buf) - (p - buf), "src=%s", ip6_sprintf(&((struct sockaddr_in6 *)&saidx->src)->sin6_addr)); while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), " dst=%s", ip6_sprintf(&((struct sockaddr_in6 *)&saidx->dst)->sin6_addr)); } #endif while (p && *p) p++; snprintf(p, sizeof(buf) - (p - buf), ")"); return buf; } void ipsec_dumpmbuf(m) struct mbuf *m; { int totlen; int i; u_char *p; totlen = 0; printf("---\n"); while (m) { p = mtod(m, u_char *); for (i = 0; i < m->m_len; i++) { printf("%02x ", p[i]); totlen++; if (totlen % 16 == 0) printf("\n"); } m = m->m_next; } if (totlen % 16 != 0) printf("\n"); printf("---\n"); } #ifdef INET /* * IPsec output logic for IPv4. */ int ipsec4_output(state, sp, flags) struct ipsec_output_state *state; struct secpolicy *sp; int flags; { struct ip *ip = NULL; struct ipsecrequest *isr = NULL; struct secasindex saidx; int s; int error; struct sockaddr_in *dst4; struct sockaddr_in *sin; if (!state) panic("state == NULL in ipsec4_output"); if (!state->m) panic("state->m == NULL in ipsec4_output"); if (!state->ro) panic("state->ro == NULL in ipsec4_output"); if (!state->dst) panic("state->dst == NULL in ipsec4_output"); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec4_output: applyed SP\n"); kdebug_secpolicy(sp)); for (isr = sp->req; isr != NULL; isr = isr->next) { #if 0 /* give up to check restriction of transport mode */ /* XXX but should be checked somewhere */ /* * some of the IPsec operation must be performed only in * originating case. */ if (isr->saidx.mode == IPSEC_MODE_TRANSPORT && (flags & IP_FORWARDING)) continue; #endif /* make SA index for search proper SA */ ip = mtod(state->m, struct ip *); bcopy(&isr->saidx, &saidx, sizeof(saidx)); saidx.mode = isr->saidx.mode; saidx.reqid = isr->saidx.reqid; sin = (struct sockaddr_in *)&saidx.src; if (sin->sin_len == 0) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; bcopy(&ip->ip_src, &sin->sin_addr, sizeof(sin->sin_addr)); } sin = (struct sockaddr_in *)&saidx.dst; if (sin->sin_len == 0) { sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; bcopy(&ip->ip_dst, &sin->sin_addr, sizeof(sin->sin_addr)); } if ((error = key_checkrequest(isr, &saidx)) != 0) { /* * IPsec processing is required, but no SA found. * I assume that key_acquire() had been called * to get/establish the SA. Here I discard * this packet because it is responsibility for * upper layer to retransmit the packet. */ ipsecstat.out_nosa++; goto bad; } /* validity check */ if (isr->sav == NULL) { switch (ipsec_get_reqlevel(isr)) { case IPSEC_LEVEL_USE: continue; case IPSEC_LEVEL_REQUIRE: /* must be not reached here. */ panic("ipsec4_output: no SA found, but required."); } } /* * If there is no valid SA, we give up to process any * more. In such a case, the SA's status is changed * from DYING to DEAD after allocating. If a packet * send to the receiver by dead SA, the receiver can * not decode a packet because SA has been dead. */ if (isr->sav->state != SADB_SASTATE_MATURE && isr->sav->state != SADB_SASTATE_DYING) { ipsecstat.out_nosa++; error = EINVAL; goto bad; } /* * There may be the case that SA status will be changed when * we are refering to one. So calling splsoftnet(). */ s = splnet(); if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* * build IPsec tunnel. */ /* XXX should be processed with other familiy */ if (((struct sockaddr *)&isr->sav->sah->saidx.src)->sa_family != AF_INET) { ipseclog((LOG_ERR, "ipsec4_output: " "family mismatched between inner and outer spi=%u\n", (u_int32_t)ntohl(isr->sav->spi))); splx(s); error = EAFNOSUPPORT; goto bad; } state->m = ipsec4_splithdr(state->m); if (!state->m) { splx(s); error = ENOMEM; goto bad; } error = ipsec4_encapsulate(state->m, isr->sav); splx(s); if (error) { state->m = NULL; goto bad; } ip = mtod(state->m, struct ip *); state->ro = &isr->sav->sah->sa_route; state->dst = (struct sockaddr *)&state->ro->ro_dst; dst4 = (struct sockaddr_in *)state->dst; if (state->ro->ro_rt && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0 || dst4->sin_addr.s_addr != ip->ip_dst.s_addr)) { RTFREE(state->ro->ro_rt); state->ro->ro_rt = NULL; } if (state->ro->ro_rt == 0) { dst4->sin_family = AF_INET; dst4->sin_len = sizeof(*dst4); dst4->sin_addr = ip->ip_dst; rtalloc(state->ro); } if (state->ro->ro_rt == 0) { ipstat.ips_noroute++; error = EHOSTUNREACH; goto bad; } /* adjust state->dst if tunnel endpoint is offlink */ if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) { state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst4 = (struct sockaddr_in *)state->dst; } } else splx(s); state->m = ipsec4_splithdr(state->m); if (!state->m) { error = ENOMEM; goto bad; } switch (isr->saidx.proto) { case IPPROTO_ESP: #ifdef IPSEC_ESP if ((error = esp4_output(state->m, isr)) != 0) { state->m = NULL; goto bad; } break; #else m_freem(state->m); state->m = NULL; error = EINVAL; goto bad; #endif case IPPROTO_AH: if ((error = ah4_output(state->m, isr)) != 0) { state->m = NULL; goto bad; } break; case IPPROTO_IPCOMP: if ((error = ipcomp4_output(state->m, isr)) != 0) { state->m = NULL; goto bad; } break; default: ipseclog((LOG_ERR, "ipsec4_output: unknown ipsec protocol %d\n", isr->saidx.proto)); m_freem(state->m); state->m = NULL; error = EINVAL; goto bad; } if (state->m == 0) { error = ENOMEM; goto bad; } ip = mtod(state->m, struct ip *); } return 0; bad: m_freem(state->m); state->m = NULL; return error; } #endif #ifdef INET6 /* * IPsec output logic for IPv6, transport mode. */ int ipsec6_output_trans(state, nexthdrp, mprev, sp, flags, tun) struct ipsec_output_state *state; u_char *nexthdrp; struct mbuf *mprev; struct secpolicy *sp; int flags; int *tun; { struct ip6_hdr *ip6; struct ipsecrequest *isr = NULL; struct secasindex saidx; int error = 0; int plen; struct sockaddr_in6 *sin6; if (!state) panic("state == NULL in ipsec6_output_trans"); if (!state->m) panic("state->m == NULL in ipsec6_output_trans"); if (!nexthdrp) panic("nexthdrp == NULL in ipsec6_output_trans"); if (!mprev) panic("mprev == NULL in ipsec6_output_trans"); if (!sp) panic("sp == NULL in ipsec6_output_trans"); if (!tun) panic("tun == NULL in ipsec6_output_trans"); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_output_trans: applyed SP\n"); kdebug_secpolicy(sp)); *tun = 0; for (isr = sp->req; isr; isr = isr->next) { if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* the rest will be handled by ipsec6_output_tunnel() */ break; } /* make SA index for search proper SA */ ip6 = mtod(state->m, struct ip6_hdr *); bcopy(&isr->saidx, &saidx, sizeof(saidx)); saidx.mode = isr->saidx.mode; saidx.reqid = isr->saidx.reqid; sin6 = (struct sockaddr_in6 *)&saidx.src; if (sin6->sin6_len == 0) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); } } sin6 = (struct sockaddr_in6 *)&saidx.dst; if (sin6->sin6_len == 0) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } } if (key_checkrequest(isr, &saidx) == ENOENT) { /* * IPsec processing is required, but no SA found. * I assume that key_acquire() had been called * to get/establish the SA. Here I discard * this packet because it is responsibility for * upper layer to retransmit the packet. */ ipsec6stat.out_nosa++; error = ENOENT; /* * Notify the fact that the packet is discarded * to ourselves. I believe this is better than * just silently discarding. (jinmei@kame.net) * XXX: should we restrict the error to TCP packets? * XXX: should we directly notify sockets via * pfctlinputs? */ icmp6_error(state->m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN, 0); state->m = NULL; /* icmp6_error freed the mbuf */ goto bad; } /* validity check */ if (isr->sav == NULL) { switch (ipsec_get_reqlevel(isr)) { case IPSEC_LEVEL_USE: continue; case IPSEC_LEVEL_REQUIRE: /* must be not reached here. */ panic("ipsec6_output_trans: no SA found, but required."); } } /* * If there is no valid SA, we give up to process. * see same place at ipsec4_output(). */ if (isr->sav->state != SADB_SASTATE_MATURE && isr->sav->state != SADB_SASTATE_DYING) { ipsec6stat.out_nosa++; error = EINVAL; goto bad; } switch (isr->saidx.proto) { case IPPROTO_ESP: #ifdef IPSEC_ESP error = esp6_output(state->m, nexthdrp, mprev->m_next, isr); #else m_freem(state->m); error = EINVAL; #endif break; case IPPROTO_AH: error = ah6_output(state->m, nexthdrp, mprev->m_next, isr); break; case IPPROTO_IPCOMP: error = ipcomp6_output(state->m, nexthdrp, mprev->m_next, isr); break; default: ipseclog((LOG_ERR, "ipsec6_output_trans: " "unknown ipsec protocol %d\n", isr->saidx.proto)); m_freem(state->m); ipsec6stat.out_inval++; error = EINVAL; break; } if (error) { state->m = NULL; goto bad; } plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); if (plen > IPV6_MAXPACKET) { ipseclog((LOG_ERR, "ipsec6_output_trans: " "IPsec with IPv6 jumbogram is not supported\n")); ipsec6stat.out_inval++; error = EINVAL; /* XXX */ goto bad; } ip6 = mtod(state->m, struct ip6_hdr *); ip6->ip6_plen = htons(plen); } /* if we have more to go, we need a tunnel mode processing */ if (isr != NULL) *tun = 1; return 0; bad: m_freem(state->m); state->m = NULL; return error; } /* * IPsec output logic for IPv6, tunnel mode. */ int ipsec6_output_tunnel(state, sp, flags) struct ipsec_output_state *state; struct secpolicy *sp; int flags; { struct ip6_hdr *ip6; struct ipsecrequest *isr = NULL; struct secasindex saidx; int error = 0; int plen; struct sockaddr_in6* dst6; int s; if (!state) panic("state == NULL in ipsec6_output_tunnel"); if (!state->m) panic("state->m == NULL in ipsec6_output_tunnel"); if (!sp) panic("sp == NULL in ipsec6_output_tunnel"); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_output_tunnel: applyed SP\n"); kdebug_secpolicy(sp)); /* * transport mode ipsec (before the 1st tunnel mode) is already * processed by ipsec6_output_trans(). */ for (isr = sp->req; isr; isr = isr->next) { if (isr->saidx.mode == IPSEC_MODE_TUNNEL) break; } for (/* already initialized */; isr; isr = isr->next) { if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* When tunnel mode, SA peers must be specified. */ bcopy(&isr->saidx, &saidx, sizeof(saidx)); } else { /* make SA index to look for a proper SA */ struct sockaddr_in6 *sin6; bzero(&saidx, sizeof(saidx)); saidx.proto = isr->saidx.proto; saidx.mode = isr->saidx.mode; saidx.reqid = isr->saidx.reqid; ip6 = mtod(state->m, struct ip6_hdr *); sin6 = (struct sockaddr_in6 *)&saidx.src; if (sin6->sin6_len == 0) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); } } sin6 = (struct sockaddr_in6 *)&saidx.dst; if (sin6->sin6_len == 0) { sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } } } if (key_checkrequest(isr, &saidx) == ENOENT) { /* * IPsec processing is required, but no SA found. * I assume that key_acquire() had been called * to get/establish the SA. Here I discard * this packet because it is responsibility for * upper layer to retransmit the packet. */ ipsec6stat.out_nosa++; error = ENOENT; goto bad; } /* validity check */ if (isr->sav == NULL) { switch (ipsec_get_reqlevel(isr)) { case IPSEC_LEVEL_USE: continue; case IPSEC_LEVEL_REQUIRE: /* must be not reached here. */ panic("ipsec6_output_tunnel: no SA found, but required."); } } /* * If there is no valid SA, we give up to process. * see same place at ipsec4_output(). */ if (isr->sav->state != SADB_SASTATE_MATURE && isr->sav->state != SADB_SASTATE_DYING) { ipsec6stat.out_nosa++; error = EINVAL; goto bad; } /* * There may be the case that SA status will be changed when * we are refering to one. So calling splsoftnet(). */ s = splnet(); if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* * build IPsec tunnel. */ /* XXX should be processed with other familiy */ if (((struct sockaddr *)&isr->sav->sah->saidx.src)->sa_family != AF_INET6) { ipseclog((LOG_ERR, "ipsec6_output_tunnel: " "family mismatched between inner and outer, spi=%u\n", (u_int32_t)ntohl(isr->sav->spi))); splx(s); ipsec6stat.out_inval++; error = EAFNOSUPPORT; goto bad; } state->m = ipsec6_splithdr(state->m); if (!state->m) { splx(s); ipsec6stat.out_nomem++; error = ENOMEM; goto bad; } error = ipsec6_encapsulate(state->m, isr->sav); splx(s); if (error) { state->m = 0; goto bad; } ip6 = mtod(state->m, struct ip6_hdr *); state->ro = &isr->sav->sah->sa_route; state->dst = (struct sockaddr *)&state->ro->ro_dst; dst6 = (struct sockaddr_in6 *)state->dst; if (state->ro->ro_rt && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { RTFREE(state->ro->ro_rt); state->ro->ro_rt = NULL; } if (state->ro->ro_rt == 0) { bzero(dst6, sizeof(*dst6)); dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = ip6->ip6_dst; rtalloc(state->ro); } if (state->ro->ro_rt == 0) { ip6stat.ip6s_noroute++; ipsec6stat.out_noroute++; error = EHOSTUNREACH; goto bad; } /* adjust state->dst if tunnel endpoint is offlink */ if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) { state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst6 = (struct sockaddr_in6 *)state->dst; } } else splx(s); state->m = ipsec6_splithdr(state->m); if (!state->m) { ipsec6stat.out_nomem++; error = ENOMEM; goto bad; } ip6 = mtod(state->m, struct ip6_hdr *); switch (isr->saidx.proto) { case IPPROTO_ESP: #ifdef IPSEC_ESP error = esp6_output(state->m, &ip6->ip6_nxt, state->m->m_next, isr); #else m_freem(state->m); error = EINVAL; #endif break; case IPPROTO_AH: error = ah6_output(state->m, &ip6->ip6_nxt, state->m->m_next, isr); break; case IPPROTO_IPCOMP: /* XXX code should be here */ /* FALLTHROUGH */ default: ipseclog((LOG_ERR, "ipsec6_output_tunnel: " "unknown ipsec protocol %d\n", isr->saidx.proto)); m_freem(state->m); ipsec6stat.out_inval++; error = EINVAL; break; } if (error) { state->m = NULL; goto bad; } plen = state->m->m_pkthdr.len - sizeof(struct ip6_hdr); if (plen > IPV6_MAXPACKET) { ipseclog((LOG_ERR, "ipsec6_output_tunnel: " "IPsec with IPv6 jumbogram is not supported\n")); ipsec6stat.out_inval++; error = EINVAL; /* XXX */ goto bad; } ip6 = mtod(state->m, struct ip6_hdr *); ip6->ip6_plen = htons(plen); } return 0; bad: m_freem(state->m); state->m = NULL; return error; } #endif /* INET6 */ #ifdef INET /* * Chop IP header and option off from the payload. */ static struct mbuf * ipsec4_splithdr(m) struct mbuf *m; { struct mbuf *mh; struct ip *ip; int hlen; if (m->m_len < sizeof(struct ip)) panic("ipsec4_splithdr: first mbuf too short"); ip = mtod(m, struct ip *); #ifdef _IP_VHL hlen = _IP_VHL_HL(ip->ip_vhl) << 2; #else hlen = ip->ip_hl << 2; #endif if (m->m_len > hlen) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); if (!mh) { m_freem(m); return NULL; } - M_COPY_PKTHDR(mh, m); + M_MOVE_PKTHDR(mh, m); MH_ALIGN(mh, hlen); m->m_flags &= ~M_PKTHDR; m->m_len -= hlen; m->m_data += hlen; mh->m_next = m; m = mh; m->m_len = hlen; bcopy((caddr_t)ip, mtod(m, caddr_t), hlen); } else if (m->m_len < hlen) { m = m_pullup(m, hlen); if (!m) return NULL; } return m; } #endif #ifdef INET6 static struct mbuf * ipsec6_splithdr(m) struct mbuf *m; { struct mbuf *mh; struct ip6_hdr *ip6; int hlen; if (m->m_len < sizeof(struct ip6_hdr)) panic("ipsec6_splithdr: first mbuf too short"); ip6 = mtod(m, struct ip6_hdr *); hlen = sizeof(struct ip6_hdr); if (m->m_len > hlen) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); if (!mh) { m_freem(m); return NULL; } - M_COPY_PKTHDR(mh, m); + M_MOVE_PKTHDR(mh, m); MH_ALIGN(mh, hlen); m->m_flags &= ~M_PKTHDR; m->m_len -= hlen; m->m_data += hlen; mh->m_next = m; m = mh; m->m_len = hlen; bcopy((caddr_t)ip6, mtod(m, caddr_t), hlen); } else if (m->m_len < hlen) { m = m_pullup(m, hlen); if (!m) return NULL; } return m; } #endif /* validate inbound IPsec tunnel packet. */ int ipsec4_tunnel_validate(m, off, nxt0, sav) struct mbuf *m; /* no pullup permitted, m->m_len >= ip */ int off; u_int nxt0; struct secasvar *sav; { u_int8_t nxt = nxt0 & 0xff; struct sockaddr_in *sin; struct sockaddr_in osrc, odst, isrc, idst; int hlen; struct secpolicy *sp; struct ip *oip; #ifdef DIAGNOSTIC if (m->m_len < sizeof(struct ip)) panic("too short mbuf on ipsec4_tunnel_validate"); #endif if (nxt != IPPROTO_IPV4) return 0; if (m->m_pkthdr.len < off + sizeof(struct ip)) return 0; /* do not decapsulate if the SA is for transport mode only */ if (sav->sah->saidx.mode == IPSEC_MODE_TRANSPORT) return 0; oip = mtod(m, struct ip *); #ifdef _IP_VHL hlen = _IP_VHL_HL(oip->ip_vhl) << 2; #else hlen = oip->ip_hl << 2; #endif if (hlen != sizeof(struct ip)) return 0; /* AF_INET6 should be supported, but at this moment we don't. */ sin = (struct sockaddr_in *)&sav->sah->saidx.dst; if (sin->sin_family != AF_INET) return 0; if (bcmp(&oip->ip_dst, &sin->sin_addr, sizeof(oip->ip_dst)) != 0) return 0; /* XXX slow */ bzero(&osrc, sizeof(osrc)); bzero(&odst, sizeof(odst)); bzero(&isrc, sizeof(isrc)); bzero(&idst, sizeof(idst)); osrc.sin_family = odst.sin_family = isrc.sin_family = idst.sin_family = AF_INET; osrc.sin_len = odst.sin_len = isrc.sin_len = idst.sin_len = sizeof(struct sockaddr_in); osrc.sin_addr = oip->ip_src; odst.sin_addr = oip->ip_dst; m_copydata(m, off + offsetof(struct ip, ip_src), sizeof(isrc.sin_addr), (caddr_t)&isrc.sin_addr); m_copydata(m, off + offsetof(struct ip, ip_dst), sizeof(idst.sin_addr), (caddr_t)&idst.sin_addr); /* * RFC2401 5.2.1 (b): (assume that we are using tunnel mode) * - if the inner destination is multicast address, there can be * multiple permissible inner source address. implementation * may want to skip verification of inner source address against * SPD selector. * - if the inner protocol is ICMP, the packet may be an error report * from routers on the other side of the VPN cloud (R in the * following diagram). in this case, we cannot verify inner source * address against SPD selector. * me -- gw === gw -- R -- you * * we consider the first bullet to be users responsibility on SPD entry * configuration (if you need to encrypt multicast traffic, set * the source range of SPD selector to 0.0.0.0/0, or have explicit * address ranges for possible senders). * the second bullet is not taken care of (yet). * * therefore, we do not do anything special about inner source. */ sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst, (struct sockaddr *)&isrc, (struct sockaddr *)&idst); if (!sp) return 0; key_freesp(sp); return 1; } #ifdef INET6 /* validate inbound IPsec tunnel packet. */ int ipsec6_tunnel_validate(m, off, nxt0, sav) struct mbuf *m; /* no pullup permitted, m->m_len >= ip */ int off; u_int nxt0; struct secasvar *sav; { u_int8_t nxt = nxt0 & 0xff; struct sockaddr_in6 *sin6; struct sockaddr_in6 osrc, odst, isrc, idst; struct secpolicy *sp; struct ip6_hdr *oip6; #ifdef DIAGNOSTIC if (m->m_len < sizeof(struct ip6_hdr)) panic("too short mbuf on ipsec6_tunnel_validate"); #endif if (nxt != IPPROTO_IPV6) return 0; if (m->m_pkthdr.len < off + sizeof(struct ip6_hdr)) return 0; /* do not decapsulate if the SA is for transport mode only */ if (sav->sah->saidx.mode == IPSEC_MODE_TRANSPORT) return 0; oip6 = mtod(m, struct ip6_hdr *); /* AF_INET should be supported, but at this moment we don't. */ sin6 = (struct sockaddr_in6 *)&sav->sah->saidx.dst; if (sin6->sin6_family != AF_INET6) return 0; if (!IN6_ARE_ADDR_EQUAL(&oip6->ip6_dst, &sin6->sin6_addr)) return 0; /* XXX slow */ bzero(&osrc, sizeof(osrc)); bzero(&odst, sizeof(odst)); bzero(&isrc, sizeof(isrc)); bzero(&idst, sizeof(idst)); osrc.sin6_family = odst.sin6_family = isrc.sin6_family = idst.sin6_family = AF_INET6; osrc.sin6_len = odst.sin6_len = isrc.sin6_len = idst.sin6_len = sizeof(struct sockaddr_in6); osrc.sin6_addr = oip6->ip6_src; odst.sin6_addr = oip6->ip6_dst; m_copydata(m, off + offsetof(struct ip6_hdr, ip6_src), sizeof(isrc.sin6_addr), (caddr_t)&isrc.sin6_addr); m_copydata(m, off + offsetof(struct ip6_hdr, ip6_dst), sizeof(idst.sin6_addr), (caddr_t)&idst.sin6_addr); /* * regarding to inner source address validation, see a long comment * in ipsec4_tunnel_validate. */ sp = key_gettunnel((struct sockaddr *)&osrc, (struct sockaddr *)&odst, (struct sockaddr *)&isrc, (struct sockaddr *)&idst); /* * when there is no suitable inbound policy for the packet of the ipsec * tunnel mode, the kernel never decapsulate the tunneled packet * as the ipsec tunnel mode even when the system wide policy is "none". * then the kernel leaves the generic tunnel module to process this * packet. if there is no rule of the generic tunnel, the packet * is rejected and the statistics will be counted up. */ if (!sp) return 0; key_freesp(sp); return 1; } #endif /* * Make a mbuf chain for encryption. * If the original mbuf chain contains a mbuf with a cluster, * allocate a new cluster and copy the data to the new cluster. * XXX: this hack is inefficient, but is necessary to handle cases * of TCP retransmission... */ struct mbuf * ipsec_copypkt(m) struct mbuf *m; { struct mbuf *n, **mpp, *mnew; for (n = m, mpp = &m; n; n = n->m_next) { if (n->m_flags & M_EXT) { /* * Make a copy only if there are more than one * references to the cluster. * XXX: is this approach effective? */ if (n->m_ext.ext_type != EXT_CLUSTER || MEXT_IS_REF(n)) { int remain, copied; struct mbuf *mm; if (n->m_flags & M_PKTHDR) { MGETHDR(mnew, M_DONTWAIT, MT_HEADER); if (mnew == NULL) goto fail; - mnew->m_pkthdr = n->m_pkthdr; -#if 0 - if (n->m_pkthdr.aux) { - mnew->m_pkthdr.aux = - m_copym(n->m_pkthdr.aux, - 0, M_COPYALL, M_DONTWAIT); + if (!m_dup_pkthdr(mnew, n, M_DONTWAIT)) { + m_free(mnew); + goto fail; } -#endif - M_COPY_PKTHDR(mnew, n); - mnew->m_flags = n->m_flags & M_COPYFLAGS; } else { MGET(mnew, M_DONTWAIT, MT_DATA); if (mnew == NULL) goto fail; } mnew->m_len = 0; mm = mnew; /* * Copy data. If we don't have enough space to * store the whole data, allocate a cluster * or additional mbufs. * XXX: we don't use m_copyback(), since the * function does not use clusters and thus is * inefficient. */ remain = n->m_len; copied = 0; while (1) { int len; struct mbuf *mn; if (remain <= (mm->m_flags & M_PKTHDR ? MHLEN : MLEN)) len = remain; else { /* allocate a cluster */ MCLGET(mm, M_DONTWAIT); if (!(mm->m_flags & M_EXT)) { m_free(mm); goto fail; } len = remain < MCLBYTES ? remain : MCLBYTES; } bcopy(n->m_data + copied, mm->m_data, len); copied += len; remain -= len; mm->m_len = len; if (remain <= 0) /* completed? */ break; /* need another mbuf */ MGETHDR(mn, M_DONTWAIT, MT_HEADER); if (mn == NULL) goto fail; mn->m_pkthdr.rcvif = NULL; mm->m_next = mn; mm = mn; } /* adjust chain */ mm->m_next = m_free(n); n = mm; *mpp = mnew; mpp = &n->m_next; continue; } } *mpp = n; mpp = &n->m_next; } return(m); fail: m_freem(m); return(NULL); } void ipsec_delaux(m) struct mbuf *m; { struct m_tag *tag; while ((tag = m_tag_find(m, PACKET_TAG_IPSEC_HISTORY, NULL)) != NULL) m_tag_delete(m, tag); } int ipsec_addhist(m, proto, spi) struct mbuf *m; int proto; u_int32_t spi; { struct m_tag *tag; struct ipsec_history *p; tag = m_tag_get(PACKET_TAG_IPSEC_HISTORY, sizeof (struct ipsec_history), M_NOWAIT); if (tag == NULL) return ENOBUFS; p = (struct ipsec_history *)(tag+1); bzero(p, sizeof(*p)); p->ih_proto = proto; p->ih_spi = spi; m_tag_prepend(m, tag); return 0; } struct ipsec_history * ipsec_gethist(m, lenp) struct mbuf *m; int *lenp; { struct m_tag *tag; tag = m_tag_find(m, PACKET_TAG_IPSEC_HISTORY, NULL); if (tag == NULL) return NULL; /* XXX NB: noone uses this so fake it */ if (lenp) *lenp = sizeof (struct ipsec_history); return ((struct ipsec_history *)(tag+1)); } Index: head/sys/netipsec/ipsec_mbuf.c =================================================================== --- head/sys/netipsec/ipsec_mbuf.c (revision 108465) +++ head/sys/netipsec/ipsec_mbuf.c (revision 108466) @@ -1,422 +1,422 @@ /* $FreeBSD$ */ /* * IPsec-specific mbuf routines. */ #include "opt_param.h" #include #include #include #include #include #include #include extern struct mbuf *m_getptr(struct mbuf *, int, int *); /* * Create a writable copy of the mbuf chain. While doing this * we compact the chain with a goal of producing a chain with * at most two mbufs. The second mbuf in this chain is likely * to be a cluster. The primary purpose of this work is to create * a writable packet for encryption, compression, etc. The * secondary goal is to linearize the data so the data can be * passed to crypto hardware in the most efficient manner possible. */ struct mbuf * m_clone(struct mbuf *m0) { struct mbuf *m, *mprev; KASSERT(m0 != NULL, ("m_clone: null mbuf")); mprev = NULL; for (m = m0; m != NULL; m = mprev->m_next) { /* * Regular mbufs are ignored unless there's a cluster * in front of it that we can use to coalesce. We do * the latter mainly so later clusters can be coalesced * also w/o having to handle them specially (i.e. convert * mbuf+cluster -> cluster). This optimization is heavily * influenced by the assumption that we're running over * Ethernet where MCBYTES is large enough that the max * packet size will permit lots of coalescing into a * single cluster. This in turn permits efficient * crypto operations, especially when using hardware. */ if ((m->m_flags & M_EXT) == 0) { if (mprev && (mprev->m_flags & M_EXT) && m->m_len <= M_TRAILINGSPACE(mprev)) { /* XXX: this ignores mbuf types */ memcpy(mtod(mprev, caddr_t) + mprev->m_len, mtod(m, caddr_t), m->m_len); mprev->m_len += m->m_len; mprev->m_next = m->m_next; /* unlink from chain */ m_free(m); /* reclaim mbuf */ newipsecstat.ips_mbcoalesced++; } else { mprev = m; } continue; } /* * Cluster'd mbufs are left alone (for now). */ if (!MEXT_IS_REF(m)) { mprev = m; continue; } /* * Not writable, replace with a copy or coalesce with * the previous mbuf if possible (since we have to copy * it anyway, we try to reduce the number of mbufs and * clusters so that future work is easier). */ /* XXX why can M_PKTHDR be set past the first mbuf? */ KASSERT(m->m_flags & M_EXT, ("m_clone: m_flags 0x%x", m->m_flags)); /* NB: we only coalesce into a cluster */ if (mprev == NULL || (mprev->m_flags & M_EXT) == 0 || m->m_len > M_TRAILINGSPACE(mprev)) { struct mbuf *n; /* * Allocate a new page, copy the data to the front * and release the reference to the old page. */ if (mprev == NULL && (m->m_flags & M_PKTHDR)) { /* * NB: if a packet header is present we * must allocate the mbuf separately from * the cluster 'cuz M_COPY_PKTHDR will * smash the data pointer and drop the * M_EXT marker. */ MGETHDR(n, M_DONTWAIT, m->m_type); if (n == NULL) { m_freem(m0); return (NULL); } - M_COPY_PKTHDR(n, m); + M_MOVE_PKTHDR(n, m); MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); m_freem(m0); return (NULL); } } else { n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); if (n == NULL) { m_freem(m0); return (NULL); } } memcpy(mtod(n, caddr_t), mtod(m, caddr_t), m->m_len); n->m_len = m->m_len; n->m_next = m->m_next; if (mprev == NULL) m0 = n; /* new head of chain */ else mprev->m_next = n; /* replace old mbuf */ m_free(m); /* release old mbuf */ mprev = n; newipsecstat.ips_clcopied++; } else { /* XXX: this ignores mbuf types */ memcpy(mtod(mprev, caddr_t) + mprev->m_len, mtod(m, caddr_t), m->m_len); mprev->m_len += m->m_len; mprev->m_next = m->m_next; /* unlink from chain */ m_free(m); /* reclaim mbuf */ newipsecstat.ips_clcoalesced++; } } return (m0); } /* * Make space for a new header of length hlen at offset off * in the packet. When doing this we allocate new mbufs only * when absolutely necessary. The mbuf where the new header * is to go is returned together with an offset into the mbuf. * If NULL is returned then the mbuf chain may have been modified; * the caller is assumed to always free the chain. */ struct mbuf * m_makespace(struct mbuf *m0, int skip, int hlen, int *off) { struct mbuf *m; unsigned remain; KASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); for (m = m0; m && skip > m->m_len; m = m->m_next) skip -= m->m_len; if (m == NULL) return (NULL); /* * At this point skip is the offset into the mbuf m * where the new header should be placed. Figure out * if there's space to insert the new header. If so, * and copying the remainder makese sense then do so. * Otherwise insert a new mbuf in the chain, splitting * the contents of m as needed. */ remain = m->m_len - skip; /* data to move */ if (hlen > M_TRAILINGSPACE(m)) { struct mbuf *n; /* XXX code doesn't handle clusters XXX */ KASSERT(remain < MLEN, ("m_makespace: remainder too big: %u", remain)); /* * Not enough space in m, split the contents * of m, inserting new mbufs as required. * * NB: this ignores mbuf types. */ MGET(n, M_DONTWAIT, MT_DATA); if (n == NULL) return (NULL); n->m_next = m->m_next; /* splice new mbuf */ m->m_next = n; newipsecstat.ips_mbinserted++; if (hlen <= M_TRAILINGSPACE(m) + remain) { /* * New header fits in the old mbuf if we copy * the remainder; just do the copy to the new * mbuf and we're good to go. */ memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + skip, remain); n->m_len = remain; m->m_len = skip + hlen; *off = skip; } else { /* * No space in the old mbuf for the new header. * Make space in the new mbuf and check the * remainder'd data fits too. If not then we * must allocate an additional mbuf (yech). */ n->m_len = 0; if (remain + hlen > M_TRAILINGSPACE(n)) { struct mbuf *n2; MGET(n2, M_DONTWAIT, MT_DATA); /* NB: new mbuf is on chain, let caller free */ if (n2 == NULL) return (NULL); n2->m_len = 0; memcpy(mtod(n2, caddr_t), mtod(m, caddr_t) + skip, remain); n2->m_len = remain; /* splice in second mbuf */ n2->m_next = n->m_next; n->m_next = n2; newipsecstat.ips_mbinserted++; } else { memcpy(mtod(n, caddr_t) + hlen, mtod(m, caddr_t) + skip, remain); n->m_len += remain; } m->m_len -= remain; n->m_len += hlen; m = n; /* header is at front ... */ *off = 0; /* ... of new mbuf */ } } else { /* * Copy the remainder to the back of the mbuf * so there's space to write the new header. */ /* XXX can this be memcpy? does it handle overlap? */ ovbcopy(mtod(m, caddr_t) + skip, mtod(m, caddr_t) + skip + hlen, remain); m->m_len += hlen; *off = skip; } m0->m_pkthdr.len += hlen; /* adjust packet length */ return m; } /* * m_pad(m, n) pads with bytes at the end. The packet header * length is updated, and a pointer to the first byte of the padding * (which is guaranteed to be all in one mbuf) is returned. */ caddr_t m_pad(struct mbuf *m, int n) { register struct mbuf *m0, *m1; register int len, pad; caddr_t retval; if (n <= 0) { /* No stupid arguments. */ DPRINTF(("m_pad: pad length invalid (%d)\n", n)); m_freem(m); return NULL; } len = m->m_pkthdr.len; pad = n; m0 = m; while (m0->m_len < len) { KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ len -= m0->m_len; m0 = m0->m_next; } if (m0->m_len != len) { DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); m_freem(m); return NULL; } /* Check for zero-length trailing mbufs, and find the last one. */ for (m1 = m0; m1->m_next; m1 = m1->m_next) { if (m1->m_next->m_len != 0) { DPRINTF(("m_pad: length mismatch (should be %d " "instead of %d)\n", m->m_pkthdr.len, m->m_pkthdr.len + m1->m_next->m_len)); m_freem(m); return NULL; } m0 = m1->m_next; } if (pad > M_TRAILINGSPACE(m0)) { /* Add an mbuf to the chain. */ MGET(m1, M_DONTWAIT, MT_DATA); if (m1 == 0) { m_freem(m0); DPRINTF(("m_pad: unable to get extra mbuf\n")); return NULL; } m0->m_next = m1; m0 = m1; m0->m_len = 0; } retval = m0->m_data + m0->m_len; m0->m_len += pad; m->m_pkthdr.len += pad; return retval; } /* * Remove hlen data at offset skip in the packet. This is used by * the protocols strip protocol headers and associated data (e.g. IV, * authenticator) on input. */ int m_striphdr(struct mbuf *m, int skip, int hlen) { struct mbuf *m1; int roff; /* Find beginning of header */ m1 = m_getptr(m, skip, &roff); if (m1 == NULL) return (EINVAL); /* Remove the header and associated data from the mbuf. */ if (roff == 0) { /* The header was at the beginning of the mbuf */ newipsecstat.ips_input_front++; m_adj(m1, hlen); if ((m1->m_flags & M_PKTHDR) == 0) m->m_pkthdr.len -= hlen; } else if (roff + hlen >= m1->m_len) { struct mbuf *mo; /* * Part or all of the header is at the end of this mbuf, * so first let's remove the remainder of the header from * the beginning of the remainder of the mbuf chain, if any. */ newipsecstat.ips_input_end++; if (roff + hlen > m1->m_len) { /* Adjust the next mbuf by the remainder */ m_adj(m1->m_next, roff + hlen - m1->m_len); /* The second mbuf is guaranteed not to have a pkthdr... */ m->m_pkthdr.len -= (roff + hlen - m1->m_len); } /* Now, let's unlink the mbuf chain for a second...*/ mo = m1->m_next; m1->m_next = NULL; /* ...and trim the end of the first part of the chain...sick */ m_adj(m1, -(m1->m_len - roff)); if ((m1->m_flags & M_PKTHDR) == 0) m->m_pkthdr.len -= (m1->m_len - roff); /* Finally, let's relink */ m1->m_next = mo; } else { /* * The header lies in the "middle" of the mbuf; copy * the remainder of the mbuf down over the header. */ newipsecstat.ips_input_middle++; bcopy(mtod(m1, u_char *) + roff + hlen, mtod(m1, u_char *) + roff, m1->m_len - (roff + hlen)); m1->m_len -= hlen; m->m_pkthdr.len -= hlen; } return (0); } /* * Diagnostic routine to check mbuf alignment as required by the * crypto device drivers (that use DMA). */ void m_checkalignment(const char* where, struct mbuf *m0, int off, int len) { int roff; struct mbuf *m = m_getptr(m0, off, &roff); caddr_t addr; if (m == NULL) return; printf("%s (off %u len %u): ", where, off, len); addr = mtod(m, caddr_t) + roff; do { int mlen; if (((uintptr_t) addr) & 3) { printf("addr misaligned %p,", addr); break; } mlen = m->m_len; if (mlen > len) mlen = len; len -= mlen; if (len && (mlen & 3)) { printf("len mismatch %u,", mlen); break; } m = m->m_next; addr = m ? mtod(m, caddr_t) : NULL; } while (m && len > 0); for (m = m0; m; m = m->m_next) printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); printf("\n"); } Index: head/sys/netipsec/ipsec_output.c =================================================================== --- head/sys/netipsec/ipsec_output.c (revision 108465) +++ head/sys/netipsec/ipsec_output.c (revision 108466) @@ -1,737 +1,737 @@ /* $FreeBSD$ */ /* $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */ /* * IPsec output processing. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include int ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) { struct tdb_ident *tdbi; struct m_tag *mtag; struct secasvar *sav; struct secasindex *saidx; int error; #if 0 SPLASSERT(net, "ipsec_process_done"); #endif KASSERT(m != NULL, ("ipsec_process_done: null mbuf")); KASSERT(isr != NULL, ("ipsec_process_done: null ISR")); sav = isr->sav; KASSERT(sav != NULL, ("ipsec_process_done: null SA")); KASSERT(sav->sah != NULL, ("ipsec_process_done: null SAH")); saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: /* Fix the header length, for AH processing. */ mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len); break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* Fix the header length, for AH processing. */ if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) { error = ENXIO; goto bad; } if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) { /* No jumbogram support. */ error = ENXIO; /*?*/ goto bad; } mtod(m, struct ip6_hdr *)->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); break; #endif /* INET6 */ default: DPRINTF(("ipsec_process_done: unknown protocol family %u\n", saidx->dst.sa.sa_family)); error = ENXIO; goto bad; } /* * Add a record of what we've done or what needs to be done to the * packet. */ mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, sizeof(struct tdb_ident), M_NOWAIT); if (mtag == NULL) { DPRINTF(("ipsec_process_done: could not get packet tag\n")); error = ENOMEM; goto bad; } tdbi = (struct tdb_ident *)(mtag + 1); tdbi->dst = saidx->dst; tdbi->proto = saidx->proto; tdbi->spi = sav->spi; m_tag_prepend(m, mtag); /* * If there's another (bundled) SA to apply, do so. * Note that this puts a burden on the kernel stack size. * If this is a problem we'll need to introduce a queue * to set the packet on so we can unwind the stack before * doing further processing. */ if (isr->next) { newipsecstat.ips_out_bundlesa++; return ipsec4_process_packet(m, isr->next, 0, 0); } /* * We're done with IPsec processing, transmit the packet using the * appropriate network protocol (IP or IPv6). SPD lookup will be * performed again there. */ switch (saidx->dst.sa.sa_family) { #ifdef INET struct ip *ip; case AF_INET: ip = mtod(m, struct ip *); ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL); #endif /* INET */ #ifdef INET6 case AF_INET6: /* * We don't need massage, IPv6 header fields are always in * net endian. */ return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif /* INET6 */ } panic("ipsec_process_done"); bad: m_freem(m); KEY_FREESAV(&sav); return (error); } static struct ipsecrequest * ipsec_nextisr( struct mbuf *m, struct ipsecrequest *isr, int af, struct secasindex *saidx, int *error ) { #define IPSEC_OSTAT(x,y,z) (isr->saidx.proto == IPPROTO_ESP ? (x)++ : \ isr->saidx.proto == IPPROTO_AH ? (y)++ : (z)++) struct secasvar *sav; #if 0 SPLASSERT(net, "ipsec_nextisr"); #endif KASSERT(af == AF_INET || af == AF_INET6, ("ipsec_nextisr: invalid address family %u", af)); again: /* * Craft SA index to search for proper SA. Note that * we only fillin unspecified SA peers for transport * mode; for tunnel mode they must already be filled in. */ *saidx = isr->saidx; if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { /* Fillin unspecified SA peers only for transport mode */ if (af == AF_INET) { struct sockaddr_in *sin; struct ip *ip = mtod(m, struct ip *); if (saidx->src.sa.sa_len == 0) { sin = &saidx->src.sin; sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; sin->sin_addr = ip->ip_src; } if (saidx->dst.sa.sa_len == 0) { sin = &saidx->dst.sin; sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; sin->sin_addr = ip->ip_dst; } } else { struct sockaddr_in6 *sin6; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if (saidx->src.sin6.sin6_len == 0) { sin6 = (struct sockaddr_in6 *)&saidx->src; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; sin6->sin6_addr = ip6->ip6_src; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); } } if (saidx->dst.sin6.sin6_len == 0) { sin6 = (struct sockaddr_in6 *)&saidx->dst; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; sin6->sin6_addr = ip6->ip6_dst; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } } } } /* * Lookup SA and validate it. */ *error = key_checkrequest(isr, saidx); if (*error != 0) { /* * IPsec processing is required, but no SA found. * I assume that key_acquire() had been called * to get/establish the SA. Here I discard * this packet because it is responsibility for * upper layer to retransmit the packet. */ newipsecstat.ips_out_nosa++; goto bad; } sav = isr->sav; if (sav == NULL) { /* XXX valid return */ KASSERT(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE, ("ipsec_nextisr: no SA found, but required; level %u", ipsec_get_reqlevel(isr))); isr = isr->next; if (isr == NULL) { /*XXXstatistic??*/ *error = EINVAL; /*XXX*/ return isr; } goto again; } /* * Check system global policy controls. */ if ((isr->saidx.proto == IPPROTO_ESP && !esp_enable) || (isr->saidx.proto == IPPROTO_AH && !ah_enable) || (isr->saidx.proto == IPPROTO_IPCOMP && !ipcomp_enable)) { DPRINTF(("ipsec_nextisr: IPsec outbound packet dropped due" " to policy (check your sysctls)\n")); IPSEC_OSTAT(espstat.esps_pdrops, ahstat.ahs_pdrops, ipcompstat.ipcomps_pdrops); *error = EHOSTUNREACH; goto bad; } /* * Sanity check the SA contents for the caller * before they invoke the xform output method. */ if (sav->tdb_xform == NULL) { DPRINTF(("ipsec_nextisr: no transform for SA\n")); IPSEC_OSTAT(espstat.esps_noxform, ahstat.ahs_noxform, ipcompstat.ipcomps_noxform); *error = EHOSTUNREACH; goto bad; } return isr; bad: KASSERT(*error != 0, ("ipsec_nextisr: error return w/ no error code")); return NULL; #undef IPSEC_OSTAT } #ifdef INET /* * IPsec output logic for IPv4. */ int ipsec4_process_packet( struct mbuf *m, struct ipsecrequest *isr, int flags, int tunalready) { struct secasindex saidx; struct secasvar *sav; struct ip *ip; int s, error, i, off; KASSERT(m != NULL, ("ipsec4_process_packet: null mbuf")); KASSERT(isr != NULL, ("ipsec4_process_packet: null isr")); s = splnet(); /* insure SA contents don't change */ isr = ipsec_nextisr(m, isr, AF_INET, &saidx, &error); if (isr == NULL) goto bad; sav = isr->sav; if (!tunalready) { union sockaddr_union *dst = &sav->sah->saidx.dst; int setdf; /* * Collect IP_DF state from the outer header. */ if (dst->sa.sa_family == AF_INET) { if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { error = ENOBUFS; goto bad; } ip = mtod(m, struct ip *); /* Honor system-wide control of how to handle IP_DF */ switch (ip4_ipsec_dfbit) { case 0: /* clear in outer header */ case 1: /* set in outer header */ setdf = ip4_ipsec_dfbit; break; default: /* propagate to outer header */ setdf = ntohs(ip->ip_off & IP_DF); break; } } else { ip = NULL; /* keep compiler happy */ setdf = 0; } /* Do the appropriate encapsulation, if necessary */ if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ dst->sa.sa_family != AF_INET || /* PF mismatch */ #if 0 (sav->flags & SADB_X_SAFLAGS_TUNNEL) || /* Tunnel requ'd */ sav->tdb_xform->xf_type == XF_IP4 || /* ditto */ #endif (dst->sa.sa_family == AF_INET && /* Proxy */ dst->sin.sin_addr.s_addr != INADDR_ANY && dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { struct mbuf *mp; /* Fix IPv4 header checksum and length */ if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { error = ENOBUFS; goto bad; } ip = mtod(m, struct ip *); ip->ip_len = htons(m->m_pkthdr.len); ip->ip_sum = 0; #ifdef _IP_VHL if (ip->ip_vhl == IP_VHL_BORING) ip->ip_sum = in_cksum_hdr(ip); else ip->ip_sum = in_cksum(m, _IP_VHL_HL(ip->ip_vhl) << 2); #else ip->ip_sum = in_cksum(m, ip->ip_hl << 2); #endif /* Encapsulate the packet */ error = ipip_output(m, isr, &mp, 0, 0); if (mp == NULL && !error) { /* Should never happen. */ DPRINTF(("ipsec4_process_packet: ipip_output " "returns no mbuf and no error!")); error = EFAULT; } if (error) { if (mp) m_freem(mp); goto bad; } m = mp, mp = NULL; /* * ipip_output clears IP_DF in the new header. If * we need to propagate IP_DF from the outer header, * then we have to do it here. * * XXX shouldn't assume what ipip_output does. */ if (dst->sa.sa_family == AF_INET && setdf) { if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { error = ENOBUFS; goto bad; } ip = mtod(m, struct ip *); ip->ip_off = ntohs(ip->ip_off); ip->ip_off |= IP_DF; ip->ip_off = htons(ip->ip_off); } } } /* * Dispatch to the appropriate IPsec transform logic. The * packet will be returned for transmission after crypto * processing, etc. are completed. For encapsulation we * bypass this call because of the explicit call done above * (necessary to deal with IP_DF handling for IPv4). * * NB: m & sav are ``passed to caller'' who's reponsible for * for reclaiming their resources. */ if (sav->tdb_xform->xf_type != XF_IP4) { ip = mtod(m, struct ip *); i = ip->ip_hl << 2; off = offsetof(struct ip, ip_p); error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); } else { error = ipsec_process_done(m, isr); } splx(s); return error; bad: splx(s); if (m) m_freem(m); return error; } #endif #ifdef INET6 /* * Chop IP6 header from the payload. */ static struct mbuf * ipsec6_splithdr(struct mbuf *m) { struct mbuf *mh; struct ip6_hdr *ip6; int hlen; KASSERT(m->m_len >= sizeof (struct ip6_hdr), ("ipsec6_splithdr: first mbuf too short, len %u", m->m_len)); ip6 = mtod(m, struct ip6_hdr *); hlen = sizeof(struct ip6_hdr); if (m->m_len > hlen) { MGETHDR(mh, M_DONTWAIT, MT_HEADER); if (!mh) { m_freem(m); return NULL; } - M_COPY_PKTHDR(mh, m); + M_MOVE_PKTHDR(mh, m); MH_ALIGN(mh, hlen); m->m_len -= hlen; m->m_data += hlen; mh->m_next = m; m = mh; m->m_len = hlen; bcopy((caddr_t)ip6, mtod(m, caddr_t), hlen); } else if (m->m_len < hlen) { m = m_pullup(m, hlen); if (!m) return NULL; } return m; } /* * IPsec output logic for IPv6, transport mode. */ int ipsec6_output_trans( struct ipsec_output_state *state, u_char *nexthdrp, struct mbuf *mprev, struct secpolicy *sp, int flags, int *tun) { struct ipsecrequest *isr; struct secasindex saidx; int error = 0; struct mbuf *m; KASSERT(state != NULL, ("ipsec6_output: null state")); KASSERT(state->m != NULL, ("ipsec6_output: null m")); KASSERT(nexthdrp != NULL, ("ipsec6_output: null nexthdrp")); KASSERT(mprev != NULL, ("ipsec6_output: null mprev")); KASSERT(sp != NULL, ("ipsec6_output: null sp")); KASSERT(tun != NULL, ("ipsec6_output: null tun")); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_output_trans: applyed SP\n"); kdebug_secpolicy(sp)); isr = sp->req; if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* the rest will be handled by ipsec6_output_tunnel() */ *tun = 1; /* need tunnel-mode processing */ return 0; } *tun = 0; m = state->m; isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error); if (isr == NULL) { #ifdef notdef /* XXX should notification be done for all errors ? */ /* * Notify the fact that the packet is discarded * to ourselves. I believe this is better than * just silently discarding. (jinmei@kame.net) * XXX: should we restrict the error to TCP packets? * XXX: should we directly notify sockets via * pfctlinputs? */ icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN, 0); m = NULL; /* NB: icmp6_error frees mbuf */ #endif goto bad; } return (*isr->sav->tdb_xform->xf_output)(m, isr, NULL, sizeof (struct ip6_hdr), offsetof(struct ip6_hdr, ip6_nxt)); bad: if (m) m_freem(m); state->m = NULL; return error; } static int ipsec6_encapsulate(struct mbuf *m, struct secasvar *sav) { struct ip6_hdr *oip6; struct ip6_hdr *ip6; size_t plen; /* can't tunnel between different AFs */ if (sav->sah->saidx.src.sa.sa_family != AF_INET6 || sav->sah->saidx.dst.sa.sa_family != AF_INET6) { m_freem(m); return EINVAL; } KASSERT(m->m_len != sizeof (struct ip6_hdr), ("ipsec6_encapsulate: mbuf wrong size; len %u", m->m_len)); /* * grow the mbuf to accomodate the new IPv6 header. */ plen = m->m_pkthdr.len; if (M_LEADINGSPACE(m->m_next) < sizeof(struct ip6_hdr)) { struct mbuf *n; MGET(n, M_DONTWAIT, MT_DATA); if (!n) { m_freem(m); return ENOBUFS; } n->m_len = sizeof(struct ip6_hdr); n->m_next = m->m_next; m->m_next = n; m->m_pkthdr.len += sizeof(struct ip6_hdr); oip6 = mtod(n, struct ip6_hdr *); } else { m->m_next->m_len += sizeof(struct ip6_hdr); m->m_next->m_data -= sizeof(struct ip6_hdr); m->m_pkthdr.len += sizeof(struct ip6_hdr); oip6 = mtod(m->m_next, struct ip6_hdr *); } ip6 = mtod(m, struct ip6_hdr *); ovbcopy((caddr_t)ip6, (caddr_t)oip6, sizeof(struct ip6_hdr)); /* Fake link-local scope-class addresses */ if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_src)) oip6->ip6_src.s6_addr16[1] = 0; if (IN6_IS_SCOPE_LINKLOCAL(&oip6->ip6_dst)) oip6->ip6_dst.s6_addr16[1] = 0; /* construct new IPv6 header. see RFC 2401 5.1.2.2 */ /* ECN consideration. */ ip6_ecn_ingress(ip6_ipsec_ecn, &ip6->ip6_flow, &oip6->ip6_flow); if (plen < IPV6_MAXPACKET - sizeof(struct ip6_hdr)) ip6->ip6_plen = htons(plen); else { /* ip6->ip6_plen will be updated in ip6_output() */ } ip6->ip6_nxt = IPPROTO_IPV6; sav->sah->saidx.src.sin6.sin6_addr = ip6->ip6_src; sav->sah->saidx.dst.sin6.sin6_addr = ip6->ip6_dst; ip6->ip6_hlim = IPV6_DEFHLIM; /* XXX Should ip6_src be updated later ? */ return 0; } /* * IPsec output logic for IPv6, tunnel mode. */ int ipsec6_output_tunnel(struct ipsec_output_state *state, struct secpolicy *sp, int flags) { struct ip6_hdr *ip6; struct ipsecrequest *isr; struct secasindex saidx; int error; struct sockaddr_in6* dst6; struct mbuf *m; KASSERT(state != NULL, ("ipsec6_output: null state")); KASSERT(state->m != NULL, ("ipsec6_output: null m")); KASSERT(sp != NULL, ("ipsec6_output: null sp")); KEYDEBUG(KEYDEBUG_IPSEC_DATA, printf("ipsec6_output_tunnel: applyed SP\n"); kdebug_secpolicy(sp)); m = state->m; /* * transport mode ipsec (before the 1st tunnel mode) is already * processed by ipsec6_output_trans(). */ for (isr = sp->req; isr; isr = isr->next) { if (isr->saidx.mode == IPSEC_MODE_TUNNEL) break; } isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error); if (isr == NULL) goto bad; /* * There may be the case that SA status will be changed when * we are refering to one. So calling splsoftnet(). */ if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { /* * build IPsec tunnel. */ /* XXX should be processed with other familiy */ if (isr->sav->sah->saidx.src.sa.sa_family != AF_INET6) { ipseclog((LOG_ERR, "ipsec6_output_tunnel: " "family mismatched between inner and outer, spi=%u\n", ntohl(isr->sav->spi))); newipsecstat.ips_out_inval++; error = EAFNOSUPPORT; goto bad; } m = ipsec6_splithdr(m); if (!m) { newipsecstat.ips_out_nomem++; error = ENOMEM; goto bad; } error = ipsec6_encapsulate(m, isr->sav); if (error) { m = NULL; goto bad; } ip6 = mtod(m, struct ip6_hdr *); state->ro = &isr->sav->sah->sa_route; state->dst = (struct sockaddr *)&state->ro->ro_dst; dst6 = (struct sockaddr_in6 *)state->dst; if (state->ro->ro_rt && ((state->ro->ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&dst6->sin6_addr, &ip6->ip6_dst))) { RTFREE(state->ro->ro_rt); state->ro->ro_rt = NULL; } if (state->ro->ro_rt == 0) { bzero(dst6, sizeof(*dst6)); dst6->sin6_family = AF_INET6; dst6->sin6_len = sizeof(*dst6); dst6->sin6_addr = ip6->ip6_dst; rtalloc(state->ro); } if (state->ro->ro_rt == 0) { ip6stat.ip6s_noroute++; newipsecstat.ips_out_noroute++; error = EHOSTUNREACH; goto bad; } /* adjust state->dst if tunnel endpoint is offlink */ if (state->ro->ro_rt->rt_flags & RTF_GATEWAY) { state->dst = (struct sockaddr *)state->ro->ro_rt->rt_gateway; dst6 = (struct sockaddr_in6 *)state->dst; } } m = ipsec6_splithdr(m); if (!m) { newipsecstat.ips_out_nomem++; error = ENOMEM; goto bad; } ip6 = mtod(m, struct ip6_hdr *); return (*isr->sav->tdb_xform->xf_output)(m, isr, NULL, sizeof (struct ip6_hdr), offsetof(struct ip6_hdr, ip6_nxt)); bad: if (m) m_freem(m); state->m = NULL; return error; } #endif /*INET6*/ Index: head/sys/sys/mbuf.h =================================================================== --- head/sys/sys/mbuf.h (revision 108465) +++ head/sys/sys/mbuf.h (revision 108466) @@ -1,553 +1,557 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)mbuf.h 8.5 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_MBUF_H_ #define _SYS_MBUF_H_ #include #include /* * Mbufs are of a single size, MSIZE (machine/param.h), which * includes overhead. An mbuf may add a single "mbuf cluster" of size * MCLBYTES (also in machine/param.h), which has no additional overhead * and is used instead of the internal data area; this is done when * at least MINCLSIZE of data must be stored. Additionally, it is possible * to allocate a separate buffer externally and attach it to the mbuf in * a way similar to that of mbuf clusters. */ #define MLEN (MSIZE - sizeof(struct m_hdr)) /* normal data len */ #define MHLEN (MLEN - sizeof(struct pkthdr)) /* data len w/pkthdr */ #define MINCLSIZE (MHLEN + 1) /* smallest amount to put in cluster */ #define M_MAXCOMPRESS (MHLEN / 2) /* max amount to copy for compression */ #ifdef _KERNEL /*- * Macros for type conversion: * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. * dtom(x) -- Convert data pointer within mbuf to mbuf pointer (XXX). */ #define mtod(m, t) ((t)((m)->m_data)) #define dtom(x) ((struct mbuf *)((intptr_t)(x) & ~(MSIZE-1))) #endif /* _KERNEL */ /* * Header present at the beginning of every mbuf. */ struct m_hdr { struct mbuf *mh_next; /* next buffer in chain */ struct mbuf *mh_nextpkt; /* next chain in queue/record */ caddr_t mh_data; /* location of data */ int mh_len; /* amount of data in this mbuf */ int mh_flags; /* flags; see below */ short mh_type; /* type of data in this mbuf */ }; /* * Packet tag structure (see below for details). */ struct m_tag { SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ u_int16_t m_tag_id; /* Tag ID */ u_int16_t m_tag_len; /* Length of data */ u_int32_t m_tag_cookie; /* ABI/Module ID */ }; /* * Record/packet header in first mbuf of chain; valid only if M_PKTHDR is set. */ struct pkthdr { struct ifnet *rcvif; /* rcv interface */ int len; /* total packet length */ /* variables for ip and tcp reassembly */ void *header; /* pointer to packet header */ /* variables for hardware checksum */ int csum_flags; /* flags regarding checksum */ int csum_data; /* data field used by csum routines */ SLIST_HEAD(packet_tags, m_tag) tags; /* list of packet tags */ struct label label; /* MAC label of data in packet */ }; /* * Description of external storage mapped into mbuf; valid only if M_EXT is set. */ struct m_ext { caddr_t ext_buf; /* start of buffer */ void (*ext_free) /* free routine if not the usual */ (void *, void *); void *ext_args; /* optional argument pointer */ u_int ext_size; /* size of buffer, for ext_free */ u_int *ref_cnt; /* pointer to ref count info */ int ext_type; /* type of external storage */ }; /* * The core of the mbuf object along with some shortcut defines for * practical purposes. */ struct mbuf { struct m_hdr m_hdr; union { struct { struct pkthdr MH_pkthdr; /* M_PKTHDR set */ union { struct m_ext MH_ext; /* M_EXT set */ char MH_databuf[MHLEN]; } MH_dat; } MH; char M_databuf[MLEN]; /* !M_PKTHDR, !M_EXT */ } M_dat; }; #define m_next m_hdr.mh_next #define m_len m_hdr.mh_len #define m_data m_hdr.mh_data #define m_type m_hdr.mh_type #define m_flags m_hdr.mh_flags #define m_nextpkt m_hdr.mh_nextpkt #define m_act m_nextpkt #define m_pkthdr M_dat.MH.MH_pkthdr #define m_ext M_dat.MH.MH_dat.MH_ext #define m_pktdat M_dat.MH.MH_dat.MH_databuf #define m_dat M_dat.M_databuf /* * mbuf flags. */ #define M_EXT 0x0001 /* has associated external storage */ #define M_PKTHDR 0x0002 /* start of record */ #define M_EOR 0x0004 /* end of record */ #define M_RDONLY 0x0008 /* associated data is marked read-only */ #define M_PROTO1 0x0010 /* protocol-specific */ #define M_PROTO2 0x0020 /* protocol-specific */ #define M_PROTO3 0x0040 /* protocol-specific */ #define M_PROTO4 0x0080 /* protocol-specific */ #define M_PROTO5 0x0100 /* protocol-specific */ /* * mbuf pkthdr flags (also stored in m_flags). */ #define M_BCAST 0x0200 /* send/received as link-level broadcast */ #define M_MCAST 0x0400 /* send/received as link-level multicast */ #define M_FRAG 0x0800 /* packet is a fragment of a larger packet */ #define M_FIRSTFRAG 0x1000 /* packet is first fragment */ #define M_LASTFRAG 0x2000 /* packet is last fragment */ /* * External buffer types: identify ext_buf type. */ #define EXT_CLUSTER 1 /* mbuf cluster */ #define EXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ #define EXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ #define EXT_MOD_TYPE 200 /* custom module's ext_buf type */ #define EXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ /* * Flags copied when copying m_pkthdr. */ -#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_PROTO1|M_PROTO1|M_PROTO2|M_PROTO3 | \ - M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|M_FRAG|M_RDONLY) +#define M_COPYFLAGS (M_PKTHDR|M_EOR|M_RDONLY|M_PROTO1|M_PROTO1|M_PROTO2|\ + M_PROTO3|M_PROTO4|M_PROTO5|M_BCAST|M_MCAST|\ + M_FRAG|M_FIRSTFRAG|M_LASTFRAG) /* * Flags indicating hw checksum support and sw checksum requirements. */ #define CSUM_IP 0x0001 /* will csum IP */ #define CSUM_TCP 0x0002 /* will csum TCP */ #define CSUM_UDP 0x0004 /* will csum UDP */ #define CSUM_IP_FRAGS 0x0008 /* will csum IP fragments */ #define CSUM_FRAGMENT 0x0010 /* will do IP fragmentation */ #define CSUM_IP_CHECKED 0x0100 /* did csum IP */ #define CSUM_IP_VALID 0x0200 /* ... the csum is valid */ #define CSUM_DATA_VALID 0x0400 /* csum_data field is valid */ #define CSUM_PSEUDO_HDR 0x0800 /* csum_data has pseudo hdr */ #define CSUM_DELAY_DATA (CSUM_TCP | CSUM_UDP) #define CSUM_DELAY_IP (CSUM_IP) /* XXX add ipv6 here too? */ /* * mbuf types. */ #define MT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #define MT_DATA 1 /* dynamic (data) allocation */ #define MT_HEADER 2 /* packet header */ #if 0 #define MT_SOCKET 3 /* socket structure */ #define MT_PCB 4 /* protocol control block */ #define MT_RTABLE 5 /* routing tables */ #define MT_HTABLE 6 /* IMP host tables */ #define MT_ATABLE 7 /* address resolution tables */ #endif #define MT_SONAME 8 /* socket name */ #if 0 #define MT_SOOPTS 10 /* socket options */ #endif #define MT_FTABLE 11 /* fragment reassembly header */ #if 0 #define MT_RIGHTS 12 /* access rights */ #define MT_IFADDR 13 /* interface address */ #endif #define MT_TAG 13 /* volatile metadata associated to pkts */ #define MT_CONTROL 14 /* extra-data protocol message */ #define MT_OOBDATA 15 /* expedited data */ #define MT_NTYPES 16 /* number of mbuf types for mbtypes[] */ /* * Mbuf and cluster allocation statistics PCPU structure. */ struct mbpstat { u_long mb_mbfree; u_long mb_mbpgs; u_long mb_clfree; u_long mb_clpgs; long mb_mbtypes[MT_NTYPES]; short mb_active; }; /* * General mbuf allocator statistics structure. * XXX: Modifications of these are not protected by any mutex locks nor by * any atomic() manipulations. As a result, we may occasionally lose * a count or two. Luckily, not all of these fields are modified at all * and remain static, and those that are manipulated are only manipulated * in failure situations, which do not occur (hopefully) very often. */ struct mbstat { u_long m_drops; /* times failed to allocate */ u_long m_wait; /* times succesfully returned from wait */ u_long m_drain; /* times drained protocols for space */ u_long m_mcfail; /* XXX: times m_copym failed */ u_long m_mpfail; /* XXX: times m_pullup failed */ u_long m_msize; /* length of an mbuf */ u_long m_mclbytes; /* length of an mbuf cluster */ u_long m_minclsize; /* min length of data to allocate a cluster */ u_long m_mlen; /* length of data in an mbuf */ u_long m_mhlen; /* length of data in a header mbuf */ /* Number of mbtypes (gives # elems in mbpstat's mb_mbtypes[] array: */ short m_numtypes; }; /* * Flags specifying how an allocation should be made. * M_DONTWAIT means "don't block if nothing is available" whereas * M_TRYWAIT means "block for mbuf_wait ticks at most if nothing is * available." */ #define M_DONTWAIT 1 #define M_TRYWAIT 0 #define M_WAIT M_TRYWAIT /* XXX: Deprecated. */ #ifdef _KERNEL /*- * mbuf external reference count management macros. * * MEXT_IS_REF(m): true if (m) is not the only mbuf referencing * the external buffer ext_buf. * * MEXT_REM_REF(m): remove reference to m_ext object. * * MEXT_ADD_REF(m): add reference to m_ext object already * referred to by (m). */ #define MEXT_IS_REF(m) (*((m)->m_ext.ref_cnt) > 1) #define MEXT_REM_REF(m) do { \ KASSERT(*((m)->m_ext.ref_cnt) > 0, ("m_ext refcnt < 0")); \ atomic_subtract_int((m)->m_ext.ref_cnt, 1); \ } while(0) #define MEXT_ADD_REF(m) atomic_add_int((m)->m_ext.ref_cnt, 1) /* * mbuf, cluster, and external object allocation macros * (for compatibility purposes). */ -#define M_COPY_PKTHDR(to, from) m_copy_pkthdr((to), (from)) +/* NB: M_COPY_PKTHDR is deprecated, use M_MOVE_PKTHDR or m_dup_pktdr */ +#define M_MOVE_PKTHDR(to, from) m_move_pkthdr((to), (from)) #define m_getclr(how, type) m_get_clrd((how), (type)) #define MGET(m, how, type) ((m) = m_get((how), (type))) #define MGETHDR(m, how, type) ((m) = m_gethdr((how), (type))) #define MCLGET(m, how) m_clget((m), (how)) #define MEXTADD(m, buf, size, free, args, flags, type) \ m_extadd((m), (caddr_t)(buf), (size), (free), (args), (flags), (type)) /* * MEXTFREE(m): disassociate (and possibly free) an external object from (m). * * If the atomic_cmpset_int() returns 0, then we effectively do nothing * in terms of "cleaning up" (freeing the ext buf and ref. counter) as * this means that either there are still references, or another thread * is taking care of the clean-up. */ #define MEXTFREE(m) do { \ struct mbuf *_mb = (m); \ \ MEXT_REM_REF(_mb); \ if (atomic_cmpset_int(_mb->m_ext.ref_cnt, 0, 1)) \ _mext_free(_mb); \ _mb->m_flags &= ~M_EXT; \ } while (0) /* * Evaluate TRUE if it's safe to write to the mbuf m's data region (this * can be both the local data payload, or an external buffer area, * depending on whether M_EXT is set). */ #define M_WRITABLE(m) (!((m)->m_flags & M_RDONLY) && (!((m)->m_flags \ & M_EXT) || !MEXT_IS_REF(m))) /* * Set the m_data pointer of a newly-allocated mbuf (m_get/MGET) to place * an object of the specified size at the end of the mbuf, longword aligned. */ #define M_ALIGN(m, len) do { \ (m)->m_data += (MLEN - (len)) & ~(sizeof(long) - 1); \ } while (0) /* * As above, for mbufs allocated with m_gethdr/MGETHDR * or initialized by M_COPY_PKTHDR. */ #define MH_ALIGN(m, len) do { \ (m)->m_data += (MHLEN - (len)) & ~(sizeof(long) - 1); \ } while (0) /* * Compute the amount of space available * before the current start of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. */ #define M_LEADINGSPACE(m) \ ((m)->m_flags & M_EXT ? \ (M_WRITABLE(m) ? (m)->m_data - (m)->m_ext.ext_buf : 0): \ (m)->m_flags & M_PKTHDR ? (m)->m_data - (m)->m_pktdat : \ (m)->m_data - (m)->m_dat) /* * Compute the amount of space available * after the end of data in an mbuf. * * The M_WRITABLE() is a temporary, conservative safety measure: the burden * of checking writability of the mbuf data area rests solely with the caller. */ #define M_TRAILINGSPACE(m) \ ((m)->m_flags & M_EXT ? \ (M_WRITABLE(m) ? (m)->m_ext.ext_buf + (m)->m_ext.ext_size \ - ((m)->m_data + (m)->m_len) : 0) : \ &(m)->m_dat[MLEN] - ((m)->m_data + (m)->m_len)) /* * Arrange to prepend space of size plen to mbuf m. * If a new mbuf must be allocated, how specifies whether to wait. * If the allocation fails, the original mbuf chain is freed and m is * set to NULL. */ #define M_PREPEND(m, plen, how) do { \ struct mbuf **_mmp = &(m); \ struct mbuf *_mm = *_mmp; \ int _mplen = (plen); \ int __mhow = (how); \ \ if (M_LEADINGSPACE(_mm) >= _mplen) { \ _mm->m_data -= _mplen; \ _mm->m_len += _mplen; \ } else \ _mm = m_prepend(_mm, _mplen, __mhow); \ if (_mm != NULL && _mm->m_flags & M_PKTHDR) \ _mm->m_pkthdr.len += _mplen; \ *_mmp = _mm; \ } while (0) /* * Change mbuf to new type. * This is a relatively expensive operation and should be avoided. */ #define MCHTYPE(m, t) m_chtype((m), (t)) /* Length to m_copy to copy all. */ #define M_COPYALL 1000000000 /* Compatibility with 4.3. */ #define m_copy(m, o, l) m_copym((m), (o), (l), M_DONTWAIT) extern int max_datalen; /* MHLEN - max_hdr */ extern int max_hdr; /* Largest link + protocol header */ extern int max_linkhdr; /* Largest link-level header */ extern int max_protohdr; /* Largest protocol header */ extern struct mbstat mbstat; /* General mbuf stats/infos */ extern int nmbclusters; /* Maximum number of clusters */ extern int nmbcnt; /* Scale kmem_map for counter space */ extern int nmbufs; /* Maximum number of mbufs */ extern int nsfbufs; /* Number of sendfile(2) bufs */ void _mext_free(struct mbuf *); void m_adj(struct mbuf *, int); void m_cat(struct mbuf *, struct mbuf *); void m_chtype(struct mbuf *, short); void m_clget(struct mbuf *, int); void m_extadd(struct mbuf *, caddr_t, u_int, void (*)(void *, void *), void *, int, int); void m_copyback(struct mbuf *, int, int, caddr_t); void m_copydata(const struct mbuf *, int, int, caddr_t); struct mbuf *m_copym(struct mbuf *, int, int, int); struct mbuf *m_copypacket(struct mbuf *, int); void m_copy_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_devget(char *, int, int, struct ifnet *, void (*)(char *, caddr_t, u_int)); struct mbuf *m_dup(struct mbuf *, int); +int m_dup_pkthdr(struct mbuf *, struct mbuf *, int); u_int m_fixhdr(struct mbuf *); struct mbuf *m_free(struct mbuf *); void m_freem(struct mbuf *); struct mbuf *m_get(int, short); struct mbuf *m_get_clrd(int, short); struct mbuf *m_getcl(int, short, int); struct mbuf *m_gethdr(int, short); struct mbuf *m_gethdr_clrd(int, short); struct mbuf *m_getm(struct mbuf *, int, int, short); u_int m_length(struct mbuf *, struct mbuf **); +void m_move_pkthdr(struct mbuf *, struct mbuf *); struct mbuf *m_prepend(struct mbuf *, int, int); void m_print(const struct mbuf *); struct mbuf *m_pulldown(struct mbuf *, int, int, int *); struct mbuf *m_pullup(struct mbuf *, int); struct mbuf *m_split(struct mbuf *, int, int); /* * Packets may have annotations attached by affixing a list * of "packet tags" to the pkthdr structure. Packet tags are * dynamically allocated semi-opaque data structures that have * a fixed header (struct m_tag) that specifies the size of the * memory block and a pair that identifies it. * The cookie is a 32-bit unique unsigned value used to identify * a module or ABI. By convention this value is chose as the * date+time that the module is created, expressed as the number of * seconds since the epoch (e.g. using date -u +'%s'). The type value * is an ABI/module-specific value that identifies a particular annotation * and is private to the module. For compatibility with systems * like openbsd that define packet tags w/o an ABI/module cookie, * the value PACKET_ABI_COMPAT is used to implement m_tag_get and * m_tag_find compatibility shim functions and several tag types are * defined below. Users that do not require compatibility should use * a private cookie value so that packet tag-related definitions * can be maintained privately. * * Note that the packet tag returned by m_tag_allocate has the default * memory alignment implemented by malloc. To reference private data * one can use a construct like: * * struct m_tag *mtag = m_tag_allocate(...); * struct foo *p = (struct foo *)(mtag+1); * * if the alignment of struct m_tag is sufficient for referencing members * of struct foo. Otherwise it is necessary to embed struct m_tag within * the private data structure to insure proper alignment; e.g. * * struct foo { * struct m_tag tag; * ... * }; * struct foo *p = (struct foo *) m_tag_allocate(...); * struct m_tag *mtag = &p->tag; */ #define PACKET_TAG_NONE 0 /* Nadda */ /* Packet tag for use with PACKET_ABI_COMPAT */ #define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ #define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ #define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ #define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ #define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ #define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ #define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ #define PACKET_TAG_GIF 8 /* GIF processing done */ #define PACKET_TAG_GRE 9 /* GRE processing done */ #define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ #define PACKET_TAG_ENCAP 11 /* Encap. processing */ #define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ #define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ #define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ /* * As a temporary and low impact solution to replace the even uglier * approach used so far in some parts of the network stack (which relies * on global variables), packet tag-like annotations are stored in MT_TAG * mbufs (or lookalikes) prepended to the actual mbuf chain. * * m_type = MT_TAG * m_flags = m_tag_id * m_next = next buffer in chain. * * BE VERY CAREFUL not to pass these blocks to the mbuf handling routines. */ #define _m_tag_id m_hdr.mh_flags /* Packet tags used in the FreeBSD network stack */ #define PACKET_TAG_DUMMYNET 15 /* dummynet info */ #define PACKET_TAG_IPFW 16 /* ipfw classification */ #define PACKET_TAG_DIVERT 17 /* divert info */ #define PACKET_TAG_IPFORWARD 18 /* ipforward info */ /* Packet tag routines */ struct m_tag *m_tag_alloc(u_int32_t, int, int, int); void m_tag_free(struct m_tag *); void m_tag_prepend(struct mbuf *, struct m_tag *); void m_tag_unlink(struct mbuf *, struct m_tag *); void m_tag_delete(struct mbuf *, struct m_tag *); void m_tag_delete_chain(struct mbuf *, struct m_tag *); struct m_tag *m_tag_locate(struct mbuf *, u_int32_t, int, struct m_tag *); -struct m_tag *m_tag_copy(struct m_tag *); -int m_tag_copy_chain(struct mbuf *, struct mbuf *); +struct m_tag *m_tag_copy(struct m_tag *, int); +int m_tag_copy_chain(struct mbuf *, struct mbuf *, int); void m_tag_init(struct mbuf *); struct m_tag *m_tag_first(struct mbuf *); struct m_tag *m_tag_next(struct mbuf *, struct m_tag *); /* these are for openbsd compatibility */ #define MTAG_ABI_COMPAT 0 /* compatibility ABI */ static __inline struct m_tag * m_tag_get(int type, int length, int wait) { return m_tag_alloc(MTAG_ABI_COMPAT, type, length, wait); } static __inline struct m_tag * m_tag_find(struct mbuf *m, int type, struct m_tag *start) { return m_tag_locate(m, MTAG_ABI_COMPAT, type, start); } #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */