Index: head/sys/net/if_arcsubr.c =================================================================== --- head/sys/net/if_arcsubr.c (revision 290382) +++ head/sys/net/if_arcsubr.c (revision 290383) @@ -1,837 +1,833 @@ /* $NetBSD: if_arcsubr.c,v 1.36 2001/06/14 05:44:23 itojun Exp $ */ /* $FreeBSD$ */ /*- * Copyright (c) 1994, 1995 Ignatios Souvatzis * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: if_ethersubr.c,v 1.9 1994/06/29 06:36:11 cgd Exp * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 * */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #define ARCNET_ALLOW_BROKEN_ARP static struct mbuf *arc_defrag(struct ifnet *, struct mbuf *); static int arc_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); u_int8_t arcbroadcastaddr = 0; #define ARC_LLADDR(ifp) (*(u_int8_t *)IF_LLADDR(ifp)) #define senderr(e) { error = (e); goto bad;} #define SIN(s) ((const struct sockaddr_in *)(s)) /* * ARCnet output routine. * Encapsulate a packet of type family for the local net. * Assumes that ifp is actually pointer to arccom structure. */ int arc_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct arc_header *ah; int error; u_int8_t atype, adst; int loop_copy = 0; int isphds; #if defined(INET) || defined(INET6) int is_gw = 0; #endif if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) return(ENETDOWN); /* m, m1 aren't initialized yet */ error = 0; #if defined(INET) || defined(INET6) if (ro != NULL && ro->ro_rt != NULL && (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; #endif switch (dst->sa_family) { #ifdef INET case AF_INET: /* * For now, use the simple IP addr -> ARCnet addr mapping */ if (m->m_flags & (M_BCAST|M_MCAST)) adst = arcbroadcastaddr; /* ARCnet broadcast address */ else if (ifp->if_flags & IFF_NOARP) adst = ntohl(SIN(dst)->sin_addr.s_addr) & 0xFF; else { error = arpresolve(ifp, is_gw, m, dst, &adst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } atype = (ifp->if_flags & IFF_LINK0) ? ARCTYPE_IP_OLD : ARCTYPE_IP; break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ARCNET); loop_copy = -1; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: atype = ARCTYPE_REVARP; break; case ARPOP_REQUEST: case ARPOP_REPLY: default: atype = ARCTYPE_ARP; break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, &adst, ARC_ADDR_LEN); else bcopy(ar_tha(ah), &adst, ARC_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if ((m->m_flags & M_MCAST) != 0) adst = arcbroadcastaddr; /* ARCnet broadcast address */ else { error = nd6_resolve(ifp, is_gw, m, dst, &adst, NULL); if (error != 0) return (error == EWOULDBLOCK ? 0 : error); } atype = ARCTYPE_INET6; break; #endif case AF_UNSPEC: { const struct arc_header *ah; loop_copy = -1; ah = (const struct arc_header *)dst->sa_data; adst = ah->arc_dhost; atype = ah->arc_type; if (atype == ARCTYPE_ARP) { atype = (ifp->if_flags & IFF_LINK0) ? ARCTYPE_ARP_OLD: ARCTYPE_ARP; #ifdef ARCNET_ALLOW_BROKEN_ARP /* * XXX It's not clear per RFC826 if this is needed, but * "assigned numbers" say this is wrong. * However, e.g., AmiTCP 3.0Beta used it... we make this * switchable for emergency cases. Not perfect, but... */ if (ifp->if_flags & IFF_LINK2) mtod(m, struct arphdr *)->ar_pro = atype - 1; #endif } break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } isphds = arc_isphds(atype); M_PREPEND(m, isphds ? ARC_HDRNEWLEN : ARC_HDRLEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); ah = mtod(m, struct arc_header *); ah->arc_type = atype; ah->arc_dhost = adst; ah->arc_shost = ARC_LLADDR(ifp); if (isphds) { ah->arc_flag = 0; ah->arc_seqid = 0; } if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, ARC_HDRLEN); } else if (ah->arc_dhost == ah->arc_shost) { (void) if_simloop(ifp, m, dst->sa_family, ARC_HDRLEN); return (0); /* XXX */ } } BPF_MTAP(ifp, m); error = ifp->if_transmit(ifp, m); return (error); bad: if (m) m_freem(m); return (error); } void arc_frag_init(struct ifnet *ifp) { struct arccom *ac; ac = (struct arccom *)ifp->if_l2com; ac->curr_frag = 0; } struct mbuf * arc_frag_next(struct ifnet *ifp) { struct arccom *ac; struct mbuf *m; struct arc_header *ah; ac = (struct arccom *)ifp->if_l2com; if ((m = ac->curr_frag) == 0) { int tfrags; /* dequeue new packet */ IF_DEQUEUE(&ifp->if_snd, m); if (m == 0) return 0; ah = mtod(m, struct arc_header *); if (!arc_isphds(ah->arc_type)) return m; ++ac->ac_seqid; /* make the seqid unique */ tfrags = (m->m_pkthdr.len + ARC_MAX_DATA - 1) / ARC_MAX_DATA; ac->fsflag = 2 * tfrags - 3; ac->sflag = 0; ac->rsflag = ac->fsflag; ac->arc_dhost = ah->arc_dhost; ac->arc_shost = ah->arc_shost; ac->arc_type = ah->arc_type; m_adj(m, ARC_HDRNEWLEN); ac->curr_frag = m; } /* split out next fragment and return it */ if (ac->sflag < ac->fsflag) { /* we CAN'T have short packets here */ ac->curr_frag = m_split(m, ARC_MAX_DATA, M_NOWAIT); if (ac->curr_frag == 0) { m_freem(m); return 0; } M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT); if (m == 0) { m_freem(ac->curr_frag); ac->curr_frag = 0; return 0; } ah = mtod(m, struct arc_header *); ah->arc_flag = ac->rsflag; ah->arc_seqid = ac->ac_seqid; ac->sflag += 2; ac->rsflag = ac->sflag; } else if ((m->m_pkthdr.len >= ARC_MIN_FORBID_LEN - ARC_HDRNEWLEN + 2) && (m->m_pkthdr.len <= ARC_MAX_FORBID_LEN - ARC_HDRNEWLEN + 2)) { ac->curr_frag = 0; M_PREPEND(m, ARC_HDRNEWLEN_EXC, M_NOWAIT); if (m == 0) return 0; ah = mtod(m, struct arc_header *); ah->arc_flag = 0xFF; ah->arc_seqid = 0xFFFF; ah->arc_type2 = ac->arc_type; ah->arc_flag2 = ac->sflag; ah->arc_seqid2 = ac->ac_seqid; } else { ac->curr_frag = 0; M_PREPEND(m, ARC_HDRNEWLEN, M_NOWAIT); if (m == 0) return 0; ah = mtod(m, struct arc_header *); ah->arc_flag = ac->sflag; ah->arc_seqid = ac->ac_seqid; } ah->arc_dhost = ac->arc_dhost; ah->arc_shost = ac->arc_shost; ah->arc_type = ac->arc_type; return m; } /* * Defragmenter. Returns mbuf if last packet found, else * NULL. frees imcoming mbuf as necessary. */ static __inline struct mbuf * arc_defrag(struct ifnet *ifp, struct mbuf *m) { struct arc_header *ah, *ah1; struct arccom *ac; struct ac_frag *af; struct mbuf *m1; char *s; int newflen; u_char src,dst,typ; ac = (struct arccom *)ifp->if_l2com; if (m->m_len < ARC_HDRNEWLEN) { m = m_pullup(m, ARC_HDRNEWLEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return NULL; } } ah = mtod(m, struct arc_header *); typ = ah->arc_type; if (!arc_isphds(typ)) return m; src = ah->arc_shost; dst = ah->arc_dhost; if (ah->arc_flag == 0xff) { m_adj(m, 4); if (m->m_len < ARC_HDRNEWLEN) { m = m_pullup(m, ARC_HDRNEWLEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return NULL; } } ah = mtod(m, struct arc_header *); } af = &ac->ac_fragtab[src]; m1 = af->af_packet; s = "debug code error"; if (ah->arc_flag & 1) { /* * first fragment. We always initialize, which is * about the right thing to do, as we only want to * accept one fragmented packet per src at a time. */ if (m1 != NULL) m_freem(m1); af->af_packet = m; m1 = m; af->af_maxflag = ah->arc_flag; af->af_lastseen = 0; af->af_seqid = ah->arc_seqid; return NULL; /* notreached */ } else { /* check for unfragmented packet */ if (ah->arc_flag == 0) return m; /* do we have a first packet from that src? */ if (m1 == NULL) { s = "no first frag"; goto outofseq; } ah1 = mtod(m1, struct arc_header *); if (ah->arc_seqid != ah1->arc_seqid) { s = "seqid differs"; goto outofseq; } if (typ != ah1->arc_type) { s = "type differs"; goto outofseq; } if (dst != ah1->arc_dhost) { s = "dest host differs"; goto outofseq; } /* typ, seqid and dst are ok here. */ if (ah->arc_flag == af->af_lastseen) { m_freem(m); return NULL; } if (ah->arc_flag == af->af_lastseen + 2) { /* ok, this is next fragment */ af->af_lastseen = ah->arc_flag; m_adj(m,ARC_HDRNEWLEN); /* * m_cat might free the first mbuf (with pkthdr) * in 2nd chain; therefore: */ newflen = m->m_pkthdr.len; m_cat(m1,m); m1->m_pkthdr.len += newflen; /* is it the last one? */ if (af->af_lastseen > af->af_maxflag) { af->af_packet = NULL; return(m1); } else return NULL; } s = "other reason"; /* if all else fails, it is out of sequence, too */ } outofseq: if (m1) { m_freem(m1); af->af_packet = NULL; } if (m) m_freem(m); log(LOG_INFO,"%s: got out of seq. packet: %s\n", ifp->if_xname, s); return NULL; } /* * return 1 if Packet Header Definition Standard, else 0. * For now: old IP, old ARP aren't obviously. Lacking correct information, * we guess that besides new IP and new ARP also IPX and APPLETALK are PHDS. * (Apple and Novell corporations were involved, among others, in PHDS work). * Easiest is to assume that everybody else uses that, too. */ int arc_isphds(u_int8_t type) { return (type != ARCTYPE_IP_OLD && type != ARCTYPE_ARP_OLD && type != ARCTYPE_DIAGNOSE); } /* * Process a received Arcnet packet; * the packet is in the mbuf chain m with * the ARCnet header. */ void arc_input(struct ifnet *ifp, struct mbuf *m) { struct arc_header *ah; int isr; u_int8_t atype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } /* possibly defragment: */ m = arc_defrag(ifp, m); if (m == NULL) return; BPF_MTAP(ifp, m); ah = mtod(m, struct arc_header *); /* does this belong to us? */ if ((ifp->if_flags & IFF_PROMISC) == 0 && ah->arc_dhost != arcbroadcastaddr && ah->arc_dhost != ARC_LLADDR(ifp)) { m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if (ah->arc_dhost == arcbroadcastaddr) { m->m_flags |= M_BCAST|M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } atype = ah->arc_type; switch (atype) { #ifdef INET case ARCTYPE_IP: m_adj(m, ARC_HDRNEWLEN); - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ARCTYPE_IP_OLD: m_adj(m, ARC_HDRLEN); - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ARCTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } m_adj(m, ARC_HDRNEWLEN); isr = NETISR_ARP; #ifdef ARCNET_ALLOW_BROKEN_ARP mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP); #endif break; case ARCTYPE_ARP_OLD: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } m_adj(m, ARC_HDRLEN); isr = NETISR_ARP; #ifdef ARCNET_ALLOW_BROKEN_ARP mtod(m, struct arphdr *)->ar_pro = htons(ETHERTYPE_IP); #endif break; #endif #ifdef INET6 case ARCTYPE_INET6: m_adj(m, ARC_HDRNEWLEN); isr = NETISR_IPV6; break; #endif default: m_freem(m); return; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } /* * Register (new) link level address. */ void arc_storelladdr(struct ifnet *ifp, u_int8_t lla) { ARC_LLADDR(ifp) = lla; } /* * Perform common duties while attaching to interface list */ void arc_ifattach(struct ifnet *ifp, u_int8_t lla) { struct ifaddr *ifa; struct sockaddr_dl *sdl; struct arccom *ac; if_attach(ifp); ifp->if_addrlen = 1; ifp->if_hdrlen = ARC_HDRLEN; ifp->if_mtu = 1500; ifp->if_resolvemulti = arc_resolvemulti; if (ifp->if_baudrate == 0) ifp->if_baudrate = 2500000; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ARCNET; sdl->sdl_alen = ifp->if_addrlen; if (ifp->if_flags & IFF_BROADCAST) ifp->if_flags |= IFF_MULTICAST|IFF_ALLMULTI; ac = (struct arccom *)ifp->if_l2com; ac->ac_seqid = (time_second) & 0xFFFF; /* try to make seqid unique */ if (lla == 0) { /* XXX this message isn't entirely clear, to me -- cgd */ log(LOG_ERR,"%s: link address 0 reserved for broadcasts. Please change it and ifconfig %s down up\n", ifp->if_xname, ifp->if_xname); } arc_storelladdr(ifp, lla); ifp->if_broadcastaddr = &arcbroadcastaddr; bpfattach(ifp, DLT_ARCNET, ARC_HDRLEN); } void arc_ifdetach(struct ifnet *ifp) { bpfdetach(ifp); if_detach(ifp); } int arc_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) &ifr->ifr_data; *(u_int8_t *)sa->sa_data = ARC_LLADDR(ifp); } break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifr == NULL) error = EAFNOSUPPORT; else { switch (ifr->ifr_addr.sa_family) { case AF_INET: case AF_INET6: error = 0; break; default: error = EAFNOSUPPORT; break; } } break; case SIOCSIFMTU: /* * Set the interface MTU. * mtu can't be larger than ARCMTU for RFC1051 * and can't be larger than ARC_PHDS_MTU */ if (((ifp->if_flags & IFF_LINK0) && ifr->ifr_mtu > ARCMTU) || ifr->ifr_mtu > ARC_PHDS_MAXMTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; } return (error); } /* based on ether_resolvemulti() */ int arc_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; if (*LLADDR(sdl) != arcbroadcastaddr) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ARC_ADDR_LEN; *LLADDR(sdl) = 0; *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ARC_ADDR_LEN; *LLADDR(sdl) = 0; *llsa = (struct sockaddr *)sdl; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } static MALLOC_DEFINE(M_ARCCOM, "arccom", "ARCNET interface internals"); static void* arc_alloc(u_char type, struct ifnet *ifp) { struct arccom *ac; ac = malloc(sizeof(struct arccom), M_ARCCOM, M_WAITOK | M_ZERO); ac->ac_ifp = ifp; return (ac); } static void arc_free(void *com, u_char type) { free(com, M_ARCCOM); } static int arc_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: if_register_com_alloc(IFT_ARCNET, arc_alloc, arc_free); break; case MOD_UNLOAD: if_deregister_com_alloc(IFT_ARCNET); break; default: return EOPNOTSUPP; } return (0); } static moduledata_t arc_mod = { "arcnet", arc_modevent, 0 }; DECLARE_MODULE(arcnet, arc_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(arcnet, 1); Index: head/sys/net/if_ethersubr.c =================================================================== --- head/sys/net/if_ethersubr.c (revision 290382) +++ head/sys/net/if_ethersubr.c (revision 290383) @@ -1,1141 +1,1139 @@ /*- * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if_ethersubr.c 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_netgraph.h" #include "opt_mbuf_profiling.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #endif #ifdef INET6 #include #endif #include #ifdef CTASSERT CTASSERT(sizeof (struct ether_header) == ETHER_ADDR_LEN * 2 + 2); CTASSERT(sizeof (struct ether_addr) == ETHER_ADDR_LEN); #endif VNET_DEFINE(struct pfil_head, link_pfil_hook); /* Packet filter hooks */ /* netgraph node hooks for ng_ether(4) */ void (*ng_ether_input_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_input_orphan_p)(struct ifnet *ifp, struct mbuf *m); int (*ng_ether_output_p)(struct ifnet *ifp, struct mbuf **mp); void (*ng_ether_attach_p)(struct ifnet *ifp); void (*ng_ether_detach_p)(struct ifnet *ifp); void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* if_bridge(4) support */ struct mbuf *(*bridge_input_p)(struct ifnet *, struct mbuf *); int (*bridge_output_p)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void (*bridge_dn_p)(struct mbuf *, struct ifnet *); /* if_lagg(4) support */ struct mbuf *(*lagg_input_p)(struct ifnet *, struct mbuf *); static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int ether_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); #ifdef VIMAGE static void ether_reassign(struct ifnet *, struct vnet *, char *); #endif #define ETHER_IS_BROADCAST(addr) \ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0) #define senderr(e) do { error = (e); goto bad;} while (0) static void update_mbuf_csumflags(struct mbuf *src, struct mbuf *dst) { int csum_flags = 0; if (src->m_pkthdr.csum_flags & CSUM_IP) csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); if (src->m_pkthdr.csum_flags & CSUM_DELAY_DATA) csum_flags |= (CSUM_DATA_VALID|CSUM_PSEUDO_HDR); if (src->m_pkthdr.csum_flags & CSUM_SCTP) csum_flags |= CSUM_SCTP_VALID; dst->m_pkthdr.csum_flags |= csum_flags; if (csum_flags & CSUM_DATA_VALID) dst->m_pkthdr.csum_data = 0xffff; } /* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { short type; int error = 0, hdrcmplt = 0; u_char edst[ETHER_ADDR_LEN]; struct llentry *lle = NULL; struct rtentry *rt0 = NULL; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ int is_gw = 0; uint32_t pflags = 0; if (ro != NULL) { if (!(m->m_flags & (M_BCAST | M_MCAST))) { lle = ro->ro_lle; if (lle != NULL) pflags = lle->la_flags; } rt0 = ro->ro_rt; if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); hlen = ETHER_HDR_LEN; switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (pflags & LLE_VALID) != 0) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = arpresolve(ifp, is_gw, m, dst, edst, &pflags); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = 0; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN); else bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (pflags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst, &pflags); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IPV6); break; #endif case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; /* FALLTHROUGH */ case AF_UNSPEC: loop_copy = 0; /* if this is for us, don't do it */ eh = (const struct ether_header *)dst->sa_data; (void)memcpy(edst, eh->ether_dhost, sizeof (edst)); type = eh->ether_type; break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } if ((pflags & LLE_IFADDR) != 0) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); eh = mtod(m, struct ether_header *); if (hdrcmplt == 0) { memcpy(&eh->ether_type, &type, sizeof(eh->ether_type)); memcpy(eh->ether_dhost, edst, sizeof (edst)); memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost)); } /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { if (m->m_flags & M_BCAST) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, dst->sa_family, hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } else if (bcmp(eh->ether_dhost, eh->ether_shost, ETHER_ADDR_LEN) == 0) { update_mbuf_csumflags(m, m); (void) if_simloop(ifp, m, dst->sa_family, hlen); return (0); /* XXX */ } } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); } /* * Ethernet link layer output routine to send a raw frame to the device. * * This assumes that the 14 byte Ethernet header is present and contiguous * in the first mbuf (if BRIDGE'ing). */ int ether_output_frame(struct ifnet *ifp, struct mbuf *m) { int i; if (PFIL_HOOKED(&V_link_pfil_hook)) { i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_OUT, NULL); if (i != 0) return (EACCES); if (m == NULL) return (0); } /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. */ return ((ifp->if_transmit)(ifp, m)); } #if defined(INET) || defined(INET6) #endif /* * Process a received Ethernet packet; the packet is in the * mbuf chain m with the ethernet header at the front. */ static void ether_input_internal(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; u_short etype; if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } #ifdef DIAGNOSTIC if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if_printf(ifp, "discard frame at !IFF_DRV_RUNNING\n"); m_freem(m); return; } #endif if (m->m_len < ETHER_HDR_LEN) { /* XXX maybe should pullup? */ if_printf(ifp, "discard frame w/o leading ethernet " "header (len %u pkt len %u)\n", m->m_len, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } eh = mtod(m, struct ether_header *); etype = ntohs(eh->ether_type); random_harvest_queue(m, sizeof(*m), 2, RANDOM_NET_ETHER); CURVNET_SET_QUIET(ifp->if_vnet); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. */ ETHER_BPF_MTAP(ifp, m); /* * If the CRC is still on the packet, trim it off. We do this once * and once only in case we are re-entered. Nothing else on the * Ethernet receive path expects to see the FCS. */ if (m->m_flags & M_HASFCS) { m_adj(m, -ETHER_CRC_LEN); m->m_flags &= ~M_HASFCS; } if (!(ifp->if_capenable & IFCAP_HWSTATS)) if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Allow monitor mode to claim this frame, after stats are updated. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); CURVNET_RESTORE(); return; } /* Handle input from a lagg(4) port */ if (ifp->if_type == IFT_IEEE8023ADLAG) { KASSERT(lagg_input_p != NULL, ("%s: if_lagg not loaded!", __func__)); m = (*lagg_input_p)(ifp, m); if (m != NULL) ifp = m->m_pkthdr.rcvif; else { CURVNET_RESTORE(); return; } } /* * If the hardware did not process an 802.1Q tag, do this now, * to allow 802.1P priority frames to be passed to the main input * path correctly. * TODO: Deal with Q-in-Q frames, but not arbitrary nesting levels. */ if ((m->m_flags & M_VLANTAG) == 0 && etype == ETHERTYPE_VLAN) { struct ether_vlan_header *evl; if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { #ifdef DIAGNOSTIC if_printf(ifp, "cannot pullup VLAN header\n"); #endif if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); CURVNET_RESTORE(); return; } evl = mtod(m, struct ether_vlan_header *); m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); m->m_flags |= M_VLANTAG; bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); eh = mtod(m, struct ether_header *); } M_SETFIB(m, ifp->if_fib); /* Allow ng_ether(4) to claim this frame. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_p != NULL, ("%s: ng_ether_input_p is NULL", __func__)); m->m_flags &= ~M_PROMISC; (*ng_ether_input_p)(ifp, &m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } /* * Allow if_bridge(4) to claim this frame. * The BRIDGE_INPUT() macro will update ifp if the bridge changed it * and the frame should be delivered locally. */ if (ifp->if_bridge != NULL) { m->m_flags &= ~M_PROMISC; BRIDGE_INPUT(ifp, m); if (m == NULL) { CURVNET_RESTORE(); return; } eh = mtod(m, struct ether_header *); } #if defined(INET) || defined(INET6) /* * Clear M_PROMISC on frame so that carp(4) will see it when the * mbuf flows up to Layer 3. * FreeBSD's implementation of carp(4) uses the inprotosw * to dispatch IPPROTO_CARP. carp(4) also allocates its own * Ethernet addresses of the form 00:00:5e:00:01:xx, which * is outside the scope of the M_PROMISC test below. * TODO: Maintain a hash table of ethernet addresses other than * ether_dhost which may be active on this ifp. */ if (ifp->if_carp && (*carp_forus_p)(ifp, eh->ether_dhost)) { m->m_flags &= ~M_PROMISC; } else #endif { /* * If the frame received was not for our MAC address, set the * M_PROMISC flag on the mbuf chain. The frame may need to * be seen by the rest of the Ethernet input path in case of * re-entry (e.g. bridge, vlan, netgraph) but should not be * seen by upper protocol layers. */ if (!ETHER_IS_MULTICAST(eh->ether_dhost) && bcmp(IF_LLADDR(ifp), eh->ether_dhost, ETHER_ADDR_LEN) != 0) m->m_flags |= M_PROMISC; } ether_demux(ifp, m); CURVNET_RESTORE(); } /* * Ethernet input dispatch; by default, direct dispatch here regardless of * global configuration. However, if RSS is enabled, hook up RSS affinity * so that when deferred or hybrid dispatch is enabled, we can redistribute * load based on RSS. * * XXXRW: Would be nice if the ifnet passed up a flag indicating whether or * not it had already done work distribution via multi-queue. Then we could * direct dispatch in the event load balancing was already complete and * handle the case of interfaces with different capabilities better. * * XXXRW: Sort of want an M_DISTRIBUTED flag to avoid multiple distributions * at multiple layers? * * XXXRW: For now, enable all this only if RSS is compiled in, although it * works fine without RSS. Need to characterise the performance overhead * of the detour through the netisr code in the event the result is always * direct dispatch. */ static void ether_nh_input(struct mbuf *m) { M_ASSERTPKTHDR(m); KASSERT(m->m_pkthdr.rcvif != NULL, ("%s: NULL interface pointer", __func__)); ether_input_internal(m->m_pkthdr.rcvif, m); } static struct netisr_handler ether_nh = { .nh_name = "ether", .nh_handler = ether_nh_input, .nh_proto = NETISR_ETHER, #ifdef RSS .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_DIRECT, .nh_m2cpuid = rss_m2cpuid, #else .nh_policy = NETISR_POLICY_SOURCE, .nh_dispatch = NETISR_DISPATCH_DIRECT, #endif }; static void ether_init(__unused void *arg) { netisr_register(ðer_nh); } SYSINIT(ether, SI_SUB_INIT_IF, SI_ORDER_ANY, ether_init, NULL); static void vnet_ether_init(__unused void *arg) { int i; /* Initialize packet filter hooks. */ V_link_pfil_hook.ph_type = PFIL_TYPE_AF; V_link_pfil_hook.ph_af = AF_LINK; if ((i = pfil_head_register(&V_link_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil link hook, " "error %d\n", __func__, i); } VNET_SYSINIT(vnet_ether_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_init, NULL); static void vnet_ether_destroy(__unused void *arg) { int i; if ((i = pfil_head_unregister(&V_link_pfil_hook)) != 0) printf("%s: WARNING: unable to unregister pfil link hook, " "error %d\n", __func__, i); } VNET_SYSUNINIT(vnet_ether_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_ether_destroy, NULL); static void ether_input(struct ifnet *ifp, struct mbuf *m) { struct mbuf *mn; /* * The drivers are allowed to pass in a chain of packets linked with * m_nextpkt. We split them up into separate packets here and pass * them up. This allows the drivers to amortize the receive lock. */ while (m) { mn = m->m_nextpkt; m->m_nextpkt = NULL; /* * We will rely on rcvif being set properly in the deferred context, * so assert it is correct here. */ KASSERT(m->m_pkthdr.rcvif == ifp, ("%s: ifnet mismatch", __func__)); netisr_dispatch(NETISR_ETHER, m); m = mn; } } /* * Upper layer processing for a received Ethernet packet. */ void ether_demux(struct ifnet *ifp, struct mbuf *m) { struct ether_header *eh; int i, isr; u_short ether_type; KASSERT(ifp != NULL, ("%s: NULL interface pointer", __func__)); /* Do not grab PROMISC frames in case we are re-entered. */ if (PFIL_HOOKED(&V_link_pfil_hook) && !(m->m_flags & M_PROMISC)) { i = pfil_run_hooks(&V_link_pfil_hook, &m, ifp, PFIL_IN, NULL); if (i != 0 || m == NULL) return; } eh = mtod(m, struct ether_header *); ether_type = ntohs(eh->ether_type); /* * If this frame has a VLAN tag other than 0, call vlan_input() * if its module is loaded. Otherwise, drop. */ if ((m->m_flags & M_VLANTAG) && EVL_VLANOFTAG(m->m_pkthdr.ether_vtag) != 0) { if (ifp->if_vlantrunk == NULL) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } KASSERT(vlan_input_p != NULL,("%s: VLAN not loaded!", __func__)); /* Clear before possibly re-entering ether_input(). */ m->m_flags &= ~M_PROMISC; (*vlan_input_p)(ifp, m); return; } /* * Pass promiscuously received frames to the upper layer if the user * requested this by setting IFF_PPROMISC. Otherwise, drop them. */ if ((ifp->if_flags & IFF_PPROMISC) == 0 && (m->m_flags & M_PROMISC)) { m_freem(m); return; } /* * Reset layer specific mbuf flags to avoid confusing upper layers. * Strip off Ethernet header. */ m->m_flags &= ~M_VLANTAG; m_clrprotoflags(m); m_adj(m, ETHER_HDR_LEN); /* * Dispatch frame to upper layer. */ switch (ether_type) { #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) { /* Discard packet if ARP is disabled on interface */ m_freem(m); return; } isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: goto discard; } netisr_dispatch(isr, m); return; discard: /* * Packet is to be discarded. If netgraph is present, * hand the packet to it for last chance processing; * otherwise dispose of it. */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_input_orphan_p != NULL, ("ng_ether_input_orphan_p is NULL")); /* * Put back the ethernet header so netgraph has a * consistent view of inbound packets. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); (*ng_ether_input_orphan_p)(ifp, m); return; } m_freem(m); } /* * Convert Ethernet address to printable (loggable) representation. * This routine is for compatibility; it's better to just use * * printf("%6D", , ":"); * * since there's no static buffer involved. */ char * ether_sprintf(const u_char *ap) { static char etherbuf[18]; snprintf(etherbuf, sizeof (etherbuf), "%6D", ap, ":"); return (etherbuf); } /* * Perform common duties while attaching to interface list */ void ether_ifattach(struct ifnet *ifp, const u_int8_t *lla) { int i; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_addrlen = ETHER_ADDR_LEN; ifp->if_hdrlen = ETHER_HDR_LEN; if_attach(ifp); ifp->if_mtu = ETHERMTU; ifp->if_output = ether_output; ifp->if_input = ether_input; ifp->if_resolvemulti = ether_resolvemulti; #ifdef VIMAGE ifp->if_reassign = ether_reassign; #endif if (ifp->if_baudrate == 0) ifp->if_baudrate = IF_Mbps(10); /* just a default */ ifp->if_broadcastaddr = etherbroadcastaddr; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ETHER; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); bpfattach(ifp, DLT_EN10MB, ETHER_HDR_LEN); if (ng_ether_attach_p != NULL) (*ng_ether_attach_p)(ifp); /* Announce Ethernet MAC address if non-zero. */ for (i = 0; i < ifp->if_addrlen; i++) if (lla[i] != 0) break; if (i != ifp->if_addrlen) if_printf(ifp, "Ethernet address: %6D\n", lla, ":"); uuid_ether_add(LLADDR(sdl)); } /* * Perform common duties while detaching an Ethernet interface */ void ether_ifdetach(struct ifnet *ifp) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)(ifp->if_addr->ifa_addr); uuid_ether_del(LLADDR(sdl)); if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } bpfdetach(ifp); if_detach(ifp); } #ifdef VIMAGE void ether_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused __unused) { if (ifp->if_l2com != NULL) { KASSERT(ng_ether_detach_p != NULL, ("ng_ether_detach_p is NULL")); (*ng_ether_detach_p)(ifp); } if (ng_ether_attach_p != NULL) { CURVNET_SET_QUIET(new_vnet); (*ng_ether_attach_p)(ifp); CURVNET_RESTORE(); } } #endif SYSCTL_DECL(_net_link); SYSCTL_NODE(_net_link, IFT_ETHER, ether, CTLFLAG_RW, 0, "Ethernet"); #if 0 /* * This is for reference. We have a table-driven version * of the little-endian crc32 generator, which is faster * than the double-loop. */ uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { size_t i; uint32_t crc; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = (crc ^ data) & 1; crc >>= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_LE); } } return (crc); } #else uint32_t ether_crc32_le(const uint8_t *buf, size_t len) { static const uint32_t crctab[] = { 0x00000000, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c }; size_t i; uint32_t crc; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { crc ^= buf[i]; crc = (crc >> 4) ^ crctab[crc & 0xf]; crc = (crc >> 4) ^ crctab[crc & 0xf]; } return (crc); } #endif uint32_t ether_crc32_be(const uint8_t *buf, size_t len) { size_t i; uint32_t crc, carry; int bit; uint8_t data; crc = 0xffffffff; /* initial value */ for (i = 0; i < len; i++) { for (data = *buf++, bit = 0; bit < 8; bit++, data >>= 1) { carry = ((crc & 0x80000000) ? 1 : 0) ^ (data & 0x01); crc <<= 1; if (carry) crc = (crc ^ ETHER_CRC_POLY_BE) | carry; } } return (crc); } int ether_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, ETHER_ADDR_LEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ETHERMTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } static int ether_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if (!ETHER_IS_MULTICAST(e_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; sdl = link_init_sdl(ifp, *llsa, IFT_ETHER); sdl->sdl_alen = ETHER_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } static moduledata_t ether_mod = { .name = "ether", }; void ether_vlan_mtap(struct bpf_if *bp, struct mbuf *m, void *data, u_int dlen) { struct ether_vlan_header vlan; struct mbuf mv, mb; KASSERT((m->m_flags & M_VLANTAG) != 0, ("%s: vlan information not present", __func__)); KASSERT(m->m_len >= sizeof(struct ether_header), ("%s: mbuf not large enough for header", __func__)); bcopy(mtod(m, char *), &vlan, sizeof(struct ether_header)); vlan.evl_proto = vlan.evl_encap_proto; vlan.evl_encap_proto = htons(ETHERTYPE_VLAN); vlan.evl_tag = htons(m->m_pkthdr.ether_vtag); m->m_len -= sizeof(struct ether_header); m->m_data += sizeof(struct ether_header); /* * If a data link has been supplied by the caller, then we will need to * re-create a stack allocated mbuf chain with the following structure: * * (1) mbuf #1 will contain the supplied data link * (2) mbuf #2 will contain the vlan header * (3) mbuf #3 will contain the original mbuf's packet data * * Otherwise, submit the packet and vlan header via bpf_mtap2(). */ if (data != NULL) { mv.m_next = m; mv.m_data = (caddr_t)&vlan; mv.m_len = sizeof(vlan); mb.m_next = &mv; mb.m_data = data; mb.m_len = dlen; bpf_mtap(bp, &mb); } else bpf_mtap2(bp, &vlan, sizeof(vlan), m); m->m_len += sizeof(struct ether_header); m->m_data -= sizeof(struct ether_header); } struct mbuf * ether_vlanencap(struct mbuf *m, uint16_t tag) { struct ether_vlan_header *evl; M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); if (m == NULL) return (NULL); /* M_PREPEND takes care of m_len, m_pkthdr.len for us */ if (m->m_len < sizeof(*evl)) { m = m_pullup(m, sizeof(*evl)); if (m == NULL) return (NULL); } /* * Transform the Ethernet header into an Ethernet header * with 802.1Q encapsulation. */ evl = mtod(m, struct ether_vlan_header *); bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); evl->evl_encap_proto = htons(ETHERTYPE_VLAN); evl->evl_tag = htons(tag); return (m); } DECLARE_MODULE(ether, ether_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(ether, 1); Index: head/sys/net/if_fddisubr.c =================================================================== --- head/sys/net/if_fddisubr.c (revision 290382) +++ head/sys/net/if_fddisubr.c (revision 290383) @@ -1,673 +1,671 @@ /*- * Copyright (c) 1995, 1996 * Matt Thomas . All rights reserved. * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: if_ethersubr.c,v 1.5 1994/12/13 22:31:45 wollman Exp * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #ifdef DECNET #include #endif #include static const u_char fddibroadcastaddr[FDDI_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int fddi_resolvemulti(struct ifnet *, struct sockaddr **, struct sockaddr *); static int fddi_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void fddi_input(struct ifnet *ifp, struct mbuf *m); #define senderr(e) do { error = (e); goto bad; } while (0) /* * FDDI output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ static int fddi_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int16_t type; int loop_copy = 0, error = 0, hdrcmplt = 0; u_char esrc[FDDI_ADDR_LEN], edst[FDDI_ADDR_LEN]; struct fddi_header *fh; #if defined(INET) || defined(INET6) int is_gw = 0; #endif #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); getmicrotime(&ifp->if_lastchange); #if defined(INET) || defined(INET6) if (ro != NULL && ro->ro_rt != NULL && (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; #endif switch (dst->sa_family) { #ifdef INET case AF_INET: { error = arpresolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; } case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = -1; /* if this is for us, don't do it */ switch (ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, FDDI_ADDR_LEN); else bcopy(ar_tha(ah), edst, FDDI_ADDR_LEN); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IPV6); break; #endif /* INET6 */ case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; eh = (const struct ether_header *)dst->sa_data; bcopy(eh->ether_shost, esrc, FDDI_ADDR_LEN); /* FALLTHROUGH */ } case AF_UNSPEC: { const struct ether_header *eh; loop_copy = -1; eh = (const struct ether_header *)dst->sa_data; bcopy(eh->ether_dhost, edst, FDDI_ADDR_LEN); if (*edst & 1) m->m_flags |= (M_BCAST|M_MCAST); type = eh->ether_type; break; } case AF_IMPLINK: { fh = mtod(m, struct fddi_header *); error = EPROTONOSUPPORT; switch (fh->fddi_fc & (FDDIFC_C|FDDIFC_L|FDDIFC_F)) { case FDDIFC_LLC_ASYNC: { /* legal priorities are 0 through 7 */ if ((fh->fddi_fc & FDDIFC_Z) > 7) goto bad; break; } case FDDIFC_LLC_SYNC: { /* FDDIFC_Z bits reserved, must be zero */ if (fh->fddi_fc & FDDIFC_Z) goto bad; break; } case FDDIFC_SMT: { /* FDDIFC_Z bits must be non zero */ if ((fh->fddi_fc & FDDIFC_Z) == 0) goto bad; break; } default: { /* anything else is too dangerous */ goto bad; } } error = 0; if (fh->fddi_dhost[0] & 1) m->m_flags |= (M_BCAST|M_MCAST); goto queue_it; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } /* * Add LLC header. */ if (type != 0) { struct llc *l; M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); l = mtod(m, struct llc *); l->llc_control = LLC_UI; l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP; l->llc_snap.org_code[0] = l->llc_snap.org_code[1] = l->llc_snap.org_code[2] = 0; l->llc_snap.ether_type = htons(type); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, FDDI_HDR_LEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); fh = mtod(m, struct fddi_header *); fh->fddi_fc = FDDIFC_LLC_ASYNC|FDDIFC_LLC_PRIO4; bcopy((caddr_t)edst, (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN); queue_it: if (hdrcmplt) bcopy((caddr_t)esrc, (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN); else bcopy(IF_LLADDR(ifp), (caddr_t)fh->fddi_shost, FDDI_ADDR_LEN); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, FDDI_HDR_LEN); } else if (bcmp(fh->fddi_dhost, fh->fddi_shost, FDDI_ADDR_LEN) == 0) { (void) if_simloop(ifp, m, dst->sa_family, FDDI_HDR_LEN); return (0); /* XXX */ } } error = (ifp->if_transmit)(ifp, m); if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); bad: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (m) m_freem(m); return (error); } /* * Process a received FDDI packet. */ static void fddi_input(ifp, m) struct ifnet *ifp; struct mbuf *m; { int isr; struct llc *l; struct fddi_header *fh; /* * Do consistency checks to verify assumptions * made by code past this point. */ if ((m->m_flags & M_PKTHDR) == 0) { if_printf(ifp, "discard frame w/o packet header\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m = m_pullup(m, FDDI_HDR_LEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } fh = mtod(m, struct fddi_header *); /* * Discard packet if interface is not up. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) goto dropanyway; /* * Give bpf a chance at the packet. */ BPF_MTAP(ifp, m); /* * Interface marked for monitoring; discard packet. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); return; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* * Update interface statistics. */ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); getmicrotime(&ifp->if_lastchange); /* * Discard non local unicast packets when interface * is in promiscuous mode. */ if ((ifp->if_flags & IFF_PROMISC) && ((fh->fddi_dhost[0] & 1) == 0) && (bcmp(IF_LLADDR(ifp), (caddr_t)fh->fddi_dhost, FDDI_ADDR_LEN) != 0)) goto dropanyway; /* * Set mbuf flags for bcast/mcast. */ if (fh->fddi_dhost[0] & 1) { if (bcmp(ifp->if_broadcastaddr, fh->fddi_dhost, FDDI_ADDR_LEN) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } #ifdef M_LINK0 /* * If this has a LLC priority of 0, then mark it so upper * layers have a hint that it really came via a FDDI/Ethernet * bridge. */ if ((fh->fddi_fc & FDDIFC_LLC_PRIO7) == FDDIFC_LLC_PRIO0) m->m_flags |= M_LINK0; #endif /* Strip off FDDI header. */ m_adj(m, FDDI_HDR_LEN); m = m_pullup(m, LLC_SNAPFRAMELEN); if (m == 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } l = mtod(m, struct llc *); switch (l->llc_dsap) { case LLC_SNAP_LSAP: { u_int16_t type; if ((l->llc_control != LLC_UI) || (l->llc_ssap != LLC_SNAP_LSAP)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } if (l->llc_snap.org_code[0] != 0 || l->llc_snap.org_code[1] != 0 || l->llc_snap.org_code[2] != 0) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } type = ntohs(l->llc_snap.ether_type); m_adj(m, LLC_SNAPFRAMELEN); switch (type) { #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) goto dropanyway; isr = NETISR_ARP; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif #ifdef DECNET case ETHERTYPE_DECNET: isr = NETISR_DECNET; break; #endif default: /* printf("fddi_input: unknown protocol 0x%x\n", type); */ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } break; } default: /* printf("fddi_input: unknown dsap 0x%x\n", l->llc_dsap); */ if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; dropanyway: if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (m) m_freem(m); return; } /* * Perform common duties while attaching to interface list */ void fddi_ifattach(ifp, lla, bpf) struct ifnet *ifp; const u_int8_t *lla; int bpf; { struct ifaddr *ifa; struct sockaddr_dl *sdl; ifp->if_type = IFT_FDDI; ifp->if_addrlen = FDDI_ADDR_LEN; ifp->if_hdrlen = 21; if_attach(ifp); /* Must be called before additional assignments */ ifp->if_mtu = FDDIMTU; ifp->if_output = fddi_output; ifp->if_input = fddi_input; ifp->if_resolvemulti = fddi_resolvemulti; ifp->if_broadcastaddr = fddibroadcastaddr; ifp->if_baudrate = 100000000; #ifdef IFF_NOTRAILERS ifp->if_flags |= IFF_NOTRAILERS; #endif ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_FDDI; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); if (bpf) bpfattach(ifp, DLT_FDDI, FDDI_HDR_LEN); return; } void fddi_ifdetach(ifp, bpf) struct ifnet *ifp; int bpf; { if (bpf) bpfdetach(ifp); if_detach(ifp); return; } int fddi_ioctl (ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct ifaddr *ifa; struct ifreq *ifr; int error; ifa = (struct ifaddr *) data; ifr = (struct ifreq *) data; error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: /* before arpwhohas */ ifp->if_init(ifp->if_softc); arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, FDDI_ADDR_LEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > FDDIMTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; break; } return (error); } static int fddi_resolvemulti(ifp, llsa, sa) struct ifnet *ifp; struct sockaddr **llsa; struct sockaddr *sa; { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if ((e_addr[0] & 1) != 1) return (EADDRNOTAVAIL); *llsa = 0; return (0); #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return (EADDRNOTAVAIL); sdl = link_init_sdl(ifp, *llsa, IFT_FDDI); sdl->sdl_nlen = 0; sdl->sdl_alen = FDDI_ADDR_LEN; sdl->sdl_slen = 0; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return (0); } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return (EADDRNOTAVAIL); sdl = link_init_sdl(ifp, *llsa, IFT_FDDI); sdl->sdl_nlen = 0; sdl->sdl_alen = FDDI_ADDR_LEN; sdl->sdl_slen = 0; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return (EAFNOSUPPORT); } return (0); } static moduledata_t fddi_mod = { "fddi", /* module name */ NULL, /* event handler */ 0 /* extra data */ }; DECLARE_MODULE(fddi, fddi_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(fddi, 1); Index: head/sys/net/if_fwsubr.c =================================================================== --- head/sys/net/if_fwsubr.c (revision 290382) +++ head/sys/net/if_fwsubr.c (revision 290383) @@ -1,862 +1,860 @@ /*- * Copyright (c) 2004 Doug Rabson * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #include static MALLOC_DEFINE(M_FWCOM, "fw_com", "firewire interface internals"); struct fw_hwaddr firewire_broadcastaddr = { 0xffffffff, 0xffffffff, 0xff, 0xff, 0xffff, 0xffffffff }; static int firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct fw_com *fc = IFP2FWC(ifp); int error, type; struct m_tag *mtag; union fw_encap *enc; struct fw_hwaddr *destfw; uint8_t speed; uint16_t psize, fsize, dsize; struct mbuf *mtail; int unicast, dgl, foff; static int next_dgl; #if defined(INET) || defined(INET6) int is_gw = 0; #endif #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) goto bad; #endif if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { error = ENETDOWN; goto bad; } #if defined(INET) || defined(INET6) if (ro != NULL && ro->ro_rt != NULL && (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; #endif /* * For unicast, we make a tag to store the lladdr of the * destination. This might not be the first time we have seen * the packet (for instance, the arp code might be trying to * re-send it after receiving an arp reply) so we only * allocate a tag if there isn't one there already. For * multicast, we will eventually use a different tag to store * the channel number. */ unicast = !(m->m_flags & (M_BCAST | M_MCAST)); if (unicast) { mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, NULL); if (!mtag) { mtag = m_tag_alloc(MTAG_FIREWIRE, MTAG_FIREWIRE_HWADDR, sizeof (struct fw_hwaddr), M_NOWAIT); if (!mtag) { error = ENOMEM; goto bad; } m_tag_prepend(m, mtag); } destfw = (struct fw_hwaddr *)(mtag + 1); } else { destfw = 0; } switch (dst->sa_family) { #ifdef INET case AF_INET: /* * Only bother with arp for unicast. Allocation of * channels etc. for firewire is quite different and * doesn't fit into the arp model. */ if (unicast) { is_gw = 0; if (ro != NULL && ro->ro_rt != NULL && (ro->ro_rt->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; error = arpresolve(ifp, is_gw, m, dst, (u_char *) destfw, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } type = ETHERTYPE_IP; break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_IEEE1394); type = ETHERTYPE_ARP; if (unicast) *destfw = *(struct fw_hwaddr *) ar_tha(ah); /* * The standard arp code leaves a hole for the target * hardware address which we need to close up. */ bcopy(ar_tpa(ah), ar_tha(ah), ah->ar_pln); m_adj(m, -ah->ar_hln); break; } #endif #ifdef INET6 case AF_INET6: if (unicast) { error = nd6_resolve(fc->fc_ifp, is_gw, m, dst, (u_char *) destfw, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); } type = ETHERTYPE_IPV6; break; #endif default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); error = EAFNOSUPPORT; goto bad; } /* * Let BPF tap off a copy before we encapsulate. */ if (bpf_peers_present(ifp->if_bpf)) { struct fw_bpfhdr h; if (unicast) bcopy(destfw, h.firewire_dhost, 8); else bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8); bcopy(&fc->fc_hwaddr, h.firewire_shost, 8); h.firewire_type = htons(type); bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m); } /* * Punt on MCAP for now and send all multicast packets on the * broadcast channel. */ if (m->m_flags & M_MCAST) m->m_flags |= M_BCAST; /* * Figure out what speed to use and what the largest supported * packet size is. For unicast, this is the minimum of what we * can speak and what they can hear. For broadcast, lets be * conservative and use S100. We could possibly improve that * by examining the bus manager's speed map or similar. We * also reduce the packet size for broadcast to account for * the GASP header. */ if (unicast) { speed = min(fc->fc_speed, destfw->sspd); psize = min(512 << speed, 2 << destfw->sender_max_rec); } else { speed = 0; psize = 512 - 2*sizeof(uint32_t); } /* * Next, we encapsulate, possibly fragmenting the original * datagram if it won't fit into a single packet. */ if (m->m_pkthdr.len <= psize - sizeof(uint32_t)) { /* * No fragmentation is necessary. */ M_PREPEND(m, sizeof(uint32_t), M_NOWAIT); if (!m) { error = ENOBUFS; goto bad; } enc = mtod(m, union fw_encap *); enc->unfrag.ether_type = type; enc->unfrag.lf = FW_ENCAP_UNFRAG; enc->unfrag.reserved = 0; /* * Byte swap the encapsulation header manually. */ enc->ul[0] = htonl(enc->ul[0]); error = (ifp->if_transmit)(ifp, m); return (error); } else { /* * Fragment the datagram, making sure to leave enough * space for the encapsulation header in each packet. */ fsize = psize - 2*sizeof(uint32_t); dgl = next_dgl++; dsize = m->m_pkthdr.len; foff = 0; while (m) { if (m->m_pkthdr.len > fsize) { /* * Split off the tail segment from the * datagram, copying our tags over. */ mtail = m_split(m, fsize, M_NOWAIT); m_tag_copy_chain(mtail, m, M_NOWAIT); } else { mtail = 0; } /* * Add our encapsulation header to this * fragment and hand it off to the link. */ M_PREPEND(m, 2*sizeof(uint32_t), M_NOWAIT); if (!m) { error = ENOBUFS; goto bad; } enc = mtod(m, union fw_encap *); if (foff == 0) { enc->firstfrag.lf = FW_ENCAP_FIRST; enc->firstfrag.reserved1 = 0; enc->firstfrag.reserved2 = 0; enc->firstfrag.datagram_size = dsize - 1; enc->firstfrag.ether_type = type; enc->firstfrag.dgl = dgl; } else { if (mtail) enc->nextfrag.lf = FW_ENCAP_NEXT; else enc->nextfrag.lf = FW_ENCAP_LAST; enc->nextfrag.reserved1 = 0; enc->nextfrag.reserved2 = 0; enc->nextfrag.reserved3 = 0; enc->nextfrag.datagram_size = dsize - 1; enc->nextfrag.fragment_offset = foff; enc->nextfrag.dgl = dgl; } foff += m->m_pkthdr.len - 2*sizeof(uint32_t); /* * Byte swap the encapsulation header manually. */ enc->ul[0] = htonl(enc->ul[0]); enc->ul[1] = htonl(enc->ul[1]); error = (ifp->if_transmit)(ifp, m); if (error) { if (mtail) m_freem(mtail); return (ENOBUFS); } m = mtail; } return (0); } bad: if (m) m_freem(m); return (error); } static struct mbuf * firewire_input_fragment(struct fw_com *fc, struct mbuf *m, int src) { union fw_encap *enc; struct fw_reass *r; struct mbuf *mf, *mprev; int dsize; int fstart, fend, start, end, islast; uint32_t id; /* * Find an existing reassembly buffer or create a new one. */ enc = mtod(m, union fw_encap *); id = enc->firstfrag.dgl | (src << 16); STAILQ_FOREACH(r, &fc->fc_frags, fr_link) if (r->fr_id == id) break; if (!r) { r = malloc(sizeof(struct fw_reass), M_TEMP, M_NOWAIT); if (!r) { m_freem(m); return 0; } r->fr_id = id; r->fr_frags = 0; STAILQ_INSERT_HEAD(&fc->fc_frags, r, fr_link); } /* * If this fragment overlaps any other fragment, we must discard * the partial reassembly and start again. */ if (enc->firstfrag.lf == FW_ENCAP_FIRST) fstart = 0; else fstart = enc->nextfrag.fragment_offset; fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t); dsize = enc->nextfrag.datagram_size; islast = (enc->nextfrag.lf == FW_ENCAP_LAST); for (mf = r->fr_frags; mf; mf = mf->m_nextpkt) { enc = mtod(mf, union fw_encap *); if (enc->nextfrag.datagram_size != dsize) { /* * This fragment must be from a different * packet. */ goto bad; } if (enc->firstfrag.lf == FW_ENCAP_FIRST) start = 0; else start = enc->nextfrag.fragment_offset; end = start + mf->m_pkthdr.len - 2*sizeof(uint32_t); if ((fstart < end && fend > start) || (islast && enc->nextfrag.lf == FW_ENCAP_LAST)) { /* * Overlap - discard reassembly buffer and start * again with this fragment. */ goto bad; } } /* * Find where to put this fragment in the list. */ for (mf = r->fr_frags, mprev = NULL; mf; mprev = mf, mf = mf->m_nextpkt) { enc = mtod(mf, union fw_encap *); if (enc->firstfrag.lf == FW_ENCAP_FIRST) start = 0; else start = enc->nextfrag.fragment_offset; if (start >= fend) break; } /* * If this is a last fragment and we are not adding at the end * of the list, discard the buffer. */ if (islast && mprev && mprev->m_nextpkt) goto bad; if (mprev) { m->m_nextpkt = mprev->m_nextpkt; mprev->m_nextpkt = m; /* * Coalesce forwards and see if we can make a whole * datagram. */ enc = mtod(mprev, union fw_encap *); if (enc->firstfrag.lf == FW_ENCAP_FIRST) start = 0; else start = enc->nextfrag.fragment_offset; end = start + mprev->m_pkthdr.len - 2*sizeof(uint32_t); while (end == fstart) { /* * Strip off the encap header from m and * append it to mprev, freeing m. */ m_adj(m, 2*sizeof(uint32_t)); mprev->m_nextpkt = m->m_nextpkt; mprev->m_pkthdr.len += m->m_pkthdr.len; m_cat(mprev, m); if (mprev->m_pkthdr.len == dsize + 1 + 2*sizeof(uint32_t)) { /* * We have assembled a complete packet * we must be finished. Make sure we have * merged the whole chain. */ STAILQ_REMOVE(&fc->fc_frags, r, fw_reass, fr_link); free(r, M_TEMP); m = mprev->m_nextpkt; while (m) { mf = m->m_nextpkt; m_freem(m); m = mf; } mprev->m_nextpkt = NULL; return (mprev); } /* * See if we can continue merging forwards. */ end = fend; m = mprev->m_nextpkt; if (m) { enc = mtod(m, union fw_encap *); if (enc->firstfrag.lf == FW_ENCAP_FIRST) fstart = 0; else fstart = enc->nextfrag.fragment_offset; fend = fstart + m->m_pkthdr.len - 2*sizeof(uint32_t); } else { break; } } } else { m->m_nextpkt = 0; r->fr_frags = m; } return (0); bad: while (r->fr_frags) { mf = r->fr_frags; r->fr_frags = mf->m_nextpkt; m_freem(mf); } m->m_nextpkt = 0; r->fr_frags = m; return (0); } void firewire_input(struct ifnet *ifp, struct mbuf *m, uint16_t src) { struct fw_com *fc = IFP2FWC(ifp); union fw_encap *enc; int type, isr; /* * The caller has already stripped off the packet header * (stream or wreqb) and marked the mbuf's M_BCAST flag * appropriately. We de-encapsulate the IP packet and pass it * up the line after handling link-level fragmentation. */ if (m->m_pkthdr.len < sizeof(uint32_t)) { if_printf(ifp, "discarding frame without " "encapsulation header (len %u pkt len %u)\n", m->m_len, m->m_pkthdr.len); } m = m_pullup(m, sizeof(uint32_t)); if (m == NULL) return; enc = mtod(m, union fw_encap *); /* * Byte swap the encapsulation header manually. */ enc->ul[0] = ntohl(enc->ul[0]); if (enc->unfrag.lf != 0) { m = m_pullup(m, 2*sizeof(uint32_t)); if (!m) return; enc = mtod(m, union fw_encap *); enc->ul[1] = ntohl(enc->ul[1]); m = firewire_input_fragment(fc, m, src); if (!m) return; enc = mtod(m, union fw_encap *); type = enc->firstfrag.ether_type; m_adj(m, 2*sizeof(uint32_t)); } else { type = enc->unfrag.ether_type; m_adj(m, sizeof(uint32_t)); } if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } #ifdef DIAGNOSTIC if (m->m_pkthdr.rcvif != ifp) { if_printf(ifp, "Warning, frame marked as received on %s\n", m->m_pkthdr.rcvif->if_xname); } #endif #ifdef MAC /* * Tag the mbuf with an appropriate MAC label before any other * consumers can get to it. */ mac_ifnet_create_mbuf(ifp, m); #endif /* * Give bpf a chance at the packet. The link-level driver * should have left us a tag with the EUID of the sender. */ if (bpf_peers_present(ifp->if_bpf)) { struct fw_bpfhdr h; struct m_tag *mtag; mtag = m_tag_locate(m, MTAG_FIREWIRE, MTAG_FIREWIRE_SENDER_EUID, 0); if (mtag) bcopy(mtag + 1, h.firewire_shost, 8); else bcopy(&firewire_broadcastaddr, h.firewire_dhost, 8); bcopy(&fc->fc_hwaddr, h.firewire_dhost, 8); h.firewire_type = htons(type); bpf_mtap2(ifp->if_bpf, &h, sizeof(h), m); } if (ifp->if_flags & IFF_MONITOR) { /* * Interface marked for monitoring; discard packet. */ m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* Discard packet if interface is not up */ if ((ifp->if_flags & IFF_UP) == 0) { m_freem(m); return; } if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); switch (type) { #ifdef INET case ETHERTYPE_IP: - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ETHERTYPE_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); /* * Adjust the arp packet to insert an empty tha slot. */ m->m_len += ah->ar_hln; m->m_pkthdr.len += ah->ar_hln; bcopy(ar_tha(ah), ar_tpa(ah), ah->ar_pln); isr = NETISR_ARP; break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; break; #endif default: m_freem(m); return; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); } int firewire_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa = (struct ifaddr *) data; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(&IFP2FWC(ifp)->fc_hwaddr, (caddr_t) sa->sa_data, sizeof(struct fw_hwaddr)); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > 1500) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } static int firewire_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa, struct sockaddr *sa) { #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. */ *llsa = 0; return 0; #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) return EADDRNOTAVAIL; *llsa = 0; return 0; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return 0; } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) return EADDRNOTAVAIL; *llsa = 0; return 0; #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return EAFNOSUPPORT; } } void firewire_ifattach(struct ifnet *ifp, struct fw_hwaddr *llc) { struct fw_com *fc = IFP2FWC(ifp); struct ifaddr *ifa; struct sockaddr_dl *sdl; static const char* speeds[] = { "S100", "S200", "S400", "S800", "S1600", "S3200" }; fc->fc_speed = llc->sspd; STAILQ_INIT(&fc->fc_frags); ifp->if_addrlen = sizeof(struct fw_hwaddr); ifp->if_hdrlen = 0; if_attach(ifp); ifp->if_mtu = 1500; /* XXX */ ifp->if_output = firewire_output; ifp->if_resolvemulti = firewire_resolvemulti; ifp->if_broadcastaddr = (u_char *) &firewire_broadcastaddr; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_IEEE1394; sdl->sdl_alen = ifp->if_addrlen; bcopy(llc, LLADDR(sdl), ifp->if_addrlen); bpfattach(ifp, DLT_APPLE_IP_OVER_IEEE1394, sizeof(struct fw_hwaddr)); if_printf(ifp, "Firewire address: %8D @ 0x%04x%08x, %s, maxrec %d\n", (uint8_t *) &llc->sender_unique_ID_hi, ":", ntohs(llc->sender_unicast_FIFO_hi), ntohl(llc->sender_unicast_FIFO_lo), speeds[llc->sspd], (2 << llc->sender_max_rec)); } void firewire_ifdetach(struct ifnet *ifp) { bpfdetach(ifp); if_detach(ifp); } void firewire_busreset(struct ifnet *ifp) { struct fw_com *fc = IFP2FWC(ifp); struct fw_reass *r; struct mbuf *m; /* * Discard any partial datagrams since the host ids may have changed. */ while ((r = STAILQ_FIRST(&fc->fc_frags))) { STAILQ_REMOVE_HEAD(&fc->fc_frags, fr_link); while (r->fr_frags) { m = r->fr_frags; r->fr_frags = m->m_nextpkt; m_freem(m); } free(r, M_TEMP); } } static void * firewire_alloc(u_char type, struct ifnet *ifp) { struct fw_com *fc; fc = malloc(sizeof(struct fw_com), M_FWCOM, M_WAITOK | M_ZERO); fc->fc_ifp = ifp; return (fc); } static void firewire_free(void *com, u_char type) { free(com, M_FWCOM); } static int firewire_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: if_register_com_alloc(IFT_IEEE1394, firewire_alloc, firewire_free); break; case MOD_UNLOAD: if_deregister_com_alloc(IFT_IEEE1394); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t firewire_mod = { "if_firewire", firewire_modevent, 0 }; DECLARE_MODULE(if_firewire, firewire_mod, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(if_firewire, 1); Index: head/sys/net/if_iso88025subr.c =================================================================== --- head/sys/net/if_iso88025subr.c (revision 290382) +++ head/sys/net/if_iso88025subr.c (revision 290383) @@ -1,703 +1,701 @@ /*- * Copyright (c) 1998, Larry Lile * All rights reserved. * * For latest sources and information on this driver, please * go to http://anarchy.stdio.com. * * Questions, comments or suggestions should be directed to * Larry Lile . * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ /* * * General ISO 802.5 (Token Ring) support routines * */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #endif #ifdef INET6 #include #endif #include static const u_char iso88025_broadcastaddr[ISO88025_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; static int iso88025_resolvemulti (struct ifnet *, struct sockaddr **, struct sockaddr *); #define senderr(e) do { error = (e); goto bad; } while (0) /* * Perform common duties while attaching to interface list */ void iso88025_ifattach(struct ifnet *ifp, const u_int8_t *lla, int bpf) { struct ifaddr *ifa; struct sockaddr_dl *sdl; ifa = NULL; ifp->if_type = IFT_ISO88025; ifp->if_addrlen = ISO88025_ADDR_LEN; ifp->if_hdrlen = ISO88025_HDR_LEN; if_attach(ifp); /* Must be called before additional assignments */ ifp->if_output = iso88025_output; ifp->if_input = iso88025_input; ifp->if_resolvemulti = iso88025_resolvemulti; ifp->if_broadcastaddr = iso88025_broadcastaddr; if (ifp->if_baudrate == 0) ifp->if_baudrate = TR_16MBPS; /* 16Mbit should be a safe default */ if (ifp->if_mtu == 0) ifp->if_mtu = ISO88025_DEFAULT_MTU; ifa = ifp->if_addr; KASSERT(ifa != NULL, ("%s: no lladdr!\n", __func__)); sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_ISO88025; sdl->sdl_alen = ifp->if_addrlen; bcopy(lla, LLADDR(sdl), ifp->if_addrlen); if (bpf) bpfattach(ifp, DLT_IEEE802, ISO88025_HDR_LEN); return; } /* * Perform common duties while detaching a Token Ring interface */ void iso88025_ifdetach(ifp, bpf) struct ifnet *ifp; int bpf; { if (bpf) bpfdetach(ifp); if_detach(ifp); return; } int iso88025_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifaddr *ifa; struct ifreq *ifr; int error; ifa = (struct ifaddr *) data; ifr = (struct ifreq *) data; error = 0; switch (command) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifp->if_init(ifp->if_softc); /* before arpwhohas */ arp_ifinit(ifp, ifa); break; #endif /* INET */ default: ifp->if_init(ifp->if_softc); break; } break; case SIOCGIFADDR: { struct sockaddr *sa; sa = (struct sockaddr *) & ifr->ifr_data; bcopy(IF_LLADDR(ifp), (caddr_t) sa->sa_data, ISO88025_ADDR_LEN); } break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ISO88025_MAX_MTU) { error = EINVAL; } else { ifp->if_mtu = ifr->ifr_mtu; } break; default: error = EINVAL; /* XXX netbsd has ENOTTY??? */ break; } return (error); } /* * ISO88025 encapsulation */ int iso88025_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { u_int16_t snap_type = 0; int loop_copy = 0, error = 0, rif_len = 0; u_char edst[ISO88025_ADDR_LEN]; struct iso88025_header *th; struct iso88025_header gen_th; struct sockaddr_dl *sdl = NULL; struct rtentry *rt0 = NULL; int is_gw = 0; if (ro != NULL) { rt0 = ro->ro_rt; if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); getmicrotime(&ifp->if_lastchange); /* Calculate routing info length based on arp table entry */ /* XXX any better way to do this ? */ if (rt0 && (sdl = (struct sockaddr_dl *)rt0->rt_gateway)) if (SDL_ISO88025(sdl)->trld_rcf != 0) rif_len = TR_RCF_RIFLEN(SDL_ISO88025(sdl)->trld_rcf); /* Generate a generic 802.5 header for the packet */ gen_th.ac = TR_AC; gen_th.fc = TR_LLC_FRAME; (void)memcpy((caddr_t)gen_th.iso88025_shost, IF_LLADDR(ifp), ISO88025_ADDR_LEN); if (rif_len) { gen_th.iso88025_shost[0] |= TR_RII; if (rif_len > 2) { gen_th.rcf = SDL_ISO88025(sdl)->trld_rcf; (void)memcpy((caddr_t)gen_th.rd, (caddr_t)SDL_ISO88025(sdl)->trld_route, rif_len - 2); } } switch (dst->sa_family) { #ifdef INET case AF_INET: error = arpresolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); snap_type = ETHERTYPE_IP; break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_IEEE802); loop_copy = -1; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: snap_type = ETHERTYPE_REVARP; break; case ARPOP_REQUEST: case ARPOP_REPLY: default: snap_type = ETHERTYPE_ARP; break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, ISO88025_ADDR_LEN); else bcopy(ar_tha(ah), edst, ISO88025_ADDR_LEN); } break; #endif /* INET */ #ifdef INET6 case AF_INET6: error = nd6_resolve(ifp, is_gw, m, dst, edst, NULL); if (error) return (error == EWOULDBLOCK ? 0 : error); snap_type = ETHERTYPE_IPV6; break; #endif /* INET6 */ case AF_UNSPEC: { const struct iso88025_sockaddr_data *sd; /* * For AF_UNSPEC sockaddr.sa_data must contain all of the * mac information needed to send the packet. This allows * full mac, llc, and source routing function to be controlled. * llc and source routing information must already be in the * mbuf provided, ac/fc are set in sa_data. sockaddr.sa_data * should be an iso88025_sockaddr_data structure see iso88025.h */ loop_copy = -1; sd = (const struct iso88025_sockaddr_data *)dst->sa_data; gen_th.ac = sd->ac; gen_th.fc = sd->fc; (void)memcpy(edst, sd->ether_dhost, ISO88025_ADDR_LEN); (void)memcpy(gen_th.iso88025_shost, sd->ether_shost, ISO88025_ADDR_LEN); rif_len = 0; break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); break; } /* * Add LLC header. */ if (snap_type != 0) { struct llc *l; M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT); if (m == 0) senderr(ENOBUFS); l = mtod(m, struct llc *); l->llc_control = LLC_UI; l->llc_dsap = l->llc_ssap = LLC_SNAP_LSAP; l->llc_snap.org_code[0] = l->llc_snap.org_code[1] = l->llc_snap.org_code[2] = 0; l->llc_snap.ether_type = htons(snap_type); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, ISO88025_HDR_LEN + rif_len, M_NOWAIT); if (m == 0) senderr(ENOBUFS); th = mtod(m, struct iso88025_header *); bcopy((caddr_t)edst, (caddr_t)&gen_th.iso88025_dhost, ISO88025_ADDR_LEN); /* Copy as much of the generic header as is needed into the mbuf */ memcpy(th, &gen_th, ISO88025_HDR_LEN + rif_len); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && (loop_copy != -1)) { if ((m->m_flags & M_BCAST) || (loop_copy > 0)) { struct mbuf *n; n = m_copy(m, 0, (int)M_COPYALL); (void) if_simloop(ifp, n, dst->sa_family, ISO88025_HDR_LEN); } else if (bcmp(th->iso88025_dhost, th->iso88025_shost, ETHER_ADDR_LEN) == 0) { (void) if_simloop(ifp, m, dst->sa_family, ISO88025_HDR_LEN); return(0); /* XXX */ } } IFQ_HANDOFF_ADJ(ifp, m, ISO88025_HDR_LEN + LLC_SNAPFRAMELEN, error); if (error) { printf("iso88025_output: packet dropped QFULL.\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } return (error); bad: if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); if (m) m_freem(m); return (error); } /* * ISO 88025 de-encapsulation */ void iso88025_input(ifp, m) struct ifnet *ifp; struct mbuf *m; { struct iso88025_header *th; struct llc *l; int isr; int mac_hdr_len; /* * Do consistency checks to verify assumptions * made by code past this point. */ if ((m->m_flags & M_PKTHDR) == 0) { if_printf(ifp, "discard frame w/o packet header\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } if (m->m_pkthdr.rcvif == NULL) { if_printf(ifp, "discard frame w/o interface pointer\n"); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m = m_pullup(m, ISO88025_HDR_LEN); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } th = mtod(m, struct iso88025_header *); /* * Discard packet if interface is not up. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) goto dropanyway; /* * Give bpf a chance at the packet. */ BPF_MTAP(ifp, m); /* * Interface marked for monitoring; discard packet. */ if (ifp->if_flags & IFF_MONITOR) { m_freem(m); return; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* * Update interface statistics. */ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); getmicrotime(&ifp->if_lastchange); /* * Discard non local unicast packets when interface * is in promiscuous mode. */ if ((ifp->if_flags & IFF_PROMISC) && ((th->iso88025_dhost[0] & 1) == 0) && (bcmp(IF_LLADDR(ifp), (caddr_t) th->iso88025_dhost, ISO88025_ADDR_LEN) != 0)) goto dropanyway; /* * Set mbuf flags for bcast/mcast. */ if (th->iso88025_dhost[0] & 1) { if (bcmp(iso88025_broadcastaddr, th->iso88025_dhost, ISO88025_ADDR_LEN) == 0) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } mac_hdr_len = ISO88025_HDR_LEN; /* Check for source routing info */ if (th->iso88025_shost[0] & TR_RII) mac_hdr_len += TR_RCF_RIFLEN(th->rcf); /* Strip off ISO88025 header. */ m_adj(m, mac_hdr_len); m = m_pullup(m, LLC_SNAPFRAMELEN); if (m == 0) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto dropanyway; } l = mtod(m, struct llc *); switch (l->llc_dsap) { case LLC_SNAP_LSAP: { u_int16_t type; if ((l->llc_control != LLC_UI) || (l->llc_ssap != LLC_SNAP_LSAP)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } if (l->llc_snap.org_code[0] != 0 || l->llc_snap.org_code[1] != 0 || l->llc_snap.org_code[2] != 0) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } type = ntohs(l->llc_snap.ether_type); m_adj(m, LLC_SNAPFRAMELEN); switch (type) { #ifdef INET case ETHERTYPE_IP: th->iso88025_shost[0] &= ~(TR_RII); - if ((m = ip_fastforward(m)) == NULL) - return; isr = NETISR_IP; break; case ETHERTYPE_ARP: if (ifp->if_flags & IFF_NOARP) goto dropanyway; isr = NETISR_ARP; break; #endif /* INET */ #ifdef INET6 case ETHERTYPE_IPV6: th->iso88025_shost[0] &= ~(TR_RII); isr = NETISR_IPV6; break; #endif /* INET6 */ default: printf("iso88025_input: unexpected llc_snap ether_type 0x%02x\n", type); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; } break; } #ifdef ISO case LLC_ISO_LSAP: switch (l->llc_control) { case LLC_UI: if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; break; case LLC_XID: case LLC_XID_P: if(m->m_len < ISO88025_ADDR_LEN) goto dropanyway; l->llc_window = 0; l->llc_fid = 9; l->llc_class = 1; l->llc_dsap = l->llc_ssap = 0; /* Fall through to */ case LLC_TEST: case LLC_TEST_P: { struct sockaddr sa; struct iso88025_sockaddr_data *th2; int i; u_char c; c = l->llc_dsap; if (th->iso88025_shost[0] & TR_RII) { /* XXX */ printf("iso88025_input: dropping source routed LLC_TEST\n"); goto dropanyway; } l->llc_dsap = l->llc_ssap; l->llc_ssap = c; if (m->m_flags & (M_BCAST | M_MCAST)) bcopy((caddr_t)IF_LLADDR(ifp), (caddr_t)th->iso88025_dhost, ISO88025_ADDR_LEN); sa.sa_family = AF_UNSPEC; sa.sa_len = sizeof(sa); th2 = (struct iso88025_sockaddr_data *)sa.sa_data; for (i = 0; i < ISO88025_ADDR_LEN; i++) { th2->ether_shost[i] = c = th->iso88025_dhost[i]; th2->ether_dhost[i] = th->iso88025_dhost[i] = th->iso88025_shost[i]; th->iso88025_shost[i] = c; } th2->ac = TR_AC; th2->fc = TR_LLC_FRAME; ifp->if_output(ifp, m, &sa, NULL); return; } default: printf("iso88025_input: unexpected llc control 0x%02x\n", l->llc_control); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; break; } break; #endif /* ISO */ default: printf("iso88025_input: unknown dsap 0x%x\n", l->llc_dsap); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto dropanyway; break; } M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; dropanyway: if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if (m) m_freem(m); return; } static int iso88025_resolvemulti (ifp, llsa, sa) struct ifnet *ifp; struct sockaddr **llsa; struct sockaddr *sa; { struct sockaddr_dl *sdl; #ifdef INET struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *sin6; #endif u_char *e_addr; switch(sa->sa_family) { case AF_LINK: /* * No mapping needed. Just check that it's a valid MC address. */ sdl = (struct sockaddr_dl *)sa; e_addr = LLADDR(sdl); if ((e_addr[0] & 1) != 1) { return (EADDRNOTAVAIL); } *llsa = 0; return (0); #ifdef INET case AF_INET: sin = (struct sockaddr_in *)sa; if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { return (EADDRNOTAVAIL); } sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025); sdl->sdl_alen = ISO88025_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IP_MULTICAST(&sin->sin_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)sa; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* * An IP6 address of 0 means listen to all * of the Ethernet multicast address used for IP6. * (This is used for multicast routers.) */ ifp->if_flags |= IFF_ALLMULTI; *llsa = 0; return (0); } if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { return (EADDRNOTAVAIL); } sdl = link_init_sdl(ifp, *llsa, IFT_ISO88025); sdl->sdl_alen = ISO88025_ADDR_LEN; e_addr = LLADDR(sdl); ETHER_MAP_IPV6_MULTICAST(&sin6->sin6_addr, e_addr); *llsa = (struct sockaddr *)sdl; return (0); #endif default: /* * Well, the text isn't quite right, but it's the name * that counts... */ return (EAFNOSUPPORT); } return (0); } static moduledata_t iso88025_mod = { .name = "iso88025", }; DECLARE_MODULE(iso88025, iso88025_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(iso88025, 1); Index: head/sys/netinet/in_var.h =================================================================== --- head/sys/netinet/in_var.h (revision 290382) +++ head/sys/netinet/in_var.h (revision 290383) @@ -1,399 +1,399 @@ /*- * Copyright (c) 1985, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in_var.h 8.2 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _NETINET_IN_VAR_H_ #define _NETINET_IN_VAR_H_ /* * Argument structure for SIOCAIFADDR. */ struct in_aliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr_in ifra_addr; struct sockaddr_in ifra_broadaddr; #define ifra_dstaddr ifra_broadaddr struct sockaddr_in ifra_mask; int ifra_vhid; }; #ifdef _KERNEL #include #include #include struct igmp_ifsoftc; struct in_multi; struct lltable; /* * IPv4 per-interface state. */ struct in_ifinfo { struct lltable *ii_llt; /* ARP state */ struct igmp_ifsoftc *ii_igmp; /* IGMP state */ struct in_multi *ii_allhosts; /* 224.0.0.1 membership */ }; /* * Interface address, Internet version. One of these structures * is allocated for each Internet address on an interface. * The ifaddr structure contains the protocol-independent part * of the structure and is assumed to be first. */ struct in_ifaddr { struct ifaddr ia_ifa; /* protocol-independent info */ #define ia_ifp ia_ifa.ifa_ifp #define ia_flags ia_ifa.ifa_flags /* ia_subnet{,mask} in host order */ u_long ia_subnet; /* subnet address */ u_long ia_subnetmask; /* mask of subnet */ LIST_ENTRY(in_ifaddr) ia_hash; /* entry in bucket of inet addresses */ TAILQ_ENTRY(in_ifaddr) ia_link; /* list of internet addresses */ struct sockaddr_in ia_addr; /* reserve space for interface name */ struct sockaddr_in ia_dstaddr; /* reserve space for broadcast addr */ #define ia_broadaddr ia_dstaddr struct sockaddr_in ia_sockmask; /* reserve space for general netmask */ }; /* * Given a pointer to an in_ifaddr (ifaddr), * return a pointer to the addr as a sockaddr_in. */ #define IA_SIN(ia) (&(((struct in_ifaddr *)(ia))->ia_addr)) #define IA_DSTSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_dstaddr)) #define IA_MASKSIN(ia) (&(((struct in_ifaddr *)(ia))->ia_sockmask)) #define IN_LNAOF(in, ifa) \ ((ntohl((in).s_addr) & ~((struct in_ifaddr *)(ifa)->ia_subnetmask)) extern u_char inetctlerrmap[]; #define LLTABLE(ifp) \ ((struct in_ifinfo *)(ifp)->if_afdata[AF_INET])->ii_llt /* * Hash table for IP addresses. */ TAILQ_HEAD(in_ifaddrhead, in_ifaddr); LIST_HEAD(in_ifaddrhashhead, in_ifaddr); VNET_DECLARE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); VNET_DECLARE(struct in_ifaddrhead, in_ifaddrhead); VNET_DECLARE(u_long, in_ifaddrhmask); /* mask for hash table */ #define V_in_ifaddrhashtbl VNET(in_ifaddrhashtbl) #define V_in_ifaddrhead VNET(in_ifaddrhead) #define V_in_ifaddrhmask VNET(in_ifaddrhmask) #define INADDR_NHASH_LOG2 9 #define INADDR_NHASH (1 << INADDR_NHASH_LOG2) #define INADDR_HASHVAL(x) fnv_32_buf((&(x)), sizeof(x), FNV1_32_INIT) #define INADDR_HASH(x) \ (&V_in_ifaddrhashtbl[INADDR_HASHVAL(x) & V_in_ifaddrhmask]) extern struct rmlock in_ifaddr_lock; #define IN_IFADDR_LOCK_ASSERT() rm_assert(&in_ifaddr_lock, RA_LOCKED) #define IN_IFADDR_RLOCK(t) rm_rlock(&in_ifaddr_lock, (t)) #define IN_IFADDR_RLOCK_ASSERT() rm_assert(&in_ifaddr_lock, RA_RLOCKED) #define IN_IFADDR_RUNLOCK(t) rm_runlock(&in_ifaddr_lock, (t)) #define IN_IFADDR_WLOCK() rm_wlock(&in_ifaddr_lock) #define IN_IFADDR_WLOCK_ASSERT() rm_assert(&in_ifaddr_lock, RA_WLOCKED) #define IN_IFADDR_WUNLOCK() rm_wunlock(&in_ifaddr_lock) /* * Macro for finding the internet address structure (in_ifaddr) * corresponding to one of our IP addresses (in_addr). */ #define INADDR_TO_IFADDR(addr, ia) \ /* struct in_addr addr; */ \ /* struct in_ifaddr *ia; */ \ do { \ \ LIST_FOREACH(ia, INADDR_HASH((addr).s_addr), ia_hash) \ if (IA_SIN(ia)->sin_addr.s_addr == (addr).s_addr) \ break; \ } while (0) /* * Macro for finding the interface (ifnet structure) corresponding to one * of our IP addresses. */ #define INADDR_TO_IFP(addr, ifp) \ /* struct in_addr addr; */ \ /* struct ifnet *ifp; */ \ { \ struct in_ifaddr *ia; \ \ INADDR_TO_IFADDR(addr, ia); \ (ifp) = (ia == NULL) ? NULL : ia->ia_ifp; \ } /* * Macro for finding the internet address structure (in_ifaddr) corresponding * to a given interface (ifnet structure). */ #define IFP_TO_IA(ifp, ia, t) \ /* struct ifnet *ifp; */ \ /* struct in_ifaddr *ia; */ \ /* struct rm_priotracker *t; */ \ do { \ IN_IFADDR_RLOCK((t)); \ for ((ia) = TAILQ_FIRST(&V_in_ifaddrhead); \ (ia) != NULL && (ia)->ia_ifp != (ifp); \ (ia) = TAILQ_NEXT((ia), ia_link)) \ continue; \ if ((ia) != NULL) \ ifa_ref(&(ia)->ia_ifa); \ IN_IFADDR_RUNLOCK((t)); \ } while (0) /* * Legacy IPv4 IGMP per-link structure. */ struct router_info { struct ifnet *rti_ifp; int rti_type; /* type of router which is querier on this interface */ int rti_time; /* # of slow timeouts since last old query */ SLIST_ENTRY(router_info) rti_list; }; /* * IPv4 multicast IGMP-layer source entry. */ struct ip_msource { RB_ENTRY(ip_msource) ims_link; /* RB tree links */ in_addr_t ims_haddr; /* host byte order */ struct ims_st { uint16_t ex; /* # of exclusive members */ uint16_t in; /* # of inclusive members */ } ims_st[2]; /* state at t0, t1 */ uint8_t ims_stp; /* pending query */ }; /* * IPv4 multicast PCB-layer source entry. */ struct in_msource { RB_ENTRY(ip_msource) ims_link; /* RB tree links */ in_addr_t ims_haddr; /* host byte order */ uint8_t imsl_st[2]; /* state before/at commit */ }; RB_HEAD(ip_msource_tree, ip_msource); /* define struct ip_msource_tree */ static __inline int ip_msource_cmp(const struct ip_msource *a, const struct ip_msource *b) { if (a->ims_haddr < b->ims_haddr) return (-1); if (a->ims_haddr == b->ims_haddr) return (0); return (1); } RB_PROTOTYPE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); /* * IPv4 multicast PCB-layer group filter descriptor. */ struct in_mfilter { struct ip_msource_tree imf_sources; /* source list for (S,G) */ u_long imf_nsrc; /* # of source entries */ uint8_t imf_st[2]; /* state before/at commit */ }; /* * IPv4 group descriptor. * * For every entry on an ifnet's if_multiaddrs list which represents * an IP multicast group, there is one of these structures. * * If any source filters are present, then a node will exist in the RB-tree * to permit fast lookup by source whenever an operation takes place. * This permits pre-order traversal when we issue reports. * Source filter trees are kept separately from the socket layer to * greatly simplify locking. * * When IGMPv3 is active, inm_timer is the response to group query timer. * The state-change timer inm_sctimer is separate; whenever state changes * for the group the state change record is generated and transmitted, * and kept if retransmissions are necessary. * * FUTURE: inm_link is now only used when groups are being purged * on a detaching ifnet. It could be demoted to a SLIST_ENTRY, but * because it is at the very start of the struct, we can't do this * w/o breaking the ABI for ifmcstat. */ struct in_multi { LIST_ENTRY(in_multi) inm_link; /* to-be-released by in_ifdetach */ struct in_addr inm_addr; /* IP multicast address, convenience */ struct ifnet *inm_ifp; /* back pointer to ifnet */ struct ifmultiaddr *inm_ifma; /* back pointer to ifmultiaddr */ u_int inm_timer; /* IGMPv1/v2 group / v3 query timer */ u_int inm_state; /* state of the membership */ void *inm_rti; /* unused, legacy field */ u_int inm_refcount; /* reference count */ /* New fields for IGMPv3 follow. */ struct igmp_ifsoftc *inm_igi; /* IGMP info */ SLIST_ENTRY(in_multi) inm_nrele; /* to-be-released by IGMP */ struct ip_msource_tree inm_srcs; /* tree of sources */ u_long inm_nsrc; /* # of tree entries */ struct mbufq inm_scq; /* queue of pending * state-change packets */ struct timeval inm_lastgsrtv; /* Time of last G-S-R query */ uint16_t inm_sctimer; /* state-change timer */ uint16_t inm_scrv; /* state-change rexmit count */ /* * SSM state counters which track state at T0 (the time the last * state-change report's RV timer went to zero) and T1 * (time of pending report, i.e. now). * Used for computing IGMPv3 state-change reports. Several refcounts * are maintained here to optimize for common use-cases. */ struct inm_st { uint16_t iss_fmode; /* IGMP filter mode */ uint16_t iss_asm; /* # of ASM listeners */ uint16_t iss_ex; /* # of exclusive members */ uint16_t iss_in; /* # of inclusive members */ uint16_t iss_rec; /* # of recorded sources */ } inm_st[2]; /* state at t0, t1 */ }; /* * Helper function to derive the filter mode on a source entry * from its internal counters. Predicates are: * A source is only excluded if all listeners exclude it. * A source is only included if no listeners exclude it, * and at least one listener includes it. * May be used by ifmcstat(8). */ static __inline uint8_t ims_get_mode(const struct in_multi *inm, const struct ip_msource *ims, uint8_t t) { t = !!t; if (inm->inm_st[t].iss_ex > 0 && inm->inm_st[t].iss_ex == ims->ims_st[t].ex) return (MCAST_EXCLUDE); else if (ims->ims_st[t].in > 0 && ims->ims_st[t].ex == 0) return (MCAST_INCLUDE); return (MCAST_UNDEFINED); } #ifdef SYSCTL_DECL SYSCTL_DECL(_net_inet); SYSCTL_DECL(_net_inet_ip); SYSCTL_DECL(_net_inet_raw); #endif /* * Lock macros for IPv4 layer multicast address lists. IPv4 lock goes * before link layer multicast locks in the lock order. In most cases, * consumers of IN_*_MULTI() macros should acquire the locks before * calling them; users of the in_{add,del}multi() functions should not. */ extern struct mtx in_multi_mtx; #define IN_MULTI_LOCK() mtx_lock(&in_multi_mtx) #define IN_MULTI_UNLOCK() mtx_unlock(&in_multi_mtx) #define IN_MULTI_LOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_OWNED) #define IN_MULTI_UNLOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_NOTOWNED) /* Acquire an in_multi record. */ static __inline void inm_acquire_locked(struct in_multi *inm) { IN_MULTI_LOCK_ASSERT(); ++inm->inm_refcount; } /* * Return values for imo_multi_filter(). */ #define MCAST_PASS 0 /* Pass */ #define MCAST_NOTGMEMBER 1 /* This host not a member of group */ #define MCAST_NOTSMEMBER 2 /* This host excluded source */ #define MCAST_MUTED 3 /* [deprecated] */ struct rtentry; struct route; struct ip_moptions; struct radix_node_head; struct in_multi *inm_lookup_locked(struct ifnet *, const struct in_addr); struct in_multi *inm_lookup(struct ifnet *, const struct in_addr); int imo_multi_filter(const struct ip_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); void inm_commit(struct in_multi *); void inm_clear_recorded(struct in_multi *); void inm_print(const struct in_multi *); int inm_record_source(struct in_multi *inm, const in_addr_t); void inm_release(struct in_multi *); void inm_release_locked(struct in_multi *); struct in_multi * in_addmulti(struct in_addr *, struct ifnet *); void in_delmulti(struct in_multi *); int in_joingroup(struct ifnet *, const struct in_addr *, /*const*/ struct in_mfilter *, struct in_multi **); int in_joingroup_locked(struct ifnet *, const struct in_addr *, /*const*/ struct in_mfilter *, struct in_multi **); int in_leavegroup(struct in_multi *, /*const*/ struct in_mfilter *); int in_leavegroup_locked(struct in_multi *, /*const*/ struct in_mfilter *); int in_control(struct socket *, u_long, caddr_t, struct ifnet *, struct thread *); int in_addprefix(struct in_ifaddr *, int); int in_scrubprefix(struct in_ifaddr *, u_int); void ip_input(struct mbuf *); void ip_direct_input(struct mbuf *); void in_ifadown(struct ifaddr *ifa, int); -struct mbuf *ip_fastforward(struct mbuf *); +struct mbuf *ip_tryforward(struct mbuf *); void *in_domifattach(struct ifnet *); void in_domifdetach(struct ifnet *, void *); /* XXX */ void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); void in_rtalloc(struct route *ro, u_int fibnum); struct rtentry *in_rtalloc1(struct sockaddr *, int, u_long, u_int); void in_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct sockaddr *, u_int); #endif /* _KERNEL */ /* INET6 stuff */ #include #endif /* _NETINET_IN_VAR_H_ */ Index: head/sys/netinet/ip_fastfwd.c =================================================================== --- head/sys/netinet/ip_fastfwd.c (revision 290382) +++ head/sys/netinet/ip_fastfwd.c (revision 290383) @@ -1,587 +1,480 @@ /*- * Copyright (c) 2003 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * ip_fastforward gets its speed from processing the forwarded packet to * completion (if_output on the other side) without any queues or netisr's. * The receiving interface DMAs the packet into memory, the upper half of * driver calls ip_fastforward, we do our routing table lookup and directly * send it off to the outgoing interface, which DMAs the packet to the * network card. The only part of the packet we touch with the CPU is the * IP header (unless there are complex firewall rules touching other parts * of the packet, but that is up to you). We are essentially limited by bus * bandwidth and how fast the network card/driver can set up receives and * transmits. * * We handle basic errors, IP header errors, checksum errors, * destination unreachable, fragmentation and fragmentation needed and * report them via ICMP to the sender. * * Else if something is not pure IPv4 unicast forwarding we fall back to * the normal ip_input processing path. We should only be called from * interfaces connected to the outside world. * * Firewalling is fully supported including divert, ipfw fwd and ipfilter * ipnat and address rewrite. * * IPSEC is not supported if this host is a tunnel broker. IPSEC is * supported for connections to/from local host. * * We try to do the least expensive (in CPU ops) checks and operations * first to catch junk with as little overhead as possible. * * We take full advantage of hardware support for IP checksum and * fragmentation offloading. * * We don't do ICMP redirect in the fast forwarding path. I have had my own * cases where two core routers with Zebra routing suite would send millions * ICMP redirects to connected hosts if the destination router was not the * default gateway. In one case it was filling the routing table of a host * with approximately 300.000 cloned redirect entries until it ran out of * kernel memory. However the networking code proved very robust and it didn't * crash or fail in other ways. */ /* * Many thanks to Matt Thomas of NetBSD for basic structure of ip_flow.c which * is being followed here. */ #include __FBSDID("$FreeBSD$"); #include "opt_ipfw.h" #include "opt_ipstealth.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -static VNET_DEFINE(int, ipfastforward_active); -#define V_ipfastforward_active VNET(ipfastforward_active) - -SYSCTL_INT(_net_inet_ip, OID_AUTO, fastforwarding, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding"); - static struct sockaddr_in * ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m) { struct sockaddr_in *dst; struct rtentry *rt; /* * Find route to destination. */ bzero(ro, sizeof(*ro)); dst = (struct sockaddr_in *)&ro->ro_dst; dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr.s_addr = dest.s_addr; in_rtalloc_ign(ro, 0, M_GETFIB(m)); /* * Route there and interface still up? */ rt = ro->ro_rt; if (rt && (rt->rt_flags & RTF_UP) && (rt->rt_ifp->if_flags & IFF_UP) && (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { if (rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)rt->rt_gateway; } else { IPSTAT_INC(ips_noroute); IPSTAT_INC(ips_cantforward); if (rt) RTFREE(rt); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); return NULL; } return dst; } /* * Try to forward a packet based on the destination address. * This is a fast path optimized for the plain forwarding case. * If the packet is handled (and consumed) here then we return NULL; * otherwise mbuf is returned and the packet should be delivered * to ip_input for full processing. */ struct mbuf * -ip_fastforward(struct mbuf *m) +ip_tryforward(struct mbuf *m) { struct ip *ip; struct mbuf *m0 = NULL; struct route ro; struct sockaddr_in *dst = NULL; struct ifnet *ifp; struct in_addr odest, dest; - uint16_t sum, ip_len, ip_off; + uint16_t ip_len, ip_off; int error = 0; - int hlen, mtu; + int mtu; struct m_tag *fwd_tag = NULL; /* * Are we active and forwarding packets? */ - if (!V_ipfastforward_active || !V_ipforwarding) - return m; M_ASSERTVALID(m); M_ASSERTPKTHDR(m); bzero(&ro, sizeof(ro)); - /* - * Step 1: check for packet drop conditions (and sanity checks) - */ - /* - * Is entire packet big enough? - */ - if (m->m_pkthdr.len < sizeof(struct ip)) { - IPSTAT_INC(ips_tooshort); - goto drop; - } - - /* - * Is first mbuf large enough for ip header and is header present? - */ - if (m->m_len < sizeof (struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - IPSTAT_INC(ips_toosmall); - return NULL; /* mbuf already free'd */ - } - - ip = mtod(m, struct ip *); - - /* - * Is it IPv4? - */ - if (ip->ip_v != IPVERSION) { - IPSTAT_INC(ips_badvers); - goto drop; - } - - /* - * Is IP header length correct and is it in first mbuf? - */ - hlen = ip->ip_hl << 2; - if (hlen < sizeof(struct ip)) { /* minimum header length */ - IPSTAT_INC(ips_badhlen); - goto drop; - } - if (hlen > m->m_len) { - if ((m = m_pullup(m, hlen)) == NULL) { - IPSTAT_INC(ips_badhlen); - return NULL; /* mbuf already free'd */ - } - ip = mtod(m, struct ip *); - } - - /* - * Checksum correct? - */ - if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) - sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); - else { - if (hlen == sizeof(struct ip)) - sum = in_cksum_hdr(ip); - else - sum = in_cksum(m, hlen); - } - if (sum) { - IPSTAT_INC(ips_badsum); - goto drop; - } - - /* - * Remember that we have checked the IP header and found it valid. - */ - m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); - - ip_len = ntohs(ip->ip_len); - - /* - * Is IP length longer than packet we have got? - */ - if (m->m_pkthdr.len < ip_len) { - IPSTAT_INC(ips_tooshort); - goto drop; - } - - /* - * Is packet longer than IP header tells us? If yes, truncate packet. - */ - if (m->m_pkthdr.len > ip_len) { - if (m->m_len == m->m_pkthdr.len) { - m->m_len = ip_len; - m->m_pkthdr.len = ip_len; - } else - m_adj(m, ip_len - m->m_pkthdr.len); - } - - /* - * Is packet from or to 127/8? - */ - if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || - (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { - IPSTAT_INC(ips_badaddr); - goto drop; - } - #ifdef ALTQ /* * Is packet dropped by traffic conditioner? */ if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) goto drop; #endif /* - * Step 2: fallback conditions to normal ip_input path processing - */ - - /* * Only IP packets without options */ + ip = mtod(m, struct ip *); + if (ip->ip_hl != (sizeof(struct ip) >> 2)) { if (V_ip_doopts == 1) return m; else if (V_ip_doopts == 2) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_FILTER_PROHIB, 0, 0); return NULL; /* mbuf already free'd */ } /* else ignore IP options and continue */ } /* * Only unicast IP, not from loopback, no L2 or IP broadcast, * no multicast, no INADDR_ANY * * XXX: Probably some of these checks could be direct drop * conditions. However it is not clear whether there are some * hacks or obscure behaviours which make it neccessary to * let ip_input handle it. We play safe here and let ip_input * deal with it until it is proven that we can directly drop it. */ if ((m->m_flags & (M_BCAST|M_MCAST)) || (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) || ntohl(ip->ip_src.s_addr) == (u_long)INADDR_BROADCAST || ntohl(ip->ip_dst.s_addr) == (u_long)INADDR_BROADCAST || IN_MULTICAST(ntohl(ip->ip_src.s_addr)) || IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_src.s_addr)) || IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || ip->ip_src.s_addr == INADDR_ANY || ip->ip_dst.s_addr == INADDR_ANY ) return m; /* * Is it for a local address on this host? */ if (in_localip(ip->ip_dst)) return m; IPSTAT_INC(ips_total); /* * Step 3: incoming packet firewall processing */ odest.s_addr = dest.s_addr = ip->ip_dst.s_addr; /* * Run through list of ipfilter hooks for input packets */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passin; if (pfil_run_hooks( &V_inet_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL) || m == NULL) goto drop; M_ASSERTVALID(m); M_ASSERTPKTHDR(m); ip = mtod(m, struct ip *); /* m may have changed by pfil hook */ dest.s_addr = ip->ip_dst.s_addr; /* * Destination address changed? */ if (odest.s_addr != dest.s_addr) { /* * Is it now for a local address on this host? */ if (in_localip(dest)) goto forwardlocal; /* * Go on with new destination address */ } if (m->m_flags & M_FASTFWD_OURS) { /* * ipfw changed it for a local address on this host. */ goto forwardlocal; } passin: /* * Step 4: decrement TTL and look up route */ /* * Check TTL */ #ifdef IPSTEALTH if (!V_ipstealth) { #endif if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); return NULL; /* mbuf already free'd */ } /* * Decrement the TTL and incrementally change the IP header checksum. * Don't bother doing this with hw checksum offloading, it's faster * doing it right here. */ ip->ip_ttl -= IPTTLDEC; if (ip->ip_sum >= (u_int16_t) ~htons(IPTTLDEC << 8)) ip->ip_sum -= ~htons(IPTTLDEC << 8); else ip->ip_sum += htons(IPTTLDEC << 8); #ifdef IPSTEALTH } #endif /* * Find route to destination. */ if ((dst = ip_findroute(&ro, dest, m)) == NULL) return NULL; /* icmp unreach already sent */ ifp = ro.ro_rt->rt_ifp; /* * Immediately drop blackholed traffic, and directed broadcasts * for either the all-ones or all-zero subnet addresses on * locally attached networks. */ if ((ro.ro_rt->rt_flags & (RTF_BLACKHOLE|RTF_BROADCAST)) != 0) goto drop; /* * Step 5: outgoing firewall packet processing */ /* * Run through list of hooks for output packets. */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passout; if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, NULL) || m == NULL) { goto drop; } M_ASSERTVALID(m); M_ASSERTPKTHDR(m); ip = mtod(m, struct ip *); dest.s_addr = ip->ip_dst.s_addr; /* * Destination address changed? */ if (m->m_flags & M_IP_NEXTHOP) fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL); if (odest.s_addr != dest.s_addr || fwd_tag != NULL) { /* * Is it now for a local address on this host? */ if (m->m_flags & M_FASTFWD_OURS || in_localip(dest)) { forwardlocal: /* * Return packet for processing by ip_input(). */ m->m_flags |= M_FASTFWD_OURS; if (ro.ro_rt) RTFREE(ro.ro_rt); return m; } /* * Redo route lookup with new destination address */ if (fwd_tag) { dest.s_addr = ((struct sockaddr_in *) (fwd_tag + 1))->sin_addr.s_addr; m_tag_delete(m, fwd_tag); m->m_flags &= ~M_IP_NEXTHOP; } RTFREE(ro.ro_rt); if ((dst = ip_findroute(&ro, dest, m)) == NULL) return NULL; /* icmp unreach already sent */ ifp = ro.ro_rt->rt_ifp; } passout: /* * Step 6: send off the packet */ ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); /* * Check if route is dampned (when ARP is unable to resolve) */ if ((ro.ro_rt->rt_flags & RTF_REJECT) && (ro.ro_rt->rt_expire == 0 || time_uptime < ro.ro_rt->rt_expire)) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); goto consumed; } /* * Check if media link state of interface is not down */ if (ifp->if_link_state == LINK_STATE_DOWN) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); goto consumed; } /* * Check if packet fits MTU or if hardware will fragment for us */ if (ro.ro_rt->rt_mtu) mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); else mtu = ifp->if_mtu; if (ip_len <= mtu) { /* * Avoid confusing lower layers. */ m_clrprotoflags(m); /* * Send off the packet via outgoing interface */ IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, &ro); } else { /* * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery */ if (ip_off & IP_DF) { IPSTAT_INC(ips_cantfrag); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, mtu); goto consumed; } else { /* * We have to fragment the packet */ m->m_pkthdr.csum_flags |= CSUM_IP; if (ip_fragment(ip, &m, mtu, ifp->if_hwassist)) goto drop; KASSERT(m != NULL, ("null mbuf and no error")); /* * Send off the fragments via outgoing interface */ error = 0; do { m0 = m->m_nextpkt; m->m_nextpkt = NULL; /* * Avoid confusing lower layers. */ m_clrprotoflags(m); IP_PROBE(send, NULL, NULL, ip, ifp, ip, NULL); error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, &ro); if (error) break; } while ((m = m0) != NULL); if (error) { /* Reclaim remaining fragments */ for (m = m0; m; m = m0) { m0 = m->m_nextpkt; m_freem(m); } } else IPSTAT_INC(ips_fragmented); } } if (error != 0) IPSTAT_INC(ips_odropped); else { counter_u64_add(ro.ro_rt->rt_pksent, 1); IPSTAT_INC(ips_forward); IPSTAT_INC(ips_fastforward); } consumed: RTFREE(ro.ro_rt); return NULL; drop: if (m) m_freem(m); if (ro.ro_rt) RTFREE(ro.ro_rt); return NULL; } Index: head/sys/netinet/ip_input.c =================================================================== --- head/sys/netinet/ip_input.c (revision 290382) +++ head/sys/netinet/ip_input.c (revision 290383) @@ -1,1335 +1,1347 @@ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_ipfw.h" #include "opt_ipstealth.h" #include "opt_ipsec.h" #include "opt_route.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include +#include +#include #endif /* IPSEC */ #include #include #include #ifdef CTASSERT CTASSERT(sizeof(struct ip) == 20); #endif /* IP reassembly functions are defined in ip_reass.c. */ extern void ipreass_init(void); extern void ipreass_drain(void); extern void ipreass_slowtimo(void); #ifdef VIMAGE extern void ipreass_destroy(void); #endif struct rmlock in_ifaddr_lock; RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); VNET_DEFINE(int, rsvp_on); VNET_DEFINE(int, ipforwarding); SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipforwarding), 0, "Enable IP forwarding between interfaces"); static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ #define V_ipsendredirects VNET(ipsendredirects) SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); /* * XXX - Setting ip_checkinterface mostly implements the receive side of * the Strong ES model described in RFC 1122, but since the routing table * and transmit implementation do not implement the Strong ES model, * setting this to 1 results in an odd hybrid. * * XXX - ip_checkinterface currently must be disabled if you use ipnat * to translate the destination address to another local interface. * * XXX - ip_checkinterface must be disabled if you add IP aliases * to the loopback interface instead of the interface where the * packets for those addresses are received. */ static VNET_DEFINE(int, ip_checkinterface); #define V_ip_checkinterface VNET(ip_checkinterface) SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_checkinterface), 0, "Verify packet arrives on correct interface"); VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ static struct netisr_handler ip_nh = { .nh_name = "ip", .nh_handler = ip_input, .nh_proto = NETISR_IP, #ifdef RSS .nh_m2cpuid = rss_soft_m2cpuid_v4, .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, #else .nh_policy = NETISR_POLICY_FLOW, #endif }; #ifdef RSS /* * Directly dispatched frames are currently assumed * to have a flowid already calculated. * * It should likely have something that assert it * actually has valid flow details. */ static struct netisr_handler ip_direct_nh = { .nh_name = "ip_direct", .nh_handler = ip_direct_input, .nh_proto = NETISR_IP_DIRECT, .nh_m2cpuid = rss_soft_m2cpuid_v4, .nh_policy = NETISR_POLICY_CPU, .nh_dispatch = NETISR_DISPATCH_HYBRID, }; #endif extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ #ifdef IPCTL_DEFMTU SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, &ip_mtu, 0, "Default MTU"); #endif #ifdef IPSTEALTH VNET_DEFINE(int, ipstealth); SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipstealth), 0, "IP stealth mode, no TTL decrementation on forwarding"); #endif /* * IP statistics are stored in the "array" of counter(9)s. */ VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); VNET_PCPUSTAT_SYSINIT(ipstat); SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, "IP statistics (struct ipstat, netinet/ip_var.h)"); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipstat); #endif /* VIMAGE */ /* * Kernel module interface for updating ipstat. The argument is an index * into ipstat treated as an array. */ void kmod_ipstat_inc(int statnum) { counter_u64_add(VNET(ipstat)[statnum], 1); } void kmod_ipstat_dec(int statnum) { counter_u64_add(VNET(ipstat)[statnum], -1); } static int sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&ip_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&ip_nh, qlimit)); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", "Maximum size of the IP input queue"); static int sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) { u_int64_t qdrops_long; int error, qdrops; netisr_getqdrops(&ip_nh, &qdrops_long); qdrops = qdrops_long; error = sysctl_handle_int(oidp, &qdrops, 0, req); if (error || !req->newptr) return (error); if (qdrops != 0) return (EINVAL); netisr_clearqdrops(&ip_nh); return (0); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", "Number of packets dropped from the IP input queue"); #ifdef RSS static int sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) { int error, qlimit; netisr_getqlimit(&ip_direct_nh, &qlimit); error = sysctl_handle_int(oidp, &qlimit, 0, req); if (error || !req->newptr) return (error); if (qlimit < 1) return (EINVAL); return (netisr_setqlimit(&ip_direct_nh, qlimit)); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", "Maximum size of the IP direct input queue"); static int sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) { u_int64_t qdrops_long; int error, qdrops; netisr_getqdrops(&ip_direct_nh, &qdrops_long); qdrops = qdrops_long; error = sysctl_handle_int(oidp, &qdrops, 0, req); if (error || !req->newptr) return (error); if (qdrops != 0) return (EINVAL); netisr_clearqdrops(&ip_direct_nh); return (0); } SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", "Number of packets dropped from the IP direct input queue"); #endif /* RSS */ /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. */ void ip_init(void) { struct protosw *pr; int i; TAILQ_INIT(&V_in_ifaddrhead); V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); /* Initialize IP reassembly queue. */ ipreass_init(); /* Initialize packet filter hooks. */ V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; V_inet_pfil_hook.ph_af = AF_INET; if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to register pfil hook, " "error %d\n", __func__, i); /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) return; pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) panic("ip_init: PF_INET not found"); /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ for (i = 0; i < IPPROTO_MAX; i++) ip_protox[i] = pr - inetsw; /* * Cycle through IP protocols and put them into the appropriate place * in ip_protox[]. */ for (pr = inetdomain.dom_protosw; pr < inetdomain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { /* Be careful to only index valid IP protocols. */ if (pr->pr_protocol < IPPROTO_MAX) ip_protox[pr->pr_protocol] = pr - inetsw; } netisr_register(&ip_nh); #ifdef RSS netisr_register(&ip_direct_nh); #endif } #ifdef VIMAGE void ip_destroy(void) { int i; if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0) printf("%s: WARNING: unable to unregister pfil hook, " "error %d\n", __func__, i); /* Cleanup in_ifaddr hash table; should be empty. */ hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); /* Destroy IP reassembly queue. */ ipreass_destroy(); } #endif #ifdef RSS /* * IP direct input routine. * * This is called when reinjecting completed fragments where * all of the previous checking and book-keeping has been done. */ void ip_direct_input(struct mbuf *m) { struct ip *ip; int hlen; ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; IPSTAT_INC(ips_delivered); (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); return; } #endif /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ip_input(struct mbuf *m) { struct ip *ip = NULL; struct in_ifaddr *ia = NULL; struct ifaddr *ifa; struct ifnet *ifp; int checkif, hlen = 0; uint16_t sum, ip_len; int dchg = 0; /* dest changed after fw */ struct in_addr odst; /* original dst address */ M_ASSERTPKTHDR(m); if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; /* Set up some basics that will be used later. */ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; ip_len = ntohs(ip->ip_len); goto ours; } IPSTAT_INC(ips_total); if (m->m_pkthdr.len < sizeof(struct ip)) goto tooshort; if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { IPSTAT_INC(ips_toosmall); return; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { IPSTAT_INC(ips_badvers); goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ IPSTAT_INC(ips_badhlen); goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == NULL) { IPSTAT_INC(ips_badhlen); return; } ip = mtod(m, struct ip *); } IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); /* 127/8 must not appear on wire - RFC1122 */ ifp = m->m_pkthdr.rcvif; if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { if ((ifp->if_flags & IFF_LOOPBACK) == 0) { IPSTAT_INC(ips_badaddr); goto bad; } } if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); } else { if (hlen == sizeof(struct ip)) { sum = in_cksum_hdr(ip); } else { sum = in_cksum(m, hlen); } } if (sum) { IPSTAT_INC(ips_badsum); goto bad; } #ifdef ALTQ if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) /* packet is dropped by traffic conditioner */ return; #endif ip_len = ntohs(ip->ip_len); if (ip_len < hlen) { IPSTAT_INC(ips_badlen); goto bad; } /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < ip_len) { tooshort: IPSTAT_INC(ips_tooshort); goto bad; } if (m->m_pkthdr.len > ip_len) { if (m->m_len == m->m_pkthdr.len) { m->m_len = ip_len; m->m_pkthdr.len = ip_len; } else m_adj(m, ip_len - m->m_pkthdr.len); } + /* Try to forward the packet, but if we fail continue */ #ifdef IPSEC + /* For now we do not handle IPSEC in tryforward. */ + if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) && + (V_ipforwarding == 1)) + if (ip_tryforward(m) == NULL) + return; /* * Bypass packet filtering for packets previously handled by IPsec. */ if (ip_ipsec_filtertunnel(m)) goto passin; +#else + if (V_ipforwarding == 1) + if (ip_tryforward(m) == NULL) + return; #endif /* IPSEC */ /* * Run through list of hooks for input packets. * * NB: Beware of the destination address changing (e.g. * by NAT rewriting). When this happens, tell * ip_forward to do the right thing. */ /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passin; odst = ip->ip_dst; if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) return; if (m == NULL) /* consumed by filter */ return; ip = mtod(m, struct ip *); dchg = (odst.s_addr != ip->ip_dst.s_addr); ifp = m->m_pkthdr.rcvif; if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; goto ours; } if (m->m_flags & M_IP_NEXTHOP) { dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); if (dchg != 0) { /* * Directly ship the packet on. This allows * forwarding packets originally destined to us * to some other directly connected host. */ ip_forward(m, 1); return; } } passin: /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) return; /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no * matter if it is destined to another node, or whether it is * a multicast one, RSVP wants it! and prevents it from being forwarded * anywhere else. Also checks if the rsvp daemon is running before * grabbing the packet. */ if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) goto ours; /* * Check our list of addresses, to see if the packet is for us. * If we don't have any addresses, assume any unicast packet * we receive might be for us (and let the upper layers deal * with it). */ if (TAILQ_EMPTY(&V_in_ifaddrhead) && (m->m_flags & (M_MCAST|M_BCAST)) == 0) goto ours; /* * Enable a consistency check between the destination address * and the arrival interface for a unicast packet (the RFC 1122 * strong ES model) if IP forwarding is disabled and the packet * is not locally generated and the packet is not subject to * 'ipfw fwd'. * * XXX - Checking also should be disabled if the destination * address is ipnat'ed to a different interface. * * XXX - Checking is incompatible with IP aliases added * to the loopback interface instead of the interface where * the packets are received. * * XXX - This is the case for carp vhost IPs as well so we * insert a workaround. If the packet got here, we already * checked with carp_iamatch() and carp_forus(). */ checkif = V_ip_checkinterface && (V_ipforwarding == 0) && ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && ifp->if_carp == NULL && (dchg == 0); /* * Check for exact addresses in the hash bucket. */ /* IN_IFADDR_RLOCK(); */ LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { /* * If the address matches, verify that the packet * arrived via the correct interface if checking is * enabled. */ if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && (!checkif || ia->ia_ifp == ifp)) { counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); /* IN_IFADDR_RUNLOCK(); */ goto ours; } } /* IN_IFADDR_RUNLOCK(); */ /* * Check for broadcast addresses. * * Only accept broadcast packets that arrive via the matching * interface. Reception of forwarded directed broadcasts would * be handled via ip_forward() and ether_output() with the loopback * into the stack for SIMPLEX interfaces handled by ether_output(). */ if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; ia = ifatoia(ifa); if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == ip->ip_dst.s_addr) { counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); IF_ADDR_RUNLOCK(ifp); goto ours; } #ifdef BOOTP_COMPAT if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); IF_ADDR_RUNLOCK(ifp); goto ours; } #endif } IF_ADDR_RUNLOCK(ifp); ia = NULL; } /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { if (V_ip_mrouter) { /* * If we are acting as a multicast router, all * incoming multicast packets are passed to the * kernel-level multicast forwarding function. * The packet is returned (relatively) intact; if * ip_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. */ if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } /* * The process-level routing daemon needs to receive * all multicast IGMP packets, whether or not this * host belongs to their destination groups. */ if (ip->ip_p == IPPROTO_IGMP) goto ours; IPSTAT_INC(ips_forward); } /* * Assume the packet is for us, to avoid prematurely taking * a lock on the in_multi hash. Protocols must perform * their own filtering and update statistics accordingly. */ goto ours; } if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) goto ours; if (ip->ip_dst.s_addr == INADDR_ANY) goto ours; /* * Not for us; forward if possible and desirable. */ if (V_ipforwarding == 0) { IPSTAT_INC(ips_cantforward); m_freem(m); } else { ip_forward(m, dchg); } return; ours: #ifdef IPSTEALTH /* * IPSTEALTH: Process non-routing options only * if the packet is destined for us. */ if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) return; #endif /* IPSTEALTH */ /* * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf. */ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { /* XXXGL: shouldn't we save & set m_flags? */ m = ip_reass(m); if (m == NULL) return; ip = mtod(m, struct ip *); /* Get the header length of the reassembled packet */ hlen = ip->ip_hl << 2; } #ifdef IPSEC /* * enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ if (ip_ipsec_input(m, ip->ip_p) != 0) goto bad; #endif /* IPSEC */ /* * Switch out to protocol's input routine. */ IPSTAT_INC(ips_delivered); (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); return; bad: m_freem(m); } /* * IP timer processing; * if a timer expires on a reassembly * queue, discard it. */ void ip_slowtimo(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); ipreass_slowtimo(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } void ip_drain(void) { VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK_NOSLEEP(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); ipreass_drain(); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK_NOSLEEP(); } /* * The protocol to be inserted into ip_protox[] must be already registered * in inetsw[], either statically or through pf_proto_register(). */ int ipproto_register(short ipproto) { struct protosw *pr; /* Sanity checks. */ if (ipproto <= 0 || ipproto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* * The protocol slot must not be occupied by another protocol * already. An index pointing to IPPROTO_RAW is unused. */ pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ return (EEXIST); /* Find the protocol position in inetsw[] and set the index. */ for (pr = inetdomain.dom_protosw; pr < inetdomain.dom_protoswNPROTOSW; pr++) { if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol == ipproto) { ip_protox[pr->pr_protocol] = pr - inetsw; return (0); } } return (EPROTONOSUPPORT); } int ipproto_unregister(short ipproto) { struct protosw *pr; /* Sanity checks. */ if (ipproto <= 0 || ipproto >= IPPROTO_MAX) return (EPROTONOSUPPORT); /* Check if the protocol was indeed registered. */ pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == NULL) return (EPFNOSUPPORT); if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ return (ENOENT); /* Reset the protocol slot to IPPROTO_RAW. */ ip_protox[ipproto] = pr - inetsw; return (0); } /* * Given address of next destination (final or next hop), return (referenced) * internet address info of interface to be used to get there. */ struct in_ifaddr * ip_rtaddr(struct in_addr dst, u_int fibnum) { struct route sro; struct sockaddr_in *sin; struct in_ifaddr *ia; bzero(&sro, sizeof(sro)); sin = (struct sockaddr_in *)&sro.ro_dst; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; in_rtalloc_ign(&sro, 0, fibnum); if (sro.ro_rt == NULL) return (NULL); ia = ifatoia(sro.ro_rt->rt_ifa); ifa_ref(&ia->ia_ifa); RTFREE(sro.ro_rt); return (ia); } u_char inetctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, EMSGSIZE, EHOSTUNREACH, 0, 0, 0, 0, EHOSTUNREACH, 0, ENOPROTOOPT, ECONNREFUSED }; /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful * icmp message because icmp doesn't have a large enough repertoire * of codes and types. * * If not forwarding, just drop the packet. This could be confusing * if ipforwarding was zero but some routing protocol was advancing * us as a gateway to somewhere. However, we must let the routing * protocol deal with that. * * The srcrt parameter indicates whether the packet is being forwarded * via a source route. */ void ip_forward(struct mbuf *m, int srcrt) { struct ip *ip = mtod(m, struct ip *); struct in_ifaddr *ia; struct mbuf *mcopy; struct sockaddr_in *sin; struct in_addr dest; struct route ro; int error, type = 0, code = 0, mtu = 0; if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } #ifdef IPSEC if (ip_ipsec_fwd(m) != 0) { IPSTAT_INC(ips_cantforward); m_freem(m); return; } #endif /* IPSEC */ #ifdef IPSTEALTH if (!V_ipstealth) { #endif if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); return; } #ifdef IPSTEALTH } #endif bzero(&ro, sizeof(ro)); sin = (struct sockaddr_in *)&ro.ro_dst; sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; #ifdef RADIX_MPATH rtalloc_mpath_fib(&ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), M_GETFIB(m)); #else in_rtalloc_ign(&ro, 0, M_GETFIB(m)); #endif if (ro.ro_rt != NULL) { ia = ifatoia(ro.ro_rt->rt_ifa); ifa_ref(&ia->ia_ifa); } else ia = NULL; #ifndef IPSEC /* * 'ia' may be NULL if there is no route for this destination. * In case of IPsec, Don't discard it just yet, but pass it to * ip_output in case of outgoing IPsec policy. */ if (!srcrt && ia == NULL) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); RO_RTFREE(&ro); return; } #endif /* * Save the IP header and at most 8 bytes of the payload, * in case we need to generate an ICMP message to the src. * * XXX this can be optimized a lot by saving the data in a local * buffer on the stack (72 bytes at most), and only allocating the * mbuf if really necessary. The vast majority of the packets * are forwarded without having to send an ICMP back (either * because unnecessary, or because rate limited), so we are * really we are wasting a lot of work here. * * We don't use m_copy() because it might return a reference * to a shared cluster. Both this function and ip_output() * assume exclusive access to the IP header in `m', so any * data in a cluster may change before we reach icmp_error(). */ mcopy = m_gethdr(M_NOWAIT, m->m_type); if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { /* * It's probably ok if the pkthdr dup fails (because * the deep copy of the tag chain failed), but for now * be conservative and just discard the copy since * code below may some day want the tags. */ m_free(mcopy); mcopy = NULL; } if (mcopy != NULL) { mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); mcopy->m_pkthdr.len = mcopy->m_len; m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); } #ifdef IPSTEALTH if (!V_ipstealth) { #endif ip->ip_ttl -= IPTTLDEC; #ifdef IPSTEALTH } #endif /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a default route * or a route modified by a redirect. */ dest.s_addr = 0; if (!srcrt && V_ipsendredirects && ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { struct rtentry *rt; rt = ro.ro_rt; if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && satosin(rt_key(rt))->sin_addr.s_addr != 0) { #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) u_long src = ntohl(ip->ip_src.s_addr); if (RTA(rt) && (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { if (rt->rt_flags & RTF_GATEWAY) dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; else dest.s_addr = ip->ip_dst.s_addr; /* Router requirements says to only send host redirects */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; } } } error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); if (error == EMSGSIZE && ro.ro_rt) mtu = ro.ro_rt->rt_mtu; RO_RTFREE(&ro); if (error) IPSTAT_INC(ips_cantforward); else { IPSTAT_INC(ips_forward); if (type) IPSTAT_INC(ips_redirectsent); else { if (mcopy) m_freem(mcopy); if (ia != NULL) ifa_free(&ia->ia_ifa); return; } } if (mcopy == NULL) { if (ia != NULL) ifa_free(&ia->ia_ifa); return; } switch (error) { case 0: /* forwarded, but need redirect */ /* type, code set above */ break; case ENETUNREACH: case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; break; case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; #ifdef IPSEC /* * If IPsec is configured for this path, * override any possibly mtu value set by ip_output. */ mtu = ip_ipsec_mtu(mcopy, mtu); #endif /* IPSEC */ /* * If the MTU was set before make sure we are below the * interface MTU. * If the MTU wasn't set before use the interface mtu or * fall back to the next smaller mtu step compared to the * current packet size. */ if (mtu != 0) { if (ia != NULL) mtu = min(mtu, ia->ia_ifp->if_mtu); } else { if (ia != NULL) mtu = ia->ia_ifp->if_mtu; else mtu = ip_next_mtu(ntohs(ip->ip_len), 0); } IPSTAT_INC(ips_cantfrag); break; case ENOBUFS: case EACCES: /* ipfw denied packet */ m_freem(mcopy); if (ia != NULL) ifa_free(&ia->ia_ifa); return; } if (ia != NULL) ifa_free(&ia->ia_ifa); icmp_error(mcopy, type, code, dest.s_addr, mtu); } void ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, struct mbuf *m) { if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { struct bintime bt; bintime(&bt); if (inp->inp_socket->so_options & SO_BINTIME) { *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), SCM_BINTIME, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_socket->so_options & SO_TIMESTAMP) { struct timeval tv; bintime2timeval(&bt, &tv); *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), SCM_TIMESTAMP, SOL_SOCKET); if (*mp) mp = &(*mp)->m_next; } } if (inp->inp_flags & INP_RECVDSTADDR) { *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVTTL) { *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, sizeof(u_char), IP_RECVTTL, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #ifdef notyet /* XXX * Moving these out of udp_input() made them even more broken * than they already were. */ /* options were tossed already */ if (inp->inp_flags & INP_RECVOPTS) { *mp = sbcreatecontrol((caddr_t)opts_deleted_above, sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } /* ip_srcroute doesn't do what we want here, need to fix */ if (inp->inp_flags & INP_RECVRETOPTS) { *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #endif if (inp->inp_flags & INP_RECVIF) { struct ifnet *ifp; struct sdlbuf { struct sockaddr_dl sdl; u_char pad[32]; } sdlbuf; struct sockaddr_dl *sdp; struct sockaddr_dl *sdl2 = &sdlbuf.sdl; if ((ifp = m->m_pkthdr.rcvif) && ifp->if_index && ifp->if_index <= V_if_index) { sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; /* * Change our mind and don't try copy. */ if (sdp->sdl_family != AF_LINK || sdp->sdl_len > sizeof(sdlbuf)) { goto makedummy; } bcopy(sdp, sdl2, sdp->sdl_len); } else { makedummy: sdl2->sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]); sdl2->sdl_family = AF_LINK; sdl2->sdl_index = 0; sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; } *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, IP_RECVIF, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags & INP_RECVTOS) { *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, sizeof(u_char), IP_RECVTOS, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } if (inp->inp_flags2 & INP_RECVFLOWID) { uint32_t flowid, flow_type; flowid = m->m_pkthdr.flowid; flow_type = M_HASHTYPE_GET(m); /* * XXX should handle the failure of one or the * other - don't populate both? */ *mp = sbcreatecontrol((caddr_t) &flowid, sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; *mp = sbcreatecontrol((caddr_t) &flow_type, sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } #ifdef RSS if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { uint32_t flowid, flow_type; uint32_t rss_bucketid; flowid = m->m_pkthdr.flowid; flow_type = M_HASHTYPE_GET(m); if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { *mp = sbcreatecontrol((caddr_t) &rss_bucketid, sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); if (*mp) mp = &(*mp)->m_next; } } #endif } /* * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on * locking. This code remains in ip_input.c as ip_mroute.c is optionally * compiled. */ static VNET_DEFINE(int, ip_rsvp_on); VNET_DEFINE(struct socket *, ip_rsvpd); #define V_ip_rsvp_on VNET(ip_rsvp_on) int ip_rsvp_init(struct socket *so) { if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) return EOPNOTSUPP; if (V_ip_rsvpd != NULL) return EADDRINUSE; V_ip_rsvpd = so; /* * This may seem silly, but we need to be sure we don't over-increment * the RSVP counter, in case something slips up. */ if (!V_ip_rsvp_on) { V_ip_rsvp_on = 1; V_rsvp_on++; } return 0; } int ip_rsvp_done(void) { V_ip_rsvpd = NULL; /* * This may seem silly, but we need to be sure we don't over-decrement * the RSVP counter, in case something slips up. */ if (V_ip_rsvp_on) { V_ip_rsvp_on = 0; V_rsvp_on--; } return 0; } int rsvp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m; m = *mp; *mp = NULL; if (rsvp_input_p) { /* call the real one if loaded */ *mp = m; rsvp_input_p(mp, offp, proto); return (IPPROTO_DONE); } /* Can still get packets with rsvp_on = 0 if there is a local member * of the group to which the RSVP packet is addressed. But in this * case we want to throw the packet away. */ if (!V_rsvp_on) { m_freem(m); return (IPPROTO_DONE); } if (V_ip_rsvpd != NULL) { *mp = m; rip_input(mp, offp, proto); return (IPPROTO_DONE); } /* Drop the packet */ m_freem(m); return (IPPROTO_DONE); }