Index: head/sys/net/if_me.c =================================================================== --- head/sys/net/if_me.c (revision 335759) +++ head/sys/net/if_me.c (revision 335760) @@ -1,629 +1,632 @@ /*- * Copyright (c) 2014, 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MEMTU (1500 - sizeof(struct mobhdr)) static const char mename[] = "me"; static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); /* Minimal forwarding header RFC 2004 */ struct mobhdr { uint8_t mob_proto; /* protocol */ uint8_t mob_flags; /* flags */ #define MOB_FLAGS_SP 0x80 /* source present */ uint16_t mob_csum; /* header checksum */ struct in_addr mob_dst; /* original destination address */ struct in_addr mob_src; /* original source addr (optional) */ } __packed; struct me_softc { struct ifnet *me_ifp; u_int me_fibnum; struct in_addr me_src; struct in_addr me_dst; CK_LIST_ENTRY(me_softc) chain; }; CK_LIST_HEAD(me_list, me_softc); #define ME2IFP(sc) ((sc)->me_ifp) #define ME_READY(sc) ((sc)->me_src.s_addr != 0) #define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt) #define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt) #define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) #ifndef ME_HASH_SIZE #define ME_HASH_SIZE (1 << 4) #endif static VNET_DEFINE(struct me_list *, me_hashtbl) = NULL; #define V_me_hashtbl VNET(me_hashtbl) #define ME_HASH(src, dst) (V_me_hashtbl[\ me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)]) static struct sx me_ioctl_sx; SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); static int me_clone_create(struct if_clone *, int, caddr_t); static void me_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, me_cloner); #define V_me_cloner VNET(me_cloner) static void me_qflush(struct ifnet *); static int me_transmit(struct ifnet *, struct mbuf *); static int me_ioctl(struct ifnet *, u_long, caddr_t); static int me_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int me_input(struct mbuf *, int, int, void *); static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t); static void me_delete_tunnel(struct me_softc *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, "Minimal Encapsulation for IP (RFC 2004)"); #ifndef MAX_ME_NEST #define MAX_ME_NEST 1 #endif static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST; #define V_max_me_nesting VNET(max_me_nesting) SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); static uint32_t me_hashval(in_addr_t src, in_addr_t dst) { uint32_t ret; ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); return (fnv_32_buf(&dst, sizeof(dst), ret)); } static struct me_list * me_hashinit(void) { struct me_list *hash; int i; hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE, M_IFME, M_WAITOK); for (i = 0; i < ME_HASH_SIZE; i++) CK_LIST_INIT(&hash[i]); return (hash); } static void vnet_me_init(const void *unused __unused) { V_me_cloner = if_clone_simple(mename, me_clone_create, me_clone_destroy, 0); } VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_me_init, NULL); static void vnet_me_uninit(const void *unused __unused) { if (V_me_hashtbl != NULL) free(V_me_hashtbl, M_IFME); if_clone_detach(V_me_cloner); } VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_me_uninit, NULL); static int me_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct me_softc *sc; sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); sc->me_fibnum = curthread->td_proc->p_fibnum; ME2IFP(sc) = if_alloc(IFT_TUNNEL); ME2IFP(sc)->if_softc = sc; if_initname(ME2IFP(sc), mename, unit); ME2IFP(sc)->if_mtu = MEMTU;; ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; ME2IFP(sc)->if_output = me_output; ME2IFP(sc)->if_ioctl = me_ioctl; ME2IFP(sc)->if_transmit = me_transmit; ME2IFP(sc)->if_qflush = me_qflush; ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(ME2IFP(sc)); bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); return (0); } static void me_clone_destroy(struct ifnet *ifp) { struct me_softc *sc; sx_xlock(&me_ioctl_sx); sc = ifp->if_softc; me_delete_tunnel(sc); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&me_ioctl_sx); ME_WAIT(); if_free(ifp); free(sc, M_IFME); } static int me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *src, *dst; struct me_softc *sc; int error; switch (cmd) { case SIOCSIFMTU: if (ifr->ifr_mtu < 576) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; return (0); case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: return (0); } sx_xlock(&me_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto end; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: src = &((struct in_aliasreq *)data)->ifra_addr; dst = &((struct in_aliasreq *)data)->ifra_dstaddr; if (src->sin_family != dst->sin_family || src->sin_family != AF_INET || src->sin_len != dst->sin_len || src->sin_len != sizeof(struct sockaddr_in)) { error = EINVAL; break; } if (src->sin_addr.s_addr == INADDR_ANY || dst->sin_addr.s_addr == INADDR_ANY) { error = EADDRNOTAVAIL; break; } error = me_set_tunnel(sc, src->sin_addr.s_addr, dst->sin_addr.s_addr); break; case SIOCDIFPHYADDR: me_delete_tunnel(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (!ME_READY(sc)) { error = EADDRNOTAVAIL; break; } src = (struct sockaddr_in *)&ifr->ifr_addr; memset(src, 0, sizeof(*src)); src->sin_family = AF_INET; src->sin_len = sizeof(*src); switch (cmd) { case SIOCGIFPSRCADDR: src->sin_addr = sc->me_src; break; case SIOCGIFPDSTADDR: src->sin_addr = sc->me_dst; break; } error = prison_if(curthread->td_ucred, sintosa(src)); if (error != 0) memset(src, 0, sizeof(*src)); break; case SIOCGTUNFIB: ifr->ifr_fib = sc->me_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->me_fibnum = ifr->ifr_fib; break; default: error = EINVAL; break; } end: sx_xunlock(&me_ioctl_sx); return (error); } static int me_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip *ip; struct me_softc *sc; + if (V_me_hashtbl == NULL) + return (0); + MPASS(in_epoch()); ip = mtod(m, const struct ip *); CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, ip->ip_src.s_addr), chain) { if (sc->me_src.s_addr == ip->ip_dst.s_addr && sc->me_dst.s_addr == ip->ip_src.s_addr) { if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) return (0); *arg = sc; return (ENCAP_DRV_LOOKUP); } } return (0); } static int me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst) { struct me_softc *tmp; sx_assert(&me_ioctl_sx, SA_XLOCKED); if (V_me_hashtbl == NULL) V_me_hashtbl = me_hashinit(); if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst) return (0); CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) { if (tmp == sc) continue; if (tmp->me_src.s_addr == src && tmp->me_dst.s_addr == dst) return (EADDRNOTAVAIL); } me_delete_tunnel(sc); sc->me_dst.s_addr = dst; sc->me_src.s_addr = src; CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain); ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ME2IFP(sc), LINK_STATE_UP); return (0); } static void me_delete_tunnel(struct me_softc *sc) { sx_assert(&me_ioctl_sx, SA_XLOCKED); if (ME_READY(sc)) { CK_LIST_REMOVE(sc, chain); ME_WAIT(); sc->me_src.s_addr = 0; sc->me_dst.s_addr = 0; ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN); } } static uint16_t me_in_cksum(uint16_t *p, int nwords) { uint32_t sum = 0; while (nwords-- > 0) sum += *p++; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); return (~sum); } static int me_input(struct mbuf *m, int off, int proto, void *arg) { struct me_softc *sc = arg; struct mobhdr *mh; struct ifnet *ifp; struct ip *ip; int hlen; ifp = ME2IFP(sc); /* checks for short packets */ hlen = sizeof(struct mobhdr); if (m->m_pkthdr.len < sizeof(struct ip) + hlen) hlen -= sizeof(struct in_addr); if (m->m_len < sizeof(struct ip) + hlen) m = m_pullup(m, sizeof(struct ip) + hlen); if (m == NULL) goto drop; mh = (struct mobhdr *)mtodo(m, sizeof(struct ip)); /* check for wrong flags */ if (mh->mob_flags & (~MOB_FLAGS_SP)) { m_freem(m); goto drop; } if (mh->mob_flags) { if (hlen != sizeof(struct mobhdr)) { m_freem(m); goto drop; } } else hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); /* check mobile header checksum */ if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) { m_freem(m); goto drop; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif ip = mtod(m, struct ip *); ip->ip_dst = mh->mob_dst; ip->ip_p = mh->mob_proto; ip->ip_sum = 0; ip->ip_len = htons(m->m_pkthdr.len - hlen); if (mh->mob_flags) ip->ip_src = mh->mob_src; memmove(mtodo(m, hlen), ip, sizeof(struct ip)); m_adj(m, hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); M_SETFIB(m, ifp->if_fib); hlen = AF_INET; BPF_MTAP2(ifp, &hlen, sizeof(hlen), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) m_freem(m); else netisr_dispatch(NETISR_IP, m); return (IPPROTO_DONE); drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return (IPPROTO_DONE); } #define MTAG_ME 1414491977 static int me_check_nesting(struct ifnet *ifp, struct mbuf *m) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, MTAG_ME, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); return (EIO); } count++; } if (count > V_max_me_nesting) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", ifp->if_xname, count); return (EIO); } mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } static int me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro __unused) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } static int me_transmit(struct ifnet *ifp, struct mbuf *m) { struct mobhdr mh; struct me_softc *sc; struct ip *ip; uint32_t af; int error, hlen, plen; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error != 0) goto drop; #endif error = ENETDOWN; ME_RLOCK(); sc = ifp->if_softc; if (sc == NULL || !ME_READY(sc) || (ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || (error = me_check_nesting(ifp, m) != 0)) { m_freem(m); goto drop; } af = m->m_pkthdr.csum_data; if (af != AF_INET) { error = EAFNOSUPPORT; m_freem(m); goto drop; } if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { error = ENOBUFS; goto drop; } ip = mtod(m, struct ip *); /* Fragmented datagramms shouldn't be encapsulated */ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { error = EINVAL; m_freem(m); goto drop; } mh.mob_proto = ip->ip_p; mh.mob_src = ip->ip_src; mh.mob_dst = ip->ip_dst; if (in_hosteq(sc->me_src, ip->ip_src)) { hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); mh.mob_flags = 0; } else { hlen = sizeof(struct mobhdr); mh.mob_flags = MOB_FLAGS_SP; } BPF_MTAP2(ifp, &af, sizeof(af), m); plen = m->m_pkthdr.len; ip->ip_src = sc->me_src; ip->ip_dst = sc->me_dst; m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->me_fibnum); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto drop; } if (m->m_len < sizeof(struct ip) + hlen) m = m_pullup(m, sizeof(struct ip) + hlen); if (m == NULL) { error = ENOBUFS; goto drop; } memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip)); ip = mtod(m, struct ip *); ip->ip_len = htons(m->m_pkthdr.len); ip->ip_p = IPPROTO_MOBILE; ip->ip_sum = 0; mh.mob_csum = 0; mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t)); bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen); error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); drop: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); } ME_RUNLOCK(); return (error); } static void me_qflush(struct ifnet *ifp __unused) { } static const struct encaptab *ecookie = NULL; static const struct encap_config me_encap_cfg = { .proto = IPPROTO_MOBILE, .min_length = sizeof(struct ip) + sizeof(struct mobhdr) - sizeof(in_addr_t), .exact_match = ENCAP_DRV_LOOKUP, .lookup = me_lookup, .input = me_input }; static int memodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK); break; case MOD_UNLOAD: ip_encap_detach(ecookie); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t me_mod = { "if_me", memodevent, 0 }; DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_me, 1); Index: head/sys/netinet/in_gif.c =================================================================== --- head/sys/netinet/in_gif.c (revision 335759) +++ head/sys/netinet/in_gif.c (revision 335760) @@ -1,403 +1,406 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in_gif.c,v 1.54 2001/05/14 14:02:16 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #define GIF_TTL 30 static VNET_DEFINE(int, ip_gif_ttl) = GIF_TTL; #define V_ip_gif_ttl VNET(ip_gif_ttl) SYSCTL_INT(_net_inet_ip, IPCTL_GIF_TTL, gifttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_gif_ttl), 0, "Default TTL value for encapsulated packets"); /* * We keep interfaces in a hash table using src+dst as key. * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. */ static VNET_DEFINE(struct gif_list *, ipv4_hashtbl) = NULL; static VNET_DEFINE(struct gif_list, ipv4_list) = CK_LIST_HEAD_INITIALIZER(); #define V_ipv4_hashtbl VNET(ipv4_hashtbl) #define V_ipv4_list VNET(ipv4_list) #define GIF_HASH(src, dst) (V_ipv4_hashtbl[\ in_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) #define GIF_HASH_SC(sc) GIF_HASH((sc)->gif_iphdr->ip_src.s_addr,\ (sc)->gif_iphdr->ip_dst.s_addr) static uint32_t in_gif_hashval(in_addr_t src, in_addr_t dst) { uint32_t ret; ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); return (fnv_32_buf(&dst, sizeof(dst), ret)); } static int in_gif_checkdup(const struct gif_softc *sc, in_addr_t src, in_addr_t dst) { struct gif_softc *tmp; if (sc->gif_family == AF_INET && sc->gif_iphdr->ip_src.s_addr == src && sc->gif_iphdr->ip_dst.s_addr == dst) return (EEXIST); CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) { if (tmp == sc) continue; if (tmp->gif_iphdr->ip_src.s_addr == src && tmp->gif_iphdr->ip_dst.s_addr == dst) return (EADDRNOTAVAIL); } return (0); } static void in_gif_attach(struct gif_softc *sc) { if (sc->gif_options & GIF_IGNORE_SOURCE) CK_LIST_INSERT_HEAD(&V_ipv4_list, sc, chain); else CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); } int in_gif_setopts(struct gif_softc *sc, u_int options) { /* NOTE: we are protected with gif_ioctl_sx lock */ MPASS(sc->gif_family == AF_INET); MPASS(sc->gif_options != options); if ((options & GIF_IGNORE_SOURCE) != (sc->gif_options & GIF_IGNORE_SOURCE)) { CK_LIST_REMOVE(sc, chain); sc->gif_options = options; in_gif_attach(sc); } return (0); } int in_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *dst, *src; struct ip *ip; int error; /* NOTE: we are protected with gif_ioctl_sx lock */ error = EINVAL; switch (cmd) { case SIOCSIFPHYADDR: src = &((struct in_aliasreq *)data)->ifra_addr; dst = &((struct in_aliasreq *)data)->ifra_dstaddr; /* sanity checks */ if (src->sin_family != dst->sin_family || src->sin_family != AF_INET || src->sin_len != dst->sin_len || src->sin_len != sizeof(*src)) break; if (src->sin_addr.s_addr == INADDR_ANY || dst->sin_addr.s_addr == INADDR_ANY) { error = EADDRNOTAVAIL; break; } if (V_ipv4_hashtbl == NULL) V_ipv4_hashtbl = gif_hashinit(); error = in_gif_checkdup(sc, src->sin_addr.s_addr, dst->sin_addr.s_addr); if (error == EADDRNOTAVAIL) break; if (error == EEXIST) { /* Addresses are the same. Just return. */ error = 0; break; } ip = malloc(sizeof(*ip), M_GIF, M_WAITOK | M_ZERO); ip->ip_src.s_addr = src->sin_addr.s_addr; ip->ip_dst.s_addr = dst->sin_addr.s_addr; if (sc->gif_family != 0) { /* Detach existing tunnel first */ CK_LIST_REMOVE(sc, chain); GIF_WAIT(); free(sc->gif_hdr, M_GIF); /* XXX: should we notify about link state change? */ } sc->gif_family = AF_INET; sc->gif_iphdr = ip; in_gif_attach(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (sc->gif_family != AF_INET) { error = EADDRNOTAVAIL; break; } src = (struct sockaddr_in *)&ifr->ifr_addr; memset(src, 0, sizeof(*src)); src->sin_family = AF_INET; src->sin_len = sizeof(*src); src->sin_addr = (cmd == SIOCGIFPSRCADDR) ? sc->gif_iphdr->ip_src: sc->gif_iphdr->ip_dst; error = prison_if(curthread->td_ucred, (struct sockaddr *)src); if (error != 0) memset(src, 0, sizeof(*src)); break; } return (error); } int in_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) { struct gif_softc *sc = ifp->if_softc; struct ip *ip; int len; /* prepend new IP header */ MPASS(in_epoch()); len = sizeof(struct ip); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) len += ETHERIP_ALIGN; #endif M_PREPEND(m, len, M_NOWAIT); if (m == NULL) return (ENOBUFS); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) { len = mtod(m, vm_offset_t) & 3; KASSERT(len == 0 || len == ETHERIP_ALIGN, ("in_gif_output: unexpected misalignment")); m->m_data += len; m->m_len -= ETHERIP_ALIGN; } #endif ip = mtod(m, struct ip *); MPASS(sc->gif_family == AF_INET); bcopy(sc->gif_iphdr, ip, sizeof(struct ip)); ip->ip_p = proto; /* version will be set in ip_output() */ ip->ip_ttl = V_ip_gif_ttl; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_tos = ecn; return (ip_output(m, NULL, NULL, 0, NULL, NULL)); } static int in_gif_input(struct mbuf *m, int off, int proto, void *arg) { struct gif_softc *sc = arg; struct ifnet *gifp; struct ip *ip; uint8_t ecn; MPASS(in_epoch()); if (sc == NULL) { m_freem(m); KMOD_IPSTAT_INC(ips_nogif); return (IPPROTO_DONE); } gifp = GIF2IFP(sc); if ((gifp->if_flags & IFF_UP) != 0) { ip = mtod(m, struct ip *); ecn = ip->ip_tos; m_adj(m, off); gif_input(m, gifp, proto, ecn); } else { m_freem(m); KMOD_IPSTAT_INC(ips_nogif); } return (IPPROTO_DONE); } static int in_gif_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip *ip; struct gif_softc *sc; int ret; + if (V_ipv4_hashtbl == NULL) + return (0); + MPASS(in_epoch()); ip = mtod(m, const struct ip *); /* * NOTE: it is safe to iterate without any locking here, because softc * can be reclaimed only when we are not within net_epoch_preempt * section, but ip_encap lookup+input are executed in epoch section. */ ret = 0; CK_LIST_FOREACH(sc, &GIF_HASH(ip->ip_dst.s_addr, ip->ip_src.s_addr), chain) { /* * This is an inbound packet, its ip_dst is source address * in softc. */ if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr && sc->gif_iphdr->ip_dst.s_addr == ip->ip_src.s_addr) { ret = ENCAP_DRV_LOOKUP; goto done; } } /* * No exact match. * Check the list of interfaces with GIF_IGNORE_SOURCE flag. */ CK_LIST_FOREACH(sc, &V_ipv4_list, chain) { if (sc->gif_iphdr->ip_src.s_addr == ip->ip_dst.s_addr) { ret = 32 + 8; /* src + proto */ goto done; } } return (0); done: if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); /* ingress filters on outer source */ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct nhop4_basic nh4; struct in_addr dst; dst = ip->ip_src; if (fib4_lookup_nh_basic(sc->gif_fibnum, dst, 0, 0, &nh4) != 0) return (0); if (nh4.nh_ifp != m->m_pkthdr.rcvif) return (0); } *arg = sc; return (ret); } static struct { const struct encap_config encap; const struct encaptab *cookie; } ipv4_encap_cfg[] = { { .encap = { .proto = IPPROTO_IPV4, .min_length = 2 * sizeof(struct ip), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in_gif_lookup, .input = in_gif_input }, }, #ifdef INET6 { .encap = { .proto = IPPROTO_IPV6, .min_length = sizeof(struct ip) + sizeof(struct ip6_hdr), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in_gif_lookup, .input = in_gif_input }, }, #endif { .encap = { .proto = IPPROTO_ETHERIP, .min_length = sizeof(struct ip) + sizeof(struct etherip_header) + sizeof(struct ether_header), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in_gif_lookup, .input = in_gif_input }, } }; void in_gif_init(void) { int i; if (!IS_DEFAULT_VNET(curvnet)) return; for (i = 0; i < nitems(ipv4_encap_cfg); i++) ipv4_encap_cfg[i].cookie = ip_encap_attach( &ipv4_encap_cfg[i].encap, NULL, M_WAITOK); } void in_gif_uninit(void) { int i; if (IS_DEFAULT_VNET(curvnet)) { for (i = 0; i < nitems(ipv4_encap_cfg); i++) ip_encap_detach(ipv4_encap_cfg[i].cookie); } if (V_ipv4_hashtbl != NULL) gif_hashdestroy(V_ipv4_hashtbl); } Index: head/sys/netinet/ip_gre.c =================================================================== --- head/sys/netinet/ip_gre.c (revision 335759) +++ head/sys/netinet/ip_gre.c (revision 335760) @@ -1,296 +1,299 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-NetBSD * * Copyright (c) 1998 The NetBSD Foundation, Inc. * Copyright (c) 2014, 2018 Andrey V. Elsukov * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Heiko W.Rupp * * IPv6-over-GRE contributed by Gert Doering * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #define GRE_TTL 30 VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL; #define V_ip_gre_ttl VNET(ip_gre_ttl) SYSCTL_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_gre_ttl), 0, "Default TTL value for encapsulated packets"); static VNET_DEFINE(struct gre_list *, ipv4_hashtbl) = NULL; #define V_ipv4_hashtbl VNET(ipv4_hashtbl) #define GRE_HASH(src, dst) (V_ipv4_hashtbl[\ in_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)]) #define GRE_HASH_SC(sc) GRE_HASH((sc)->gre_oip.ip_src.s_addr,\ (sc)->gre_oip.ip_dst.s_addr) static uint32_t in_gre_hashval(in_addr_t src, in_addr_t dst) { uint32_t ret; ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); return (fnv_32_buf(&dst, sizeof(dst), ret)); } static int in_gre_checkdup(const struct gre_softc *sc, in_addr_t src, in_addr_t dst) { struct gre_softc *tmp; if (sc->gre_family == AF_INET && sc->gre_oip.ip_src.s_addr == src && sc->gre_oip.ip_dst.s_addr == dst) return (EEXIST); CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) { if (tmp == sc) continue; if (tmp->gre_oip.ip_src.s_addr == src && tmp->gre_oip.ip_dst.s_addr == dst) return (EADDRNOTAVAIL); } return (0); } static int in_gre_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip *ip; struct gre_softc *sc; + if (V_ipv4_hashtbl == NULL) + return (0); + MPASS(in_epoch()); ip = mtod(m, const struct ip *); CK_LIST_FOREACH(sc, &GRE_HASH(ip->ip_dst.s_addr, ip->ip_src.s_addr), chain) { /* * This is an inbound packet, its ip_dst is source address * in softc. */ if (sc->gre_oip.ip_src.s_addr == ip->ip_dst.s_addr && sc->gre_oip.ip_dst.s_addr == ip->ip_src.s_addr) { if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0) return (0); *arg = sc; return (ENCAP_DRV_LOOKUP); } } return (0); } static void in_gre_attach(struct gre_softc *sc) { sc->gre_hlen = sizeof(struct greip); sc->gre_oip.ip_v = IPVERSION; sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; sc->gre_oip.ip_p = IPPROTO_GRE; gre_updatehdr(sc, &sc->gre_gihdr->gi_gre); CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain); } void in_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value) { MPASS(cmd == GRESKEY || cmd == GRESOPTS); /* NOTE: we are protected with gre_ioctl_sx lock */ MPASS(sc->gre_family == AF_INET); CK_LIST_REMOVE(sc, chain); GRE_WAIT(); if (cmd == GRESKEY) sc->gre_key = value; else sc->gre_options = value; in_gre_attach(sc); } int in_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *dst, *src; struct ip *ip; int error; /* NOTE: we are protected with gre_ioctl_sx lock */ error = EINVAL; switch (cmd) { case SIOCSIFPHYADDR: src = &((struct in_aliasreq *)data)->ifra_addr; dst = &((struct in_aliasreq *)data)->ifra_dstaddr; /* sanity checks */ if (src->sin_family != dst->sin_family || src->sin_family != AF_INET || src->sin_len != dst->sin_len || src->sin_len != sizeof(*src)) break; if (src->sin_addr.s_addr == INADDR_ANY || dst->sin_addr.s_addr == INADDR_ANY) { error = EADDRNOTAVAIL; break; } if (V_ipv4_hashtbl == NULL) V_ipv4_hashtbl = gre_hashinit(); error = in_gre_checkdup(sc, src->sin_addr.s_addr, dst->sin_addr.s_addr); if (error == EADDRNOTAVAIL) break; if (error == EEXIST) { /* Addresses are the same. Just return. */ error = 0; break; } ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); ip->ip_src.s_addr = src->sin_addr.s_addr; ip->ip_dst.s_addr = dst->sin_addr.s_addr; if (sc->gre_family != 0) { /* Detach existing tunnel first */ CK_LIST_REMOVE(sc, chain); GRE_WAIT(); free(sc->gre_hdr, M_GRE); /* XXX: should we notify about link state change? */ } sc->gre_family = AF_INET; sc->gre_hdr = ip; sc->gre_oseq = 0; sc->gre_iseq = UINT32_MAX; in_gre_attach(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (sc->gre_family != AF_INET) { error = EADDRNOTAVAIL; break; } src = (struct sockaddr_in *)&ifr->ifr_addr; memset(src, 0, sizeof(*src)); src->sin_family = AF_INET; src->sin_len = sizeof(*src); src->sin_addr = (cmd == SIOCGIFPSRCADDR) ? sc->gre_oip.ip_src: sc->gre_oip.ip_dst; error = prison_if(curthread->td_ucred, (struct sockaddr *)src); if (error != 0) memset(src, 0, sizeof(*src)); break; } return (error); } int in_gre_output(struct mbuf *m, int af, int hlen) { struct greip *gi; gi = mtod(m, struct greip *); switch (af) { case AF_INET: /* * gre_transmit() has used M_PREPEND() that doesn't guarantee * m_data is contiguous more than hlen bytes. Use m_copydata() * here to avoid m_pullup(). */ m_copydata(m, hlen + offsetof(struct ip, ip_tos), sizeof(u_char), &gi->gi_ip.ip_tos); m_copydata(m, hlen + offsetof(struct ip, ip_id), sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id); break; #ifdef INET6 case AF_INET6: gi->gi_ip.ip_tos = 0; /* XXX */ ip_fillid(&gi->gi_ip); break; #endif } gi->gi_ip.ip_ttl = V_ip_gre_ttl; gi->gi_ip.ip_len = htons(m->m_pkthdr.len); return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL)); } static const struct encaptab *ecookie = NULL; static const struct encap_config ipv4_encap_cfg = { .proto = IPPROTO_GRE, .min_length = sizeof(struct greip) + sizeof(struct ip), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in_gre_lookup, .input = gre_input }; void in_gre_init(void) { if (!IS_DEFAULT_VNET(curvnet)) return; ecookie = ip_encap_attach(&ipv4_encap_cfg, NULL, M_WAITOK); } void in_gre_uninit(void) { if (IS_DEFAULT_VNET(curvnet)) ip_encap_detach(ecookie); if (V_ipv4_hashtbl != NULL) gre_hashdestroy(V_ipv4_hashtbl); } Index: head/sys/netinet6/in6_gif.c =================================================================== --- head/sys/netinet6/in6_gif.c (revision 335759) +++ head/sys/netinet6/in6_gif.c (revision 335760) @@ -1,424 +1,427 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_gif.c,v 1.49 2001/05/14 14:02:17 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #include #include #include #include #define GIF_HLIM 30 static VNET_DEFINE(int, ip6_gif_hlim) = GIF_HLIM; #define V_ip6_gif_hlim VNET(ip6_gif_hlim) SYSCTL_DECL(_net_inet6_ip6); SYSCTL_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0, "Default hop limit for encapsulated packets"); /* * We keep interfaces in a hash table using src+dst as key. * Interfaces with GIF_IGNORE_SOURCE flag are linked into plain list. */ static VNET_DEFINE(struct gif_list *, ipv6_hashtbl) = NULL; static VNET_DEFINE(struct gif_list, ipv6_list) = CK_LIST_HEAD_INITIALIZER(); #define V_ipv6_hashtbl VNET(ipv6_hashtbl) #define V_ipv6_list VNET(ipv6_list) #define GIF_HASH(src, dst) (V_ipv6_hashtbl[\ in6_gif_hashval((src), (dst)) & (GIF_HASH_SIZE - 1)]) #define GIF_HASH_SC(sc) GIF_HASH(&(sc)->gif_ip6hdr->ip6_src,\ &(sc)->gif_ip6hdr->ip6_dst) static uint32_t in6_gif_hashval(const struct in6_addr *src, const struct in6_addr *dst) { uint32_t ret; ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT); return (fnv_32_buf(dst, sizeof(*dst), ret)); } static int in6_gif_checkdup(const struct gif_softc *sc, const struct in6_addr *src, const struct in6_addr *dst) { struct gif_softc *tmp; if (sc->gif_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, src) && IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, dst)) return (EEXIST); CK_LIST_FOREACH(tmp, &GIF_HASH(src, dst), chain) { if (tmp == sc) continue; if (IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_src, src) && IN6_ARE_ADDR_EQUAL(&tmp->gif_ip6hdr->ip6_dst, dst)) return (EADDRNOTAVAIL); } return (0); } static void in6_gif_attach(struct gif_softc *sc) { if (sc->gif_options & GIF_IGNORE_SOURCE) CK_LIST_INSERT_HEAD(&V_ipv6_list, sc, chain); else CK_LIST_INSERT_HEAD(&GIF_HASH_SC(sc), sc, chain); } int in6_gif_setopts(struct gif_softc *sc, u_int options) { /* NOTE: we are protected with gif_ioctl_sx lock */ MPASS(sc->gif_family == AF_INET6); MPASS(sc->gif_options != options); if ((options & GIF_IGNORE_SOURCE) != (sc->gif_options & GIF_IGNORE_SOURCE)) { CK_LIST_REMOVE(sc, chain); sc->gif_options = options; in6_gif_attach(sc); } return (0); } int in6_gif_ioctl(struct gif_softc *sc, u_long cmd, caddr_t data) { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct sockaddr_in6 *dst, *src; struct ip6_hdr *ip6; int error; /* NOTE: we are protected with gif_ioctl_sx lock */ error = EINVAL; switch (cmd) { case SIOCSIFPHYADDR_IN6: src = &((struct in6_aliasreq *)data)->ifra_addr; dst = &((struct in6_aliasreq *)data)->ifra_dstaddr; /* sanity checks */ if (src->sin6_family != dst->sin6_family || src->sin6_family != AF_INET6 || src->sin6_len != dst->sin6_len || src->sin6_len != sizeof(*src)) break; if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) { error = EADDRNOTAVAIL; break; } /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ if ((error = sa6_embedscope(src, 0)) != 0 || (error = sa6_embedscope(dst, 0)) != 0) break; if (V_ipv6_hashtbl == NULL) V_ipv6_hashtbl = gif_hashinit(); error = in6_gif_checkdup(sc, &src->sin6_addr, &dst->sin6_addr); if (error == EADDRNOTAVAIL) break; if (error == EEXIST) { /* Addresses are the same. Just return. */ error = 0; break; } ip6 = malloc(sizeof(*ip6), M_GIF, M_WAITOK | M_ZERO); ip6->ip6_src = src->sin6_addr; ip6->ip6_dst = dst->sin6_addr; if (sc->gif_family != 0) { /* Detach existing tunnel first */ CK_LIST_REMOVE(sc, chain); GIF_WAIT(); free(sc->gif_hdr, M_GIF); /* XXX: should we notify about link state change? */ } sc->gif_family = AF_INET6; sc->gif_ip6hdr = ip6; in6_gif_attach(sc); break; case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (sc->gif_family != AF_INET6) { error = EADDRNOTAVAIL; break; } src = (struct sockaddr_in6 *)&ifr->ifr_addr; memset(src, 0, sizeof(*src)); src->sin6_family = AF_INET6; src->sin6_len = sizeof(*src); src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ? sc->gif_ip6hdr->ip6_src: sc->gif_ip6hdr->ip6_dst; error = prison_if(curthread->td_ucred, (struct sockaddr *)src); if (error == 0) error = sa6_recoverscope(src); if (error != 0) memset(src, 0, sizeof(*src)); break; } return (error); } int in6_gif_output(struct ifnet *ifp, struct mbuf *m, int proto, uint8_t ecn) { struct gif_softc *sc = ifp->if_softc; struct ip6_hdr *ip6; int len; /* prepend new IP header */ MPASS(in_epoch()); len = sizeof(struct ip6_hdr); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) len += ETHERIP_ALIGN; #endif M_PREPEND(m, len, M_NOWAIT); if (m == NULL) return (ENOBUFS); #ifndef __NO_STRICT_ALIGNMENT if (proto == IPPROTO_ETHERIP) { len = mtod(m, vm_offset_t) & 3; KASSERT(len == 0 || len == ETHERIP_ALIGN, ("in6_gif_output: unexpected misalignment")); m->m_data += len; m->m_len -= ETHERIP_ALIGN; } #endif ip6 = mtod(m, struct ip6_hdr *); MPASS(sc->gif_family == AF_INET6); bcopy(sc->gif_ip6hdr, ip6, sizeof(struct ip6_hdr)); ip6->ip6_flow |= htonl((uint32_t)ecn << 20); ip6->ip6_nxt = proto; ip6->ip6_hlim = V_ip6_gif_hlim; /* * force fragmentation to minimum MTU, to avoid path MTU discovery. * it is too painful to ask for resend of inner packet, to achieve * path MTU discovery for encapsulated packets. */ return (ip6_output(m, 0, NULL, IPV6_MINMTU, 0, NULL, NULL)); } static int in6_gif_input(struct mbuf *m, int off, int proto, void *arg) { struct gif_softc *sc = arg; struct ifnet *gifp; struct ip6_hdr *ip6; uint8_t ecn; MPASS(in_epoch()); if (sc == NULL) { m_freem(m); IP6STAT_INC(ip6s_nogif); return (IPPROTO_DONE); } gifp = GIF2IFP(sc); if ((gifp->if_flags & IFF_UP) != 0) { ip6 = mtod(m, struct ip6_hdr *); ecn = (ntohl(ip6->ip6_flow) >> 20) & 0xff; m_adj(m, off); gif_input(m, gifp, proto, ecn); } else { m_freem(m); IP6STAT_INC(ip6s_nogif); } return (IPPROTO_DONE); } static int in6_gif_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip6_hdr *ip6; struct gif_softc *sc; int ret; + if (V_ipv6_hashtbl == NULL) + return (0); + MPASS(in_epoch()); /* * NOTE: it is safe to iterate without any locking here, because softc * can be reclaimed only when we are not within net_epoch_preempt * section, but ip_encap lookup+input are executed in epoch section. */ ip6 = mtod(m, const struct ip6_hdr *); ret = 0; CK_LIST_FOREACH(sc, &GIF_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) { /* * This is an inbound packet, its ip6_dst is source address * in softc. */ if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst) && IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_dst, &ip6->ip6_src)) { ret = ENCAP_DRV_LOOKUP; goto done; } } /* * No exact match. * Check the list of interfaces with GIF_IGNORE_SOURCE flag. */ CK_LIST_FOREACH(sc, &V_ipv6_list, chain) { if (IN6_ARE_ADDR_EQUAL(&sc->gif_ip6hdr->ip6_src, &ip6->ip6_dst)) { ret = 128 + 8; /* src + proto */ goto done; } } return (0); done: if ((GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); /* ingress filters on outer source */ if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct nhop6_basic nh6; if (fib6_lookup_nh_basic(sc->gif_fibnum, &ip6->ip6_src, ntohs(in6_getscope(&ip6->ip6_src)), 0, 0, &nh6) != 0) return (0); if (nh6.nh_ifp != m->m_pkthdr.rcvif) return (0); } *arg = sc; return (ret); } static struct { const struct encap_config encap; const struct encaptab *cookie; } ipv6_encap_cfg[] = { #ifdef INET { .encap = { .proto = IPPROTO_IPV4, .min_length = sizeof(struct ip6_hdr) + sizeof(struct ip), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in6_gif_lookup, .input = in6_gif_input }, }, #endif { .encap = { .proto = IPPROTO_IPV6, .min_length = 2 * sizeof(struct ip6_hdr), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in6_gif_lookup, .input = in6_gif_input }, }, { .encap = { .proto = IPPROTO_ETHERIP, .min_length = sizeof(struct ip6_hdr) + sizeof(struct etherip_header) + sizeof(struct ether_header), .exact_match = ENCAP_DRV_LOOKUP, .lookup = in6_gif_lookup, .input = in6_gif_input }, } }; void in6_gif_init(void) { int i; if (!IS_DEFAULT_VNET(curvnet)) return; for (i = 0; i < nitems(ipv6_encap_cfg); i++) ipv6_encap_cfg[i].cookie = ip6_encap_attach( &ipv6_encap_cfg[i].encap, NULL, M_WAITOK); } void in6_gif_uninit(void) { int i; if (IS_DEFAULT_VNET(curvnet)) { for (i = 0; i < nitems(ipv6_encap_cfg); i++) ip6_encap_detach(ipv6_encap_cfg[i].cookie); } if (V_ipv6_hashtbl != NULL) gif_hashdestroy(V_ipv6_hashtbl); } Index: head/sys/netinet6/ip6_gre.c =================================================================== --- head/sys/netinet6/ip6_gre.c (revision 335759) +++ head/sys/netinet6/ip6_gre.c (revision 335760) @@ -1,284 +1,287 @@ /*- * Copyright (c) 2014, 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #include #include VNET_DEFINE(int, ip6_gre_hlim) = IPV6_DEFHLIM; #define V_ip6_gre_hlim VNET(ip6_gre_hlim) SYSCTL_DECL(_net_inet6_ip6); SYSCTL_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_gre_hlim), 0, "Default hop limit for encapsulated packets"); static VNET_DEFINE(struct gre_list *, ipv6_hashtbl) = NULL; #define V_ipv6_hashtbl VNET(ipv6_hashtbl) #define GRE_HASH(src, dst) (V_ipv6_hashtbl[\ in6_gre_hashval((src), (dst)) & (GRE_HASH_SIZE - 1)]) #define GRE_HASH_SC(sc) GRE_HASH(&(sc)->gre_oip6.ip6_src,\ &(sc)->gre_oip6.ip6_dst) static uint32_t in6_gre_hashval(const struct in6_addr *src, const struct in6_addr *dst) { uint32_t ret; ret = fnv_32_buf(src, sizeof(*src), FNV1_32_INIT); return (fnv_32_buf(dst, sizeof(*dst), ret)); } static int in6_gre_checkdup(const struct gre_softc *sc, const struct in6_addr *src, const struct in6_addr *dst) { struct gre_softc *tmp; if (sc->gre_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_src, src) && IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_dst, dst)) return (EEXIST); CK_LIST_FOREACH(tmp, &GRE_HASH(src, dst), chain) { if (tmp == sc) continue; if (IN6_ARE_ADDR_EQUAL(&tmp->gre_oip6.ip6_src, src) && IN6_ARE_ADDR_EQUAL(&tmp->gre_oip6.ip6_dst, dst)) return (EADDRNOTAVAIL); } return (0); } static int in6_gre_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip6_hdr *ip6; struct gre_softc *sc; + if (V_ipv6_hashtbl == NULL) + return (0); + MPASS(in_epoch()); ip6 = mtod(m, const struct ip6_hdr *); CK_LIST_FOREACH(sc, &GRE_HASH(&ip6->ip6_dst, &ip6->ip6_src), chain) { /* * This is an inbound packet, its ip6_dst is source address * in softc. */ if (IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_src, &ip6->ip6_dst) && IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_dst, &ip6->ip6_src)) { if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0) return (0); *arg = sc; return (ENCAP_DRV_LOOKUP); } } return (0); } static void in6_gre_attach(struct gre_softc *sc) { sc->gre_hlen = sizeof(struct greip6); sc->gre_oip6.ip6_vfc = IPV6_VERSION; sc->gre_oip6.ip6_nxt = IPPROTO_GRE; gre_updatehdr(sc, &sc->gre_gi6hdr->gi6_gre); CK_LIST_INSERT_HEAD(&GRE_HASH_SC(sc), sc, chain); } void in6_gre_setopts(struct gre_softc *sc, u_long cmd, uint32_t value) { MPASS(cmd == GRESKEY || cmd == GRESOPTS); /* NOTE: we are protected with gre_ioctl_sx lock */ MPASS(sc->gre_family == AF_INET6); CK_LIST_REMOVE(sc, chain); GRE_WAIT(); if (cmd == GRESKEY) sc->gre_key = value; else sc->gre_options = value; in6_gre_attach(sc); } int in6_gre_ioctl(struct gre_softc *sc, u_long cmd, caddr_t data) { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct sockaddr_in6 *dst, *src; struct ip6_hdr *ip6; int error; /* NOTE: we are protected with gre_ioctl_sx lock */ error = EINVAL; switch (cmd) { case SIOCSIFPHYADDR_IN6: src = &((struct in6_aliasreq *)data)->ifra_addr; dst = &((struct in6_aliasreq *)data)->ifra_dstaddr; /* sanity checks */ if (src->sin6_family != dst->sin6_family || src->sin6_family != AF_INET6 || src->sin6_len != dst->sin6_len || src->sin6_len != sizeof(*src)) break; if (IN6_IS_ADDR_UNSPECIFIED(&src->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&dst->sin6_addr)) { error = EADDRNOTAVAIL; break; } /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ if ((error = sa6_embedscope(src, 0)) != 0 || (error = sa6_embedscope(dst, 0)) != 0) break; if (V_ipv6_hashtbl == NULL) V_ipv6_hashtbl = gre_hashinit(); error = in6_gre_checkdup(sc, &src->sin6_addr, &dst->sin6_addr); if (error == EADDRNOTAVAIL) break; if (error == EEXIST) { /* Addresses are the same. Just return. */ error = 0; break; } ip6 = malloc(sizeof(struct greip6) + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); ip6->ip6_src = src->sin6_addr; ip6->ip6_dst = dst->sin6_addr; if (sc->gre_family != 0) { /* Detach existing tunnel first */ CK_LIST_REMOVE(sc, chain); GRE_WAIT(); free(sc->gre_hdr, M_GRE); /* XXX: should we notify about link state change? */ } sc->gre_family = AF_INET6; sc->gre_hdr = ip6; sc->gre_oseq = 0; sc->gre_iseq = UINT32_MAX; in6_gre_attach(sc); break; case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (sc->gre_family != AF_INET6) { error = EADDRNOTAVAIL; break; } src = (struct sockaddr_in6 *)&ifr->ifr_addr; memset(src, 0, sizeof(*src)); src->sin6_family = AF_INET6; src->sin6_len = sizeof(*src); src->sin6_addr = (cmd == SIOCGIFPSRCADDR_IN6) ? sc->gre_oip6.ip6_src: sc->gre_oip6.ip6_dst; error = prison_if(curthread->td_ucred, (struct sockaddr *)src); if (error == 0) error = sa6_recoverscope(src); if (error != 0) memset(src, 0, sizeof(*src)); break; } return (error); } int in6_gre_output(struct mbuf *m, int af __unused, int hlen __unused) { struct greip6 *gi6; gi6 = mtod(m, struct greip6 *); gi6->gi6_ip6.ip6_hlim = V_ip6_gre_hlim; return (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL, NULL)); } static const struct encaptab *ecookie = NULL; static const struct encap_config ipv6_encap_cfg = { .proto = IPPROTO_GRE, .min_length = sizeof(struct greip6) + #ifdef INET sizeof(struct ip), #else sizeof(struct ip6_hdr), #endif .exact_match = ENCAP_DRV_LOOKUP, .lookup = in6_gre_lookup, .input = gre_input }; void in6_gre_init(void) { if (!IS_DEFAULT_VNET(curvnet)) return; ecookie = ip6_encap_attach(&ipv6_encap_cfg, NULL, M_WAITOK); } void in6_gre_uninit(void) { if (IS_DEFAULT_VNET(curvnet)) ip6_encap_detach(ecookie); if (V_ipv6_hashtbl != NULL) gre_hashdestroy(V_ipv6_hashtbl); }