Index: head/sys/net/if_gif.c =================================================================== --- head/sys/net/if_gif.c (revision 361748) +++ head/sys/net/if_gif.c (revision 361749) @@ -1,703 +1,724 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * Copyright (c) 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #ifndef INET #include #endif #include #include #include #include #endif /* INET6 */ #include #include #include #include #include static const char gifname[] = "gif"; MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static struct sx gif_ioctl_sx; SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl"); void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); void (*ng_gif_detach_p)(struct ifnet *ifp); +#ifdef VIMAGE +static void gif_reassign(struct ifnet *, struct vnet *, char *); +#endif static void gif_delete_tunnel(struct gif_softc *); static int gif_ioctl(struct ifnet *, u_long, caddr_t); static int gif_transmit(struct ifnet *, struct mbuf *); static void gif_qflush(struct ifnet *); static int gif_clone_create(struct if_clone *, int, caddr_t); static void gif_clone_destroy(struct ifnet *); VNET_DEFINE_STATIC(struct if_clone *, gif_cloner); #define V_gif_cloner VNET(gif_cloner) SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Generic Tunnel Interface"); #ifndef MAX_GIF_NEST /* * This macro controls the default upper limitation on nesting of gif tunnels. * Since, setting a large value to this macro with a careless configuration * may introduce system crash, we don't allow any nestings by default. * If you need to configure nested gif tunnels, you can define this macro * in your kernel configuration file. However, if you do so, please be * careful to configure the tunnels so that it won't make a loop. */ #define MAX_GIF_NEST 1 #endif VNET_DEFINE_STATIC(int, max_gif_nesting) = MAX_GIF_NEST; #define V_max_gif_nesting VNET(max_gif_nesting) SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); static int gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct gif_softc *sc; sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); GIF2IFP(sc)->if_softc = sc; if_initname(GIF2IFP(sc), gifname, unit); GIF2IFP(sc)->if_addrlen = 0; GIF2IFP(sc)->if_mtu = GIF_MTU; GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; GIF2IFP(sc)->if_ioctl = gif_ioctl; GIF2IFP(sc)->if_transmit = gif_transmit; GIF2IFP(sc)->if_qflush = gif_qflush; GIF2IFP(sc)->if_output = gif_output; +#ifdef VIMAGE + GIF2IFP(sc)->if_reassign = gif_reassign; +#endif GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GIF2IFP(sc)); bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t)); if (ng_gif_attach_p != NULL) (*ng_gif_attach_p)(GIF2IFP(sc)); return (0); } + +#ifdef VIMAGE +static void +gif_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused, + char *unused __unused) +{ + struct gif_softc *sc; + + sx_xlock(&gif_ioctl_sx); + sc = ifp->if_softc; + if (sc != NULL) + gif_delete_tunnel(sc); + sx_xunlock(&gif_ioctl_sx); +} +#endif /* VIMAGE */ static void gif_clone_destroy(struct ifnet *ifp) { struct gif_softc *sc; sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; gif_delete_tunnel(sc); if (ng_gif_detach_p != NULL) (*ng_gif_detach_p)(ifp); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gif_ioctl_sx); GIF_WAIT(); if_free(ifp); free(sc, M_GIF); } static void vnet_gif_init(const void *unused __unused) { V_gif_cloner = if_clone_simple(gifname, gif_clone_create, gif_clone_destroy, 0); #ifdef INET in_gif_init(); #endif #ifdef INET6 in6_gif_init(); #endif } VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_init, NULL); static void vnet_gif_uninit(const void *unused __unused) { if_clone_detach(V_gif_cloner); #ifdef INET in_gif_uninit(); #endif #ifdef INET6 in6_gif_uninit(); #endif } VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_uninit, NULL); static int gifmodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t gif_mod = { "if_gif", gifmodevent, 0 }; DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gif, 1); struct gif_list * gif_hashinit(void) { struct gif_list *hash; int i; hash = malloc(sizeof(struct gif_list) * GIF_HASH_SIZE, M_GIF, M_WAITOK); for (i = 0; i < GIF_HASH_SIZE; i++) CK_LIST_INIT(&hash[i]); return (hash); } void gif_hashdestroy(struct gif_list *hash) { free(hash, M_GIF); } #define MTAG_GIF 1080679712 static int gif_transmit(struct ifnet *ifp, struct mbuf *m) { struct gif_softc *sc; struct etherip_header *eth; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; uint32_t t; #endif uint32_t af; uint8_t proto, ecn; int error; NET_EPOCH_ASSERT(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto err; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->gif_family == 0 || (error = if_tunnel_check_nesting(ifp, m, MTAG_GIF, V_max_gif_nesting)) != 0) { m_freem(m); goto err; } /* Now pull back the af that we stashed in the csum_data. */ if (ifp->if_bridge) af = AF_LINK; else af = m->m_pkthdr.csum_data; m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->gif_fibnum); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); /* inner AF-specific encapsulation */ ecn = 0; switch (af) { #ifdef INET case AF_INET: proto = IPPROTO_IPV4; if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { error = ENOBUFS; goto err; } ip = mtod(m, struct ip *); ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &ecn, &ip->ip_tos); break; #endif #ifdef INET6 case AF_INET6: proto = IPPROTO_IPV6; if (m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) { error = ENOBUFS; goto err; } t = 0; ip6 = mtod(m, struct ip6_hdr *); ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &t, &ip6->ip6_flow); ecn = (ntohl(t) >> 20) & 0xff; break; #endif case AF_LINK: proto = IPPROTO_ETHERIP; M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto err; } eth = mtod(m, struct etherip_header *); eth->eip_resvh = 0; eth->eip_ver = ETHERIP_VERSION; eth->eip_resvl = 0; break; default: error = EAFNOSUPPORT; m_freem(m); goto err; } /* XXX should we check if our outer source is legal? */ /* dispatch to output logic based on outer AF */ switch (sc->gif_family) { #ifdef INET case AF_INET: error = in_gif_output(ifp, m, proto, ecn); break; #endif #ifdef INET6 case AF_INET6: error = in6_gif_output(ifp, m, proto, ecn); break; #endif default: m_freem(m); } err: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void gif_qflush(struct ifnet *ifp __unused) { } int gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to * the gif_transmit() routine, avoiding using yet another mtag. */ m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } void gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn) { struct etherip_header *eip; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; uint32_t t; #endif struct ether_header *eh; struct ifnet *oldifp; int isr, n, af; NET_EPOCH_ASSERT(); if (ifp == NULL) { /* just in case */ m_freem(m); return; } m->m_pkthdr.rcvif = ifp; m_clrprotoflags(m); switch (proto) { #ifdef INET case IPPROTO_IPV4: af = AF_INET; if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) goto drop; ip = mtod(m, struct ip *); if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &ecn, &ip->ip_tos) == 0) { m_freem(m); goto drop; } break; #endif #ifdef INET6 case IPPROTO_IPV6: af = AF_INET6; if (m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) goto drop; t = htonl((uint32_t)ecn << 20); ip6 = mtod(m, struct ip6_hdr *); if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &t, &ip6->ip6_flow) == 0) { m_freem(m); goto drop; } break; #endif case IPPROTO_ETHERIP: af = AF_LINK; break; default: m_freem(m); goto drop; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif if (bpf_peers_present(ifp->if_bpf)) { uint32_t af1 = af; bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m); } if ((ifp->if_flags & IFF_MONITOR) != 0) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); m_freem(m); return; } if (ng_gif_input_p != NULL) { (*ng_gif_input_p)(ifp, &m, af); if (m == NULL) goto drop; } /* * Put the packet to the network layer input queue according to the * specified address family. * Note: older versions of gif_input directly called network layer * input functions, e.g. ip6_input, here. We changed the policy to * prevent too many recursive calls of such input functions, which * might cause kernel panic. But the change may introduce another * problem; if the input queue is full, packets are discarded. * The kernel stack overflow really happened, and we believed * queue-full rarely occurs, so we changed the policy. */ switch (af) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: isr = NETISR_IPV6; break; #endif case AF_LINK: n = sizeof(struct etherip_header) + sizeof(struct ether_header); if (n > m->m_len) m = m_pullup(m, n); if (m == NULL) goto drop; eip = mtod(m, struct etherip_header *); if (eip->eip_ver != ETHERIP_VERSION) { /* discard unknown versions */ m_freem(m); goto drop; } m_adj(m, sizeof(struct etherip_header)); m->m_flags &= ~(M_BCAST|M_MCAST); m->m_pkthdr.rcvif = ifp; if (ifp->if_bridge) { oldifp = ifp; eh = mtod(m, struct ether_header *); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } BRIDGE_INPUT(ifp, m); if (m != NULL && ifp != oldifp) { /* * The bridge gave us back itself or one of the * members for which the frame is addressed. */ ether_demux(ifp, m); return; } } if (m != NULL) m_freem(m); return; default: if (ng_gif_input_orphan_p != NULL) (*ng_gif_input_orphan_p)(ifp, m, af); else m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } static int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq*)data; struct gif_softc *sc; u_int options; int error; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMTU: case SIOCSIFFLAGS: return (0); case SIOCSIFMTU: if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) return (EINVAL); else ifp->if_mtu = ifr->ifr_mtu; return (0); } sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto bad; } error = 0; switch (cmd) { case SIOCDIFPHYADDR: if (sc->gif_family == 0) break; gif_delete_tunnel(sc); break; #ifdef INET case SIOCSIFPHYADDR: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = in_gif_ioctl(sc, cmd, data); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = in6_gif_ioctl(sc, cmd, data); break; #endif case SIOCGTUNFIB: ifr->ifr_fib = sc->gif_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->gif_fibnum = ifr->ifr_fib; break; case GIFGOPTS: options = sc->gif_options; error = copyout(&options, ifr_data_get_ptr(ifr), sizeof(options)); break; case GIFSOPTS: if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) break; error = copyin(ifr_data_get_ptr(ifr), &options, sizeof(options)); if (error) break; if (options & ~GIF_OPTMASK) { error = EINVAL; break; } if (sc->gif_options != options) { switch (sc->gif_family) { #ifdef INET case AF_INET: error = in_gif_setopts(sc, options); break; #endif #ifdef INET6 case AF_INET6: error = in6_gif_setopts(sc, options); break; #endif default: /* No need to invoke AF-handler */ sc->gif_options = options; } } break; default: error = EINVAL; break; } if (error == 0 && sc->gif_family != 0) { if ( #ifdef INET cmd == SIOCSIFPHYADDR || #endif #ifdef INET6 cmd == SIOCSIFPHYADDR_IN6 || #endif 0) { if_link_state_change(ifp, LINK_STATE_UP); } } bad: sx_xunlock(&gif_ioctl_sx); return (error); } static void gif_delete_tunnel(struct gif_softc *sc) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); if (sc->gif_family != 0) { CK_LIST_REMOVE(sc, srchash); CK_LIST_REMOVE(sc, chain); /* Wait until it become safe to free gif_hdr */ GIF_WAIT(); free(sc->gif_hdr, M_GIF); } sc->gif_family = 0; GIF2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(GIF2IFP(sc), LINK_STATE_DOWN); } Index: head/sys/net/if_gre.c =================================================================== --- head/sys/net/if_gre.c (revision 361748) +++ head/sys/net/if_gre.c (revision 361749) @@ -1,820 +1,841 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998 The NetBSD Foundation, Inc. * Copyright (c) 2014, 2018 Andrey V. Elsukov * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Heiko W.Rupp * * IPv6-over-GRE contributed by Gert Doering * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #ifdef RSS #include #endif #endif #ifdef INET6 #include #include #include #ifdef RSS #include #endif #endif #include #include #include #include #include #include #define GREMTU 1476 static const char grename[] = "gre"; MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); static struct sx gre_ioctl_sx; SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); static int gre_clone_create(struct if_clone *, int, caddr_t); static void gre_clone_destroy(struct ifnet *); VNET_DEFINE_STATIC(struct if_clone *, gre_cloner); #define V_gre_cloner VNET(gre_cloner) +#ifdef VIMAGE +static void gre_reassign(struct ifnet *, struct vnet *, char *); +#endif static void gre_qflush(struct ifnet *); static int gre_transmit(struct ifnet *, struct mbuf *); static int gre_ioctl(struct ifnet *, u_long, caddr_t); static int gre_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void gre_delete_tunnel(struct gre_softc *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Generic Routing Encapsulation"); #ifndef MAX_GRE_NEST /* * This macro controls the default upper limitation on nesting of gre tunnels. * Since, setting a large value to this macro with a careless configuration * may introduce system crash, we don't allow any nestings by default. * If you need to configure nested gre tunnels, you can define this macro * in your kernel configuration file. However, if you do so, please be * careful to configure the tunnels so that it won't make a loop. */ #define MAX_GRE_NEST 1 #endif VNET_DEFINE_STATIC(int, max_gre_nesting) = MAX_GRE_NEST; #define V_max_gre_nesting VNET(max_gre_nesting) SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels"); static void vnet_gre_init(const void *unused __unused) { V_gre_cloner = if_clone_simple(grename, gre_clone_create, gre_clone_destroy, 0); #ifdef INET in_gre_init(); #endif #ifdef INET6 in6_gre_init(); #endif } VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_init, NULL); static void vnet_gre_uninit(const void *unused __unused) { if_clone_detach(V_gre_cloner); #ifdef INET in_gre_uninit(); #endif #ifdef INET6 in6_gre_uninit(); #endif /* XXX: epoch_call drain */ } VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_uninit, NULL); static int gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct gre_softc *sc; sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); sc->gre_fibnum = curthread->td_proc->p_fibnum; GRE2IFP(sc) = if_alloc(IFT_TUNNEL); GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), grename, unit); GRE2IFP(sc)->if_mtu = GREMTU; GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; GRE2IFP(sc)->if_output = gre_output; GRE2IFP(sc)->if_ioctl = gre_ioctl; GRE2IFP(sc)->if_transmit = gre_transmit; GRE2IFP(sc)->if_qflush = gre_qflush; +#ifdef VIMAGE + GRE2IFP(sc)->if_reassign = gre_reassign; +#endif GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); return (0); } + +#ifdef VIMAGE +static void +gre_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused, + char *unused __unused) +{ + struct gre_softc *sc; + + sx_xlock(&gre_ioctl_sx); + sc = ifp->if_softc; + if (sc != NULL) + gre_delete_tunnel(sc); + sx_xunlock(&gre_ioctl_sx); +} +#endif /* VIMAGE */ static void gre_clone_destroy(struct ifnet *ifp) { struct gre_softc *sc; sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; gre_delete_tunnel(sc); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gre_ioctl_sx); GRE_WAIT(); if_free(ifp); free(sc, M_GRE); } static int gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct gre_softc *sc; uint32_t opt; int error; switch (cmd) { case SIOCSIFMTU: /* XXX: */ if (ifr->ifr_mtu < 576) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; return (0); case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: return (0); case GRESADDRS: case GRESADDRD: case GREGADDRS: case GREGADDRD: case GRESPROTO: case GREGPROTO: return (EOPNOTSUPP); } sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto end; } error = 0; switch (cmd) { case SIOCDIFPHYADDR: if (sc->gre_family == 0) break; gre_delete_tunnel(sc); break; #ifdef INET case SIOCSIFPHYADDR: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = in_gre_ioctl(sc, cmd, data); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = in6_gre_ioctl(sc, cmd, data); break; #endif case SIOCGTUNFIB: ifr->ifr_fib = sc->gre_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->gre_fibnum = ifr->ifr_fib; break; case GRESKEY: case GRESOPTS: case GRESPORT: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if ((error = copyin(ifr_data_get_ptr(ifr), &opt, sizeof(opt))) != 0) break; if (cmd == GRESKEY) { if (sc->gre_key == opt) break; } else if (cmd == GRESOPTS) { if (opt & ~GRE_OPTMASK) { error = EINVAL; break; } if (sc->gre_options == opt) break; } else if (cmd == GRESPORT) { if (opt != 0 && (opt < V_ipport_hifirstauto || opt > V_ipport_hilastauto)) { error = EINVAL; break; } if (sc->gre_port == opt) break; if ((sc->gre_options & GRE_UDPENCAP) == 0) { /* * UDP encapsulation is not enabled, thus * there is no need to reattach softc. */ sc->gre_port = opt; break; } } switch (sc->gre_family) { #ifdef INET case AF_INET: error = in_gre_setopts(sc, cmd, opt); break; #endif #ifdef INET6 case AF_INET6: error = in6_gre_setopts(sc, cmd, opt); break; #endif default: /* * Tunnel is not yet configured. * We can just change any parameters. */ if (cmd == GRESKEY) sc->gre_key = opt; if (cmd == GRESOPTS) sc->gre_options = opt; if (cmd == GRESPORT) sc->gre_port = opt; break; } /* * XXX: Do we need to initiate change of interface * state here? */ break; case GREGKEY: error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr), sizeof(sc->gre_key)); break; case GREGOPTS: error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr), sizeof(sc->gre_options)); break; case GREGPORT: error = copyout(&sc->gre_port, ifr_data_get_ptr(ifr), sizeof(sc->gre_port)); break; default: error = EINVAL; break; } if (error == 0 && sc->gre_family != 0) { if ( #ifdef INET cmd == SIOCSIFPHYADDR || #endif #ifdef INET6 cmd == SIOCSIFPHYADDR_IN6 || #endif 0) { if_link_state_change(ifp, LINK_STATE_UP); } } end: sx_xunlock(&gre_ioctl_sx); return (error); } static void gre_delete_tunnel(struct gre_softc *sc) { struct gre_socket *gs; sx_assert(&gre_ioctl_sx, SA_XLOCKED); if (sc->gre_family != 0) { CK_LIST_REMOVE(sc, chain); CK_LIST_REMOVE(sc, srchash); GRE_WAIT(); free(sc->gre_hdr, M_GRE); sc->gre_family = 0; } /* * If this Tunnel was the last one that could use UDP socket, * we should unlink socket from hash table and close it. */ if ((gs = sc->gre_so) != NULL && CK_LIST_EMPTY(&gs->list)) { CK_LIST_REMOVE(gs, chain); soclose(gs->so); NET_EPOCH_CALL(gre_sofree, &gs->epoch_ctx); sc->gre_so = NULL; } GRE2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(GRE2IFP(sc), LINK_STATE_DOWN); } struct gre_list * gre_hashinit(void) { struct gre_list *hash; int i; hash = malloc(sizeof(struct gre_list) * GRE_HASH_SIZE, M_GRE, M_WAITOK); for (i = 0; i < GRE_HASH_SIZE; i++) CK_LIST_INIT(&hash[i]); return (hash); } void gre_hashdestroy(struct gre_list *hash) { free(hash, M_GRE); } void gre_sofree(epoch_context_t ctx) { struct gre_socket *gs; gs = __containerof(ctx, struct gre_socket, epoch_ctx); free(gs, M_GRE); } static __inline uint16_t gre_cksum_add(uint16_t sum, uint16_t a) { uint16_t res; res = sum + a; return (res + (res < a)); } void gre_update_udphdr(struct gre_softc *sc, struct udphdr *udp, uint16_t csum) { sx_assert(&gre_ioctl_sx, SA_XLOCKED); MPASS(sc->gre_options & GRE_UDPENCAP); udp->uh_dport = htons(GRE_UDPPORT); udp->uh_sport = htons(sc->gre_port); udp->uh_sum = csum; udp->uh_ulen = 0; } void gre_update_hdr(struct gre_softc *sc, struct grehdr *gh) { uint32_t *opts; uint16_t flags; sx_assert(&gre_ioctl_sx, SA_XLOCKED); flags = 0; opts = gh->gre_opts; if (sc->gre_options & GRE_ENABLE_CSUM) { flags |= GRE_FLAGS_CP; sc->gre_hlen += 2 * sizeof(uint16_t); *opts++ = 0; } if (sc->gre_key != 0) { flags |= GRE_FLAGS_KP; sc->gre_hlen += sizeof(uint32_t); *opts++ = htonl(sc->gre_key); } if (sc->gre_options & GRE_ENABLE_SEQ) { flags |= GRE_FLAGS_SP; sc->gre_hlen += sizeof(uint32_t); *opts++ = 0; } else sc->gre_oseq = 0; gh->gre_flags = htons(flags); } int gre_input(struct mbuf *m, int off, int proto, void *arg) { struct gre_softc *sc = arg; struct grehdr *gh; struct ifnet *ifp; uint32_t *opts; #ifdef notyet uint32_t key; #endif uint16_t flags; int hlen, isr, af; ifp = GRE2IFP(sc); hlen = off + sizeof(struct grehdr) + 4 * sizeof(uint32_t); if (m->m_pkthdr.len < hlen) goto drop; if (m->m_len < hlen) { m = m_pullup(m, hlen); if (m == NULL) goto drop; } gh = (struct grehdr *)mtodo(m, off); flags = ntohs(gh->gre_flags); if (flags & ~GRE_FLAGS_MASK) goto drop; opts = gh->gre_opts; hlen = 2 * sizeof(uint16_t); if (flags & GRE_FLAGS_CP) { /* reserved1 field must be zero */ if (((uint16_t *)opts)[1] != 0) goto drop; if (in_cksum_skip(m, m->m_pkthdr.len, off) != 0) goto drop; hlen += 2 * sizeof(uint16_t); opts++; } if (flags & GRE_FLAGS_KP) { #ifdef notyet /* * XXX: The current implementation uses the key only for outgoing * packets. But we can check the key value here, or even in the * encapcheck function. */ key = ntohl(*opts); #endif hlen += sizeof(uint32_t); opts++; } #ifdef notyet } else key = 0; if (sc->gre_key != 0 && (key != sc->gre_key || key != 0)) goto drop; #endif if (flags & GRE_FLAGS_SP) { #ifdef notyet seq = ntohl(*opts); #endif hlen += sizeof(uint32_t); } switch (ntohs(gh->gre_proto)) { case ETHERTYPE_WCCP: /* * For WCCP skip an additional 4 bytes if after GRE header * doesn't follow an IP header. */ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40) hlen += sizeof(uint32_t); /* FALLTHROUGH */ case ETHERTYPE_IP: isr = NETISR_IP; af = AF_INET; break; case ETHERTYPE_IPV6: isr = NETISR_IPV6; af = AF_INET6; break; default: goto drop; } m_adj(m, off + hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) m_freem(m); else netisr_dispatch(isr, m); return (IPPROTO_DONE); drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return (IPPROTO_DONE); } static int gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to * the gre_transmit() routine, avoiding using yet another mtag. */ m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } static void gre_setseqn(struct grehdr *gh, uint32_t seq) { uint32_t *opts; uint16_t flags; opts = gh->gre_opts; flags = ntohs(gh->gre_flags); KASSERT((flags & GRE_FLAGS_SP) != 0, ("gre_setseqn called, but GRE_FLAGS_SP isn't set ")); if (flags & GRE_FLAGS_CP) opts++; if (flags & GRE_FLAGS_KP) opts++; *opts = htonl(seq); } static uint32_t gre_flowid(struct gre_softc *sc, struct mbuf *m, uint32_t af) { uint32_t flowid; if ((sc->gre_options & GRE_UDPENCAP) == 0 || sc->gre_port != 0) return (0); #ifndef RSS switch (af) { #ifdef INET case AF_INET: flowid = mtod(m, struct ip *)->ip_src.s_addr ^ mtod(m, struct ip *)->ip_dst.s_addr; break; #endif #ifdef INET6 case AF_INET6: flowid = mtod(m, struct ip6_hdr *)->ip6_src.s6_addr32[3] ^ mtod(m, struct ip6_hdr *)->ip6_dst.s6_addr32[3]; break; #endif default: flowid = 0; } #else /* RSS */ switch (af) { #ifdef INET case AF_INET: flowid = rss_hash_ip4_2tuple(mtod(m, struct ip *)->ip_src, mtod(m, struct ip *)->ip_dst); break; #endif #ifdef INET6 case AF_INET6: flowid = rss_hash_ip6_2tuple( &mtod(m, struct ip6_hdr *)->ip6_src, &mtod(m, struct ip6_hdr *)->ip6_dst); break; #endif default: flowid = 0; } #endif return (flowid); } #define MTAG_GRE 1307983903 static int gre_transmit(struct ifnet *ifp, struct mbuf *m) { GRE_RLOCK_TRACKER; struct gre_softc *sc; struct grehdr *gh; struct udphdr *uh; uint32_t af, flowid; int error, len; uint16_t proto; len = 0; GRE_RLOCK(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto drop; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->gre_family == 0 || (error = if_tunnel_check_nesting(ifp, m, MTAG_GRE, V_max_gre_nesting)) != 0) { m_freem(m); goto drop; } af = m->m_pkthdr.csum_data; BPF_MTAP2(ifp, &af, sizeof(af), m); m->m_flags &= ~(M_BCAST|M_MCAST); flowid = gre_flowid(sc, m, af); M_SETFIB(m, sc->gre_fibnum); M_PREPEND(m, sc->gre_hlen, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto drop; } bcopy(sc->gre_hdr, mtod(m, void *), sc->gre_hlen); /* Determine GRE proto */ switch (af) { #ifdef INET case AF_INET: proto = htons(ETHERTYPE_IP); break; #endif #ifdef INET6 case AF_INET6: proto = htons(ETHERTYPE_IPV6); break; #endif default: m_freem(m); error = ENETDOWN; goto drop; } /* Determine offset of GRE header */ switch (sc->gre_family) { #ifdef INET case AF_INET: len = sizeof(struct ip); break; #endif #ifdef INET6 case AF_INET6: len = sizeof(struct ip6_hdr); break; #endif default: m_freem(m); error = ENETDOWN; goto drop; } if (sc->gre_options & GRE_UDPENCAP) { uh = (struct udphdr *)mtodo(m, len); uh->uh_sport |= htons(V_ipport_hifirstauto) | (flowid >> 16) | (flowid & 0xFFFF); uh->uh_sport = htons(ntohs(uh->uh_sport) % V_ipport_hilastauto); uh->uh_ulen = htons(m->m_pkthdr.len - len); uh->uh_sum = gre_cksum_add(uh->uh_sum, htons(m->m_pkthdr.len - len + IPPROTO_UDP)); m->m_pkthdr.csum_flags = sc->gre_csumflags; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); len += sizeof(struct udphdr); } gh = (struct grehdr *)mtodo(m, len); gh->gre_proto = proto; if (sc->gre_options & GRE_ENABLE_SEQ) gre_setseqn(gh, sc->gre_oseq++); if (sc->gre_options & GRE_ENABLE_CSUM) { *(uint16_t *)gh->gre_opts = in_cksum_skip(m, m->m_pkthdr.len, len); } len = m->m_pkthdr.len - len; switch (sc->gre_family) { #ifdef INET case AF_INET: error = in_gre_output(m, af, sc->gre_hlen); break; #endif #ifdef INET6 case AF_INET6: error = in6_gre_output(m, af, sc->gre_hlen, flowid); break; #endif default: m_freem(m); error = ENETDOWN; } drop: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); } GRE_RUNLOCK(); return (error); } static void gre_qflush(struct ifnet *ifp __unused) { } static int gremodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t gre_mod = { "if_gre", gremodevent, 0 }; DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gre, 1); Index: head/sys/net/if_ipsec.c =================================================================== --- head/sys/net/if_ipsec.c (revision 361748) +++ head/sys/net/if_ipsec.c (revision 361749) @@ -1,1045 +1,1066 @@ /*- * Copyright (c) 2016-2018 Yandex LLC * Copyright (c) 2016-2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include static MALLOC_DEFINE(M_IPSEC, "ipsec", "IPsec Virtual Tunnel Interface"); static const char ipsecname[] = "ipsec"; #if defined(INET) && defined(INET6) #define IPSEC_SPCOUNT 4 #else #define IPSEC_SPCOUNT 2 #endif struct ipsec_softc { struct ifnet *ifp; struct secpolicy *sp[IPSEC_SPCOUNT]; uint32_t reqid; u_int family; u_int fibnum; CK_LIST_ENTRY(ipsec_softc) idhash; CK_LIST_ENTRY(ipsec_softc) srchash; }; #define IPSEC_RLOCK_TRACKER struct epoch_tracker ipsec_et #define IPSEC_RLOCK() epoch_enter_preempt(net_epoch_preempt, &ipsec_et) #define IPSEC_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &ipsec_et) #define IPSEC_WAIT() epoch_wait_preempt(net_epoch_preempt) #ifndef IPSEC_HASH_SIZE #define IPSEC_HASH_SIZE (1 << 5) #endif CK_LIST_HEAD(ipsec_iflist, ipsec_softc); VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec_idhtbl) = NULL; #define V_ipsec_idhtbl VNET(ipsec_idhtbl) #ifdef INET VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec4_srchtbl) = NULL; #define V_ipsec4_srchtbl VNET(ipsec4_srchtbl) static const struct srcaddrtab *ipsec4_srctab = NULL; #endif #ifdef INET6 VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec6_srchtbl) = NULL; #define V_ipsec6_srchtbl VNET(ipsec6_srchtbl) static const struct srcaddrtab *ipsec6_srctab = NULL; #endif static struct ipsec_iflist * ipsec_idhash(uint32_t id) { return (&V_ipsec_idhtbl[fnv_32_buf(&id, sizeof(id), FNV1_32_INIT) & (IPSEC_HASH_SIZE - 1)]); } static struct ipsec_iflist * ipsec_srchash(const struct sockaddr *sa) { uint32_t hval; switch (sa->sa_family) { #ifdef INET case AF_INET: hval = fnv_32_buf( &((const struct sockaddr_in *)sa)->sin_addr.s_addr, sizeof(in_addr_t), FNV1_32_INIT); return (&V_ipsec4_srchtbl[hval & (IPSEC_HASH_SIZE - 1)]); #endif #ifdef INET6 case AF_INET6: hval = fnv_32_buf( &((const struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr), FNV1_32_INIT); return (&V_ipsec6_srchtbl[hval & (IPSEC_HASH_SIZE - 1)]); #endif } return (NULL); } /* * ipsec_ioctl_sx protects from concurrent ioctls. */ static struct sx ipsec_ioctl_sx; SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl"); static int ipsec_init_reqid(struct ipsec_softc *); static int ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *, struct sockaddr *, uint32_t); static void ipsec_delete_tunnel(struct ipsec_softc *); static int ipsec_set_addresses(struct ifnet *, struct sockaddr *, struct sockaddr *); static int ipsec_set_reqid(struct ipsec_softc *, uint32_t); static void ipsec_set_running(struct ipsec_softc *); +#ifdef VIMAGE +static void ipsec_reassign(struct ifnet *, struct vnet *, char *); +#endif static void ipsec_srcaddr(void *, const struct sockaddr *, int); static int ipsec_ioctl(struct ifnet *, u_long, caddr_t); static int ipsec_transmit(struct ifnet *, struct mbuf *); static int ipsec_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void ipsec_qflush(struct ifnet *); static int ipsec_clone_create(struct if_clone *, int, caddr_t); static void ipsec_clone_destroy(struct ifnet *); VNET_DEFINE_STATIC(struct if_clone *, ipsec_cloner); #define V_ipsec_cloner VNET(ipsec_cloner) static int ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct ipsec_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_IPSEC, M_WAITOK | M_ZERO); sc->fibnum = curthread->td_proc->p_fibnum; sc->ifp = ifp = if_alloc(IFT_TUNNEL); ifp->if_softc = sc; if_initname(ifp, ipsecname, unit); ifp->if_addrlen = 0; ifp->if_mtu = IPSEC_MTU; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_ioctl = ipsec_ioctl; ifp->if_transmit = ipsec_transmit; ifp->if_qflush = ipsec_qflush; ifp->if_output = ipsec_output; +#ifdef VIMAGE + ifp->if_reassign = ipsec_reassign; +#endif if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); return (0); } + +#ifdef VIMAGE +static void +ipsec_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused, + char *unused __unused) +{ + struct ipsec_softc *sc; + + sx_xlock(&ipsec_ioctl_sx); + sc = ifp->if_softc; + if (sc != NULL) + ipsec_delete_tunnel(sc); + sx_xunlock(&ipsec_ioctl_sx); +} +#endif /* VIMAGE */ static void ipsec_clone_destroy(struct ifnet *ifp) { struct ipsec_softc *sc; sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; ipsec_delete_tunnel(sc); /* * Delete softc from idhash on interface destroy, since * ipsec_delete_tunnel() keeps reqid unchanged. */ if (sc->reqid != 0) CK_LIST_REMOVE(sc, idhash); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&ipsec_ioctl_sx); IPSEC_WAIT(); if_free(ifp); free(sc, M_IPSEC); } static struct ipsec_iflist * ipsec_hashinit(void) { struct ipsec_iflist *hash; int i; hash = malloc(sizeof(struct ipsec_iflist) * IPSEC_HASH_SIZE, M_IPSEC, M_WAITOK); for (i = 0; i < IPSEC_HASH_SIZE; i++) CK_LIST_INIT(&hash[i]); return (hash); } static void vnet_ipsec_init(const void *unused __unused) { V_ipsec_idhtbl = ipsec_hashinit(); #ifdef INET V_ipsec4_srchtbl = ipsec_hashinit(); if (IS_DEFAULT_VNET(curvnet)) ipsec4_srctab = ip_encap_register_srcaddr(ipsec_srcaddr, NULL, M_WAITOK); #endif #ifdef INET6 V_ipsec6_srchtbl = ipsec_hashinit(); if (IS_DEFAULT_VNET(curvnet)) ipsec6_srctab = ip6_encap_register_srcaddr(ipsec_srcaddr, NULL, M_WAITOK); #endif V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create, ipsec_clone_destroy, 0); } VNET_SYSINIT(vnet_ipsec_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_init, NULL); static void vnet_ipsec_uninit(const void *unused __unused) { if_clone_detach(V_ipsec_cloner); free(V_ipsec_idhtbl, M_IPSEC); /* * Use V_ipsec_idhtbl pointer as indicator that VNET is going to be * destroyed, it is used by ipsec_srcaddr() callback. */ V_ipsec_idhtbl = NULL; IPSEC_WAIT(); #ifdef INET if (IS_DEFAULT_VNET(curvnet)) ip_encap_unregister_srcaddr(ipsec4_srctab); free(V_ipsec4_srchtbl, M_IPSEC); #endif #ifdef INET6 if (IS_DEFAULT_VNET(curvnet)) ip6_encap_unregister_srcaddr(ipsec6_srctab); free(V_ipsec6_srchtbl, M_IPSEC); #endif } VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_uninit, NULL); static struct secpolicy * ipsec_getpolicy(struct ipsec_softc *sc, int dir, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)]); #endif #ifdef INET6 case AF_INET6: return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1) #ifdef INET + 2 #endif ]); #endif } return (NULL); } static struct secasindex * ipsec_getsaidx(struct ipsec_softc *sc, int dir, sa_family_t af) { struct secpolicy *sp; sp = ipsec_getpolicy(sc, dir, af); if (sp == NULL) return (NULL); return (&sp->req[0]->saidx); } static int ipsec_transmit(struct ifnet *ifp, struct mbuf *m) { IPSEC_RLOCK_TRACKER; struct ipsec_softc *sc; struct secpolicy *sp; struct ip *ip; uint32_t af; int error; IPSEC_RLOCK(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto err; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || sc->family == 0) { m_freem(m); goto err; } /* Determine address family to correctly handle packet in BPF */ ip = mtod(m, struct ip *); switch (ip->ip_v) { #ifdef INET case IPVERSION: af = AF_INET; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): af = AF_INET6; break; #endif default: error = EAFNOSUPPORT; m_freem(m); goto err; } /* * Loop prevention. * XXX: for now just check presence of IPSEC_OUT_DONE mbuf tag. * We can read full chain and compare destination address, * proto and mode from xform_history with values from softc. */ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { m_freem(m); goto err; } sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af); key_addref(sp); M_SETFIB(m, sc->fibnum); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); switch (af) { #ifdef INET case AF_INET: error = ipsec4_process_packet(m, sp, NULL); break; #endif #ifdef INET6 case AF_INET6: error = ipsec6_process_packet(m, sp, NULL); break; #endif default: panic("%s: unknown address family\n", __func__); } err: if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); IPSEC_RUNLOCK(); return (error); } static void ipsec_qflush(struct ifnet *ifp __unused) { } static int ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { return (ifp->if_transmit(ifp, m)); } int ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) { IPSEC_RLOCK_TRACKER; struct secasindex *saidx; struct ipsec_softc *sc; struct ifnet *ifp; if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { m_freem(m); return (ENETDOWN); } if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL || sav->sah->saidx.proto != IPPROTO_ESP) return (0); IPSEC_RLOCK(); CK_LIST_FOREACH(sc, ipsec_idhash(sav->sah->saidx.reqid), idhash) { if (sc->family == 0) continue; saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND, sav->sah->saidx.src.sa.sa_family); /* SA's reqid should match reqid in SP */ if (saidx == NULL || sav->sah->saidx.reqid != saidx->reqid) continue; /* SAH's addresses should match tunnel endpoints. */ if (key_sockaddrcmp(&sav->sah->saidx.dst.sa, &saidx->dst.sa, 0) != 0) continue; if (key_sockaddrcmp(&sav->sah->saidx.src.sa, &saidx->src.sa, 0) == 0) break; } if (sc == NULL) { IPSEC_RUNLOCK(); /* Tunnel was not found. Nothing to do. */ return (0); } ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_UP) == 0) { IPSEC_RUNLOCK(); m_freem(m); return (ENETDOWN); } /* * We found matching and working tunnel. * Set its ifnet as receiving interface. */ m->m_pkthdr.rcvif = ifp; m_clrprotoflags(m); M_SETFIB(m, ifp->if_fib); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) { IPSEC_RUNLOCK(); m_freem(m); return (ENETDOWN); } IPSEC_RUNLOCK(); return (0); } static int ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq*)data; struct sockaddr *dst, *src; struct ipsec_softc *sc; struct secasindex *saidx; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif uint32_t reqid; int error; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMTU: case SIOCSIFFLAGS: return (0); case SIOCSIFMTU: if (ifr->ifr_mtu < IPSEC_MTU_MIN || ifr->ifr_mtu > IPSEC_MTU_MAX) return (EINVAL); else ifp->if_mtu = ifr->ifr_mtu; return (0); } sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; /* Check that softc is still here */ if (sc == NULL) { error = ENXIO; goto bad; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: goto bad; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto bad; /* validate sa_len */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto bad; break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto bad; break; #endif default: error = EAFNOSUPPORT; goto bad; } /* check sa_family looks sane for the cmd */ error = EAFNOSUPPORT; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: if (src->sa_family == AF_INET) break; goto bad; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: if (src->sa_family == AF_INET6) break; goto bad; #endif } error = EADDRNOTAVAIL; switch (src->sa_family) { #ifdef INET case AF_INET: if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) goto bad; break; #endif #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED( &satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED( &satosin6(dst)->sin6_addr)) goto bad; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto bad; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto bad; #endif }; error = ipsec_set_addresses(ifp, src, dst); break; case SIOCDIFPHYADDR: ipsec_delete_tunnel(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif if (sc->family == 0) { error = EADDRNOTAVAIL; break; } saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (saidx->src.sa.sa_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (saidx->src.sa.sa_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif default: error = EAFNOSUPPORT; } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = saidx->src.sin.sin_addr; break; case SIOCGIFPDSTADDR: sin->sin_addr = saidx->dst.sin.sin_addr; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = saidx->src.sin6.sin6_addr; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = saidx->dst.sin6.sin6_addr; break; #endif } } if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_SETIFFIB)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->fibnum = ifr->ifr_fib; break; case IPSECGREQID: reqid = sc->reqid; error = copyout(&reqid, ifr_data_get_ptr(ifr), sizeof(reqid)); break; case IPSECSREQID: if ((error = priv_check(curthread, PRIV_NET_SETIFCAP)) != 0) break; error = copyin(ifr_data_get_ptr(ifr), &reqid, sizeof(reqid)); if (error != 0) break; error = ipsec_set_reqid(sc, reqid); break; default: error = EINVAL; break; } bad: sx_xunlock(&ipsec_ioctl_sx); return (error); } /* * Check that ingress address belongs to local host. */ static void ipsec_set_running(struct ipsec_softc *sc) { struct secasindex *saidx; int localip; saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); localip = 0; switch (sc->family) { #ifdef INET case AF_INET: localip = in_localip(saidx->src.sin.sin_addr); break; #endif #ifdef INET6 case AF_INET6: localip = in6_localip(&saidx->src.sin6.sin6_addr); break; #endif } if (localip != 0) sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; else sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } /* * ifaddr_event handler. * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent * source address spoofing. */ static void ipsec_srcaddr(void *arg __unused, const struct sockaddr *sa, int event __unused) { struct ipsec_softc *sc; struct secasindex *saidx; /* Check that VNET is ready */ if (V_ipsec_idhtbl == NULL) return; NET_EPOCH_ASSERT(); CK_LIST_FOREACH(sc, ipsec_srchash(sa), srchash) { if (sc->family == 0) continue; saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sa->sa_family); if (saidx == NULL || key_sockaddrcmp(&saidx->src.sa, sa, 0) != 0) continue; ipsec_set_running(sc); } } /* * Allocate new private security policies for tunneling interface. * Each tunneling interface has following security policies for * both AF: * 0.0.0.0/0[any] 0.0.0.0/0[any] -P in \ * ipsec esp/tunnel/RemoteIP-LocalIP/unique:reqid * 0.0.0.0/0[any] 0.0.0.0/0[any] -P out \ * ipsec esp/tunnel/LocalIP-RemoteIP/unique:reqid */ static int ipsec_newpolicies(struct ipsec_softc *sc, struct secpolicy *sp[IPSEC_SPCOUNT], const struct sockaddr *src, const struct sockaddr *dst, uint32_t reqid) { struct ipsecrequest *isr; int i; memset(sp, 0, sizeof(struct secpolicy *) * IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) { if ((sp[i] = key_newsp()) == NULL) goto fail; if ((isr = ipsec_newisr()) == NULL) goto fail; sp[i]->policy = IPSEC_POLICY_IPSEC; sp[i]->state = IPSEC_SPSTATE_DEAD; sp[i]->req[sp[i]->tcount++] = isr; sp[i]->created = time_second; /* Use priority field to store if_index */ sp[i]->priority = sc->ifp->if_index; isr->level = IPSEC_LEVEL_UNIQUE; isr->saidx.proto = IPPROTO_ESP; isr->saidx.mode = IPSEC_MODE_TUNNEL; isr->saidx.reqid = reqid; if (i % 2 == 0) { sp[i]->spidx.dir = IPSEC_DIR_INBOUND; bcopy(src, &isr->saidx.dst, src->sa_len); bcopy(dst, &isr->saidx.src, dst->sa_len); } else { sp[i]->spidx.dir = IPSEC_DIR_OUTBOUND; bcopy(src, &isr->saidx.src, src->sa_len); bcopy(dst, &isr->saidx.dst, dst->sa_len); } sp[i]->spidx.ul_proto = IPSEC_ULPROTO_ANY; #ifdef INET if (i < 2) { sp[i]->spidx.src.sa.sa_family = sp[i]->spidx.dst.sa.sa_family = AF_INET; sp[i]->spidx.src.sa.sa_len = sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in); continue; } #endif #ifdef INET6 sp[i]->spidx.src.sa.sa_family = sp[i]->spidx.dst.sa.sa_family = AF_INET6; sp[i]->spidx.src.sa.sa_len = sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in6); #endif } return (0); fail: for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sp[i]); return (ENOMEM); } static int ipsec_check_reqid(uint32_t reqid) { struct ipsec_softc *sc; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); CK_LIST_FOREACH(sc, ipsec_idhash(reqid), idhash) { if (sc->reqid == reqid) return (EEXIST); } return (0); } /* * We use key_newreqid() to automatically obtain unique reqid. * Then we check that given id is unique, i.e. it is not used by * another if_ipsec(4) interface. This macro limits the number of * tries to get unique id. */ #define IPSEC_REQID_TRYCNT 64 static int ipsec_init_reqid(struct ipsec_softc *sc) { uint32_t reqid; int trycount; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); if (sc->reqid != 0) /* already initialized */ return (0); trycount = IPSEC_REQID_TRYCNT; while (--trycount > 0) { reqid = key_newreqid(); if (ipsec_check_reqid(reqid) == 0) break; } if (trycount == 0) return (EEXIST); sc->reqid = reqid; CK_LIST_INSERT_HEAD(ipsec_idhash(reqid), sc, idhash); return (0); } /* * Set or update reqid for given tunneling interface. * When specified reqid is zero, generate new one. * We are protected by ioctl_sx lock from concurrent id generation. * Also softc would not disappear while we hold ioctl_sx lock. */ static int ipsec_set_reqid(struct ipsec_softc *sc, uint32_t reqid) { struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); if (sc->reqid == reqid && reqid != 0) return (0); if (reqid != 0) { /* Check that specified reqid doesn't exist */ if (ipsec_check_reqid(reqid) != 0) return (EEXIST); if (sc->reqid != 0) { CK_LIST_REMOVE(sc, idhash); IPSEC_WAIT(); } sc->reqid = reqid; CK_LIST_INSERT_HEAD(ipsec_idhash(reqid), sc, idhash); } else { /* Generate new reqid */ if (ipsec_init_reqid(sc) != 0) return (EEXIST); } /* Tunnel isn't fully configured, just return. */ if (sc->family == 0) return (0); saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); KASSERT(saidx != NULL, ("saidx is NULL, but family is %d", sc->family)); return (ipsec_set_tunnel(sc, &saidx->src.sa, &saidx->dst.sa, sc->reqid)); } /* * Set tunnel endpoints addresses. */ static int ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { struct ipsec_softc *sc; struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); sc = ifp->if_softc; if (sc->family != 0) { saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, src->sa_family); if (saidx != NULL && saidx->reqid == sc->reqid && key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) return (0); /* Nothing has been changed. */ } /* If reqid is not set, generate new one. */ if (ipsec_init_reqid(sc) != 0) return (EEXIST); return (ipsec_set_tunnel(sc, src, dst, sc->reqid)); } static int ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, struct sockaddr *dst, uint32_t reqid) { struct secpolicy *sp[IPSEC_SPCOUNT]; int i; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); /* Allocate SP with new addresses. */ if (ipsec_newpolicies(sc, sp, src, dst, reqid) == 0) { /* Add new policies to SPDB */ if (key_register_ifnet(sp, IPSEC_SPCOUNT) != 0) { for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sp[i]); return (EAGAIN); } if (sc->family != 0) ipsec_delete_tunnel(sc); for (i = 0; i < IPSEC_SPCOUNT; i++) sc->sp[i] = sp[i]; sc->family = src->sa_family; CK_LIST_INSERT_HEAD(ipsec_srchash(src), sc, srchash); } else { sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; return (ENOMEM); } ipsec_set_running(sc); return (0); } static void ipsec_delete_tunnel(struct ipsec_softc *sc) { int i; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->family != 0) { CK_LIST_REMOVE(sc, srchash); sc->family = 0; /* * Make sure that ipsec_if_input() will not do access * to softc's policies. */ IPSEC_WAIT(); key_unregister_ifnet(sc->sp, IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sc->sp[i]); } } Index: head/sys/net/if_me.c =================================================================== --- head/sys/net/if_me.c (revision 361748) +++ head/sys/net/if_me.c (revision 361749) @@ -1,665 +1,686 @@ /*- * Copyright (c) 2014, 2018 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MEMTU (1500 - sizeof(struct mobhdr)) static const char mename[] = "me"; static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); /* Minimal forwarding header RFC 2004 */ struct mobhdr { uint8_t mob_proto; /* protocol */ uint8_t mob_flags; /* flags */ #define MOB_FLAGS_SP 0x80 /* source present */ uint16_t mob_csum; /* header checksum */ struct in_addr mob_dst; /* original destination address */ struct in_addr mob_src; /* original source addr (optional) */ } __packed; struct me_softc { struct ifnet *me_ifp; u_int me_fibnum; struct in_addr me_src; struct in_addr me_dst; CK_LIST_ENTRY(me_softc) chain; CK_LIST_ENTRY(me_softc) srchash; }; CK_LIST_HEAD(me_list, me_softc); #define ME2IFP(sc) ((sc)->me_ifp) #define ME_READY(sc) ((sc)->me_src.s_addr != 0) #define ME_RLOCK_TRACKER struct epoch_tracker me_et #define ME_RLOCK() epoch_enter_preempt(net_epoch_preempt, &me_et) #define ME_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &me_et) #define ME_WAIT() epoch_wait_preempt(net_epoch_preempt) #ifndef ME_HASH_SIZE #define ME_HASH_SIZE (1 << 4) #endif VNET_DEFINE_STATIC(struct me_list *, me_hashtbl) = NULL; VNET_DEFINE_STATIC(struct me_list *, me_srchashtbl) = NULL; #define V_me_hashtbl VNET(me_hashtbl) #define V_me_srchashtbl VNET(me_srchashtbl) #define ME_HASH(src, dst) (V_me_hashtbl[\ me_hashval((src), (dst)) & (ME_HASH_SIZE - 1)]) #define ME_SRCHASH(src) (V_me_srchashtbl[\ fnv_32_buf(&(src), sizeof(src), FNV1_32_INIT) & (ME_HASH_SIZE - 1)]) static struct sx me_ioctl_sx; SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); static int me_clone_create(struct if_clone *, int, caddr_t); static void me_clone_destroy(struct ifnet *); VNET_DEFINE_STATIC(struct if_clone *, me_cloner); #define V_me_cloner VNET(me_cloner) +#ifdef VIMAGE +static void me_reassign(struct ifnet *, struct vnet *, char *); +#endif static void me_qflush(struct ifnet *); static int me_transmit(struct ifnet *, struct mbuf *); static int me_ioctl(struct ifnet *, u_long, caddr_t); static int me_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int me_input(struct mbuf *, int, int, void *); static int me_set_tunnel(struct me_softc *, in_addr_t, in_addr_t); static void me_delete_tunnel(struct me_softc *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Minimal Encapsulation for IP (RFC 2004)"); #ifndef MAX_ME_NEST #define MAX_ME_NEST 1 #endif VNET_DEFINE_STATIC(int, max_me_nesting) = MAX_ME_NEST; #define V_max_me_nesting VNET(max_me_nesting) SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); static uint32_t me_hashval(in_addr_t src, in_addr_t dst) { uint32_t ret; ret = fnv_32_buf(&src, sizeof(src), FNV1_32_INIT); return (fnv_32_buf(&dst, sizeof(dst), ret)); } static struct me_list * me_hashinit(void) { struct me_list *hash; int i; hash = malloc(sizeof(struct me_list) * ME_HASH_SIZE, M_IFME, M_WAITOK); for (i = 0; i < ME_HASH_SIZE; i++) CK_LIST_INIT(&hash[i]); return (hash); } static void vnet_me_init(const void *unused __unused) { V_me_cloner = if_clone_simple(mename, me_clone_create, me_clone_destroy, 0); } VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_me_init, NULL); static void vnet_me_uninit(const void *unused __unused) { if (V_me_hashtbl != NULL) { free(V_me_hashtbl, M_IFME); V_me_hashtbl = NULL; ME_WAIT(); free(V_me_srchashtbl, M_IFME); } if_clone_detach(V_me_cloner); } VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_me_uninit, NULL); static int me_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct me_softc *sc; sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); sc->me_fibnum = curthread->td_proc->p_fibnum; ME2IFP(sc) = if_alloc(IFT_TUNNEL); ME2IFP(sc)->if_softc = sc; if_initname(ME2IFP(sc), mename, unit); ME2IFP(sc)->if_mtu = MEMTU; ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; ME2IFP(sc)->if_output = me_output; ME2IFP(sc)->if_ioctl = me_ioctl; ME2IFP(sc)->if_transmit = me_transmit; ME2IFP(sc)->if_qflush = me_qflush; +#ifdef VIMAGE + ME2IFP(sc)->if_reassign = me_reassign; +#endif ME2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; ME2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(ME2IFP(sc)); bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); return (0); } + +#ifdef VIMAGE +static void +me_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused, + char *unused __unused) +{ + struct me_softc *sc; + + sx_xlock(&me_ioctl_sx); + sc = ifp->if_softc; + if (sc != NULL) + me_delete_tunnel(sc); + sx_xunlock(&me_ioctl_sx); +} +#endif /* VIMAGE */ static void me_clone_destroy(struct ifnet *ifp) { struct me_softc *sc; sx_xlock(&me_ioctl_sx); sc = ifp->if_softc; me_delete_tunnel(sc); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&me_ioctl_sx); ME_WAIT(); if_free(ifp); free(sc, M_IFME); } static int me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct sockaddr_in *src, *dst; struct me_softc *sc; int error; switch (cmd) { case SIOCSIFMTU: if (ifr->ifr_mtu < 576) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; return (0); case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: return (0); } sx_xlock(&me_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto end; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: src = &((struct in_aliasreq *)data)->ifra_addr; dst = &((struct in_aliasreq *)data)->ifra_dstaddr; if (src->sin_family != dst->sin_family || src->sin_family != AF_INET || src->sin_len != dst->sin_len || src->sin_len != sizeof(struct sockaddr_in)) { error = EINVAL; break; } if (src->sin_addr.s_addr == INADDR_ANY || dst->sin_addr.s_addr == INADDR_ANY) { error = EADDRNOTAVAIL; break; } error = me_set_tunnel(sc, src->sin_addr.s_addr, dst->sin_addr.s_addr); break; case SIOCDIFPHYADDR: me_delete_tunnel(sc); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (!ME_READY(sc)) { error = EADDRNOTAVAIL; break; } src = (struct sockaddr_in *)&ifr->ifr_addr; memset(src, 0, sizeof(*src)); src->sin_family = AF_INET; src->sin_len = sizeof(*src); switch (cmd) { case SIOCGIFPSRCADDR: src->sin_addr = sc->me_src; break; case SIOCGIFPDSTADDR: src->sin_addr = sc->me_dst; break; } error = prison_if(curthread->td_ucred, sintosa(src)); if (error != 0) memset(src, 0, sizeof(*src)); break; case SIOCGTUNFIB: ifr->ifr_fib = sc->me_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->me_fibnum = ifr->ifr_fib; break; default: error = EINVAL; break; } end: sx_xunlock(&me_ioctl_sx); return (error); } static int me_lookup(const struct mbuf *m, int off, int proto, void **arg) { const struct ip *ip; struct me_softc *sc; if (V_me_hashtbl == NULL) return (0); NET_EPOCH_ASSERT(); ip = mtod(m, const struct ip *); CK_LIST_FOREACH(sc, &ME_HASH(ip->ip_dst.s_addr, ip->ip_src.s_addr), chain) { if (sc->me_src.s_addr == ip->ip_dst.s_addr && sc->me_dst.s_addr == ip->ip_src.s_addr) { if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) return (0); *arg = sc; return (ENCAP_DRV_LOOKUP); } } return (0); } /* * Check that ingress address belongs to local host. */ static void me_set_running(struct me_softc *sc) { if (in_localip(sc->me_src)) ME2IFP(sc)->if_drv_flags |= IFF_DRV_RUNNING; else ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; } /* * ifaddr_event handler. * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent * source address spoofing. */ static void me_srcaddr(void *arg __unused, const struct sockaddr *sa, int event __unused) { const struct sockaddr_in *sin; struct me_softc *sc; /* Check that VNET is ready */ if (V_me_hashtbl == NULL) return; NET_EPOCH_ASSERT(); sin = (const struct sockaddr_in *)sa; CK_LIST_FOREACH(sc, &ME_SRCHASH(sin->sin_addr.s_addr), srchash) { if (sc->me_src.s_addr != sin->sin_addr.s_addr) continue; me_set_running(sc); } } static int me_set_tunnel(struct me_softc *sc, in_addr_t src, in_addr_t dst) { struct me_softc *tmp; sx_assert(&me_ioctl_sx, SA_XLOCKED); if (V_me_hashtbl == NULL) { V_me_hashtbl = me_hashinit(); V_me_srchashtbl = me_hashinit(); } if (sc->me_src.s_addr == src && sc->me_dst.s_addr == dst) return (0); CK_LIST_FOREACH(tmp, &ME_HASH(src, dst), chain) { if (tmp == sc) continue; if (tmp->me_src.s_addr == src && tmp->me_dst.s_addr == dst) return (EADDRNOTAVAIL); } me_delete_tunnel(sc); sc->me_dst.s_addr = dst; sc->me_src.s_addr = src; CK_LIST_INSERT_HEAD(&ME_HASH(src, dst), sc, chain); CK_LIST_INSERT_HEAD(&ME_SRCHASH(src), sc, srchash); me_set_running(sc); if_link_state_change(ME2IFP(sc), LINK_STATE_UP); return (0); } static void me_delete_tunnel(struct me_softc *sc) { sx_assert(&me_ioctl_sx, SA_XLOCKED); if (ME_READY(sc)) { CK_LIST_REMOVE(sc, chain); CK_LIST_REMOVE(sc, srchash); ME_WAIT(); sc->me_src.s_addr = 0; sc->me_dst.s_addr = 0; ME2IFP(sc)->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ME2IFP(sc), LINK_STATE_DOWN); } } static uint16_t me_in_cksum(uint16_t *p, int nwords) { uint32_t sum = 0; while (nwords-- > 0) sum += *p++; sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); return (~sum); } static int me_input(struct mbuf *m, int off, int proto, void *arg) { struct me_softc *sc = arg; struct mobhdr *mh; struct ifnet *ifp; struct ip *ip; int hlen; NET_EPOCH_ASSERT(); ifp = ME2IFP(sc); /* checks for short packets */ hlen = sizeof(struct mobhdr); if (m->m_pkthdr.len < sizeof(struct ip) + hlen) hlen -= sizeof(struct in_addr); if (m->m_len < sizeof(struct ip) + hlen) m = m_pullup(m, sizeof(struct ip) + hlen); if (m == NULL) goto drop; mh = (struct mobhdr *)mtodo(m, sizeof(struct ip)); /* check for wrong flags */ if (mh->mob_flags & (~MOB_FLAGS_SP)) { m_freem(m); goto drop; } if (mh->mob_flags) { if (hlen != sizeof(struct mobhdr)) { m_freem(m); goto drop; } } else hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); /* check mobile header checksum */ if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) { m_freem(m); goto drop; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif ip = mtod(m, struct ip *); ip->ip_dst = mh->mob_dst; ip->ip_p = mh->mob_proto; ip->ip_sum = 0; ip->ip_len = htons(m->m_pkthdr.len - hlen); if (mh->mob_flags) ip->ip_src = mh->mob_src; memmove(mtodo(m, hlen), ip, sizeof(struct ip)); m_adj(m, hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); M_SETFIB(m, ifp->if_fib); hlen = AF_INET; BPF_MTAP2(ifp, &hlen, sizeof(hlen), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) m_freem(m); else netisr_dispatch(NETISR_IP, m); return (IPPROTO_DONE); drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return (IPPROTO_DONE); } static int me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro __unused) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } #define MTAG_ME 1414491977 static int me_transmit(struct ifnet *ifp, struct mbuf *m) { ME_RLOCK_TRACKER; struct mobhdr mh; struct me_softc *sc; struct ip *ip; uint32_t af; int error, hlen, plen; ME_RLOCK(); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error != 0) goto drop; #endif error = ENETDOWN; sc = ifp->if_softc; if (sc == NULL || !ME_READY(sc) || (ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (error = if_tunnel_check_nesting(ifp, m, MTAG_ME, V_max_me_nesting)) != 0) { m_freem(m); goto drop; } af = m->m_pkthdr.csum_data; if (af != AF_INET) { error = EAFNOSUPPORT; m_freem(m); goto drop; } if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { error = ENOBUFS; goto drop; } ip = mtod(m, struct ip *); /* Fragmented datagramms shouldn't be encapsulated */ if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { error = EINVAL; m_freem(m); goto drop; } mh.mob_proto = ip->ip_p; mh.mob_src = ip->ip_src; mh.mob_dst = ip->ip_dst; if (in_hosteq(sc->me_src, ip->ip_src)) { hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); mh.mob_flags = 0; } else { hlen = sizeof(struct mobhdr); mh.mob_flags = MOB_FLAGS_SP; } BPF_MTAP2(ifp, &af, sizeof(af), m); plen = m->m_pkthdr.len; ip->ip_src = sc->me_src; ip->ip_dst = sc->me_dst; m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->me_fibnum); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto drop; } if (m->m_len < sizeof(struct ip) + hlen) m = m_pullup(m, sizeof(struct ip) + hlen); if (m == NULL) { error = ENOBUFS; goto drop; } memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip)); ip = mtod(m, struct ip *); ip->ip_len = htons(m->m_pkthdr.len); ip->ip_p = IPPROTO_MOBILE; ip->ip_sum = 0; mh.mob_csum = 0; mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t)); bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen); error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); drop: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); } ME_RUNLOCK(); return (error); } static void me_qflush(struct ifnet *ifp __unused) { } static const struct srcaddrtab *me_srcaddrtab = NULL; static const struct encaptab *ecookie = NULL; static const struct encap_config me_encap_cfg = { .proto = IPPROTO_MOBILE, .min_length = sizeof(struct ip) + sizeof(struct mobhdr) - sizeof(in_addr_t), .exact_match = ENCAP_DRV_LOOKUP, .lookup = me_lookup, .input = me_input }; static int memodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: me_srcaddrtab = ip_encap_register_srcaddr(me_srcaddr, NULL, M_WAITOK); ecookie = ip_encap_attach(&me_encap_cfg, NULL, M_WAITOK); break; case MOD_UNLOAD: ip_encap_detach(ecookie); ip_encap_unregister_srcaddr(me_srcaddrtab); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t me_mod = { "if_me", memodevent, 0 }; DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_me, 1);