diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index afbdc14e47c4..00c95c451966 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -1,1208 +1,1214 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include VNET_DEFINE(int, ip_defttl) = IPDEFTTL; SYSCTL_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_defttl), 0, "Maximum TTL on IP packets"); VNET_DEFINE(struct inpcbhead, ripcb); VNET_DEFINE(struct inpcbinfo, ripcbinfo); #define V_ripcb VNET(ripcb) #define V_ripcbinfo VNET(ripcbinfo) /* * Control and data hooks for ipfw, dummynet, divert and so on. * The data hooks are not used here but it is convenient * to keep them all in one place. */ VNET_DEFINE(ip_fw_chk_ptr_t, ip_fw_chk_ptr) = NULL; VNET_DEFINE(ip_fw_ctl_ptr_t, ip_fw_ctl_ptr) = NULL; int (*ip_dn_ctl_ptr)(struct sockopt *); int (*ip_dn_io_ptr)(struct mbuf **, struct ip_fw_args *); void (*ip_divert_ptr)(struct mbuf *, bool); int (*ng_ipfw_input_p)(struct mbuf **, struct ip_fw_args *, bool); #ifdef INET /* * Hooks for multicast routing. They all default to NULL, so leave them not * initialized and rely on BSS being set to 0. */ /* * The socket used to communicate with the multicast routing daemon. */ VNET_DEFINE(struct socket *, ip_mrouter); /* * The various mrouter and rsvp functions. */ int (*ip_mrouter_set)(struct socket *, struct sockopt *); int (*ip_mrouter_get)(struct socket *, struct sockopt *); int (*ip_mrouter_done)(void); int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *); int (*mrt_ioctl)(u_long, caddr_t, int); int (*legal_vif_num)(int); u_long (*ip_mcast_src)(int); int (*rsvp_input_p)(struct mbuf **, int *, int); int (*ip_rsvp_vif)(struct socket *, struct sockopt *); void (*ip_rsvp_force_done)(struct socket *); #endif /* INET */ extern struct protosw inetsw[]; u_long rip_sendspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, maxdgram, CTLFLAG_RW, &rip_sendspace, 0, "Maximum outgoing raw IP datagram size"); u_long rip_recvspace = 9216; SYSCTL_ULONG(_net_inet_raw, OID_AUTO, recvspace, CTLFLAG_RW, &rip_recvspace, 0, "Maximum space for incoming raw IP datagrams"); /* * Hash functions */ #define INP_PCBHASH_RAW_SIZE 256 #define INP_PCBHASH_RAW(proto, laddr, faddr, mask) \ (((proto) + (laddr) + (faddr)) % (mask) + 1) #ifdef INET static void rip_inshash(struct inpcb *inp) { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct inpcbhead *pcbhash; int hash; INP_INFO_WLOCK_ASSERT(pcbinfo); INP_WLOCK_ASSERT(inp); if (inp->inp_ip_p != 0 && inp->inp_laddr.s_addr != INADDR_ANY && inp->inp_faddr.s_addr != INADDR_ANY) { hash = INP_PCBHASH_RAW(inp->inp_ip_p, inp->inp_laddr.s_addr, inp->inp_faddr.s_addr, pcbinfo->ipi_hashmask); } else hash = 0; pcbhash = &pcbinfo->ipi_hashbase[hash]; CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash); } static void rip_delhash(struct inpcb *inp) { INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); INP_WLOCK_ASSERT(inp); CK_LIST_REMOVE(inp, inp_hash); } #endif /* INET */ /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ static void rip_zone_change(void *tag) { uma_zone_set_max(V_ripcbinfo.ipi_zone, maxsockets); } static int rip_inpcb_init(void *mem, int size, int flags) { struct inpcb *inp = mem; INP_LOCK_INIT(inp, "inp", "rawinp"); return (0); } void rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, 1, "ripcb", rip_inpcb_init, IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } #ifdef VIMAGE static void rip_destroy(void *unused __unused) { in_pcbinfo_destroy(&V_ripcbinfo); } VNET_SYSUNINIT(raw_ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, rip_destroy, NULL); #endif #ifdef INET static int rip_append(struct inpcb *last, struct ip *ip, struct mbuf *n, struct sockaddr_in *ripsrc) { int policyfail = 0; INP_LOCK_ASSERT(last); #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* check AH/ESP integrity. */ if (IPSEC_ENABLED(ipv4)) { if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0) policyfail = 1; } #endif /* IPSEC */ #ifdef MAC if (!policyfail && mac_inpcb_check_deliver(last, n) != 0) policyfail = 1; #endif /* Check the minimum TTL for socket. */ if (last->inp_ip_minttl && last->inp_ip_minttl > ip->ip_ttl) policyfail = 1; if (!policyfail) { struct mbuf *opts = NULL; struct socket *so; so = last->inp_socket; if ((last->inp_flags & INP_CONTROLOPTS) || (so->so_options & (SO_TIMESTAMP | SO_BINTIME))) ip_savecontrol(last, &opts, ip, n); SOCKBUF_LOCK(&so->so_rcv); if (sbappendaddr_locked(&so->so_rcv, (struct sockaddr *)ripsrc, n, opts) == 0) { soroverflow_locked(so); m_freem(n); if (opts) m_freem(opts); } else sorwakeup_locked(so); } else m_freem(n); return (policyfail); } /* * Setup generic address and protocol structures for raw_input routine, then * pass them along with mbuf chain. */ int rip_input(struct mbuf **mp, int *offp, int proto) { struct ifnet *ifp; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct inpcb *inp, *last; struct sockaddr_in ripsrc; int hash; NET_EPOCH_ASSERT(); *mp = NULL; bzero(&ripsrc, sizeof(ripsrc)); ripsrc.sin_len = sizeof(ripsrc); ripsrc.sin_family = AF_INET; ripsrc.sin_addr = ip->ip_src; last = NULL; ifp = m->m_pkthdr.rcvif; hash = INP_PCBHASH_RAW(proto, ip->ip_src.s_addr, ip->ip_dst.s_addr, V_ripcbinfo.ipi_hashmask); CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[hash], inp_hash) { if (inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_1; if (jailed_without_vnet(inp->inp_cred)) { /* * XXX: If faddr was bound to multicast group, * jailed raw socket will drop datagram. */ if (prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) goto skip_1; } last = inp; continue; skip_1: INP_RUNLOCK(inp); } CK_LIST_FOREACH(inp, &V_ripcbinfo.ipi_hashbase[0], inp_hash) { if (inp->inp_ip_p && inp->inp_ip_p != proto) continue; #ifdef INET6 /* XXX inp locking */ if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif if (!in_nullhost(inp->inp_laddr) && !in_hosteq(inp->inp_laddr, ip->ip_dst)) continue; if (!in_nullhost(inp->inp_faddr) && !in_hosteq(inp->inp_faddr, ip->ip_src)) continue; if (last != NULL) { struct mbuf *n; n = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (n != NULL) (void) rip_append(last, ip, n, &ripsrc); /* XXX count dropped packet */ INP_RUNLOCK(last); last = NULL; } INP_RLOCK(inp); if (__predict_false(inp->inp_flags2 & INP_FREED)) goto skip_2; if (jailed_without_vnet(inp->inp_cred)) { /* * Allow raw socket in jail to receive multicast; * assume process had PRIV_NETINET_RAW at attach, * and fall through into normal filter path if so. */ if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && prison_check_ip4(inp->inp_cred, &ip->ip_dst) != 0) goto skip_2; } /* * If this raw socket has multicast state, and we * have received a multicast, check if this socket * should receive it, as multicast filtering is now * the responsibility of the transport layer. */ if (inp->inp_moptions != NULL && IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { /* * If the incoming datagram is for IGMP, allow it * through unconditionally to the raw socket. * * In the case of IGMPv2, we may not have explicitly * joined the group, and may have set IFF_ALLMULTI * on the interface. imo_multi_filter() may discard * control traffic we actually need to see. * * Userland multicast routing daemons should continue * filter the control traffic appropriately. */ int blocked; blocked = MCAST_PASS; if (proto != IPPROTO_IGMP) { struct sockaddr_in group; bzero(&group, sizeof(struct sockaddr_in)); group.sin_len = sizeof(struct sockaddr_in); group.sin_family = AF_INET; group.sin_addr = ip->ip_dst; blocked = imo_multi_filter(inp->inp_moptions, ifp, (struct sockaddr *)&group, (struct sockaddr *)&ripsrc); } if (blocked != MCAST_PASS) { IPSTAT_INC(ips_notmember); goto skip_2; } } last = inp; continue; skip_2: INP_RUNLOCK(inp); } if (last != NULL) { if (rip_append(last, ip, m, &ripsrc) != 0) IPSTAT_INC(ips_delivered); INP_RUNLOCK(last); } else { if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) { IPSTAT_INC(ips_noproto); IPSTAT_DEC(ips_delivered); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0); } else { m_freem(m); } } return (IPPROTO_DONE); } /* * Generate IP header and pass packet to ip_output. Tack on options user may * have setup with control call. */ int rip_output(struct mbuf *m, struct socket *so, ...) { struct epoch_tracker et; struct ip *ip; int error; struct inpcb *inp = sotoinpcb(so); va_list ap; u_long dst; int flags = ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0) | IP_ALLOWBROADCAST; int cnt, hlen; u_char opttype, optlen, *cp; va_start(ap, so); dst = va_arg(ap, u_long); va_end(ap); /* * If the user handed us a complete IP packet, use it. Otherwise, * allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { if (m->m_pkthdr.len + sizeof(struct ip) > IP_MAXPACKET) { m_freem(m); return(EMSGSIZE); } M_PREPEND(m, sizeof(struct ip), M_NOWAIT); if (m == NULL) return(ENOBUFS); INP_RLOCK(inp); ip = mtod(m, struct ip *); ip->ip_tos = inp->inp_ip_tos; if (inp->inp_flags & INP_DONTFRAG) ip->ip_off = htons(IP_DF); else ip->ip_off = htons(0); ip->ip_p = inp->inp_ip_p; ip->ip_len = htons(m->m_pkthdr.len); ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; #ifdef ROUTE_MPATH if (CALC_FLOWID_OUTBOUND) { uint32_t hash_type, hash_val; hash_val = fib4_calc_software_hash(ip->ip_src, ip->ip_dst, 0, 0, ip->ip_p, &hash_type); m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); flags |= IP_NODEFAULTFLOWID; } #endif if (jailed(inp->inp_cred)) { /* * prison_local_ip4() would be good enough but would * let a source of INADDR_ANY pass, which we do not * want to see from jails. */ if (ip->ip_src.s_addr == INADDR_ANY) { NET_EPOCH_ENTER(et); error = in_pcbladdr(inp, &ip->ip_dst, &ip->ip_src, inp->inp_cred); NET_EPOCH_EXIT(et); } else { error = prison_local_ip4(inp->inp_cred, &ip->ip_src); } if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } } ip->ip_ttl = inp->inp_ip_ttl; } else { if (m->m_pkthdr.len > IP_MAXPACKET) { m_freem(m); return (EMSGSIZE); } if (m->m_pkthdr.len < sizeof(*ip)) { m_freem(m); return (EINVAL); } m = m_pullup(m, sizeof(*ip)); if (m == NULL) return (ENOMEM); ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; if (m->m_len < hlen) { m = m_pullup(m, hlen); if (m == NULL) return (EINVAL); ip = mtod(m, struct ip *); } #ifdef ROUTE_MPATH if (CALC_FLOWID_OUTBOUND) { uint32_t hash_type, hash_val; hash_val = fib4_calc_software_hash(ip->ip_dst, ip->ip_src, 0, 0, ip->ip_p, &hash_type); m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); flags |= IP_NODEFAULTFLOWID; } #endif INP_RLOCK(inp); /* * Don't allow both user specified and setsockopt options, * and don't allow packet length sizes that will crash. */ if ((hlen < sizeof (*ip)) || ((hlen > sizeof (*ip)) && inp->inp_options) || (ntohs(ip->ip_len) != m->m_pkthdr.len)) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } error = prison_check_ip4(inp->inp_cred, &ip->ip_src); if (error != 0) { INP_RUNLOCK(inp); m_freem(m); return (error); } /* * Don't allow IP options which do not have the required * structure as specified in section 3.1 of RFC 791 on * pages 15-23. */ cp = (u_char *)(ip + 1); cnt = hlen - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opttype = cp[IPOPT_OPTVAL]; if (opttype == IPOPT_EOL) break; if (opttype == IPOPT_NOP) { optlen = 1; continue; } if (cnt < IPOPT_OLEN + sizeof(u_char)) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } optlen = cp[IPOPT_OLEN]; if (optlen < IPOPT_OLEN + sizeof(u_char) || optlen > cnt) { INP_RUNLOCK(inp); m_freem(m); return (EINVAL); } } /* * This doesn't allow application to specify ID of zero, * but we got this limitation from the beginning of history. */ if (ip->ip_id == 0) ip_fillid(ip); /* * XXX prevent ip_output from overwriting header fields. */ flags |= IP_RAWOUTPUT; IPSTAT_INC(ips_rawout); } if (inp->inp_flags & INP_ONESBCAST) flags |= IP_SENDONES; #ifdef MAC mac_inpcb_create_mbuf(inp, m); #endif NET_EPOCH_ENTER(et); error = ip_output(m, inp->inp_options, NULL, flags, inp->inp_moptions, inp); NET_EPOCH_EXIT(et); INP_RUNLOCK(inp); return (error); } /* * Raw IP socket option processing. * * IMPORTANT NOTE regarding access control: Traditionally, raw sockets could * only be created by a privileged process, and as such, socket option * operations to manage system properties on any raw socket were allowed to * take place without explicit additional access control checks. However, * raw sockets can now also be created in jail(), and therefore explicit * checks are now required. Likewise, raw sockets can be used by a process * after it gives up privilege, so some caution is required. For options * passed down to the IP layer via ip_ctloutput(), checks are assumed to be * performed in ip_ctloutput() and therefore no check occurs here. * Unilaterally checking priv_check() here breaks normal IP socket option * operations on raw sockets. * * When adding new socket options here, make sure to add access control * checks here as necessary. * * XXX-BZ inp locking? */ int rip_ctloutput(struct socket *so, struct sockopt *sopt) { struct inpcb *inp = sotoinpcb(so); int error, optval; if (sopt->sopt_level != IPPROTO_IP) { if ((sopt->sopt_level == SOL_SOCKET) && (sopt->sopt_name == SO_SETFIB)) { inp->inp_inc.inc_fibnum = so->so_fibnum; return (0); } return (EINVAL); } error = 0; switch (sopt->sopt_dir) { case SOPT_GET: switch (sopt->sopt_name) { case IP_HDRINCL: optval = inp->inp_flags & INP_HDRINCL; error = sooptcopyout(sopt, &optval, sizeof optval); break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: /* ADD actually returns the body... */ case IP_FW_GET: case IP_FW_TABLE_GETSIZE: case IP_FW_TABLE_LIST: case IP_FW_NAT_GET_CONFIG: case IP_FW_NAT_GET_LOG: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_GET: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT; break ; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_get ? ip_mrouter_get(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; case SOPT_SET: switch (sopt->sopt_name) { case IP_HDRINCL: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error) break; if (optval) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; break; case IP_FW3: /* generic ipfw v.3 functions */ case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_ZERO: case IP_FW_RESETLOG: case IP_FW_TABLE_ADD: case IP_FW_TABLE_DEL: case IP_FW_TABLE_FLUSH: case IP_FW_NAT_CFG: case IP_FW_NAT_DEL: if (V_ip_fw_ctl_ptr != NULL) error = V_ip_fw_ctl_ptr(sopt); else error = ENOPROTOOPT; break; case IP_DUMMYNET3: /* generic dummynet v.3 functions */ case IP_DUMMYNET_CONFIGURE: case IP_DUMMYNET_DEL: case IP_DUMMYNET_FLUSH: if (ip_dn_ctl_ptr != NULL) error = ip_dn_ctl_ptr(sopt); else error = ENOPROTOOPT ; break ; case IP_RSVP_ON: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_init(so); break; case IP_RSVP_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_done(); break; case IP_RSVP_VIF_ON: case IP_RSVP_VIF_OFF: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_rsvp_vif ? ip_rsvp_vif(so, sopt) : EINVAL; break; case MRT_INIT: case MRT_DONE: case MRT_ADD_VIF: case MRT_DEL_VIF: case MRT_ADD_MFC: case MRT_DEL_MFC: case MRT_VERSION: case MRT_ASSERT: case MRT_API_SUPPORT: case MRT_API_CONFIG: case MRT_ADD_BW_UPCALL: case MRT_DEL_BW_UPCALL: error = priv_check(curthread, PRIV_NETINET_MROUTE); if (error != 0) return (error); error = ip_mrouter_set ? ip_mrouter_set(so, sopt) : EOPNOTSUPP; break; default: error = ip_ctloutput(so, sopt); break; } break; } return (error); } /* * This function exists solely to receive the PRC_IFDOWN messages which are * sent by if_down(). It looks for an ifaddr whose ifa_addr is sa, and calls * in_ifadown() to remove all routes corresponding to that address. It also * receives the PRC_IFUP messages from if_up() and reinstalls the interface * routes. */ void rip_ctlinput(int cmd, struct sockaddr *sa, void *vip) { struct rm_priotracker in_ifa_tracker; struct in_ifaddr *ia; struct ifnet *ifp; int err; int flags; switch (cmd) { case PRC_IFDOWN: IN_IFADDR_RLOCK(&in_ifa_tracker); CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa && (ia->ia_flags & IFA_ROUTE)) { ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * in_scrubprefix() kills the interface route. */ in_scrubprefix(ia, 0); /* * in_ifadown gets rid of all the rest of the * routes. This is not quite the right thing * to do, but at least if we are running a * routing process they will come back. */ in_ifadown(&ia->ia_ifa, 0); ifa_free(&ia->ia_ifa); break; } } if (ia == NULL) /* If ia matched, already unlocked. */ IN_IFADDR_RUNLOCK(&in_ifa_tracker); break; case PRC_IFUP: IN_IFADDR_RLOCK(&in_ifa_tracker); CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { if (ia->ia_ifa.ifa_addr == sa) break; } if (ia == NULL || (ia->ia_flags & IFA_ROUTE)) { IN_IFADDR_RUNLOCK(&in_ifa_tracker); return; } ifa_ref(&ia->ia_ifa); IN_IFADDR_RUNLOCK(&in_ifa_tracker); flags = RTF_UP; ifp = ia->ia_ifa.ifa_ifp; if ((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_flags & IFF_POINTOPOINT)) flags |= RTF_HOST; err = ifa_del_loopback_route((struct ifaddr *)ia, sa); rt_addrmsg(RTM_ADD, &ia->ia_ifa, ia->ia_ifp->if_fib); err = in_handle_ifaddr_route(RTM_ADD, ia); if (err == 0) ia->ia_flags |= IFA_ROUTE; err = ifa_add_loopback_route((struct ifaddr *)ia, sa); ifa_free(&ia->ia_ifa); break; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + case PRC_MSGSIZE: + if (IPSEC_ENABLED(ipv4)) + IPSEC_CTLINPUT(ipv4, cmd, sa, vip); + break; +#endif } } static int rip_attach(struct socket *so, int proto, struct thread *td) { struct inpcb *inp; int error; inp = sotoinpcb(so); KASSERT(inp == NULL, ("rip_attach: inp != NULL")); error = priv_check(td, PRIV_NETINET_RAW); if (error) return (error); if (proto >= IPPROTO_MAX || proto < 0) return EPROTONOSUPPORT; error = soreserve(so, rip_sendspace, rip_recvspace); if (error) return (error); INP_INFO_WLOCK(&V_ripcbinfo); error = in_pcballoc(so, &V_ripcbinfo); if (error) { INP_INFO_WUNLOCK(&V_ripcbinfo); return (error); } inp = (struct inpcb *)so->so_pcb; inp->inp_vflag |= INP_IPV4; inp->inp_ip_p = proto; inp->inp_ip_ttl = V_ip_defttl; rip_inshash(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); INP_WUNLOCK(inp); return (0); } static void rip_detach(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_detach: inp == NULL")); KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, ("rip_detach: not closed")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); if (so == V_ip_mrouter && ip_mrouter_done) ip_mrouter_done(); if (ip_rsvp_force_done) ip_rsvp_force_done(so); if (so == V_ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); in_pcbfree(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); } static void rip_dodisconnect(struct socket *so, struct inpcb *inp) { struct inpcbinfo *pcbinfo; pcbinfo = inp->inp_pcbinfo; INP_INFO_WLOCK(pcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr.s_addr = INADDR_ANY; rip_inshash(inp); SOCK_LOCK(so); so->so_state &= ~SS_ISCONNECTED; SOCK_UNLOCK(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(pcbinfo); } static void rip_abort(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_abort: inp == NULL")); rip_dodisconnect(so, inp); } static void rip_close(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_close: inp == NULL")); rip_dodisconnect(so, inp); } static int rip_disconnect(struct socket *so) { struct inpcb *inp; if ((so->so_state & SS_ISCONNECTED) == 0) return (ENOTCONN); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_disconnect: inp == NULL")); rip_dodisconnect(so, inp); return (0); } static int rip_bind(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; int error; if (nam->sa_family != AF_INET) return (EAFNOSUPPORT); if (nam->sa_len != sizeof(*addr)) return (EINVAL); error = prison_check_ip4(td->td_ucred, &addr->sin_addr); if (error != 0) return (error); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_bind: inp == NULL")); if (CK_STAILQ_EMPTY(&V_ifnet) || (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) || (addr->sin_addr.s_addr && (inp->inp_flags & INP_BINDANY) == 0 && ifa_ifwithaddr_check((struct sockaddr *)addr) == 0)) return (EADDRNOTAVAIL); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_laddr = addr->sin_addr; rip_inshash(inp); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_connect(struct socket *so, struct sockaddr *nam, struct thread *td) { struct sockaddr_in *addr = (struct sockaddr_in *)nam; struct inpcb *inp; if (nam->sa_len != sizeof(*addr)) return (EINVAL); if (CK_STAILQ_EMPTY(&V_ifnet)) return (EADDRNOTAVAIL); if (addr->sin_family != AF_INET && addr->sin_family != AF_IMPLINK) return (EAFNOSUPPORT); inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_connect: inp == NULL")); INP_INFO_WLOCK(&V_ripcbinfo); INP_WLOCK(inp); rip_delhash(inp); inp->inp_faddr = addr->sin_addr; rip_inshash(inp); soisconnected(so); INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_ripcbinfo); return (0); } static int rip_shutdown(struct socket *so) { struct inpcb *inp; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_shutdown: inp == NULL")); INP_WLOCK(inp); socantsendmore(so); INP_WUNLOCK(inp); return (0); } static int rip_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct thread *td) { struct inpcb *inp; u_long dst; int error; inp = sotoinpcb(so); KASSERT(inp != NULL, ("rip_send: inp == NULL")); if (control != NULL) { m_freem(control); control = NULL; } /* * Note: 'dst' reads below are unlocked. */ if (so->so_state & SS_ISCONNECTED) { if (nam) { error = EISCONN; goto release; } dst = inp->inp_faddr.s_addr; /* Unlocked read. */ } else { error = 0; if (nam == NULL) error = ENOTCONN; else if (nam->sa_family != AF_INET) error = EAFNOSUPPORT; else if (nam->sa_len != sizeof(struct sockaddr_in)) error = EINVAL; if (error != 0) goto release; dst = ((struct sockaddr_in *)nam)->sin_addr.s_addr; } return (rip_output(m, so, dst)); release: m_freem(m); return (error); } #endif /* INET */ static int rip_pcblist(SYSCTL_HANDLER_ARGS) { struct xinpgen xig; struct epoch_tracker et; struct inpcb *inp; int error; if (req->newptr != 0) return (EPERM); if (req->oldptr == 0) { int n; n = V_ripcbinfo.ipi_count; n += imax(n / 8, 10); req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xinpcb); return (0); } if ((error = sysctl_wire_old_buffer(req, 0)) != 0) return (error); bzero(&xig, sizeof(xig)); xig.xig_len = sizeof xig; xig.xig_count = V_ripcbinfo.ipi_count; xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; error = SYSCTL_OUT(req, &xig, sizeof xig); if (error) return (error); NET_EPOCH_ENTER(et); for (inp = CK_LIST_FIRST(V_ripcbinfo.ipi_listhead); inp != NULL; inp = CK_LIST_NEXT(inp, inp_list)) { INP_RLOCK(inp); if (inp->inp_gencnt <= xig.xig_gen && cr_canseeinpcb(req->td->td_ucred, inp) == 0) { struct xinpcb xi; in_pcbtoxinpcb(inp, &xi); INP_RUNLOCK(inp); error = SYSCTL_OUT(req, &xi, sizeof xi); if (error) break; } else INP_RUNLOCK(inp); } NET_EPOCH_EXIT(et); if (!error) { /* * Give the user an updated idea of our state. If the * generation differs from what we told her before, she knows * that something happened while we were processing this * request, and it might be necessary to retry. */ xig.xig_gen = V_ripcbinfo.ipi_gencnt; xig.xig_sogen = so_gencnt; xig.xig_count = V_ripcbinfo.ipi_count; error = SYSCTL_OUT(req, &xig, sizeof xig); } return (error); } SYSCTL_PROC(_net_inet_raw, OID_AUTO/*XXX*/, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, rip_pcblist, "S,xinpcb", "List of active raw IP sockets"); #ifdef INET struct pr_usrreqs rip_usrreqs = { .pru_abort = rip_abort, .pru_attach = rip_attach, .pru_bind = rip_bind, .pru_connect = rip_connect, .pru_control = in_control, .pru_detach = rip_detach, .pru_disconnect = rip_disconnect, .pru_peeraddr = in_getpeeraddr, .pru_send = rip_send, .pru_shutdown = rip_shutdown, .pru_sockaddr = in_getsockaddr, .pru_sosetlabel = in_pcbsosetlabel, .pru_close = rip_close, }; #endif /* INET */ diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c index cd24750607ea..dcc607a5b617 100644 --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -1,1530 +1,1534 @@ /* $FreeBSD$ */ /* $KAME: ipsec.c,v 1.103 2001/05/24 07:14:18 sakane Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #ifdef INET6 #include #endif #include #include #include /*XXX*/ #include #include #include #include #include #include #include #include /* NB: name changed so netstat doesn't use it. */ VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec4stat); VNET_PCPUSTAT_SYSINIT(ipsec4stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipsec4stat); #endif /* VIMAGE */ /* DF bit on encap. 0: clear 1: set 2: copy */ VNET_DEFINE(int, ip4_ipsec_dfbit) = 0; +VNET_DEFINE(int, ip4_ipsec_min_pmtu) = 576; VNET_DEFINE(int, ip4_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip4_ah_net_deflev) = IPSEC_LEVEL_USE; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ VNET_DEFINE(int, ip4_ipsec_ecn) = 0; VNET_DEFINE_STATIC(int, ip4_filtertunnel) = 0; #define V_ip4_filtertunnel VNET(ip4_filtertunnel) VNET_DEFINE_STATIC(int, check_policy_history) = 0; #define V_check_policy_history VNET(check_policy_history) VNET_DEFINE_STATIC(struct secpolicy *, def_policy) = NULL; #define V_def_policy VNET(def_policy) static int sysctl_def_policy(SYSCTL_HANDLER_ARGS) { int error, value; value = V_def_policy->policy; error = sysctl_handle_int(oidp, &value, 0, req); if (error == 0) { if (value != IPSEC_POLICY_DISCARD && value != IPSEC_POLICY_NONE) return (EINVAL); V_def_policy->policy = value; } return (error); } /* * Crypto support requirements: * * 1 require hardware support * -1 require software support * 0 take anything */ VNET_DEFINE(int, crypto_support) = CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE; /* * Use asynchronous mode to parallelize crypto jobs: * * 0 - disabled * 1 - enabled */ VNET_DEFINE(int, async_crypto) = 0; /* * TCP/UDP checksum handling policy for transport mode NAT-T (RFC3948) * * 0 - auto: incrementally recompute, when checksum delta is known; * if checksum delta isn't known, reset checksum to zero for UDP, * and mark csum_flags as valid for TCP. * 1 - fully recompute TCP/UDP checksum. */ VNET_DEFINE(int, natt_cksum_policy) = 0; FEATURE(ipsec, "Internet Protocol Security (IPsec)"); FEATURE(ipsec_natt, "UDP Encapsulation of IPsec ESP Packets ('NAT-T')"); SYSCTL_DECL(_net_inet_ipsec); /* net.inet.ipsec */ SYSCTL_PROC(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0, "Default ESP transport mode level"); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_net_deflev), 0, "Default ESP tunnel mode level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_trans_deflev), 0, "AH transfer mode default level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_net_deflev), 0, "AH tunnel mode default level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, "If set, clear type-of-service field when doing AH computation."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0, "Do not fragment bit on encap."); +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_MIN_PMTU, min_pmtu, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_min_pmtu), 0, + "Lowest acceptable PMTU value."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_ecn), 0, "Explicit Congestion Notification handling."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, crypto_support, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(crypto_support), 0, "Crypto driver selection."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, async_crypto, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(async_crypto), 0, "Use asynchronous mode to parallelize crypto jobs."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, check_policy_history, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(check_policy_history), 0, "Use strict check of inbound packets to security policy compliance."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, natt_cksum_policy, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(natt_cksum_policy), 0, "Method to fix TCP/UDP checksum for transport mode IPsec after NAT."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_filtertunnel), 0, "If set, filter packets from an IPsec tunnel."); SYSCTL_VNET_PCPUSTAT(_net_inet_ipsec, OID_AUTO, ipsecstats, struct ipsecstat, ipsec4stat, "IPsec IPv4 statistics."); #ifdef REGRESSION /* * When set to 1, IPsec will send packets with the same sequence number. * This allows to verify if the other side has proper replay attacks detection. */ VNET_DEFINE(int, ipsec_replay) = 0; SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_replay, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_replay), 0, "Emulate replay attack"); /* * When set 1, IPsec will send packets with corrupted HMAC. * This allows to verify if the other side properly detects modified packets. */ VNET_DEFINE(int, ipsec_integrity) = 0; SYSCTL_INT(_net_inet_ipsec, OID_AUTO, test_integrity, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_integrity), 0, "Emulate man-in-the-middle attack"); #endif #ifdef INET6 VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec6stat); VNET_PCPUSTAT_SYSINIT(ipsec6stat); #ifdef VIMAGE VNET_PCPUSTAT_SYSUNINIT(ipsec6stat); #endif /* VIMAGE */ VNET_DEFINE(int, ip6_esp_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_esp_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ah_trans_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ah_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ipsec_ecn) = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ VNET_DEFINE_STATIC(int, ip6_filtertunnel) = 0; #define V_ip6_filtertunnel VNET(ip6_filtertunnel) SYSCTL_DECL(_net_inet6_ipsec6); /* net.inet6.ipsec6 */ SYSCTL_PROC(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0, "Default ESP transport mode level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_NETLEV, esp_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_net_deflev), 0, "Default ESP tunnel mode level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_TRANSLEV, ah_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_trans_deflev), 0, "AH transfer mode default level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_AH_NETLEV, ah_net_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ah_net_deflev), 0, "AH tunnel mode default level."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN, ecn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec_ecn), 0, "Explicit Congestion Notification handling."); SYSCTL_INT(_net_inet6_ipsec6, OID_AUTO, filtertunnel, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_filtertunnel), 0, "If set, filter packets from an IPsec tunnel."); SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats, struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics."); #endif /* INET6 */ static int ipsec_in_reject(struct secpolicy *, struct inpcb *, const struct mbuf *); #ifdef INET static void ipsec4_get_ulp(const struct mbuf *, struct secpolicyindex *, int); static void ipsec4_setspidx_ipaddr(const struct mbuf *, struct secpolicyindex *); #endif #ifdef INET6 static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *, int); static void ipsec6_setspidx_ipaddr(const struct mbuf *, struct secpolicyindex *); #endif /* * Return a held reference to the default SP. */ static struct secpolicy * key_allocsp_default(void) { key_addref(V_def_policy); return (V_def_policy); } static void ipsec_invalidate_cache(struct inpcb *inp, u_int dir) { struct secpolicy *sp; INP_WLOCK_ASSERT(inp); if (dir == IPSEC_DIR_OUTBOUND) { if (inp->inp_sp->flags & INP_INBOUND_POLICY) return; sp = inp->inp_sp->sp_in; inp->inp_sp->sp_in = NULL; } else { if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) return; sp = inp->inp_sp->sp_out; inp->inp_sp->sp_out = NULL; } if (sp != NULL) key_freesp(&sp); /* release extra reference */ } static void ipsec_cachepolicy(struct inpcb *inp, struct secpolicy *sp, u_int dir) { uint32_t genid; int downgrade; INP_LOCK_ASSERT(inp); if (dir == IPSEC_DIR_OUTBOUND) { /* Do we have configured PCB policy? */ if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) return; /* Another thread has already set cached policy */ if (inp->inp_sp->sp_out != NULL) return; /* * Do not cache OUTBOUND policy if PCB isn't connected, * i.e. foreign address is INADDR_ANY/UNSPECIFIED. */ #ifdef INET if ((inp->inp_vflag & INP_IPV4) != 0 && inp->inp_faddr.s_addr == INADDR_ANY) return; #endif #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return; #endif } else { /* Do we have configured PCB policy? */ if (inp->inp_sp->flags & INP_INBOUND_POLICY) return; /* Another thread has already set cached policy */ if (inp->inp_sp->sp_in != NULL) return; /* * Do not cache INBOUND policy for listen socket, * that is bound to INADDR_ANY/UNSPECIFIED address. */ #ifdef INET if ((inp->inp_vflag & INP_IPV4) != 0 && inp->inp_faddr.s_addr == INADDR_ANY) return; #endif #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0 && IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) return; #endif } downgrade = 0; if (!INP_WLOCKED(inp)) { if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) return; } if (dir == IPSEC_DIR_OUTBOUND) inp->inp_sp->sp_out = sp; else inp->inp_sp->sp_in = sp; /* * SP is already referenced by the lookup code. * We take extra reference here to avoid race in the * ipsec_getpcbpolicy() function - SP will not be freed in the * time between we take SP pointer from the cache and key_addref() * call. */ key_addref(sp); genid = key_getspgen(); if (genid != inp->inp_sp->genid) { ipsec_invalidate_cache(inp, dir); inp->inp_sp->genid = genid; } KEYDBG(IPSEC_STAMP, printf("%s: PCB(%p): cached %s SP(%p)\n", __func__, inp, dir == IPSEC_DIR_OUTBOUND ? "OUTBOUND": "INBOUND", sp)); if (downgrade != 0) INP_DOWNGRADE(inp); } static struct secpolicy * ipsec_checkpolicy(struct secpolicy *sp, struct inpcb *inp, int *error) { /* Save found OUTBOUND policy into PCB SP cache. */ if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_out == NULL) ipsec_cachepolicy(inp, sp, IPSEC_DIR_OUTBOUND); switch (sp->policy) { default: printf("%s: invalid policy %u\n", __func__, sp->policy); /* FALLTHROUGH */ case IPSEC_POLICY_DISCARD: *error = -EINVAL; /* Packet is discarded by caller. */ /* FALLTHROUGH */ case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: key_freesp(&sp); sp = NULL; /* NB: force NULL result. */ break; case IPSEC_POLICY_IPSEC: /* XXXAE: handle LARVAL SP */ break; } KEYDBG(IPSEC_DUMP, printf("%s: get SP(%p), error %d\n", __func__, sp, *error)); return (sp); } static struct secpolicy * ipsec_getpcbpolicy(struct inpcb *inp, u_int dir) { struct secpolicy *sp; int flags, downgrade; if (inp == NULL || inp->inp_sp == NULL) return (NULL); INP_LOCK_ASSERT(inp); flags = inp->inp_sp->flags; if (dir == IPSEC_DIR_OUTBOUND) { sp = inp->inp_sp->sp_out; flags &= INP_OUTBOUND_POLICY; } else { sp = inp->inp_sp->sp_in; flags &= INP_INBOUND_POLICY; } /* * Check flags. If we have PCB SP, just return it. * Otherwise we need to check that cached SP entry isn't stale. */ if (flags == 0) { if (sp == NULL) return (NULL); if (inp->inp_sp->genid != key_getspgen()) { /* Invalidate the cache. */ downgrade = 0; if (!INP_WLOCKED(inp)) { if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) return (NULL); } ipsec_invalidate_cache(inp, IPSEC_DIR_OUTBOUND); ipsec_invalidate_cache(inp, IPSEC_DIR_INBOUND); if (downgrade != 0) INP_DOWNGRADE(inp); return (NULL); } KEYDBG(IPSEC_STAMP, printf("%s: PCB(%p): cache hit SP(%p)\n", __func__, inp, sp)); /* Return referenced cached policy */ } key_addref(sp); return (sp); } #ifdef INET static void ipsec4_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, int needport) { uint8_t nxt; int off; /* Sanity check. */ IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip), ("packet too short")); if (m->m_len >= sizeof (struct ip)) { const struct ip *ip = mtod(m, const struct ip *); if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) goto done; off = ip->ip_hl << 2; nxt = ip->ip_p; } else { struct ip ih; m_copydata(m, 0, sizeof (struct ip), (caddr_t) &ih); if (ih.ip_off & htons(IP_MF | IP_OFFMASK)) goto done; off = ih.ip_hl << 2; nxt = ih.ip_p; } while (off < m->m_pkthdr.len) { struct ip6_ext ip6e; struct tcphdr th; struct udphdr uh; switch (nxt) { case IPPROTO_TCP: spidx->ul_proto = nxt; if (!needport) goto done_proto; if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) goto done; m_copydata(m, off, sizeof (th), (caddr_t) &th); spidx->src.sin.sin_port = th.th_sport; spidx->dst.sin.sin_port = th.th_dport; return; case IPPROTO_UDP: spidx->ul_proto = nxt; if (!needport) goto done_proto; if (off + sizeof(struct udphdr) > m->m_pkthdr.len) goto done; m_copydata(m, off, sizeof (uh), (caddr_t) &uh); spidx->src.sin.sin_port = uh.uh_sport; spidx->dst.sin.sin_port = uh.uh_dport; return; case IPPROTO_AH: if (off + sizeof(ip6e) > m->m_pkthdr.len) goto done; /* XXX Sigh, this works but is totally bogus. */ m_copydata(m, off, sizeof(ip6e), (caddr_t) &ip6e); off += (ip6e.ip6e_len + 2) << 2; nxt = ip6e.ip6e_nxt; break; case IPPROTO_ICMP: default: /* XXX Intermediate headers??? */ spidx->ul_proto = nxt; goto done_proto; } } done: spidx->ul_proto = IPSEC_ULPROTO_ANY; done_proto: spidx->src.sin.sin_port = IPSEC_PORT_ANY; spidx->dst.sin.sin_port = IPSEC_PORT_ANY; KEYDBG(IPSEC_DUMP, printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } static void ipsec4_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { ipsec4_setsockaddrs(m, &spidx->src, &spidx->dst); spidx->prefs = sizeof(struct in_addr) << 3; spidx->prefd = sizeof(struct in_addr) << 3; } static struct secpolicy * ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir, int needport) { struct secpolicyindex spidx; struct secpolicy *sp; sp = ipsec_getpcbpolicy(inp, dir); if (sp == NULL && key_havesp(dir)) { /* Make an index to look for a policy. */ ipsec4_setspidx_ipaddr(m, &spidx); ipsec4_get_ulp(m, &spidx, needport); spidx.dir = dir; sp = key_allocsp(&spidx, dir); } if (sp == NULL) /* No SP found, use system default. */ sp = key_allocsp_default(); return (sp); } /* * Check security policy for *OUTBOUND* IPv4 packet. */ struct secpolicy * ipsec4_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error, int needport) { struct secpolicy *sp; *error = 0; sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_OUTBOUND, needport); if (sp != NULL) sp = ipsec_checkpolicy(sp, inp, error); if (sp == NULL) { switch (*error) { case 0: /* No IPsec required: BYPASS or NONE */ break; case -EINVAL: IPSECSTAT_INC(ips_out_polvio); break; default: IPSECSTAT_INC(ips_out_inval); } } KEYDBG(IPSEC_STAMP, printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); if (sp != NULL) KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); return (sp); } /* * Check IPv4 packet against *INBOUND* security policy. * This function is called from tcp_input(), udp_input(), * rip_input() and sctp_input(). */ int ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; int result; sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); if (result != 0) IPSECSTAT_INC(ips_in_polvio); return (result); } /* * IPSEC_CAP() method implementation for IPv4. */ int ipsec4_capability(struct mbuf *m, u_int cap) { switch (cap) { case IPSEC_CAP_BYPASS_FILTER: /* * Bypass packet filtering for packets previously handled * by IPsec. */ if (!V_ip4_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) return (1); return (0); case IPSEC_CAP_OPERABLE: /* Do we have active security policies? */ if (key_havesp(IPSEC_DIR_INBOUND) != 0 || key_havesp(IPSEC_DIR_OUTBOUND) != 0) return (1); return (0); }; return (EOPNOTSUPP); } #endif /* INET */ #ifdef INET6 static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, int needport) { struct tcphdr th; struct udphdr uh; struct icmp6_hdr ih; int off, nxt; IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip6_hdr), ("packet too short")); /* Set default. */ spidx->ul_proto = IPSEC_ULPROTO_ANY; spidx->src.sin6.sin6_port = IPSEC_PORT_ANY; spidx->dst.sin6.sin6_port = IPSEC_PORT_ANY; nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off < 0 || m->m_pkthdr.len < off) return; switch (nxt) { case IPPROTO_TCP: spidx->ul_proto = nxt; if (!needport) break; if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(th), (caddr_t)&th); spidx->src.sin6.sin6_port = th.th_sport; spidx->dst.sin6.sin6_port = th.th_dport; break; case IPPROTO_UDP: spidx->ul_proto = nxt; if (!needport) break; if (off + sizeof(struct udphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(uh), (caddr_t)&uh); spidx->src.sin6.sin6_port = uh.uh_sport; spidx->dst.sin6.sin6_port = uh.uh_dport; break; case IPPROTO_ICMPV6: spidx->ul_proto = nxt; if (off + sizeof(struct icmp6_hdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(ih), (caddr_t)&ih); spidx->src.sin6.sin6_port = htons((uint16_t)ih.icmp6_type); spidx->dst.sin6.sin6_port = htons((uint16_t)ih.icmp6_code); break; default: /* XXX Intermediate headers??? */ spidx->ul_proto = nxt; break; } KEYDBG(IPSEC_DUMP, printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } static void ipsec6_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { ipsec6_setsockaddrs(m, &spidx->src, &spidx->dst); spidx->prefs = sizeof(struct in6_addr) << 3; spidx->prefd = sizeof(struct in6_addr) << 3; } static struct secpolicy * ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir, int needport) { struct secpolicyindex spidx; struct secpolicy *sp; sp = ipsec_getpcbpolicy(inp, dir); if (sp == NULL && key_havesp(dir)) { /* Make an index to look for a policy. */ ipsec6_setspidx_ipaddr(m, &spidx); ipsec6_get_ulp(m, &spidx, needport); spidx.dir = dir; sp = key_allocsp(&spidx, dir); } if (sp == NULL) /* No SP found, use system default. */ sp = key_allocsp_default(); return (sp); } /* * Check security policy for *OUTBOUND* IPv6 packet. */ struct secpolicy * ipsec6_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error, int needport) { struct secpolicy *sp; *error = 0; sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_OUTBOUND, needport); if (sp != NULL) sp = ipsec_checkpolicy(sp, inp, error); if (sp == NULL) { switch (*error) { case 0: /* No IPsec required: BYPASS or NONE */ break; case -EINVAL: IPSEC6STAT_INC(ips_out_polvio); break; default: IPSEC6STAT_INC(ips_out_inval); } } KEYDBG(IPSEC_STAMP, printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); if (sp != NULL) KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); return (sp); } /* * Check IPv6 packet against inbound security policy. * This function is called from tcp6_input(), udp6_input(), * rip6_input() and sctp_input(). */ int ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; int result; sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); if (result) IPSEC6STAT_INC(ips_in_polvio); return (result); } /* * IPSEC_CAP() method implementation for IPv6. */ int ipsec6_capability(struct mbuf *m, u_int cap) { switch (cap) { case IPSEC_CAP_BYPASS_FILTER: /* * Bypass packet filtering for packets previously handled * by IPsec. */ if (!V_ip6_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) return (1); return (0); case IPSEC_CAP_OPERABLE: /* Do we have active security policies? */ if (key_havesp(IPSEC_DIR_INBOUND) != 0 || key_havesp(IPSEC_DIR_OUTBOUND) != 0) return (1); return (0); }; return (EOPNOTSUPP); } #endif /* INET6 */ int ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int type) { int idx; switch (ctx->af) { #ifdef INET case AF_INET: idx = HHOOK_IPSEC_INET; break; #endif #ifdef INET6 case AF_INET6: idx = HHOOK_IPSEC_INET6; break; #endif default: return (EPFNOSUPPORT); } if (type == HHOOK_TYPE_IPSEC_IN) HHOOKS_RUN_IF(V_ipsec_hhh_in[idx], ctx, NULL); else HHOOKS_RUN_IF(V_ipsec_hhh_out[idx], ctx, NULL); if (*ctx->mp == NULL) return (EACCES); return (0); } /* * Return current level. * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned. */ u_int ipsec_get_reqlevel(struct secpolicy *sp, u_int idx) { struct ipsecrequest *isr; u_int esp_trans_deflev, esp_net_deflev; u_int ah_trans_deflev, ah_net_deflev; u_int level = 0; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); /* XXX Note that we have ipseclog() expanded here - code sync issue. */ #define IPSEC_CHECK_DEFAULT(lev) \ (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE && \ (lev) != IPSEC_LEVEL_UNIQUE) \ ? (V_ipsec_debug ? \ log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ (lev), IPSEC_LEVEL_REQUIRE) : 0), \ (lev) = IPSEC_LEVEL_REQUIRE, (lev) : (lev)) /* * IPsec VTI uses unique security policy with fake spidx filled * with zeroes. Just return IPSEC_LEVEL_REQUIRE instead of doing * full level lookup for such policies. */ if (sp->state == IPSEC_SPSTATE_IFNET) { IPSEC_ASSERT(sp->req[idx]->level == IPSEC_LEVEL_UNIQUE, ("Wrong IPsec request level %d", sp->req[idx]->level)); return (IPSEC_LEVEL_REQUIRE); } /* Set default level. */ switch (sp->spidx.src.sa.sa_family) { #ifdef INET case AF_INET: esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_trans_deflev); esp_net_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_net_deflev); ah_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_ah_trans_deflev); ah_net_deflev = IPSEC_CHECK_DEFAULT(V_ip4_ah_net_deflev); break; #endif #ifdef INET6 case AF_INET6: esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip6_esp_trans_deflev); esp_net_deflev = IPSEC_CHECK_DEFAULT(V_ip6_esp_net_deflev); ah_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip6_ah_trans_deflev); ah_net_deflev = IPSEC_CHECK_DEFAULT(V_ip6_ah_net_deflev); break; #endif /* INET6 */ default: panic("%s: unknown af %u", __func__, sp->spidx.src.sa.sa_family); } #undef IPSEC_CHECK_DEFAULT isr = sp->req[idx]; /* Set level. */ switch (isr->level) { case IPSEC_LEVEL_DEFAULT: switch (isr->saidx.proto) { case IPPROTO_ESP: if (isr->saidx.mode == IPSEC_MODE_TUNNEL) level = esp_net_deflev; else level = esp_trans_deflev; break; case IPPROTO_AH: if (isr->saidx.mode == IPSEC_MODE_TUNNEL) level = ah_net_deflev; else level = ah_trans_deflev; break; case IPPROTO_IPCOMP: /* * We don't really care, as IPcomp document says that * we shouldn't compress small packets. */ level = IPSEC_LEVEL_USE; break; default: panic("%s: Illegal protocol defined %u\n", __func__, isr->saidx.proto); } break; case IPSEC_LEVEL_USE: case IPSEC_LEVEL_REQUIRE: level = isr->level; break; case IPSEC_LEVEL_UNIQUE: level = IPSEC_LEVEL_REQUIRE; break; default: panic("%s: Illegal IPsec level %u\n", __func__, isr->level); } return (level); } static int ipsec_check_history(const struct mbuf *m, struct secpolicy *sp, u_int idx) { struct xform_history *xh; struct m_tag *mtag; mtag = NULL; while ((mtag = m_tag_find(__DECONST(struct mbuf *, m), PACKET_TAG_IPSEC_IN_DONE, mtag)) != NULL) { xh = (struct xform_history *)(mtag + 1); KEYDBG(IPSEC_DATA, char buf[IPSEC_ADDRSTRLEN]; printf("%s: mode %s proto %u dst %s\n", __func__, kdebug_secasindex_mode(xh->mode), xh->proto, ipsec_address(&xh->dst, buf, sizeof(buf)))); if (xh->proto != sp->req[idx]->saidx.proto) continue; /* If SA had IPSEC_MODE_ANY, consider this as match. */ if (xh->mode != sp->req[idx]->saidx.mode && xh->mode != IPSEC_MODE_ANY) continue; /* * For transport mode IPsec request doesn't contain * addresses. We need to use address from spidx. */ if (sp->req[idx]->saidx.mode == IPSEC_MODE_TRANSPORT) { if (key_sockaddrcmp_withmask(&xh->dst.sa, &sp->spidx.dst.sa, sp->spidx.prefd) != 0) continue; } else { if (key_sockaddrcmp(&xh->dst.sa, &sp->req[idx]->saidx.dst.sa, 0) != 0) continue; } return (0); /* matched */ } return (1); } /* * Check security policy requirements against the actual * packet contents. Return one if the packet should be * reject as "invalid"; otherwiser return zero to have the * packet treated as "valid". * * OUT: * 0: valid * 1: invalid */ static int ipsec_in_reject(struct secpolicy *sp, struct inpcb *inp, const struct mbuf *m) { int i; KEYDBG(IPSEC_STAMP, printf("%s: PCB(%p): using SP(%p)\n", __func__, inp, sp)); KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_in == NULL) ipsec_cachepolicy(inp, sp, IPSEC_DIR_INBOUND); /* Check policy. */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: return (1); case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: return (0); } IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); /* * ipsec[46]_common_input_cb after each transform adds * PACKET_TAG_IPSEC_IN_DONE mbuf tag. It contains SPI, proto, mode * and destination address from saidx. We can compare info from * these tags with requirements in SP. */ for (i = 0; i < sp->tcount; i++) { /* * Do not check IPcomp, since IPcomp document * says that we shouldn't compress small packets. * IPComp policy should always be treated as being * in "use" level. */ if (sp->req[i]->saidx.proto == IPPROTO_IPCOMP || ipsec_get_reqlevel(sp, i) != IPSEC_LEVEL_REQUIRE) continue; if (V_check_policy_history != 0 && ipsec_check_history(m, sp, i) != 0) return (1); else switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: if ((m->m_flags & M_DECRYPTED) == 0) { KEYDBG(IPSEC_DUMP, printf("%s: ESP m_flags:%x\n", __func__, m->m_flags)); return (1); } break; case IPPROTO_AH: if ((m->m_flags & M_AUTHIPHDR) == 0) { KEYDBG(IPSEC_DUMP, printf("%s: AH m_flags:%x\n", __func__, m->m_flags)); return (1); } break; } } return (0); /* Valid. */ } /* * Compute the byte size to be occupied by IPsec header. * In case it is tunnelled, it includes the size of outer IP header. */ static size_t ipsec_hdrsiz_internal(struct secpolicy *sp) { size_t size; int i; KEYDBG(IPSEC_STAMP, printf("%s: using SP(%p)\n", __func__, sp)); KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); switch (sp->policy) { case IPSEC_POLICY_DISCARD: case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: return (0); } IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); /* * XXX: for each transform we need to lookup suitable SA * and use info from SA to calculate headers size. * XXX: for NAT-T we need to cosider UDP header size. */ size = 0; for (i = 0; i < sp->tcount; i++) { switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: size += esp_hdrsiz(NULL); break; case IPPROTO_AH: size += ah_hdrsiz(NULL); break; case IPPROTO_IPCOMP: size += sizeof(struct ipcomp); break; } if (sp->req[i]->saidx.mode == IPSEC_MODE_TUNNEL) { switch (sp->req[i]->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: size += sizeof(struct ip); break; #endif #ifdef INET6 case AF_INET6: size += sizeof(struct ip6_hdr); break; #endif default: ipseclog((LOG_ERR, "%s: unknown AF %d in " "IPsec tunnel SA\n", __func__, sp->req[i]->saidx.dst.sa.sa_family)); break; } } } return (size); } /* * Compute ESP/AH header size for protocols with PCB, including * outer IP header. Currently only tcp_output() uses it. */ size_t ipsec_hdrsiz_inpcb(struct inpcb *inp) { struct secpolicyindex spidx; struct secpolicy *sp; size_t sz; sp = ipsec_getpcbpolicy(inp, IPSEC_DIR_OUTBOUND); if (sp == NULL && key_havesp(IPSEC_DIR_OUTBOUND)) { ipsec_setspidx_inpcb(inp, &spidx, IPSEC_DIR_OUTBOUND); sp = key_allocsp(&spidx, IPSEC_DIR_OUTBOUND); } if (sp == NULL) sp = key_allocsp_default(); sz = ipsec_hdrsiz_internal(sp); key_freesp(&sp); return (sz); } #define IPSEC_BITMAP_INDEX_MASK(w) (w - 1) #define IPSEC_REDUNDANT_BIT_SHIFTS 5 #define IPSEC_REDUNDANT_BITS (1 << IPSEC_REDUNDANT_BIT_SHIFTS) #define IPSEC_BITMAP_LOC_MASK (IPSEC_REDUNDANT_BITS - 1) /* * Functions below are responsible for checking and updating bitmap. * These are used to separate ipsec_chkreplay() and ipsec_updatereplay() * from window implementation * * Based on RFC 6479. Blocks are 32 bits unsigned integers */ static inline int check_window(const struct secreplay *replay, uint64_t seq) { int index, bit_location; bit_location = seq & IPSEC_BITMAP_LOC_MASK; index = (seq >> IPSEC_REDUNDANT_BIT_SHIFTS) & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size); /* This packet already seen? */ return ((replay->bitmap)[index] & (1 << bit_location)); } static inline void advance_window(const struct secreplay *replay, uint64_t seq) { int i; uint64_t index, index_cur, diff; index_cur = replay->last >> IPSEC_REDUNDANT_BIT_SHIFTS; index = seq >> IPSEC_REDUNDANT_BIT_SHIFTS; diff = index - index_cur; if (diff > replay->bitmap_size) { /* something unusual in this case */ diff = replay->bitmap_size; } for (i = 0; i < diff; i++) { replay->bitmap[(i + index_cur + 1) & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size)] = 0; } } static inline void set_window(const struct secreplay *replay, uint64_t seq) { int index, bit_location; bit_location = seq & IPSEC_BITMAP_LOC_MASK; index = (seq >> IPSEC_REDUNDANT_BIT_SHIFTS) & IPSEC_BITMAP_INDEX_MASK(replay->bitmap_size); replay->bitmap[index] |= (1 << bit_location); } /* * Check the variable replay window. * ipsec_chkreplay() performs replay check before ICV verification. * ipsec_updatereplay() updates replay bitmap. This must be called after * ICV verification (it also performs replay check, which is usually done * beforehand). * 0 (zero) is returned if packet disallowed, 1 if packet permitted. * * Based on RFC 4303 */ int ipsec_chkreplay(uint32_t seq, uint32_t *seqhigh, struct secasvar *sav) { char buf[128]; struct secreplay *replay; uint32_t window; uint32_t tl, th, bl; uint32_t seqh; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); replay = sav->replay; /* No need to check replay if disabled. */ if (replay->wsize == 0) return (1); /* Zero sequence number is not allowed. */ if (seq == 0 && replay->last == 0) return (0); window = replay->wsize << 3; /* Size of window */ tl = (uint32_t)replay->last; /* Top of window, lower part */ th = (uint32_t)(replay->last >> 32); /* Top of window, high part */ bl = tl - window + 1; /* Bottom of window, lower part */ /* * We keep the high part intact when: * 1) the seq is within [bl, 0xffffffff] and the whole window is * within one subspace; * 2) the seq is within [0, bl) and window spans two subspaces. */ if ((tl >= window - 1 && seq >= bl) || (tl < window - 1 && seq < bl)) { *seqhigh = th; if (seq <= tl) { /* Sequence number inside window - check against replay */ if (check_window(replay, seq)) return (0); } /* Sequence number above top of window or not found in bitmap */ return (1); } /* * If ESN is not enabled and packet with highest sequence number * was received we should report overflow */ if (tl == 0xffffffff && !(sav->flags & SADB_X_SAFLAGS_ESN)) { /* Set overflow flag. */ replay->overflow++; if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) { if (sav->sah->saidx.proto == IPPROTO_ESP) ESPSTAT_INC(esps_wrap); else if (sav->sah->saidx.proto == IPPROTO_AH) AHSTAT_INC(ahs_wrap); return (0); } ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n", __func__, replay->overflow, ipsec_sa2str(sav, buf, sizeof(buf)))); } /* * Seq is within [bl, 0xffffffff] and bl is within * [0xffffffff-window, 0xffffffff]. This means we got a seq * which is within our replay window, but in the previous * subspace. */ if (tl < window - 1 && seq >= bl) { if (th == 0) return (0); *seqhigh = th - 1; seqh = th - 1; if (check_window(replay, seq)) return (0); return (1); } /* * Seq is within [0, bl) but the whole window is within one subspace. * This means that seq has wrapped and is in next subspace */ *seqhigh = th + 1; seqh = th + 1; /* Don't let high part wrap. */ if (seqh == 0) { /* Set overflow flag. */ replay->overflow++; if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) { if (sav->sah->saidx.proto == IPPROTO_ESP) ESPSTAT_INC(esps_wrap); else if (sav->sah->saidx.proto == IPPROTO_AH) AHSTAT_INC(ahs_wrap); return (0); } ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n", __func__, replay->overflow, ipsec_sa2str(sav, buf, sizeof(buf)))); } return (1); } /* * Check replay counter whether to update or not. * OUT: 0: OK * 1: NG */ int ipsec_updatereplay(uint32_t seq, struct secasvar *sav) { struct secreplay *replay; uint32_t window; uint32_t tl, th, bl; uint32_t seqh; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); replay = sav->replay; /* No need to check replay if disabled. */ if (replay->wsize == 0) return (0); /* Zero sequence number is not allowed. */ if (seq == 0 && replay->last == 0) return (1); window = replay->wsize << 3; /* Size of window */ tl = (uint32_t)replay->last; /* Top of window, lower part */ th = (uint32_t)(replay->last >> 32); /* Top of window, high part */ bl = tl - window + 1; /* Bottom of window, lower part */ /* * We keep the high part intact when: * 1) the seq is within [bl, 0xffffffff] and the whole window is * within one subspace; * 2) the seq is within [0, bl) and window spans two subspaces. */ if ((tl >= window - 1 && seq >= bl) || (tl < window - 1 && seq < bl)) { seqh = th; if (seq <= tl) { /* Sequence number inside window - check against replay */ if (check_window(replay, seq)) return (1); set_window(replay, seq); } else { advance_window(replay, ((uint64_t)seqh << 32) | seq); set_window(replay, seq); replay->last = ((uint64_t)seqh << 32) | seq; } /* Sequence number above top of window or not found in bitmap */ replay->count++; return (0); } if (!(sav->flags & SADB_X_SAFLAGS_ESN)) return (1); /* * Seq is within [bl, 0xffffffff] and bl is within * [0xffffffff-window, 0xffffffff]. This means we got a seq * which is within our replay window, but in the previous * subspace. */ if (tl < window - 1 && seq >= bl) { if (th == 0) return (1); if (check_window(replay, seq)) return (1); set_window(replay, seq); replay->count++; return (0); } /* * Seq is within [0, bl) but the whole window is within one subspace. * This means that seq has wrapped and is in next subspace */ seqh = th + 1; /* Don't let high part wrap. */ if (seqh == 0) return (1); advance_window(replay, ((uint64_t)seqh << 32) | seq); set_window(replay, seq); replay->last = ((uint64_t)seqh << 32) | seq; replay->count++; return (0); } int ipsec_updateid(struct secasvar *sav, crypto_session_t *new, crypto_session_t *old) { crypto_session_t tmp; /* * tdb_cryptoid is initialized by xform_init(). * Then it can be changed only when some crypto error occurred or * when SA is deleted. We stored used cryptoid in the xform_data * structure. In case when crypto error occurred and crypto * subsystem has reinited the session, it returns new cryptoid * and EAGAIN error code. * * This function will be called when we got EAGAIN from crypto * subsystem. * *new is cryptoid that was returned by crypto subsystem in * the crp_sid. * *old is the original cryptoid that we stored in xform_data. * * For first failed request *old == sav->tdb_cryptoid, then * we update sav->tdb_cryptoid and redo crypto_dispatch(). * For next failed request *old != sav->tdb_cryptoid, then * we store cryptoid from first request into the *new variable * and crp_sid from this second session will be returned via * *old pointer, so caller can release second session. * * XXXAE: check this more carefully. */ KEYDBG(IPSEC_STAMP, printf("%s: SA(%p) moves cryptoid %p -> %p\n", __func__, sav, *old, *new)); KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); SECASVAR_LOCK(sav); if (sav->tdb_cryptoid != *old) { /* cryptoid was already updated */ tmp = *new; *new = sav->tdb_cryptoid; *old = tmp; SECASVAR_UNLOCK(sav); return (1); } sav->tdb_cryptoid = *new; SECASVAR_UNLOCK(sav); return (0); } int ipsec_initialized(void) { return (V_def_policy != NULL); } static void def_policy_init(const void *unused __unused) { V_def_policy = key_newsp(); if (V_def_policy != NULL) { V_def_policy->policy = IPSEC_POLICY_NONE; /* Force INPCB SP cache invalidation */ key_bumpspgen(); } else printf("%s: failed to initialize default policy\n", __func__); } static void def_policy_uninit(const void *unused __unused) { if (V_def_policy != NULL) { key_freesp(&V_def_policy); key_bumpspgen(); } } VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, def_policy_init, NULL); VNET_SYSUNINIT(def_policy_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, def_policy_uninit, NULL); diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h index c7a44d60f082..1f5aa1a43edc 100644 --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -1,364 +1,368 @@ /* $FreeBSD$ */ /* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #ifndef _NETIPSEC_IPSEC_H_ #define _NETIPSEC_IPSEC_H_ #include #include #ifdef _KERNEL #include #include #include #define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. * When the value of the ul_proto is ICMPv6, the port field in "src" * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ uint8_t ul_proto; /* upper layer Protocol */ uint8_t dir; /* direction of packet flow */ uint8_t prefs; /* prefix length in bits for src */ uint8_t prefd; /* prefix length in bits for dst */ }; /* Request for IPsec */ struct ipsecrequest { struct secasindex saidx;/* hint for search proper SA */ /* if __ss_len == 0 then no address specified.*/ u_int level; /* IPsec level defined below. */ }; /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; LIST_ENTRY(secpolicy) idhash; LIST_ENTRY(secpolicy) drainq; struct secpolicyindex spidx; /* selector */ #define IPSEC_MAXREQ 4 struct ipsecrequest *req[IPSEC_MAXREQ]; u_int tcount; /* IPsec transforms count */ volatile u_int refcnt; /* reference count */ u_int policy; /* policy_type per pfkeyv2.h */ u_int state; #define IPSEC_SPSTATE_DEAD 0 #define IPSEC_SPSTATE_LARVAL 1 #define IPSEC_SPSTATE_ALIVE 2 #define IPSEC_SPSTATE_PCB 3 #define IPSEC_SPSTATE_IFNET 4 uint32_t priority; /* priority of this policy */ uint32_t id; /* It's unique number on the system. */ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and * validtime are zero. * "lifetime" is passed by sadb_lifetime.sadb_lifetime_addtime. * "validtime" is passed by sadb_lifetime.sadb_lifetime_usetime. */ time_t created; /* time created the policy */ time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ }; /* * PCB security policies. * Application can setup private security policies for socket. * Such policies can have IPSEC, BYPASS and ENTRUST type. * By default, policies are set to NULL. This means that they have ENTRUST type. * When application sets BYPASS or IPSEC type policy, the flags field * is also updated. When flags is not set, the system could store * used security policy into the sp_in/sp_out pointer to speed up further * lookups. */ struct inpcbpolicy { struct secpolicy *sp_in; struct secpolicy *sp_out; uint32_t genid; uint16_t flags; #define INP_INBOUND_POLICY 0x0001 #define INP_OUTBOUND_POLICY 0x0002 uint16_t hdrsz; }; /* SP acquiring list table. */ struct secspacq { LIST_ENTRY(secspacq) chain; struct secpolicyindex spidx; time_t created; /* for lifetime */ int count; /* for lifetime */ /* XXX: here is mbuf place holder to be sent ? */ }; #endif /* _KERNEL */ /* buffer size for formatted output of ipsec address */ #define IPSEC_ADDRSTRLEN (INET6_ADDRSTRLEN + 11) /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 #define IPSEC_PROTO_ANY 255 /* mode of security protocol */ /* NOTE: DON'T use IPSEC_MODE_ANY at SPD. It's only use in SAD */ #define IPSEC_MODE_ANY 0 /* i.e. wildcard. */ #define IPSEC_MODE_TRANSPORT 1 #define IPSEC_MODE_TUNNEL 2 #define IPSEC_MODE_TCPMD5 3 /* TCP MD5 mode */ /* * Direction of security policy. * NOTE: Since INVALID is used just as flag. * The other are used for loop counter too. */ #define IPSEC_DIR_ANY 0 #define IPSEC_DIR_INBOUND 1 #define IPSEC_DIR_OUTBOUND 2 #define IPSEC_DIR_MAX 3 #define IPSEC_DIR_INVALID 4 /* Policy level */ /* * IPSEC, ENTRUST and BYPASS are allowed for setsockopt() in PCB, * DISCARD, IPSEC and NONE are allowed for setkey() in SPD. * DISCARD and NONE are allowed for system default. */ #define IPSEC_POLICY_DISCARD 0 /* discarding packet */ #define IPSEC_POLICY_NONE 1 /* through IPsec engine */ #define IPSEC_POLICY_IPSEC 2 /* do IPsec */ #define IPSEC_POLICY_ENTRUST 3 /* consulting SPD if present. */ #define IPSEC_POLICY_BYPASS 4 /* only for privileged socket. */ /* Policy scope */ #define IPSEC_POLICYSCOPE_ANY 0x00 /* unspecified */ #define IPSEC_POLICYSCOPE_GLOBAL 0x01 /* global scope */ #define IPSEC_POLICYSCOPE_IFNET 0x02 /* if_ipsec(4) scope */ #define IPSEC_POLICYSCOPE_PCB 0x04 /* PCB scope */ /* Security protocol level */ #define IPSEC_LEVEL_DEFAULT 0 /* reference to system default */ #define IPSEC_LEVEL_USE 1 /* use SA if present. */ #define IPSEC_LEVEL_REQUIRE 2 /* require SA. */ #define IPSEC_LEVEL_UNIQUE 3 /* unique SA. */ #define IPSEC_MANUAL_REQID_MAX 0x3fff /* * if security policy level == unique, this id * indicate to a relative SA for use, else is * zero. * 1 - 0x3fff are reserved for manual keying. * 0 are reserved for above reason. Others is * for kernel use. * Note that this id doesn't identify SA * by only itself. */ #define IPSEC_REPLAYWSIZE 32 /* statistics for ipsec processing */ struct ipsecstat { uint64_t ips_in_polvio; /* input: sec policy violation */ uint64_t ips_in_nomem; /* input: no memory available */ uint64_t ips_in_inval; /* input: generic error */ uint64_t ips_out_polvio; /* output: sec policy violation */ uint64_t ips_out_nosa; /* output: SA unavailable */ uint64_t ips_out_nomem; /* output: no memory available */ uint64_t ips_out_noroute; /* output: no route available */ uint64_t ips_out_inval; /* output: generic error */ uint64_t ips_out_bundlesa; /* output: bundled SA processed */ uint64_t ips_spdcache_hits; /* SPD cache hits */ uint64_t ips_spdcache_misses; /* SPD cache misses */ uint64_t ips_clcopied; /* clusters copied during clone */ uint64_t ips_mbinserted; /* mbufs inserted during makespace */ /* * Temporary statistics for performance analysis. */ /* See where ESP/AH/IPCOMP header land in mbuf on input */ uint64_t ips_input_front; uint64_t ips_input_middle; uint64_t ips_input_end; }; /* * Definitions for IPsec & Key sysctl operations. */ #define IPSECCTL_STATS 1 /* stats */ #define IPSECCTL_DEF_POLICY 2 #define IPSECCTL_DEF_ESP_TRANSLEV 3 /* int; ESP transport mode */ #define IPSECCTL_DEF_ESP_NETLEV 4 /* int; ESP tunnel mode */ #define IPSECCTL_DEF_AH_TRANSLEV 5 /* int; AH transport mode */ #define IPSECCTL_DEF_AH_NETLEV 6 /* int; AH tunnel mode */ #if 0 /* obsolete, do not reuse */ #define IPSECCTL_INBOUND_CALL_IKE 7 #endif #define IPSECCTL_AH_CLEARTOS 8 #define IPSECCTL_AH_OFFSETMASK 9 #define IPSECCTL_DFBIT 10 #define IPSECCTL_ECN 11 #define IPSECCTL_DEBUG 12 #define IPSECCTL_ESP_RANDPAD 13 +#define IPSECCTL_MIN_PMTU 14 #ifdef _KERNEL #include struct ipsec_ctx_data; #define IPSEC_INIT_CTX(_ctx, _mp, _inp, _sav, _af, _enc) do { \ (_ctx)->mp = (_mp); \ (_ctx)->inp = (_inp); \ (_ctx)->sav = (_sav); \ (_ctx)->af = (_af); \ (_ctx)->enc = (_enc); \ } while(0) int ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int direction); VNET_DECLARE(int, ipsec_debug); #define V_ipsec_debug VNET(ipsec_debug) #ifdef REGRESSION VNET_DECLARE(int, ipsec_replay); VNET_DECLARE(int, ipsec_integrity); #define V_ipsec_replay VNET(ipsec_replay) #define V_ipsec_integrity VNET(ipsec_integrity) #endif VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec4stat); VNET_DECLARE(int, ip4_esp_trans_deflev); VNET_DECLARE(int, ip4_esp_net_deflev); VNET_DECLARE(int, ip4_ah_trans_deflev); VNET_DECLARE(int, ip4_ah_net_deflev); VNET_DECLARE(int, ip4_ipsec_dfbit); +VNET_DECLARE(int, ip4_ipsec_min_pmtu); VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, crypto_support); VNET_DECLARE(int, async_crypto); VNET_DECLARE(int, natt_cksum_policy); #define IPSECSTAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1) #define V_ip4_esp_trans_deflev VNET(ip4_esp_trans_deflev) #define V_ip4_esp_net_deflev VNET(ip4_esp_net_deflev) #define V_ip4_ah_trans_deflev VNET(ip4_ah_trans_deflev) #define V_ip4_ah_net_deflev VNET(ip4_ah_net_deflev) #define V_ip4_ipsec_dfbit VNET(ip4_ipsec_dfbit) +#define V_ip4_ipsec_min_pmtu VNET(ip4_ipsec_min_pmtu) #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_crypto_support VNET(crypto_support) #define V_async_crypto VNET(async_crypto) #define V_natt_cksum_policy VNET(natt_cksum_policy) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #ifdef IPSEC_DEBUG #define IPSEC_DEBUG_DECLARE(x) x #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) #else #define IPSEC_DEBUG_DECLARE(x) #define DPRINTF(x) #endif struct inpcb; struct m_tag; struct secasvar; struct sockopt; struct tcphdr; union sockaddr_union; int ipsec_if_input(struct mbuf *, struct secasvar *, uint32_t); struct ipsecrequest *ipsec_newisr(void); void ipsec_delisr(struct ipsecrequest *); struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, struct inpcb *, int *, int); u_int ipsec_get_reqlevel(struct secpolicy *, u_int); void udp_ipsec_adjust_cksum(struct mbuf *, struct secasvar *, int, int); int udp_ipsec_output(struct mbuf *, struct secasvar *); int udp_ipsec_input(struct mbuf *, int, int); int udp_ipsec_pcbctl(struct inpcb *, struct sockopt *); int ipsec_chkreplay(uint32_t, uint32_t *, struct secasvar *); int ipsec_updatereplay(uint32_t, struct secasvar *); int ipsec_updateid(struct secasvar *, crypto_session_t *, crypto_session_t *); int ipsec_initialized(void); void ipsec_setspidx_inpcb(struct inpcb *, struct secpolicyindex *, u_int); void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); int ipsec4_in_reject(const struct mbuf *, struct inpcb *); int ipsec4_input(struct mbuf *, int, int); int ipsec4_forward(struct mbuf *); int ipsec4_pcbctl(struct inpcb *, struct sockopt *); int ipsec4_output(struct mbuf *, struct inpcb *); int ipsec4_capability(struct mbuf *, u_int); int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec4_ctlinput(int, struct sockaddr *, void *); int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, u_int); extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); extern struct mbuf *m_makespace(struct mbuf *m0, int skip, int hlen, int *off); extern caddr_t m_pad(struct mbuf *m, int n); extern int m_striphdr(struct mbuf *m, int skip, int hlen); #endif /* _KERNEL */ #ifndef _KERNEL extern caddr_t ipsec_set_policy(char *, int); extern int ipsec_get_policylen(caddr_t); extern char *ipsec_dump_policy(caddr_t, char *); extern const char *ipsec_strerror(void); #endif /* ! KERNEL */ #endif /* _NETIPSEC_IPSEC_H_ */ diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h index 6d44f9891928..aaba405ce0db 100644 --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -1,82 +1,83 @@ /* $FreeBSD$ */ /* $KAME: ipsec.h,v 1.44 2001/03/23 08:08:47 itojun Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec controller part. */ #ifndef _NETIPSEC_IPSEC6_H_ #define _NETIPSEC_IPSEC6_H_ #include #include #ifdef _KERNEL #include VNET_PCPUSTAT_DECLARE(struct ipsecstat, ipsec6stat); VNET_DECLARE(int, ip6_esp_trans_deflev); VNET_DECLARE(int, ip6_esp_net_deflev); VNET_DECLARE(int, ip6_ah_trans_deflev); VNET_DECLARE(int, ip6_ah_net_deflev); VNET_DECLARE(int, ip6_ipsec_ecn); #define IPSEC6STAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec6stat, name, 1) #define V_ip6_esp_trans_deflev VNET(ip6_esp_trans_deflev) #define V_ip6_esp_net_deflev VNET(ip6_esp_net_deflev) #define V_ip6_ah_trans_deflev VNET(ip6_ah_trans_deflev) #define V_ip6_ah_net_deflev VNET(ip6_ah_net_deflev) #define V_ip6_ipsec_ecn VNET(ip6_ipsec_ecn) struct inpcb; struct secpolicy *ipsec6_checkpolicy(const struct mbuf *, struct inpcb *, int *, int); void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); int ipsec6_input(struct mbuf *, int, int); int ipsec6_in_reject(const struct mbuf *, struct inpcb *); int ipsec6_forward(struct mbuf *); int ipsec6_pcbctl(struct inpcb *, struct sockopt *); int ipsec6_output(struct mbuf *, struct inpcb *); int ipsec6_capability(struct mbuf *, u_int); int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec6_ctlinput(int, struct sockaddr *, void *); int ipsec6_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); int ip6_ipsec_filtertunnel(struct mbuf *); int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); #endif /*_KERNEL*/ #endif /*_NETIPSEC_IPSEC6_H_*/ diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c index 48acba68a1fe..07a3ef583be8 100644 --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -1,686 +1,728 @@ /* $OpenBSD: ipsec_input.c,v 1.63 2003/02/20 18:35:43 deraadt Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), * Angelos D. Keromytis (kermit@csd.uch.gr) and * Niels Provos (provos@physnet.uni-hamburg.de). * * This code was written by John Ioannidis for BSD/OS in Athens, Greece, * in November 1995. * * Ported to OpenBSD and NetBSD, with additional transforms, in December 1996, * by Angelos D. Keromytis. * * Additional transforms and features in 1997 and 1998 by Angelos D. Keromytis * and Niels Provos. * * Additional features in 1999 by Angelos D. Keromytis. * * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 2001, Angelos D. Keromytis. * Copyright (c) 2016 Andrey V. Elsukov * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in * all copies of any software which is or includes a copy or * modification of this software. * You may use this code under the GNU public license if you so wish. Please * contribute changes back to the authors under this freer than GPL license * so that we may further the use of strong encryption without limitations to * all. * * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR * IMPLIED WARRANTY. IN PARTICULAR, NONE OF THE AUTHORS MAKES ANY * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE * MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR * PURPOSE. */ /* * IPsec input processing. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include +#include #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #include #define IPSEC_ISTAT(proto, name) do { \ if ((proto) == IPPROTO_ESP) \ ESPSTAT_INC(esps_##name); \ else if ((proto) == IPPROTO_AH) \ AHSTAT_INC(ahs_##name); \ else \ IPCOMPSTAT_INC(ipcomps_##name); \ } while (0) /* * ipsec_common_input gets called when an IPsec-protected packet * is received by IPv4 or IPv6. Its job is to find the right SA * and call the appropriate transform. The transform callback * takes care of further processing (like ingress filtering). */ static int ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) { IPSEC_DEBUG_DECLARE(char buf[IPSEC_ADDRSTRLEN]); union sockaddr_union dst_address; struct secasvar *sav; uint32_t spi; int error; IPSEC_ISTAT(sproto, input); IPSEC_ASSERT(m != NULL, ("null packet")); IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH || sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); if ((sproto == IPPROTO_ESP && !V_esp_enable) || (sproto == IPPROTO_AH && !V_ah_enable) || (sproto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { m_freem(m); IPSEC_ISTAT(sproto, pdrops); return EOPNOTSUPP; } if (m->m_pkthdr.len - skip < 2 * sizeof (u_int32_t)) { m_freem(m); IPSEC_ISTAT(sproto, hdrops); DPRINTF(("%s: packet too small\n", __func__)); return EINVAL; } /* Retrieve the SPI from the relevant IPsec header */ if (sproto == IPPROTO_ESP) m_copydata(m, skip, sizeof(u_int32_t), (caddr_t) &spi); else if (sproto == IPPROTO_AH) m_copydata(m, skip + sizeof(u_int32_t), sizeof(u_int32_t), (caddr_t) &spi); else if (sproto == IPPROTO_IPCOMP) { u_int16_t cpi; m_copydata(m, skip + sizeof(u_int16_t), sizeof(u_int16_t), (caddr_t) &cpi); spi = ntohl(htons(cpi)); } /* * Find the SA and (indirectly) call the appropriate * kernel crypto routine. The resulting mbuf chain is a valid * IP packet ready to go through input processing. */ bzero(&dst_address, sizeof (dst_address)); dst_address.sa.sa_family = af; switch (af) { #ifdef INET case AF_INET: dst_address.sin.sin_len = sizeof(struct sockaddr_in); m_copydata(m, offsetof(struct ip, ip_dst), sizeof(struct in_addr), (caddr_t) &dst_address.sin.sin_addr); break; #endif /* INET */ #ifdef INET6 case AF_INET6: dst_address.sin6.sin6_len = sizeof(struct sockaddr_in6); m_copydata(m, offsetof(struct ip6_hdr, ip6_dst), sizeof(struct in6_addr), (caddr_t) &dst_address.sin6.sin6_addr); /* We keep addresses in SADB without embedded scope id */ if (IN6_IS_SCOPE_LINKLOCAL(&dst_address.sin6.sin6_addr)) { /* XXX: sa6_recoverscope() */ dst_address.sin6.sin6_scope_id = ntohs(dst_address.sin6.sin6_addr.s6_addr16[1]); dst_address.sin6.sin6_addr.s6_addr16[1] = 0; } break; #endif /* INET6 */ default: DPRINTF(("%s: unsupported protocol family %u\n", __func__, af)); m_freem(m); IPSEC_ISTAT(sproto, nopf); return EPFNOSUPPORT; } /* NB: only pass dst since key_allocsa follows RFC2401 */ sav = key_allocsa(&dst_address, sproto, spi); if (sav == NULL) { DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n", __func__, ipsec_address(&dst_address, buf, sizeof(buf)), (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, notdb); m_freem(m); return ENOENT; } if (sav->tdb_xform == NULL) { DPRINTF(("%s: attempted to use uninitialized SA %s/%08lx/%u\n", __func__, ipsec_address(&dst_address, buf, sizeof(buf)), (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, noxform); key_freesav(&sav); m_freem(m); return ENXIO; } /* * Call appropriate transform and return -- callback takes care of * everything else. */ error = (*sav->tdb_xform->xf_input)(m, sav, skip, protoff); return (error); } #ifdef INET extern struct protosw inetsw[]; /* * IPSEC_INPUT() method implementation for IPv4. * 0 - Permitted by inbound security policy for further processing. * EACCES - Forbidden by inbound security policy. * EINPROGRESS - consumed by IPsec. */ int ipsec4_input(struct mbuf *m, int offset, int proto) { switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_IPCOMP: /* Do inbound IPsec processing for AH/ESP/IPCOMP */ ipsec_common_input(m, offset, offsetof(struct ip, ip_p), AF_INET, proto); return (EINPROGRESS); /* mbuf consumed by IPsec */ default: /* * Protocols with further headers get their IPsec treatment * within the protocol specific processing. */ if ((inetsw[ip_protox[proto]].pr_flags & PR_LASTHDR) == 0) return (0); /* FALLTHROUGH */ }; /* * Enforce IPsec policy checking if we are seeing last header. */ if (ipsec4_in_reject(m, NULL) != 0) { /* Forbidden by inbound security policy */ m_freem(m); return (EACCES); } return (0); } +int +ipsec4_ctlinput(int code, struct sockaddr *sa, void *v) +{ + struct in_conninfo inc; + struct secasvar *sav; + struct icmp *icp; + struct ip *ip = v; + uint32_t pmtu, spi; + + if (code != PRC_MSGSIZE || ip == NULL) + return (EINVAL); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) + return (EAFNOSUPPORT); + + icp = __containerof(ip, struct icmp, icmp_ip); + pmtu = ntohs(icp->icmp_nextmtu); + + if (pmtu < V_ip4_ipsec_min_pmtu) + return (EINVAL); + + memcpy(&spi, (caddr_t)ip + (ip->ip_hl << 2), sizeof(spi)); + sav = key_allocsa((union sockaddr_union *)sa, ip->ip_p, spi); + if (sav == NULL) + return (ENOENT); + + key_freesav(&sav); + + memset(&inc, 0, sizeof(inc)); + inc.inc_faddr = satosin(sa)->sin_addr; + tcp_hc_updatemtu(&inc, pmtu); + return (0); +} + /* * IPsec input callback for INET protocols. * This routine is called as the transform callback. * Takes care of filtering and other sanity checks on * the processed packet. */ int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { IPSEC_DEBUG_DECLARE(char buf[IPSEC_ADDRSTRLEN]); struct epoch_tracker et; struct ipsec_ctx_data ctx; struct xform_history *xh; struct secasindex *saidx; struct m_tag *mtag; struct ip *ip; int error, prot, af, sproto, isr_prot; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; af = saidx->dst.sa.sa_family; IPSEC_ASSERT(af == AF_INET, ("unexpected af %u", af)); sproto = saidx->proto; IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH || sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); if (skip != 0) { /* * Fix IPv4 header */ if (m->m_len < skip && (m = m_pullup(m, skip)) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = ENOBUFS; goto bad; } ip = mtod(m, struct ip *); ip->ip_len = htons(m->m_pkthdr.len); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, ip->ip_hl << 2); } else { ip = mtod(m, struct ip *); } prot = ip->ip_p; /* * Check that we have NAT-T enabled and apply transport mode * decapsulation NAT procedure (RFC3948). * Do this before invoking into the PFIL. */ if (sav->natt != NULL && (prot == IPPROTO_UDP || prot == IPPROTO_TCP)) udp_ipsec_adjust_cksum(m, sav, prot, skip); IPSEC_INIT_CTX(&ctx, &m, NULL, sav, AF_INET, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; ip = mtod(m, struct ip *); /* update pointer */ /* IP-in-IP encapsulation */ if (prot == IPPROTO_IPIP && saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* enc0: strip outer IPv4 header */ m_striphdr(m, 0, ip->ip_hl << 2); } #ifdef INET6 /* IPv6-in-IP encapsulation. */ else if (prot == IPPROTO_IPV6 && saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* enc0: strip IPv4 header, keep IPv6 header only */ m_striphdr(m, 0, ip->ip_hl << 2); } #endif /* INET6 */ else if (prot != IPPROTO_IPV6 && saidx->mode == IPSEC_MODE_ANY) { /* * When mode is wildcard, inner protocol is IPv6 and * we have no INET6 support - drop this packet a bit later. * In other cases we assume transport mode. Set prot to * correctly choose netisr. */ prot = IPPROTO_IPIP; } /* * Record what we've done to the packet (under what SA it was * processed). */ if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); error = ENOMEM; goto bad; } xh = (struct xform_history *)(mtag + 1); bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); xh->spi = sav->spi; xh->proto = sproto; xh->mode = saidx->mode; m_tag_prepend(m, mtag); } key_sa_recordxfer(sav, m); /* record data transfer */ /* * In transport mode requeue decrypted mbuf back to IPv4 protocol * handler. This is necessary to correctly expose rcvif. */ if (saidx->mode == IPSEC_MODE_TRANSPORT) prot = IPPROTO_IPIP; /* * Re-dispatch via software interrupt. */ switch (prot) { case IPPROTO_IPIP: isr_prot = NETISR_IP; af = AF_INET; break; #ifdef INET6 case IPPROTO_IPV6: isr_prot = NETISR_IPV6; af = AF_INET6; break; #endif default: DPRINTF(("%s: cannot handle inner ip proto %d\n", __func__, prot)); IPSEC_ISTAT(sproto, nopf); error = EPFNOSUPPORT; goto bad; } IPSEC_INIT_CTX(&ctx, &m, NULL, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; /* Handle virtual tunneling interfaces */ if (saidx->mode == IPSEC_MODE_TUNNEL) error = ipsec_if_input(m, sav, af); if (error == 0) { NET_EPOCH_ENTER(et); error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); NET_EPOCH_EXIT(et); if (error) { IPSEC_ISTAT(sproto, qfull); DPRINTF(("%s: queue full; proto %u packet dropped\n", __func__, sproto)); } } key_freesav(&sav); return (error); bad: key_freesav(&sav); if (m != NULL) m_freem(m); return (error); } #endif /* INET */ #ifdef INET6 /* * IPSEC_INPUT() method implementation for IPv6. * 0 - Permitted by inbound security policy for further processing. * EACCES - Forbidden by inbound security policy. * EINPROGRESS - consumed by IPsec. */ int ipsec6_input(struct mbuf *m, int offset, int proto) { switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: case IPPROTO_IPCOMP: /* Do inbound IPsec processing for AH/ESP/IPCOMP */ ipsec_common_input(m, offset, offsetof(struct ip6_hdr, ip6_nxt), AF_INET6, proto); return (EINPROGRESS); /* mbuf consumed by IPsec */ default: /* * Protocols with further headers get their IPsec treatment * within the protocol specific processing. */ if ((inet6sw[ip6_protox[proto]].pr_flags & PR_LASTHDR) == 0) return (0); /* FALLTHROUGH */ }; /* * Enforce IPsec policy checking if we are seeing last header. */ if (ipsec6_in_reject(m, NULL) != 0) { /* Forbidden by inbound security policy */ m_freem(m); return (EACCES); } return (0); } +int +ipsec6_ctlinput(int code, struct sockaddr *sa, void *v) +{ + return (0); +} + /* * IPsec input callback, called by the transform callback. Takes care of * filtering and other sanity checks on the processed packet. */ int ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { IPSEC_DEBUG_DECLARE(char buf[IPSEC_ADDRSTRLEN]); struct epoch_tracker et; struct ipsec_ctx_data ctx; struct xform_history *xh; struct secasindex *saidx; struct ip6_hdr *ip6; struct m_tag *mtag; int prot, af, sproto; int nxt, isr_prot; int error, nest; uint8_t nxt8; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; af = saidx->dst.sa.sa_family; IPSEC_ASSERT(af == AF_INET6, ("unexpected af %u", af)); sproto = saidx->proto; IPSEC_ASSERT(sproto == IPPROTO_ESP || sproto == IPPROTO_AH || sproto == IPPROTO_IPCOMP, ("unexpected security protocol %u", sproto)); /* Fix IPv6 header */ if (m->m_len < sizeof(struct ip6_hdr) && (m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); IPSEC_ISTAT(sproto, hdrops); error = EACCES; goto bad; } IPSEC_INIT_CTX(&ctx, &m, NULL, sav, af, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); /* Save protocol */ m_copydata(m, protoff, 1, &nxt8); prot = nxt8; /* IPv6-in-IP encapsulation */ if (prot == IPPROTO_IPV6 && saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* ip6n will now contain the inner IPv6 header. */ m_striphdr(m, 0, skip); skip = 0; } #ifdef INET /* IP-in-IP encapsulation */ else if (prot == IPPROTO_IPIP && saidx->mode != IPSEC_MODE_TRANSPORT) { if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; goto bad; } /* ipn will now contain the inner IPv4 header */ m_striphdr(m, 0, skip); skip = 0; } #endif /* INET */ else { prot = IPPROTO_IPV6; /* for correct BPF processing */ } /* * Record what we've done to the packet (under what SA it was * processed). */ if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); error = ENOMEM; goto bad; } xh = (struct xform_history *)(mtag + 1); bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); xh->spi = sav->spi; xh->proto = sproto; xh->mode = saidx->mode; m_tag_prepend(m, mtag); } key_sa_recordxfer(sav, m); #ifdef INET if (prot == IPPROTO_IPIP) af = AF_INET; else #endif af = AF_INET6; IPSEC_INIT_CTX(&ctx, &m, NULL, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; if (skip == 0) { /* * We stripped outer IPv6 header. * Now we should requeue decrypted packet via netisr. */ switch (prot) { #ifdef INET case IPPROTO_IPIP: isr_prot = NETISR_IP; break; #endif case IPPROTO_IPV6: isr_prot = NETISR_IPV6; break; default: DPRINTF(("%s: cannot handle inner ip proto %d\n", __func__, prot)); IPSEC_ISTAT(sproto, nopf); error = EPFNOSUPPORT; goto bad; } /* Handle virtual tunneling interfaces */ if (saidx->mode == IPSEC_MODE_TUNNEL) error = ipsec_if_input(m, sav, af); if (error == 0) { NET_EPOCH_ENTER(et); error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); NET_EPOCH_EXIT(et); if (error) { IPSEC_ISTAT(sproto, qfull); DPRINTF(("%s: queue full; proto %u packet" " dropped\n", __func__, sproto)); } } key_freesav(&sav); return (error); } /* * See the end of ip6_input for this logic. * IPPROTO_IPV[46] case will be processed just like other ones */ nest = 0; nxt = nxt8; NET_EPOCH_ENTER(et); while (nxt != IPPROTO_DONE) { if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) { IP6STAT_INC(ip6s_toomanyhdr); error = EINVAL; goto bad_epoch; } /* * Protection against faulty packet - there should be * more sanity checks in header chain processing. */ if (m->m_pkthdr.len < skip) { IP6STAT_INC(ip6s_tooshort); in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated); error = EINVAL; goto bad_epoch; } /* * Enforce IPsec policy checking if we are seeing last header. * note that we do not visit this with protocols with pcb layer * code - like udp/tcp/raw ip. */ if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0 && ipsec6_in_reject(m, NULL)) { error = EINVAL; goto bad_epoch; } nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &skip, nxt); } NET_EPOCH_EXIT(et); key_freesav(&sav); return (0); bad_epoch: NET_EPOCH_EXIT(et); bad: key_freesav(&sav); if (m) m_freem(m); return (error); } #endif /* INET6 */ diff --git a/sys/netipsec/ipsec_mod.c b/sys/netipsec/ipsec_mod.c index 06523337f9fb..90cffde21968 100644 --- a/sys/netipsec/ipsec_mod.c +++ b/sys/netipsec/ipsec_mod.c @@ -1,148 +1,150 @@ /*- * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET static const struct ipsec_methods ipv4_methods = { .input = ipsec4_input, .forward = ipsec4_forward, .output = ipsec4_output, .pcbctl = ipsec4_pcbctl, .capability = ipsec4_capability, .check_policy = ipsec4_in_reject, + .ctlinput = ipsec4_ctlinput, .hdrsize = ipsec_hdrsiz_inpcb, .udp_input = udp_ipsec_input, .udp_pcbctl = udp_ipsec_pcbctl, }; #ifndef KLD_MODULE static const struct ipsec_support ipv4_ipsec = { .enabled = IPSEC_MODULE_ENABLED, .methods = &ipv4_methods }; const struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; #endif /* !KLD_MODULE */ #endif /* INET */ #ifdef INET6 static const struct ipsec_methods ipv6_methods = { .input = ipsec6_input, .forward = ipsec6_forward, .output = ipsec6_output, .pcbctl = ipsec6_pcbctl, .capability = ipsec6_capability, .check_policy = ipsec6_in_reject, + .ctlinput = ipsec6_ctlinput, .hdrsize = ipsec_hdrsiz_inpcb, }; #ifndef KLD_MODULE static const struct ipsec_support ipv6_ipsec = { .enabled = IPSEC_MODULE_ENABLED, .methods = &ipv6_methods }; const struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; #endif /* !KLD_MODULE */ #endif /* INET6 */ /* * Always register ipsec module. * Even when IPsec is build in the kernel, we need to have * module registered. This will prevent to load ipsec.ko. */ static int ipsec_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: /* All xforms are registered via SYSINIT */ if (!ipsec_initialized()) return (ENOMEM); #ifdef KLD_MODULE #ifdef INET ipsec_support_enable(ipv4_ipsec_support, &ipv4_methods); #endif #ifdef INET6 ipsec_support_enable(ipv6_ipsec_support, &ipv6_methods); #endif #endif /* KLD_MODULE */ break; case MOD_UNLOAD: /* All xforms are unregistered via SYSUNINIT */ #ifdef KLD_MODULE #ifdef INET ipsec_support_disable(ipv4_ipsec_support); #endif #ifdef INET6 ipsec_support_disable(ipv6_ipsec_support); #endif #endif /* KLD_MODULE */ break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t ipsec_mod = { "ipsec", ipsec_modevent, 0 }; DECLARE_MODULE(ipsec, ipsec_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); MODULE_VERSION(ipsec, 1); #ifdef KLD_MODULE MODULE_DEPEND(ipsec, ipsec_support, 1, 1, 1); #endif diff --git a/sys/netipsec/ipsec_support.h b/sys/netipsec/ipsec_support.h index b72aee20a7ae..e4d0aea906f1 100644 --- a/sys/netipsec/ipsec_support.h +++ b/sys/netipsec/ipsec_support.h @@ -1,190 +1,196 @@ /*- * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETIPSEC_IPSEC_SUPPORT_H_ #define _NETIPSEC_IPSEC_SUPPORT_H_ #ifdef _KERNEL #if defined(IPSEC) || defined(IPSEC_SUPPORT) struct mbuf; struct inpcb; struct tcphdr; struct sockopt; struct sockaddr; struct ipsec_support; struct tcpmd5_support; size_t ipsec_hdrsiz_inpcb(struct inpcb *); int ipsec_init_pcbpolicy(struct inpcb *); int ipsec_delete_pcbpolicy(struct inpcb *); int ipsec_copy_pcbpolicy(struct inpcb *, struct inpcb *); struct ipsec_methods { int (*input)(struct mbuf *, int, int); int (*check_policy)(const struct mbuf *, struct inpcb *); int (*forward)(struct mbuf *); int (*output)(struct mbuf *, struct inpcb *); int (*pcbctl)(struct inpcb *, struct sockopt *); size_t (*hdrsize)(struct inpcb *); int (*capability)(struct mbuf *, u_int); int (*ctlinput)(int, struct sockaddr *, void *); int (*udp_input)(struct mbuf *, int, int); int (*udp_pcbctl)(struct inpcb *, struct sockopt *); }; #define IPSEC_CAP_OPERABLE 1 #define IPSEC_CAP_BYPASS_FILTER 2 struct tcpmd5_methods { int (*input)(struct mbuf *, struct tcphdr *, u_char *); int (*output)(struct mbuf *, struct tcphdr *, u_char *); int (*pcbctl)(struct inpcb *, struct sockopt *); }; #define IPSEC_MODULE_ENABLED 0x0001 #define IPSEC_ENABLED(proto) \ ((proto ## _ipsec_support)->enabled & IPSEC_MODULE_ENABLED) #define TCPMD5_ENABLED() IPSEC_ENABLED(tcp) #ifdef TCP_SIGNATURE /* TCP-MD5 build in the kernel */ struct tcpmd5_support { const u_int enabled; const struct tcpmd5_methods * const methods; }; extern const struct tcpmd5_support * const tcp_ipsec_support; #define TCPMD5_INPUT(m, ...) \ (*tcp_ipsec_support->methods->input)(m, __VA_ARGS__) #define TCPMD5_OUTPUT(m, ...) \ (*tcp_ipsec_support->methods->output)(m, __VA_ARGS__) #define TCPMD5_PCBCTL(inp, sopt) \ (*tcp_ipsec_support->methods->pcbctl)(inp, sopt) #elif defined(IPSEC_SUPPORT) /* TCP-MD5 build as module */ struct tcpmd5_support { volatile u_int enabled; const struct tcpmd5_methods * volatile methods; }; extern struct tcpmd5_support * const tcp_ipsec_support; void tcpmd5_support_enable(const struct tcpmd5_methods * const); void tcpmd5_support_disable(void); int tcpmd5_kmod_pcbctl(struct tcpmd5_support * const, struct inpcb *, struct sockopt *); int tcpmd5_kmod_input(struct tcpmd5_support * const, struct mbuf *, struct tcphdr *, u_char *); int tcpmd5_kmod_output(struct tcpmd5_support * const, struct mbuf *, struct tcphdr *, u_char *); #define TCPMD5_INPUT(m, ...) \ tcpmd5_kmod_input(tcp_ipsec_support, m, __VA_ARGS__) #define TCPMD5_OUTPUT(m, ...) \ tcpmd5_kmod_output(tcp_ipsec_support, m, __VA_ARGS__) #define TCPMD5_PCBCTL(inp, sopt) \ tcpmd5_kmod_pcbctl(tcp_ipsec_support, inp, sopt) #endif #endif /* IPSEC || IPSEC_SUPPORT */ #if defined(IPSEC) struct ipsec_support { const u_int enabled; const struct ipsec_methods * const methods; }; extern const struct ipsec_support * const ipv4_ipsec_support; extern const struct ipsec_support * const ipv6_ipsec_support; #define IPSEC_INPUT(proto, m, ...) \ (*(proto ## _ipsec_support)->methods->input)(m, __VA_ARGS__) #define IPSEC_CHECK_POLICY(proto, m, ...) \ (*(proto ## _ipsec_support)->methods->check_policy)(m, __VA_ARGS__) #define IPSEC_FORWARD(proto, m) \ (*(proto ## _ipsec_support)->methods->forward)(m) #define IPSEC_OUTPUT(proto, m, ...) \ (*(proto ## _ipsec_support)->methods->output)(m, __VA_ARGS__) #define IPSEC_PCBCTL(proto, inp, sopt) \ (*(proto ## _ipsec_support)->methods->pcbctl)(inp, sopt) #define IPSEC_CAPS(proto, m, ...) \ (*(proto ## _ipsec_support)->methods->capability)(m, __VA_ARGS__) #define IPSEC_HDRSIZE(proto, inp) \ (*(proto ## _ipsec_support)->methods->hdrsize)(inp) +#define IPSEC_CTLINPUT(proto, code, sa, v) \ + (*(proto ## _ipsec_support)->methods->ctlinput)(code, sa, v) #define UDPENCAP_INPUT(m, ...) \ (*ipv4_ipsec_support->methods->udp_input)(m, __VA_ARGS__) #define UDPENCAP_PCBCTL(inp, sopt) \ (*ipv4_ipsec_support->methods->udp_pcbctl)(inp, sopt) #elif defined(IPSEC_SUPPORT) struct ipsec_support { volatile u_int enabled; const struct ipsec_methods * volatile methods; }; extern struct ipsec_support * const ipv4_ipsec_support; extern struct ipsec_support * const ipv6_ipsec_support; void ipsec_support_enable(struct ipsec_support * const, const struct ipsec_methods * const); void ipsec_support_disable(struct ipsec_support * const); int ipsec_kmod_input(struct ipsec_support * const, struct mbuf *, int, int); int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, struct inpcb *); int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); int ipsec_kmod_output(struct ipsec_support * const, struct mbuf *, struct inpcb *); int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, struct sockopt *); int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); size_t ipsec_kmod_hdrsize(struct ipsec_support * const, struct inpcb *); +int ipsec_kmod_ctlinput(struct ipsec_support * const, int, + struct sockaddr *, void *); int ipsec_kmod_udp_input(struct ipsec_support * const, struct mbuf *, int, int); int ipsec_kmod_udp_pcbctl(struct ipsec_support * const, struct inpcb *, struct sockopt *); #define UDPENCAP_INPUT(m, ...) \ ipsec_kmod_udp_input(ipv4_ipsec_support, m, __VA_ARGS__) #define UDPENCAP_PCBCTL(inp, sopt) \ ipsec_kmod_udp_pcbctl(ipv4_ipsec_support, inp, sopt) #define IPSEC_INPUT(proto, ...) \ ipsec_kmod_input(proto ## _ipsec_support, __VA_ARGS__) #define IPSEC_CHECK_POLICY(proto, ...) \ ipsec_kmod_check_policy(proto ## _ipsec_support, __VA_ARGS__) #define IPSEC_FORWARD(proto, ...) \ ipsec_kmod_forward(proto ## _ipsec_support, __VA_ARGS__) #define IPSEC_OUTPUT(proto, ...) \ ipsec_kmod_output(proto ## _ipsec_support, __VA_ARGS__) #define IPSEC_PCBCTL(proto, ...) \ ipsec_kmod_pcbctl(proto ## _ipsec_support, __VA_ARGS__) #define IPSEC_CAPS(proto, ...) \ ipsec_kmod_capability(proto ## _ipsec_support, __VA_ARGS__) #define IPSEC_HDRSIZE(proto, ...) \ ipsec_kmod_hdrsize(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CTLINPUT(proto, ...) \ + ipsec_kmod_ctlinput(proto ## _ipsec_support, __VA_ARGS__) #endif /* IPSEC_SUPPORT */ #endif /* _KERNEL */ #endif /* _NETIPSEC_IPSEC_SUPPORT_H_ */ diff --git a/sys/netipsec/subr_ipsec.c b/sys/netipsec/subr_ipsec.c index 37b686a7e91e..13c7336154e4 100644 --- a/sys/netipsec/subr_ipsec.c +++ b/sys/netipsec/subr_ipsec.c @@ -1,424 +1,429 @@ /*- * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This file is build in the kernel only when 'options IPSEC' or * 'options IPSEC_SUPPORT' is enabled. */ #ifdef INET void ipsec4_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, union sockaddr_union *dst) { static const struct sockaddr_in template = { sizeof (struct sockaddr_in), AF_INET, 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } }; src->sin = template; dst->sin = template; if (m->m_len < sizeof (struct ip)) { m_copydata(m, offsetof(struct ip, ip_src), sizeof (struct in_addr), (caddr_t) &src->sin.sin_addr); m_copydata(m, offsetof(struct ip, ip_dst), sizeof (struct in_addr), (caddr_t) &dst->sin.sin_addr); } else { const struct ip *ip = mtod(m, const struct ip *); src->sin.sin_addr = ip->ip_src; dst->sin.sin_addr = ip->ip_dst; } } #endif #ifdef INET6 void ipsec6_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, union sockaddr_union *dst) { struct ip6_hdr ip6buf; const struct ip6_hdr *ip6; if (m->m_len >= sizeof(*ip6)) ip6 = mtod(m, const struct ip6_hdr *); else { m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); ip6 = &ip6buf; } bzero(&src->sin6, sizeof(struct sockaddr_in6)); src->sin6.sin6_family = AF_INET6; src->sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&ip6->ip6_src, &src->sin6.sin6_addr, sizeof(ip6->ip6_src)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { src->sin6.sin6_addr.s6_addr16[1] = 0; src->sin6.sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); } bzero(&dst->sin6, sizeof(struct sockaddr_in6)); dst->sin6.sin6_family = AF_INET6; dst->sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&ip6->ip6_dst, &dst->sin6.sin6_addr, sizeof(ip6->ip6_dst)); if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { dst->sin6.sin6_addr.s6_addr16[1] = 0; dst->sin6.sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } } #endif #define IPSEC_MODULE_INCR 2 static int ipsec_kmod_enter(volatile u_int *cntr) { u_int old, new; do { old = *cntr; if ((old & IPSEC_MODULE_ENABLED) == 0) return (ENXIO); new = old + IPSEC_MODULE_INCR; } while(atomic_cmpset_acq_int(cntr, old, new) == 0); return (0); } static void ipsec_kmod_exit(volatile u_int *cntr) { u_int old, new; do { old = *cntr; new = old - IPSEC_MODULE_INCR; } while (atomic_cmpset_rel_int(cntr, old, new) == 0); } static void ipsec_kmod_drain(volatile u_int *cntr) { u_int old, new; do { old = *cntr; new = old & ~IPSEC_MODULE_ENABLED; } while (atomic_cmpset_acq_int(cntr, old, new) == 0); while (atomic_cmpset_int(cntr, 0, 0) == 0) pause("ipsecd", hz/2); } static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER(); static struct mtx xforms_lock; MTX_SYSINIT(xfroms_list, &xforms_lock, "IPsec transforms list", MTX_DEF); #define XFORMS_LOCK() mtx_lock(&xforms_lock) #define XFORMS_UNLOCK() mtx_unlock(&xforms_lock) void xform_attach(void *data) { struct xformsw *xsp, *entry; xsp = (struct xformsw *)data; XFORMS_LOCK(); LIST_FOREACH(entry, &xforms, chain) { if (entry->xf_type == xsp->xf_type) { XFORMS_UNLOCK(); printf("%s: failed to register %s xform\n", __func__, xsp->xf_name); return; } } LIST_INSERT_HEAD(&xforms, xsp, chain); xsp->xf_cntr = IPSEC_MODULE_ENABLED; XFORMS_UNLOCK(); } void xform_detach(void *data) { struct xformsw *xsp = (struct xformsw *)data; XFORMS_LOCK(); LIST_REMOVE(xsp, chain); XFORMS_UNLOCK(); /* Delete all SAs related to this xform. */ key_delete_xform(xsp); if (xsp->xf_cntr & IPSEC_MODULE_ENABLED) ipsec_kmod_drain(&xsp->xf_cntr); } /* * Initialize transform support in an sav. */ int xform_init(struct secasvar *sav, u_short xftype) { struct xformsw *entry; int ret; IPSEC_ASSERT(sav->tdb_xform == NULL, ("tdb_xform is already initialized")); XFORMS_LOCK(); LIST_FOREACH(entry, &xforms, chain) { if (entry->xf_type == xftype) { ret = ipsec_kmod_enter(&entry->xf_cntr); XFORMS_UNLOCK(); if (ret != 0) return (ret); ret = (*entry->xf_init)(sav, entry); ipsec_kmod_exit(&entry->xf_cntr); return (ret); } } XFORMS_UNLOCK(); return (EINVAL); } #ifdef IPSEC_SUPPORT /* * IPSEC_SUPPORT - loading of ipsec.ko and tcpmd5.ko is supported. * IPSEC + IPSEC_SUPPORT - loading tcpmd5.ko is supported. * IPSEC + TCP_SIGNATURE - all is build in the kernel, do not build * IPSEC_SUPPORT. */ #if !defined(IPSEC) || !defined(TCP_SIGNATURE) #define METHOD_DECL(...) __VA_ARGS__ #define METHOD_ARGS(...) __VA_ARGS__ #define IPSEC_KMOD_METHOD(type, name, sc, method, decl, args) \ type name (decl) \ { \ type ret = (type)ipsec_kmod_enter(&sc->enabled); \ if (ret == 0) { \ ret = (*sc->methods->method)(args); \ ipsec_kmod_exit(&sc->enabled); \ } \ return (ret); \ } static int ipsec_support_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: return (0); case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } } static moduledata_t ipsec_support_mod = { "ipsec_support", ipsec_support_modevent, 0 }; DECLARE_MODULE(ipsec_support, ipsec_support_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); MODULE_VERSION(ipsec_support, 1); #endif /* !IPSEC || !TCP_SIGNATURE */ #ifndef TCP_SIGNATURE /* Declare TCP-MD5 support as kernel module. */ static struct tcpmd5_support tcpmd5_ipsec = { .enabled = 0, .methods = NULL }; struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; IPSEC_KMOD_METHOD(int, tcpmd5_kmod_input, sc, input, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) ) IPSEC_KMOD_METHOD(int, tcpmd5_kmod_output, sc, output, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) ) IPSEC_KMOD_METHOD(int, tcpmd5_kmod_pcbctl, sc, pcbctl, METHOD_DECL(struct tcpmd5_support * const sc, struct inpcb *inp, struct sockopt *sopt), METHOD_ARGS(inp, sopt) ) void tcpmd5_support_enable(const struct tcpmd5_methods * const methods) { KASSERT(tcp_ipsec_support->enabled == 0, ("TCP-MD5 already enabled")); tcp_ipsec_support->methods = methods; tcp_ipsec_support->enabled |= IPSEC_MODULE_ENABLED; } void tcpmd5_support_disable(void) { if (tcp_ipsec_support->enabled & IPSEC_MODULE_ENABLED) { ipsec_kmod_drain(&tcp_ipsec_support->enabled); tcp_ipsec_support->methods = NULL; } } #endif /* !TCP_SIGNATURE */ #ifndef IPSEC /* * IPsec support is build as kernel module. */ #ifdef INET static struct ipsec_support ipv4_ipsec = { .enabled = 0, .methods = NULL }; struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_input, sc, udp_input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, int off, int af), METHOD_ARGS(m, off, af) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_pcbctl, sc, udp_pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, struct sockopt *sopt), METHOD_ARGS(inp, sopt) ) #endif #ifdef INET6 static struct ipsec_support ipv6_ipsec = { .enabled = 0, .methods = NULL }; struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; #endif IPSEC_KMOD_METHOD(int, ipsec_kmod_input, sc, input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, int offset, int proto), METHOD_ARGS(m, offset, proto) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_check_policy, sc, check_policy, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, struct inpcb *inp), METHOD_ARGS(m, inp) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_forward, sc, forward, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m), (m) ) +IPSEC_KMOD_METHOD(int, ipsec_kmod_ctlinput, sc, + ctlinput, METHOD_DECL(struct ipsec_support * const sc, int code, + struct sockaddr *sa, void *v), METHOD_ARGS(code, sa, v) +) + IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, output, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, struct inpcb *inp), METHOD_ARGS(m, inp) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, struct sockopt *sopt), METHOD_ARGS(inp, sopt) ) IPSEC_KMOD_METHOD(size_t, ipsec_kmod_hdrsize, sc, hdrsize, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp), (inp) ) static IPSEC_KMOD_METHOD(int, ipsec_kmod_caps, sc, capability, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, u_int cap), METHOD_ARGS(m, cap) ) int ipsec_kmod_capability(struct ipsec_support * const sc, struct mbuf *m, u_int cap) { /* * Since PF_KEY is build in the kernel, we can directly * call key_havesp() without additional synchronizations. */ if (cap == IPSEC_CAP_OPERABLE) return (key_havesp(IPSEC_DIR_INBOUND) != 0 || key_havesp(IPSEC_DIR_OUTBOUND) != 0); return (ipsec_kmod_caps(sc, m, cap)); } void ipsec_support_enable(struct ipsec_support * const sc, const struct ipsec_methods * const methods) { KASSERT(sc->enabled == 0, ("IPsec already enabled")); sc->methods = methods; sc->enabled |= IPSEC_MODULE_ENABLED; } void ipsec_support_disable(struct ipsec_support * const sc) { if (sc->enabled & IPSEC_MODULE_ENABLED) { ipsec_kmod_drain(&sc->enabled); sc->methods = NULL; } } #endif /* !IPSEC */ #endif /* IPSEC_SUPPORT */