Index: head/contrib/netcat/netcat.c =================================================================== --- head/contrib/netcat/netcat.c +++ head/contrib/netcat/netcat.c @@ -131,7 +131,7 @@ ssize_t fillbuf(int, unsigned char *, size_t *); #ifdef IPSEC -void add_ipsec_policy(int, char *); +void add_ipsec_policy(int, int, char *); char *ipsec_policy[2]; #endif @@ -642,12 +642,6 @@ if ((s = socket(res0->ai_family, res0->ai_socktype, res0->ai_protocol)) < 0) continue; -#ifdef IPSEC - if (ipsec_policy[0] != NULL) - add_ipsec_policy(s, ipsec_policy[0]); - if (ipsec_policy[1] != NULL) - add_ipsec_policy(s, ipsec_policy[1]); -#endif if (rtableid >= 0 && (setsockopt(s, SOL_SOCKET, SO_SETFIB, &rtableid, sizeof(rtableid)) == -1)) @@ -765,12 +759,7 @@ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &x, sizeof(x)); if (ret == -1) err(1, NULL); -#ifdef IPSEC - if (ipsec_policy[0] != NULL) - add_ipsec_policy(s, ipsec_policy[0]); - if (ipsec_policy[1] != NULL) - add_ipsec_policy(s, ipsec_policy[1]); -#endif + if (FreeBSD_Oflag) { if (setsockopt(s, IPPROTO_TCP, TCP_NOOPT, &FreeBSD_Oflag, sizeof(FreeBSD_Oflag)) == -1) @@ -1235,6 +1224,12 @@ &FreeBSD_Oflag, sizeof(FreeBSD_Oflag)) == -1) err(1, "disable TCP options"); } +#ifdef IPSEC + if (ipsec_policy[0] != NULL) + add_ipsec_policy(s, af, ipsec_policy[0]); + if (ipsec_policy[1] != NULL) + add_ipsec_policy(s, af, ipsec_policy[1]); +#endif } int @@ -1360,7 +1355,7 @@ #ifdef IPSEC void -add_ipsec_policy(int s, char *policy) +add_ipsec_policy(int s, int af, char *policy) { char *raw; int e; @@ -1369,8 +1364,12 @@ if (raw == NULL) errx(1, "ipsec_set_policy `%s': %s", policy, ipsec_strerror()); - e = setsockopt(s, IPPROTO_IP, IP_IPSEC_POLICY, raw, - ipsec_get_policylen(raw)); + if (af == AF_INET) + e = setsockopt(s, IPPROTO_IP, IP_IPSEC_POLICY, raw, + ipsec_get_policylen(raw)); + if (af == AF_INET6) + e = setsockopt(s, IPPROTO_IPV6, IPV6_IPSEC_POLICY, raw, + ipsec_get_policylen(raw)); if (e < 0) err(1, "ipsec policy cannot be configured"); free(raw); Index: head/lib/libipsec/pfkey.c =================================================================== --- head/lib/libipsec/pfkey.c +++ head/lib/libipsec/pfkey.c @@ -1776,21 +1776,17 @@ case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: - case SADB_X_EXT_SA_REPLAY: - mhp[ext->sadb_ext_type] = (caddr_t)ext; - break; case SADB_X_EXT_NAT_T_TYPE: case SADB_X_EXT_NAT_T_SPORT: case SADB_X_EXT_NAT_T_DPORT: - /* case SADB_X_EXT_NAT_T_OA: is OAI */ case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: - if (feature_present("ipsec_natt")) { - mhp[ext->sadb_ext_type] = (caddr_t)ext; - break; - } - /* FALLTHROUGH */ + case SADB_X_EXT_SA_REPLAY: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: + mhp[ext->sadb_ext_type] = (caddr_t)ext; + break; default: __ipsec_errcode = EIPSEC_INVAL_EXTTYPE; return -1; Index: head/lib/libipsec/pfkey_dump.c =================================================================== --- head/lib/libipsec/pfkey_dump.c +++ head/lib/libipsec/pfkey_dump.c @@ -220,6 +220,9 @@ struct sadb_ident *m_sid, *m_did; struct sadb_sens *m_sens; struct sadb_x_sa_replay *m_sa_replay; + struct sadb_x_nat_t_type *natt_type; + struct sadb_x_nat_t_port *natt_sport, *natt_dport; + struct sadb_address *natt_oai, *natt_oar; /* check pfkey message. */ if (pfkey_align(m, mhp)) { @@ -245,33 +248,46 @@ m_did = (struct sadb_ident *)mhp[SADB_EXT_IDENTITY_DST]; m_sens = (struct sadb_sens *)mhp[SADB_EXT_SENSITIVITY]; m_sa_replay = (struct sadb_x_sa_replay *)mhp[SADB_X_EXT_SA_REPLAY]; + natt_type = (struct sadb_x_nat_t_type *)mhp[SADB_X_EXT_NAT_T_TYPE]; + natt_sport = (struct sadb_x_nat_t_port *)mhp[SADB_X_EXT_NAT_T_SPORT]; + natt_dport = (struct sadb_x_nat_t_port *)mhp[SADB_X_EXT_NAT_T_DPORT]; + natt_oai = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAI]; + natt_oar = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAR]; + /* source address */ if (m_saddr == NULL) { printf("no ADDRESS_SRC extension.\n"); return; } - printf("%s ", str_ipaddr((struct sockaddr *)(m_saddr + 1))); + printf("%s", str_ipaddr((struct sockaddr *)(m_saddr + 1))); + if (natt_type != NULL && natt_sport != NULL) + printf("[%u]", ntohs(natt_sport->sadb_x_nat_t_port_port)); /* destination address */ if (m_daddr == NULL) { - printf("no ADDRESS_DST extension.\n"); + printf("\nno ADDRESS_DST extension.\n"); return; } - printf("%s ", str_ipaddr((struct sockaddr *)(m_daddr + 1))); + printf(" %s", str_ipaddr((struct sockaddr *)(m_daddr + 1))); + if (natt_type != NULL && natt_dport != NULL) + printf("[%u]", ntohs(natt_dport->sadb_x_nat_t_port_port)); /* SA type */ if (m_sa == NULL) { - printf("no SA extension.\n"); + printf("\nno SA extension.\n"); return; } if (m_sa2 == NULL) { - printf("no SA2 extension.\n"); + printf("\nno SA2 extension.\n"); return; } printf("\n\t"); - GETMSGSTR(str_satype, m->sadb_msg_satype); + if (m->sadb_msg_satype == SADB_SATYPE_ESP && natt_type != NULL) + printf("esp-udp "); + else + GETMSGSTR(str_satype, m->sadb_msg_satype); printf("mode="); GETMSGSTR(str_mode, m_sa2->sadb_x_sa2_mode); @@ -281,6 +297,18 @@ (u_int32_t)ntohl(m_sa->sadb_sa_spi), (u_int32_t)m_sa2->sadb_x_sa2_reqid, (u_int32_t)m_sa2->sadb_x_sa2_reqid); + + /* other NAT-T information */ + if (natt_type != NULL && (natt_oai != NULL || natt_oar != NULL)) { + printf("\tNAT:"); + if (natt_oai != NULL) + printf(" OAI=%s", + str_ipaddr((struct sockaddr *)(natt_oai + 1))); + if (natt_oar != NULL) + printf(" OAR=%s", + str_ipaddr((struct sockaddr *)(natt_oar + 1))); + printf("\n"); + } /* encryption key */ if (m->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { Index: head/sbin/ifconfig/Makefile =================================================================== --- head/sbin/ifconfig/Makefile +++ head/sbin/ifconfig/Makefile @@ -34,6 +34,7 @@ SRCS+= ifvxlan.c # VXLAN support SRCS+= ifgre.c # GRE keys etc SRCS+= ifgif.c # GIF reversed header workaround +SRCS+= ifipsec.c # IPsec VTI SRCS+= sfp.c # SFP/SFP+ information LIBADD+= m Index: head/sbin/ifconfig/ifipsec.c =================================================================== --- head/sbin/ifconfig/ifipsec.c +++ head/sbin/ifconfig/ifipsec.c @@ -0,0 +1,101 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ifconfig.h" + +static void +ipsec_status(int s) +{ + uint32_t reqid; + + ifr.ifr_data = (caddr_t)&reqid; + if (ioctl(s, IPSECGREQID, &ifr) == -1) + return; + printf("\treqid: %u\n", reqid); +} + +static +DECL_CMD_FUNC(setreqid, val, arg) +{ + char *ep; + uint32_t v; + + v = strtoul(val, &ep, 0); + if (*ep != '\0') { + warn("Invalid reqid value %s", val); + return; + } + ifr.ifr_data = (char *)&v; + if (ioctl(s, IPSECSREQID, &ifr) == -1) { + warn("ioctl(IPSECSREQID)"); + return; + } +} + +static struct cmd ipsec_cmds[] = { + DEF_CMD_ARG("reqid", setreqid), +}; + +static struct afswtch af_ipsec = { + .af_name = "af_ipsec", + .af_af = AF_UNSPEC, + .af_other_status = ipsec_status, +}; + +static __constructor void +ipsec_ctor(void) +{ + size_t i; + + for (i = 0; i < nitems(ipsec_cmds); i++) + cmd_register(&ipsec_cmds[i]); + af_register(&af_ipsec); +#undef N +} Index: head/sbin/setkey/setkey.8 =================================================================== --- head/sbin/setkey/setkey.8 +++ head/sbin/setkey/setkey.8 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 3, 2016 +.Dd February 6, 2017 .Dt SETKEY 8 .Os .\" @@ -270,8 +270,6 @@ prefix. SPI values between 0 and 255 are reserved for future use by IANA and they cannot be used. -TCP-MD5 associations must use 0x1000 and therefore only have per-host -granularity at this time. .\" .Pp .It Ar extensions Index: head/share/man/man4/Makefile =================================================================== --- head/share/man/man4/Makefile +++ head/share/man/man4/Makefile @@ -201,6 +201,7 @@ icmp.4 \ icmp6.4 \ ida.4 \ + if_ipsec.4 \ ifmib.4 \ ig4.4 \ igb.4 \ Index: head/share/man/man4/if_ipsec.4 =================================================================== --- head/share/man/man4/if_ipsec.4 +++ head/share/man/man4/if_ipsec.4 @@ -0,0 +1,141 @@ +.\" Copyright (c) 2017 Andrey V. Elsukov +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd February 6, 2017 +.Dt if_ipsec 4 +.Os +.Sh NAME +.Nm if_ipsec +.Nd IPsec virtual tunneling interface +.Sh SYNOPSIS +The +.Cm if_ipsec +network interface is a part of the +.Fx +IPsec implementation. +To compile it into the kernel, place this line in the kernel +configuration file: +.Bd -ragged -offset indent +.Cd "options IPSEC" +.Ed +.Pp +It can also be loaded as part of the +.Cm ipsec +kernel module if the kernel was compiled with +.Bd -ragged -offset indent +.Cd "options IPSEC_SUPPORT" +.Ed +.Sh DESCRIPTION +The +.Nm +network interface is targeted for creating route-based VPNs. +It can tunnel IPv4 and IPv6 traffic over either IPv4 or IPv6 and secure +it with ESP. +.Pp +.Nm +interfaces are dynamically created and destroyed with the +.Xr ifconfig 8 +.Cm create +and +.Cm destroy +subcommands. +The administrator must configure IPsec +.Cm tunnel +endpoint addresses. +These addresses will be used for the outer IP header of ESP packets. +The administrator can also configure the protocol and addresses for the inner +IP header with +.Xr ifconfig 8 , +and modify the routing table to route the packets through the +.Nm +interface. +.Pp +When the +.Nm +interface is configured, it automatically creates special security policies. +These policies can be used to acquire security associations from the IKE daemon, +which are needed for establishing an IPsec tunnel. +It is also possible to create needed security associations manually with the +.Xr setkey 8 +utility. +.Pp +Each +.Nm +interface has an additional numeric configuration option +.Cm reqid Ar id . +This +.Ar id +is used to distinguish traffic and security policies between several +.Nm +interfaces. +The +.Cm reqid +can be specified on interface creation and changed later. +If not specified, it is automatically assigned. +Note that changing +.Cm reqid +will lead to generation of new security policies, and this +may require creating new security associations. +.Sh EXAMPLES +The example below shows manual configuration of an IPsec tunnel +between two FreeBSD hosts. +Host A has the IP address 192.168.0.3, and host B has the IP address +192.168.0.5. +.Pp +On host A: +.Bd -literal -offset indent +ifconfig ipsec0 create reqid 100 +ifconfig ipsec0 inet tunnel 192.168.0.3 192.168.0.5 +ifconfig ipsec0 inet 172.16.0.3/16 172.16.0.5 +setkey -c +add 192.168.0.3 192.168.0.5 esp 10000 -m tunnel -u 100 -E rijndael-cbc "VerySecureKey!!1"; +add 192.168.0.5 192.168.0.3 esp 10001 -m tunnel -u 100 -E rijndael-cbc "VerySecureKey!!2"; +^D +.Ed +.Pp +On host B: +.Bd -literal -offset indent +ifconfig ipsec0 create reqid 200 +ifconfig ipsec0 inet tunnel 192.168.0.5 192.168.0.3 +ifconfig ipsec0 inet 172.16.0.5/16 172.16.0.3 +setkey -c +add 192.168.0.3 192.168.0.5 esp 10000 -m tunnel -u 200 -E rijndael-cbc "VerySecureKey!!1"; +add 192.168.0.5 192.168.0.3 esp 10001 -m tunnel -u 200 -E rijndael-cbc "VerySecureKey!!2"; +^D +.Ed +.Pp +Note the value 100 on host A and value 200 on host B are used as reqid. +The same value must be used as identifier of the policy entry in the +.Xr setkey 8 +command. +.Sh SEE ALSO +.Xr gif 4 , +.Xr gre 4 , +.Xr ipsec 4 , +.Xr ifconfig 8 , +.Xr setkey 8 +.Sh AUTHORS +.An Andrey V. Elsukov Aq Mt ae@FreeBSD.org Index: head/share/man/man4/ipsec.4 =================================================================== --- head/share/man/man4/ipsec.4 +++ head/share/man/man4/ipsec.4 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd November 29, 2009 +.Dd February 6, 2017 .Dt IPSEC 4 .Os .Sh NAME @@ -37,6 +37,7 @@ .Nd Internet Protocol Security protocol .Sh SYNOPSIS .Cd "options IPSEC" +.Cd "options IPSEC_SUPPORT" .Cd "device crypto" .Pp .In sys/types.h @@ -151,6 +152,16 @@ .Xr setkey 8 on how to use it. .Pp +Depending on the socket's address family, IPPROTO_IP or IPPROTO_IPV6 +transport level and IP_IPSEC_POLICY or IPV6_IPSEC_POLICY socket options +may be used to configure per-socket security policies. +A properly-formed IPsec policy specification structure can be +created using +.Xr ipsec_set_policy 3 +function and used as socket option value for the +.Xr setsockopt 2 +call. +.Pp When setting policies using the .Xr setkey 8 command, the @@ -228,6 +239,8 @@ .It "net.inet.ipsec.dfbit integer yes" .It "net.inet.ipsec.ecn integer yes" .It "net.inet.ipsec.debug integer yes" +.It "net.inet.ipsec.natt_cksum_policy integer yes" +.It "net.inet.ipsec.check_policy_history integer yes" .It "net.inet6.ipsec6.ecn integer yes" .It "net.inet6.ipsec6.debug integer yes" .El @@ -270,6 +283,23 @@ .It Li ipsec.debug If set to non-zero, debug messages will be generated via .Xr syslog 3 . +.It Li ipsec.natt_cksum_policy +Controls how the kernel handles TCP and UDP checksums when ESP in UDP +encapsulation is used for IPsec transport mode. +If set to a non-zero value, the kernel fully recomputes checksums for +inbound TCP segments and UDP datagrams after they are decapsulated and +decrypted. +If set to 0 and original addresses were configured for corresponding SA +by the IKE daemon, the kernel incrementally recomputes checksums for +inbound TCP segments and UDP datagrams. +If addresses were not configured, the checksums are ignored. +.It Li ipsec.check_policy_history +Enables strict policy checking for inbound packets. +By default, inbound security policies check that packets handled by IPsec +have been decrypted and authenticated. +If this variable is set to a non-zero value, each packet handled by IPsec +is checked against the history of IPsec security associations. +The IPsec security protocol, mode, and SA addresses must match. .El .Pp Variables under the @@ -305,6 +335,7 @@ .Xr ipsec_set_policy 3 , .Xr crypto 4 , .Xr enc 4 , +.Xr if_ipsec 4 , .Xr icmp6 4 , .Xr intro 4 , .Xr ip6 4 , Index: head/share/man/man4/tcp.4 =================================================================== --- head/share/man/man4/tcp.4 +++ head/share/man/man4/tcp.4 @@ -34,7 +34,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd Jan 29, 2017 +.Dd February 6, 2017 .Dt TCP 4 .Os .Sh NAME @@ -272,33 +272,27 @@ This option enables the use of MD5 digests (also known as TCP-MD5) on writes to the specified socket. Outgoing traffic is digested; -digests on incoming traffic are verified if the -.Va net.inet.tcp.signature_verify_input -sysctl is nonzero. -The current default behavior for the system is to respond to a system -advertising this option with TCP-MD5; this may change. +digests on incoming traffic are verified. +When this option is enabled on a socket, all inbound and outgoing +TCP segments must be signed with MD5 digests. .Pp One common use for this in a .Fx router deployment is to enable based routers to interwork with Cisco equipment at peering points. Support for this feature conforms to RFC 2385. -Only IPv4 -.Pq Dv AF_INET -sessions are supported. .Pp In order for this option to function correctly, it is necessary for the administrator to add a tcp-md5 key entry to the system's security associations database (SADB) using the .Xr setkey 8 utility. -This entry must have an SPI of 0x1000 and can therefore only be specified -on a per-host basis at this time. +This entry can only be specified on a per-host basis at this time. .Pp -If an SADB entry cannot be found for the destination, the outgoing traffic -will have an invalid digest option prepended, and the following error message -will be visible on the system console: -.Em "tcp_signature_compute: SADB lookup failed for %d.%d.%d.%d" . +If an SADB entry cannot be found for the destination, +the system does not send any outgoing segments and drops any inbound segments. +.Pp +Each dropped segment is taken into account in the TCP protocol statistics. .El .Pp The option level for the Index: head/share/man/man4/udp.4 =================================================================== --- head/share/man/man4/udp.4 +++ head/share/man/man4/udp.4 @@ -28,7 +28,7 @@ .\" @(#)udp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd June 5, 1993 +.Dd February 6, 2017 .Dt UDP 4 .Os .Sh NAME @@ -99,6 +99,17 @@ .Tn UDP ; see .Xr ip 4 . +.Tn UDP_ENCAP +socket option may be used at the +.Tn IPPROTO_UDP +level to encapsulate +.Tn ESP +packets in +.Tn UDP . +Only one value is supported for this option: +.Tn UDP_ENCAP_ESPINUDP +from RFC 3948, defined in +.In netinet/udp.h . .Sh MIB VARIABLES The .Nm @@ -158,7 +169,8 @@ .Xr blackhole 4 , .Xr inet 4 , .Xr intro 4 , -.Xr ip 4 +.Xr ip 4 , +.Xr udplite 4 .Sh HISTORY The .Nm Index: head/sys/conf/NOTES =================================================================== --- head/sys/conf/NOTES +++ head/sys/conf/NOTES @@ -627,12 +627,12 @@ # In order to enable IPSEC you MUST also add device crypto to # your kernel configuration options IPSEC #IP security (requires device crypto) + +# Option IPSEC_SUPPORT does not enable IPsec, but makes it possible to +# load it as a kernel module. You still MUST add device crypto to your kernel +# configuration. +options IPSEC_SUPPORT #options IPSEC_DEBUG #debug for IP security -# -# Set IPSEC_NAT_T to enable NAT-Traversal support. This enables -# optional UDP encapsulation of ESP packets. -# -options IPSEC_NAT_T #NAT-T support, UDP encap of ESP # # SMB/CIFS requester @@ -1027,7 +1027,8 @@ # carried in TCP option 19. This option is commonly used to protect # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable. # This is enabled on a per-socket basis using the TCP_MD5SIG socket option. -# This requires the use of 'device crypto' and 'options IPSEC'. +# This requires the use of 'device crypto' and either 'options IPSEC' or +# 'options IPSEC_SUPPORT'. options TCP_SIGNATURE #include support for RFC 2385 # DUMMYNET enables the "dummynet" bandwidth limiter. You need IPFIREWALL Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -587,22 +587,24 @@ compile-with "${NORMAL_C} -I$S/contrib/ngatm" contrib/ngatm/netnatm/sig/sig_verify.c optional ngatm_uni \ compile-with "${NORMAL_C} -I$S/contrib/ngatm" -crypto/blowfish/bf_ecb.c optional ipsec -crypto/blowfish/bf_skey.c optional crypto | ipsec -crypto/camellia/camellia.c optional crypto | ipsec -crypto/camellia/camellia-api.c optional crypto | ipsec -crypto/des/des_ecb.c optional crypto | ipsec | netsmb -crypto/des/des_setkey.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_ecb.c optional ipsec | ipsec_support +crypto/blowfish/bf_skey.c optional crypto | ipsec | ipsec_support +crypto/camellia/camellia.c optional crypto | ipsec | ipsec_support +crypto/camellia/camellia-api.c optional crypto | ipsec | ipsec_support +crypto/des/des_ecb.c optional crypto | ipsec | ipsec_support | netsmb +crypto/des/des_setkey.c optional crypto | ipsec | ipsec_support | netsmb crypto/rc4/rc4.c optional netgraph_mppc_encryption | kgssapi crypto/rijndael/rijndael-alg-fst.c optional crypto | ekcd | geom_bde | \ - ipsec | random !random_loadable | wlan_ccmp + ipsec | ipsec_support | random !random_loadable | wlan_ccmp crypto/rijndael/rijndael-api-fst.c optional ekcd | geom_bde | random !random_loadable -crypto/rijndael/rijndael-api.c optional crypto | ipsec | wlan_ccmp +crypto/rijndael/rijndael-api.c optional crypto | ipsec | ipsec_support | \ + wlan_ccmp crypto/sha1.c optional carp | crypto | ipsec | \ - netgraph_mppc_encryption | sctp -crypto/sha2/sha256c.c optional crypto | ekcd | geom_bde | ipsec | random !random_loadable | \ - sctp | zfs -crypto/sha2/sha512c.c optional crypto | geom_bde | ipsec | zfs + ipsec_support | netgraph_mppc_encryption | sctp +crypto/sha2/sha256c.c optional crypto | ekcd | geom_bde | ipsec | \ + ipsec_support | random !random_loadable | sctp | zfs +crypto/sha2/sha512c.c optional crypto | geom_bde | ipsec | \ + ipsec_support | zfs crypto/skein/skein.c optional crypto | zfs crypto/skein/skein_block.c optional crypto | zfs crypto/siphash/siphash.c optional inet | inet6 @@ -3879,8 +3881,7 @@ libkern/strvalid.c standard libkern/timingsafe_bcmp.c standard libkern/zlib.c optional crypto | geom_uzip | ipsec | \ - mxge | netgraph_deflate | \ - ddb_ctf | gzio + ipsec_support | mxge | netgraph_deflate | ddb_ctf | gzio net/altq/altq_cbq.c optional altq net/altq/altq_cdnr.c optional altq net/altq/altq_codel.c optional altq @@ -3916,6 +3917,7 @@ net/if_gif.c optional gif inet | gif inet6 | \ netgraph_gif inet | netgraph_gif inet6 net/if_gre.c optional gre inet | gre inet6 +net/if_ipsec.c optional inet ipsec | inet6 ipsec net/if_iso88025subr.c optional token net/if_lagg.c optional lagg net/if_loop.c optional loop @@ -4104,7 +4106,6 @@ netinet/ip_fastfwd.c optional inet netinet/ip_icmp.c optional inet | inet6 netinet/ip_input.c optional inet -netinet/ip_ipsec.c optional inet ipsec netinet/ip_mroute.c optional mrouting inet netinet/ip_options.c optional inet netinet/ip_output.c optional inet @@ -4174,7 +4175,6 @@ netinet6/ip6_input.c optional inet6 netinet6/ip6_mroute.c optional mrouting inet6 netinet6/ip6_output.c optional inet6 -netinet6/ip6_ipsec.c optional inet6 ipsec netinet6/mld6.c optional inet6 netinet6/nd6.c optional inet6 netinet6/nd6_nbr.c optional inet6 @@ -4187,15 +4187,25 @@ netipsec/ipsec.c optional ipsec inet | ipsec inet6 netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_mod.c optional ipsec inet | ipsec inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 -netipsec/key.c optional ipsec inet | ipsec inet6 -netipsec/key_debug.c optional ipsec inet | ipsec inet6 -netipsec/keysock.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_pcb.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/key.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/key_debug.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/keysock.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/subr_ipsec.c optional ipsec inet | ipsec inet6 | \ + ipsec_support inet | ipsec_support inet6 +netipsec/udpencap.c optional ipsec inet netipsec/xform_ah.c optional ipsec inet | ipsec inet6 netipsec/xform_esp.c optional ipsec inet | ipsec inet6 netipsec/xform_ipcomp.c optional ipsec inet | ipsec inet6 netipsec/xform_tcp.c optional ipsec inet tcp_signature | \ - ipsec inet6 tcp_signature + ipsec inet6 tcp_signature | ipsec_support inet tcp_signature | \ + ipsec_support inet6 tcp_signature netnatm/natm.c optional natm netnatm/natm_pcb.c optional natm netnatm/natm_proto.c optional natm @@ -4547,18 +4557,18 @@ compile-with "${OFED_C}" # crypto support -opencrypto/cast.c optional crypto | ipsec -opencrypto/criov.c optional crypto | ipsec -opencrypto/crypto.c optional crypto | ipsec +opencrypto/cast.c optional crypto | ipsec | ipsec_support +opencrypto/criov.c optional crypto | ipsec | ipsec_support +opencrypto/crypto.c optional crypto | ipsec | ipsec_support opencrypto/cryptodev.c optional cryptodev -opencrypto/cryptodev_if.m optional crypto | ipsec -opencrypto/cryptosoft.c optional crypto | ipsec -opencrypto/cryptodeflate.c optional crypto | ipsec -opencrypto/gmac.c optional crypto | ipsec -opencrypto/gfmult.c optional crypto | ipsec -opencrypto/rmd160.c optional crypto | ipsec -opencrypto/skipjack.c optional crypto | ipsec -opencrypto/xform.c optional crypto | ipsec +opencrypto/cryptodev_if.m optional crypto | ipsec | ipsec_support +opencrypto/cryptosoft.c optional crypto | ipsec | ipsec_support +opencrypto/cryptodeflate.c optional crypto | ipsec | ipsec_support +opencrypto/gmac.c optional crypto | ipsec | ipsec_support +opencrypto/gfmult.c optional crypto | ipsec | ipsec_support +opencrypto/rmd160.c optional crypto | ipsec | ipsec_support +opencrypto/skipjack.c optional crypto | ipsec | ipsec_support +opencrypto/xform.c optional crypto | ipsec | ipsec_support rpc/auth_none.c optional krpc | nfslockd | nfscl | nfsd rpc/auth_unix.c optional krpc | nfslockd | nfscl | nfsd rpc/authunix_prot.c optional krpc | nfslockd | nfscl | nfsd Index: head/sys/conf/files.amd64 =================================================================== --- head/sys/conf/files.amd64 +++ head/sys/conf/files.amd64 @@ -180,8 +180,9 @@ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | \ + ipsec_support | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock Index: head/sys/conf/files.arm =================================================================== --- head/sys/conf/files.arm +++ head/sys/conf/files.arm @@ -112,8 +112,8 @@ cddl/dev/dtrace/arm/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/arm/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/arm/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/cpufreq/cpufreq_dt.c optional cpufreq fdt dev/dwc/if_dwc.c optional dwc dev/dwc/if_dwc_if.m optional dwc Index: head/sys/conf/files.arm64 =================================================================== --- head/sys/conf/files.arm64 +++ head/sys/conf/files.arm64 @@ -142,8 +142,8 @@ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8a+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "armv8_crypto_wrap.o" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/acpica/acpi_if.m optional acpi dev/ahci/ahci_generic.c optional ahci dev/cpufreq/cpufreq_dt.c optional cpufreq fdt Index: head/sys/conf/files.i386 =================================================================== --- head/sys/conf/files.i386 +++ head/sys/conf/files.i386 @@ -143,7 +143,7 @@ compat/svr4/svr4_sysent.c optional compat_svr4 compat/svr4/svr4_sysvec.c optional compat_svr4 compat/svr4/svr4_termios.c optional compat_svr4 -bf_enc.o optional crypto | ipsec \ +bf_enc.o optional crypto | ipsec | ipsec_support \ dependency "$S/crypto/blowfish/arch/i386/bf_enc.S $S/crypto/blowfish/arch/i386/bf_enc_586.S $S/crypto/blowfish/arch/i386/bf_enc_686.S" \ compile-with "${CC} -c -I$S/crypto/blowfish/arch/i386 ${ASM_CFLAGS} ${WERROR} ${.IMPSRC}" \ no-implicit-rule @@ -159,7 +159,7 @@ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${NO_WCAST_QUAL} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" -crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb +crypto/des/arch/i386/des_enc.S optional crypto | ipsec | ipsec_support | netsmb crypto/via/padlock.c optional padlock crypto/via/padlock_cipher.c optional padlock crypto/via/padlock_hash.c optional padlock Index: head/sys/conf/files.mips =================================================================== --- head/sys/conf/files.mips +++ head/sys/conf/files.mips @@ -83,8 +83,10 @@ dev/uart/uart_cpu_fdt.c optional uart fdt # crypto support -- use generic -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | \ + ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | \ + ipsec_support | netsmb # AP common nvram interface MIPS specific, but maybe should be more generic dev/nvram2env/nvram2env_mips.c optional nvram2env Index: head/sys/conf/files.powerpc =================================================================== --- head/sys/conf/files.powerpc +++ head/sys/conf/files.powerpc @@ -20,8 +20,8 @@ cddl/dev/dtrace/powerpc/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/powerpc/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/powerpc/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/bm/if_bm.c optional bm powermac dev/adb/adb_bus.c optional adb dev/adb/adb_kbd.c optional adb Index: head/sys/conf/files.riscv =================================================================== --- head/sys/conf/files.riscv +++ head/sys/conf/files.riscv @@ -3,8 +3,8 @@ cddl/dev/dtrace/riscv/dtrace_asm.S optional dtrace compile-with "${DTRACE_S}" cddl/dev/dtrace/riscv/dtrace_subr.c optional dtrace compile-with "${DTRACE_C}" cddl/dev/fbt/riscv/fbt_isa.c optional dtrace_fbt | dtraceall compile-with "${FBT_C}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/ofw/ofw_cpu.c optional fdt dev/uart/uart_cpu_fdt.c optional uart fdt dev/xilinx/axi_quad_spi.c optional xilinx_spi Index: head/sys/conf/files.sparc64 =================================================================== --- head/sys/conf/files.sparc64 +++ head/sys/conf/files.sparc64 @@ -23,8 +23,8 @@ clean "ukbdmap.h" # cddl/contrib/opensolaris/common/atomic/sparc64/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" -crypto/blowfish/bf_enc.c optional crypto | ipsec -crypto/des/des_enc.c optional crypto | ipsec | netsmb +crypto/blowfish/bf_enc.c optional crypto | ipsec | ipsec_support +crypto/des/des_enc.c optional crypto | ipsec | ipsec_support | netsmb dev/atkbdc/atkbd.c optional atkbd atkbdc dev/atkbdc/atkbd_atkbdc.c optional atkbd atkbdc dev/atkbdc/atkbdc.c optional atkbdc Index: head/sys/conf/kern.opts.mk =================================================================== --- head/sys/conf/kern.opts.mk +++ head/sys/conf/kern.opts.mk @@ -34,6 +34,7 @@ INET \ INET6 \ IPFILTER \ + IPSEC_SUPPORT \ ISCSI \ KERNEL_SYMBOLS \ NETGRAPH \ Index: head/sys/conf/options =================================================================== --- head/sys/conf/options +++ head/sys/conf/options @@ -428,7 +428,7 @@ IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h -IPSEC_NAT_T opt_ipsec.h +IPSEC_SUPPORT opt_ipsec.h IPSTEALTH KRPC LIBALIAS @@ -451,7 +451,7 @@ TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_RFC7413 opt_inet.h TCP_RFC7413_MAX_KEYS opt_inet.h -TCP_SIGNATURE opt_inet.h +TCP_SIGNATURE opt_ipsec.h VLAN_ARRAY opt_vlan.h XBONEHACK FLOWTABLE opt_route.h Index: head/sys/modules/Makefile =================================================================== --- head/sys/modules/Makefile +++ head/sys/modules/Makefile @@ -177,6 +177,7 @@ ip6_mroute_mod \ ip_mroute_mod \ ${_ips} \ + ${_ipsec} \ ${_ipw} \ ${_ipwfw} \ ${_isci} \ @@ -357,6 +358,7 @@ sysvipc \ ${_ti} \ ${_tcp_fastpath} \ + ${_tcpmd5} \ tests/framework \ tests/callout_test \ tl \ @@ -447,6 +449,10 @@ _if_enc= if_enc _if_gif= if_gif _if_gre= if_gre +.if ${MK_IPSEC_SUPPORT} != "no" +_ipsec= ipsec +_tcpmd5= tcp/tcpmd5 +.endif .endif .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \ Index: head/sys/modules/ipsec/Makefile =================================================================== --- head/sys/modules/ipsec/Makefile +++ head/sys/modules/ipsec/Makefile @@ -0,0 +1,14 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../net ${.CURDIR}/../../netipsec + +KMOD= ipsec +SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ + ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ + opt_inet.h opt_inet6.h opt_ipsec.h opt_sctp.h +SRCS.INET= udpencap.c + +opt_ipsec.h: + @echo "#define IPSEC_SUPPORT 1" > ${.TARGET} + +.include Index: head/sys/modules/tcp/tcpmd5/Makefile =================================================================== --- head/sys/modules/tcp/tcpmd5/Makefile +++ head/sys/modules/tcp/tcpmd5/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../../netipsec + +KMOD= tcpmd5 +SRCS= xform_tcp.c opt_inet.h opt_inet6.h opt_ipsec.h + +opt_ipsec.h: + @echo "#define IPSEC_SUPPORT 1" > ${.TARGET} + +.include Index: head/sys/net/if_ipsec.h =================================================================== --- head/sys/net/if_ipsec.h +++ head/sys/net/if_ipsec.h @@ -0,0 +1,39 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NET_IF_IPSEC_H_ +#define _NET_IF_IPSEC_H_ + +#define IPSEC_MTU 1400 +#define IPSEC_MTU_MIN 1280 +#define IPSEC_MTU_MAX 8192 +#define IPSECGREQID _IOWR('i', 160, struct ifreq) +#define IPSECSREQID _IOW('i', 161, struct ifreq) + +#endif /* _NET_IF_IPSEC_H_ */ Index: head/sys/net/if_ipsec.c =================================================================== --- head/sys/net/if_ipsec.c +++ head/sys/net/if_ipsec.c @@ -0,0 +1,1000 @@ +/*- + * Copyright (c) 2016 Yandex LLC + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#ifdef INET6 +#include +#endif + +#include +#include + +#include + +static MALLOC_DEFINE(M_IPSEC, "ipsec", "IPsec Virtual Tunnel Interface"); +static const char ipsecname[] = "ipsec"; + +#if defined(INET) && defined(INET6) +#define IPSEC_SPCOUNT 4 +#else +#define IPSEC_SPCOUNT 2 +#endif + +struct ipsec_softc { + struct ifnet *ifp; + + struct rmlock lock; + struct secpolicy *sp[IPSEC_SPCOUNT]; + + uint32_t reqid; + u_int family; + u_int fibnum; + LIST_ENTRY(ipsec_softc) chain; + LIST_ENTRY(ipsec_softc) hash; +}; + +#define IPSEC_LOCK_INIT(sc) rm_init(&(sc)->lock, "if_ipsec softc") +#define IPSEC_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) +#define IPSEC_RLOCK_TRACKER struct rm_priotracker ipsec_tracker +#define IPSEC_RLOCK(sc) rm_rlock(&(sc)->lock, &ipsec_tracker) +#define IPSEC_RUNLOCK(sc) rm_runlock(&(sc)->lock, &ipsec_tracker) +#define IPSEC_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) +#define IPSEC_WLOCK(sc) rm_wlock(&(sc)->lock) +#define IPSEC_WUNLOCK(sc) rm_wunlock(&(sc)->lock) +#define IPSEC_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) + +static struct rmlock ipsec_sc_lock; +RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list"); + +#define IPSEC_SC_RLOCK_TRACKER struct rm_priotracker ipsec_sc_tracker +#define IPSEC_SC_RLOCK() rm_rlock(&ipsec_sc_lock, &ipsec_sc_tracker) +#define IPSEC_SC_RUNLOCK() rm_runlock(&ipsec_sc_lock, &ipsec_sc_tracker) +#define IPSEC_SC_RLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_RLOCKED) +#define IPSEC_SC_WLOCK() rm_wlock(&ipsec_sc_lock) +#define IPSEC_SC_WUNLOCK() rm_wunlock(&ipsec_sc_lock) +#define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED) + +LIST_HEAD(ipsec_iflist, ipsec_softc); +static VNET_DEFINE(struct ipsec_iflist, ipsec_sc_list); +static VNET_DEFINE(struct ipsec_iflist *, ipsec_sc_htbl); +static VNET_DEFINE(u_long, ipsec_sc_hmask); +#define V_ipsec_sc_list VNET(ipsec_sc_list) +#define V_ipsec_sc_htbl VNET(ipsec_sc_htbl) +#define V_ipsec_sc_hmask VNET(ipsec_sc_hmask) + +static uint32_t +ipsec_hash(uint32_t id) +{ + + return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); +} + +#define SCHASH_NHASH_LOG2 5 +#define SCHASH_NHASH (1 << SCHASH_NHASH_LOG2) +#define SCHASH_HASHVAL(id) (ipsec_hash((id)) & V_ipsec_sc_hmask) +#define SCHASH_HASH(id) &V_ipsec_sc_htbl[SCHASH_HASHVAL(id)] + +/* + * ipsec_ioctl_sx protects from concurrent ioctls. + */ +static struct sx ipsec_ioctl_sx; +SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl"); + +static int ipsec_init_reqid(struct ipsec_softc *); +static int ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *, + struct sockaddr *, uint32_t); +static void ipsec_delete_tunnel(struct ifnet *, int); + +static int ipsec_set_addresses(struct ifnet *, struct sockaddr *, + struct sockaddr *); +static int ipsec_set_reqid(struct ifnet *, uint32_t); + +static int ipsec_ioctl(struct ifnet *, u_long, caddr_t); +static int ipsec_transmit(struct ifnet *, struct mbuf *); +static int ipsec_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, struct route *); +static void ipsec_qflush(struct ifnet *); +static int ipsec_clone_create(struct if_clone *, int, caddr_t); +static void ipsec_clone_destroy(struct ifnet *); + +static VNET_DEFINE(struct if_clone *, ipsec_cloner); +#define V_ipsec_cloner VNET(ipsec_cloner) + +static int +ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct ipsec_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_IPSEC, M_WAITOK | M_ZERO); + sc->fibnum = curthread->td_proc->p_fibnum; + sc->ifp = ifp = if_alloc(IFT_TUNNEL); + IPSEC_LOCK_INIT(sc); + ifp->if_softc = sc; + if_initname(ifp, ipsecname, unit); + + ifp->if_addrlen = 0; + ifp->if_mtu = IPSEC_MTU; + ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + ifp->if_ioctl = ipsec_ioctl; + ifp->if_transmit = ipsec_transmit; + ifp->if_qflush = ipsec_qflush; + ifp->if_output = ipsec_output; + if_attach(ifp); + bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); + + IPSEC_SC_WLOCK(); + LIST_INSERT_HEAD(&V_ipsec_sc_list, sc, chain); + IPSEC_SC_WUNLOCK(); + return (0); +} + +static void +ipsec_clone_destroy(struct ifnet *ifp) +{ + struct ipsec_softc *sc; + + sx_xlock(&ipsec_ioctl_sx); + sc = ifp->if_softc; + + IPSEC_SC_WLOCK(); + ipsec_delete_tunnel(ifp, 1); + LIST_REMOVE(sc, chain); + IPSEC_SC_WUNLOCK(); + + bpfdetach(ifp); + if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&ipsec_ioctl_sx); + + if_free(ifp); + IPSEC_LOCK_DESTROY(sc); + free(sc, M_IPSEC); +} + +static void +vnet_ipsec_init(const void *unused __unused) +{ + + LIST_INIT(&V_ipsec_sc_list); + V_ipsec_sc_htbl = hashinit(SCHASH_NHASH, M_IPSEC, &V_ipsec_sc_hmask); + V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create, + ipsec_clone_destroy, 0); +} +VNET_SYSINIT(vnet_ipsec_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_ipsec_init, NULL); + +static void +vnet_ipsec_uninit(const void *unused __unused) +{ + + if_clone_detach(V_ipsec_cloner); + hashdestroy(V_ipsec_sc_htbl, M_IPSEC, V_ipsec_sc_hmask); +} +VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_ipsec_uninit, NULL); + +static struct secpolicy * +ipsec_getpolicy(struct ipsec_softc *sc, int dir, sa_family_t af) +{ + + switch (af) { +#ifdef INET + case AF_INET: + return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)]); +#endif +#ifdef INET6 + case AF_INET6: + return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1) +#ifdef INET + + 2 +#endif + ]); +#endif + } + return (NULL); +} + +static struct secasindex * +ipsec_getsaidx(struct ipsec_softc *sc, int dir, sa_family_t af) +{ + struct secpolicy *sp; + + sp = ipsec_getpolicy(sc, dir, af); + if (sp == NULL) + return (NULL); + return (&sp->req[0]->saidx); +} + +static int +ipsec_transmit(struct ifnet *ifp, struct mbuf *m) +{ + IPSEC_RLOCK_TRACKER; + struct ipsec_softc *sc; + struct secpolicy *sp; + struct ip *ip; + uint32_t af; + int error; + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error) { + m_freem(m); + goto err; + } +#endif + error = ENETDOWN; + sc = ifp->if_softc; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + (ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + m_freem(m); + goto err; + } + + /* Determine address family to correctly handle packet in BPF */ + ip = mtod(m, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + af = AF_INET; + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + af = AF_INET6; + break; +#endif + default: + error = EAFNOSUPPORT; + m_freem(m); + goto err; + } + + /* + * Loop prevention. + * XXX: for now just check presence of IPSEC_OUT_DONE mbuf tag. + * We can read full chain and compare destination address, + * proto and mode from xform_history with values from softc. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { + m_freem(m); + goto err; + } + + IPSEC_RLOCK(sc); + if (sc->family == 0) { + IPSEC_RUNLOCK(sc); + m_freem(m); + goto err; + } + sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af); + key_addref(sp); + M_SETFIB(m, sc->fibnum); + IPSEC_RUNLOCK(sc); + + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); + + switch (af) { +#ifdef INET + case AF_INET: + error = ipsec4_process_packet(m, sp, NULL); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = ipsec6_process_packet(m, sp, NULL); + break; +#endif + default: + panic("%s: unknown address family\n", __func__); + } +err: + if (error != 0) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); +} + +static void +ipsec_qflush(struct ifnet *ifp __unused) +{ + +} + +static int +ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + + return (ifp->if_transmit(ifp, m)); +} + +int +ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) +{ + IPSEC_SC_RLOCK_TRACKER; + struct secasindex *saidx; + struct ipsec_softc *sc; + struct ifnet *ifp; + + if (sav->state != SADB_SASTATE_MATURE && + sav->state != SADB_SASTATE_DYING) { + m_freem(m); + return (ENETDOWN); + } + + if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL || + sav->sah->saidx.proto != IPPROTO_ESP) + return (0); + + IPSEC_SC_RLOCK(); + /* + * We only acquire SC_RLOCK() while we are doing search in + * ipsec_sc_htbl. It is safe, because removing softc or changing + * of reqid/addresses requires removing from hash table. + */ + LIST_FOREACH(sc, SCHASH_HASH(sav->sah->saidx.reqid), hash) { + saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND, + sav->sah->saidx.src.sa.sa_family); + /* SA's reqid should match reqid in SP */ + if (saidx == NULL || + sav->sah->saidx.reqid != saidx->reqid) + continue; + /* SAH's addresses should match tunnel endpoints. */ + if (key_sockaddrcmp(&sav->sah->saidx.dst.sa, + &saidx->dst.sa, 0) != 0) + continue; + if (key_sockaddrcmp(&sav->sah->saidx.src.sa, + &saidx->src.sa, 0) == 0) + break; + } + if (sc == NULL) { + IPSEC_SC_RUNLOCK(); + /* Tunnel was not found. Nothing to do. */ + return (0); + } + ifp = sc->ifp; + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + (ifp->if_flags & IFF_UP) == 0) { + IPSEC_SC_RUNLOCK(); + m_freem(m); + return (ENETDOWN); + } + /* + * We found matching and working tunnel. + * Set its ifnet as receiving interface. + */ + m->m_pkthdr.rcvif = ifp; + IPSEC_SC_RUNLOCK(); + + /* m_clrprotoflags(m); */ + M_SETFIB(m, ifp->if_fib); + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) { + m_freem(m); + return (ENETDOWN); + } + return (0); +} + +/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ +int +ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + IPSEC_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq*)data; + struct sockaddr *dst, *src; + struct ipsec_softc *sc; + struct secasindex *saidx; +#ifdef INET + struct sockaddr_in *sin = NULL; +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6 = NULL; +#endif + uint32_t reqid; + int error; + + switch (cmd) { + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCADDMULTI: + case SIOCDELMULTI: + case SIOCGIFMTU: + case SIOCSIFFLAGS: + return (0); + case SIOCSIFMTU: + if (ifr->ifr_mtu < IPSEC_MTU_MIN || + ifr->ifr_mtu > IPSEC_MTU_MAX) + return (EINVAL); + else + ifp->if_mtu = ifr->ifr_mtu; + return (0); + } + sx_xlock(&ipsec_ioctl_sx); + sc = ifp->if_softc; + /* Check that softc is still here */ + if (sc == NULL) { + error = ENXIO; + goto bad; + } + error = 0; + switch (cmd) { + case SIOCSIFPHYADDR: +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: +#endif + error = EINVAL; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); + break; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); + break; +#endif + default: + goto bad; + } + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family || + src->sa_len != dst->sa_len) + goto bad; + + /* validate sa_len */ + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + goto bad; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto bad; + break; +#endif + default: + error = EAFNOSUPPORT; + goto bad; + } + /* check sa_family looks sane for the cmd */ + error = EAFNOSUPPORT; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + if (src->sa_family == AF_INET) + break; + goto bad; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + if (src->sa_family == AF_INET6) + break; + goto bad; +#endif + } + error = EADDRNOTAVAIL; + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (satosin(src)->sin_addr.s_addr == INADDR_ANY || + satosin(dst)->sin_addr.s_addr == INADDR_ANY) + goto bad; + break; +#endif +#ifdef INET6 + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) + || + IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) + goto bad; + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + error = sa6_embedscope(satosin6(src), 0); + if (error != 0) + goto bad; + error = sa6_embedscope(satosin6(dst), 0); + if (error != 0) + goto bad; +#endif + }; + error = ipsec_set_addresses(ifp, src, dst); + break; + case SIOCDIFPHYADDR: + ipsec_delete_tunnel(ifp, 0); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: +#endif + IPSEC_RLOCK(sc); + if (sc->family == 0) { + IPSEC_RUNLOCK(sc); + error = EADDRNOTAVAIL; + break; + } + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + if (saidx->src.sa.sa_family != AF_INET) { + error = EADDRNOTAVAIL; + break; + } + sin = (struct sockaddr_in *)&ifr->ifr_addr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (saidx->src.sa.sa_family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)data)->ifr_addr); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); + break; +#endif + default: + error = EAFNOSUPPORT; + } + if (error == 0) { + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + sin->sin_addr = saidx->src.sin.sin_addr; + break; + case SIOCGIFPDSTADDR: + sin->sin_addr = saidx->dst.sin.sin_addr; + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + sin6->sin6_addr = saidx->src.sin6.sin6_addr; + break; + case SIOCGIFPDSTADDR_IN6: + sin6->sin6_addr = saidx->dst.sin6.sin6_addr; + break; +#endif + } + } + IPSEC_RUNLOCK(sc); + if (error != 0) + break; + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin); + if (error != 0) + memset(sin, 0, sizeof(*sin)); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin6); + if (error == 0) + error = sa6_recoverscope(sin6); + if (error != 0) + memset(sin6, 0, sizeof(*sin6)); +#endif + } + break; + case SIOCGTUNFIB: + ifr->ifr_fib = sc->fibnum; + break; + case SIOCSTUNFIB: + if ((error = priv_check(curthread, PRIV_NET_SETIFFIB)) != 0) + break; + if (ifr->ifr_fib >= rt_numfibs) + error = EINVAL; + else + sc->fibnum = ifr->ifr_fib; + break; + case IPSECGREQID: + reqid = sc->reqid; + error = copyout(&reqid, ifr->ifr_data, sizeof(reqid)); + break; + case IPSECSREQID: + if ((error = priv_check(curthread, PRIV_NET_SETIFCAP)) != 0) + break; + error = copyin(ifr->ifr_data, &reqid, sizeof(reqid)); + if (error != 0) + break; + error = ipsec_set_reqid(ifp, reqid); + break; + default: + error = EINVAL; + break; + } +bad: + sx_xunlock(&ipsec_ioctl_sx); + return (error); +} + +/* + * Allocate new private security policies for tunneling interface. + * Each tunneling interface has following security policies for + * both AF: + * 0.0.0.0/0[any] 0.0.0.0/0[any] -P in \ + * ipsec esp/tunnel/RemoteIP-LocalIP/unique:reqid + * 0.0.0.0/0[any] 0.0.0.0/0[any] -P out \ + * ipsec esp/tunnel/LocalIP-RemoteIP/unique:reqid + */ +static int +ipsec_newpolicies(struct secpolicy *sp[IPSEC_SPCOUNT], + const struct sockaddr *src, const struct sockaddr *dst, uint32_t reqid) +{ + struct ipsecrequest *isr; + int i; + + memset(sp, 0, sizeof(struct secpolicy *) * IPSEC_SPCOUNT); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + if ((sp[i] = key_newsp()) == NULL) + goto fail; + if ((isr = ipsec_newisr()) == NULL) + goto fail; + + sp[i]->policy = IPSEC_POLICY_IPSEC; + sp[i]->state = IPSEC_SPSTATE_DEAD; + sp[i]->req[sp[i]->tcount++] = isr; + sp[i]->created = time_second; + isr->level = IPSEC_LEVEL_UNIQUE; + isr->saidx.proto = IPPROTO_ESP; + isr->saidx.mode = IPSEC_MODE_TUNNEL; + isr->saidx.reqid = reqid; + if (i % 2 == 0) { + sp[i]->spidx.dir = IPSEC_DIR_INBOUND; + bcopy(src, &isr->saidx.dst, src->sa_len); + bcopy(dst, &isr->saidx.src, dst->sa_len); + } else { + sp[i]->spidx.dir = IPSEC_DIR_OUTBOUND; + bcopy(src, &isr->saidx.src, src->sa_len); + bcopy(dst, &isr->saidx.dst, dst->sa_len); + } + sp[i]->spidx.ul_proto = IPSEC_ULPROTO_ANY; +#ifdef INET + if (i < 2) { + sp[i]->spidx.src.sa.sa_family = + sp[i]->spidx.dst.sa.sa_family = AF_INET; + sp[i]->spidx.src.sa.sa_len = + sp[i]->spidx.dst.sa.sa_len = + sizeof(struct sockaddr_in); + continue; + } +#endif +#ifdef INET6 + sp[i]->spidx.src.sa.sa_family = + sp[i]->spidx.dst.sa.sa_family = AF_INET6; + sp[i]->spidx.src.sa.sa_len = + sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in6); +#endif + } + return (0); +fail: + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&sp[i]); + return (ENOMEM); +} + +static int +ipsec_check_reqid(uint32_t reqid) +{ + struct ipsec_softc *sc; + + IPSEC_SC_RLOCK_ASSERT(); + LIST_FOREACH(sc, &V_ipsec_sc_list, chain) { + if (sc->reqid == reqid) + return (EEXIST); + } + return (0); +} + +/* + * We use key_newreqid() to automatically obtain unique reqid. + * Then we check that given id is unique, i.e. it is not used by + * another if_ipsec(4) interface. This macro limits the number of + * tries to get unique id. + */ +#define IPSEC_REQID_TRYCNT 64 +static int +ipsec_init_reqid(struct ipsec_softc *sc) +{ + uint32_t reqid; + int trycount; + + IPSEC_SC_RLOCK_ASSERT(); + + if (sc->reqid != 0) /* already initialized */ + return (0); + + trycount = IPSEC_REQID_TRYCNT; + while (--trycount > 0) { + reqid = key_newreqid(); + if (ipsec_check_reqid(reqid) == 0) + break; + } + if (trycount == 0) + return (EEXIST); + sc->reqid = reqid; + return (0); +} + +/* + * Set or update reqid for given tunneling interface. + * When specified reqid is zero, generate new one. + * We are protected by ioctl_sx lock from concurrent id generation. + * Also softc would not disappear while we hold ioctl_sx lock. + */ +static int +ipsec_set_reqid(struct ifnet *ifp, uint32_t reqid) +{ + IPSEC_SC_RLOCK_TRACKER; + struct ipsec_softc *sc; + struct secasindex *saidx; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + sc = ifp->if_softc; + if (sc->reqid == reqid && reqid != 0) + return (0); + + IPSEC_SC_RLOCK(); + if (reqid != 0) { + /* Check that specified reqid doesn't exist */ + if (ipsec_check_reqid(reqid) != 0) { + IPSEC_SC_RUNLOCK(); + return (EEXIST); + } + sc->reqid = reqid; + } else { + /* Generate new reqid */ + if (ipsec_init_reqid(sc) != 0) { + IPSEC_SC_RUNLOCK(); + return (EEXIST); + } + } + IPSEC_SC_RUNLOCK(); + + /* Tunnel isn't fully configured, just return. */ + if (sc->family == 0) + return (0); + + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); + KASSERT(saidx != NULL, + ("saidx is NULL, but family is %d", sc->family)); + return (ipsec_set_tunnel(sc, &saidx->src.sa, &saidx->dst.sa, + sc->reqid)); +} + +/* + * Set tunnel endpoints addresses. + */ +static int +ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, + struct sockaddr *dst) +{ + IPSEC_SC_RLOCK_TRACKER; + struct ipsec_softc *sc, *tsc; + struct secasindex *saidx; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + sc = ifp->if_softc; + if (sc->family != 0) { + saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, + src->sa_family); + if (saidx != NULL && saidx->reqid == sc->reqid && + key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && + key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) + return (0); /* Nothing has been changed. */ + + } + /* + * We cannot service IPsec tunnel when source address is + * not our own. + */ +#ifdef INET + if (src->sa_family == AF_INET && + in_localip(satosin(src)->sin_addr) == 0) + return (EADDRNOTAVAIL); +#endif +#ifdef INET6 + /* + * NOTE: IPv6 addresses are in kernel internal form with + * embedded scope zone id. + */ + if (src->sa_family == AF_INET6 && + in6_localip(&satosin6(src)->sin6_addr) == 0) + return (EADDRNOTAVAIL); +#endif + /* Check that given addresses aren't already configured */ + IPSEC_SC_RLOCK(); + LIST_FOREACH(tsc, &V_ipsec_sc_list, chain) { + if (tsc == sc || tsc->family != src->sa_family) + continue; + saidx = ipsec_getsaidx(tsc, IPSEC_DIR_OUTBOUND, tsc->family); + if (key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && + key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) { + /* We already have tunnel with such addresses */ + IPSEC_SC_RUNLOCK(); + return (EADDRNOTAVAIL); + } + } + /* If reqid is not set, generate new one. */ + if (ipsec_init_reqid(sc) != 0) { + IPSEC_SC_RUNLOCK(); + return (EEXIST); + } + IPSEC_SC_RUNLOCK(); + return (ipsec_set_tunnel(sc, src, dst, sc->reqid)); +} + +static int +ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, + struct sockaddr *dst, uint32_t reqid) +{ + struct secpolicy *sp[IPSEC_SPCOUNT]; + struct secpolicy *oldsp[IPSEC_SPCOUNT]; + int i, f; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + /* Allocate SP with new addresses. */ + if (ipsec_newpolicies(sp, src, dst, reqid) == 0) { + /* Add new policies to SPDB */ + if (key_register_ifnet(sp, IPSEC_SPCOUNT) != 0) { + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&sp[i]); + return (EAGAIN); + } + IPSEC_SC_WLOCK(); + if ((f = sc->family) != 0) + LIST_REMOVE(sc, hash); + IPSEC_WLOCK(sc); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + oldsp[i] = sc->sp[i]; + sc->sp[i] = sp[i]; + } + sc->family = src->sa_family; + IPSEC_WUNLOCK(sc); + LIST_INSERT_HEAD(SCHASH_HASH(sc->reqid), sc, hash); + IPSEC_SC_WUNLOCK(); + } else { + sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + return (ENOMEM); + } + + sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; + if (f != 0) { + key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&oldsp[i]); + } + return (0); +} + +static void +ipsec_delete_tunnel(struct ifnet *ifp, int locked) +{ + struct ipsec_softc *sc = ifp->if_softc; + struct secpolicy *oldsp[IPSEC_SPCOUNT]; + int i; + + sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); + + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + if (sc->family != 0) { + if (!locked) + IPSEC_SC_WLOCK(); + /* Remove from hash table */ + LIST_REMOVE(sc, hash); + IPSEC_WLOCK(sc); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + oldsp[i] = sc->sp[i]; + sc->sp[i] = NULL; + } + sc->family = 0; + IPSEC_WUNLOCK(sc); + if (!locked) + IPSEC_SC_WUNLOCK(); + key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); + for (i = 0; i < IPSEC_SPCOUNT; i++) + key_freesp(&oldsp[i]); + } +} Index: head/sys/net/pfkeyv2.h =================================================================== --- head/sys/net/pfkeyv2.h +++ head/sys/net/pfkeyv2.h @@ -320,7 +320,9 @@ #define SADB_X_EXT_NAT_T_OAR 24 /* Peer's NAT_OA for dst of SA. */ #define SADB_X_EXT_NAT_T_FRAG 25 /* Manual MTU override. */ #define SADB_X_EXT_SA_REPLAY 26 /* Replay window override. */ -#define SADB_EXT_MAX 26 +#define SADB_X_EXT_NEW_ADDRESS_SRC 27 +#define SADB_X_EXT_NEW_ADDRESS_DST 28 +#define SADB_EXT_MAX 28 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 Index: head/sys/netinet/in_pcb.c =================================================================== --- head/sys/netinet/in_pcb.c +++ head/sys/netinet/in_pcb.c @@ -98,12 +98,8 @@ #include #endif /* INET6 */ +#include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - #include static struct callout ipport_tick_callout; @@ -305,8 +301,8 @@ goto out; mac_inpcb_create(so, inp); #endif -#ifdef IPSEC - error = ipsec_init_policy(so, &inp->inp_sp); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + error = ipsec_init_pcbpolicy(inp); if (error != 0) { #ifdef MAC mac_inpcb_destroy(inp); @@ -1284,7 +1280,7 @@ INP_WLOCK_ASSERT(inp); /* XXXRW: Do as much as possible here. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) if (inp->inp_sp != NULL) ipsec_delete_pcbpolicy(inp); #endif Index: head/sys/netinet/in_proto.c =================================================================== --- head/sys/netinet/in_proto.c +++ head/sys/netinet/in_proto.c @@ -90,10 +90,6 @@ static struct pr_usrreqs nousrreqs; -#ifdef IPSEC -#include -#endif /* IPSEC */ - #ifdef SCTP #include #include @@ -222,37 +218,9 @@ .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }, -#ifdef IPSEC { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_AH, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ah4_input, - .pr_ctlinput = ah4_ctlinput, - .pr_usrreqs = &nousrreqs -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = esp4_input, - .pr_ctlinput = esp4_ctlinput, - .pr_usrreqs = &nousrreqs -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_IPCOMP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipcomp4_input, - .pr_usrreqs = &nousrreqs -}, -#endif /* IPSEC */ -{ - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, .pr_protocol = IPPROTO_IPV4, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap4_input, @@ -366,7 +334,7 @@ SYSCTL_NODE(_net_inet, IPPROTO_SCTP, sctp, CTLFLAG_RW, 0, "SCTP"); #endif SYSCTL_NODE(_net_inet, IPPROTO_IGMP, igmp, CTLFLAG_RW, 0, "IGMP"); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* XXX no protocol # to use, pick something "reserved" */ SYSCTL_NODE(_net_inet, 253, ipsec, CTLFLAG_RW, 0, "IPSEC"); SYSCTL_NODE(_net_inet, IPPROTO_AH, ah, CTLFLAG_RW, 0, "AH"); Index: head/sys/netinet/ip_input.c =================================================================== --- head/sys/netinet/ip_input.c +++ head/sys/netinet/ip_input.c @@ -77,13 +77,10 @@ #include #include #include -#ifdef IPSEC -#include -#include -#include -#endif /* IPSEC */ #include +#include + #include #include @@ -430,6 +427,12 @@ ip = mtod(m, struct ip *); hlen = ip->ip_hl << 2; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) + return; + } +#endif /* IPSEC */ IPSTAT_INC(ips_delivered); (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); return; @@ -559,11 +562,11 @@ * ip pointer. */ if (V_ipforwarding != 0 -#ifdef IPSEC - && !key_havesp(IPSEC_DIR_INBOUND) - && !key_havesp(IPSEC_DIR_OUTBOUND) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + && (!IPSEC_ENABLED(ipv4) || + IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) #endif - ) { + ) { if ((m = ip_tryforward(m)) == NULL) return; if (m->m_flags & M_FASTFWD_OURS) { @@ -572,13 +575,16 @@ goto ours; } } -#ifdef IPSEC + +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Bypass packet filtering for packets previously handled by IPsec. */ - if (ip_ipsec_filtertunnel(m)) - goto passin; + if (IPSEC_ENABLED(ipv4) && + IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) + goto passin; #endif + /* * Run through list of hooks for input packets. * @@ -802,14 +808,11 @@ hlen = ip->ip_hl << 2; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip_ipsec_input(m, ip->ip_p) != 0) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) + return; + } #endif /* IPSEC */ /* @@ -951,24 +954,14 @@ m_freem(m); return; } -#ifdef IPSEC - if (ip_ipsec_fwd(m) != 0) { - IPSTAT_INC(ips_cantforward); - m_freem(m); - return; - } -#endif /* IPSEC */ + if ( #ifdef IPSTEALTH - if (!V_ipstealth) { + V_ipstealth == 0 && #endif - if (ip->ip_ttl <= IPTTLDEC) { - icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, - 0, 0); - return; - } -#ifdef IPSTEALTH + ip->ip_ttl <= IPTTLDEC) { + icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); + return; } -#endif bzero(&ro, sizeof(ro)); sin = (struct sockaddr_in *)&ro.ro_dst; @@ -987,20 +980,7 @@ ifa_ref(&ia->ia_ifa); } else ia = NULL; -#ifndef IPSEC /* - * 'ia' may be NULL if there is no route for this destination. - * In case of IPsec, Don't discard it just yet, but pass it to - * ip_output in case of outgoing IPsec policy. - */ - if (!srcrt && ia == NULL) { - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); - RO_RTFREE(&ro); - return; - } -#endif - - /* * Save the IP header and at most 8 bytes of the payload, * in case we need to generate an ICMP message to the src. * @@ -1032,15 +1012,22 @@ mcopy->m_pkthdr.len = mcopy->m_len; m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); } - #ifdef IPSTEALTH - if (!V_ipstealth) { + if (V_ipstealth == 0) #endif ip->ip_ttl -= IPTTLDEC; -#ifdef IPSTEALTH +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { + /* mbuf consumed by IPsec */ + m_freem(mcopy); + if (error != EINPROGRESS) + IPSTAT_INC(ips_cantforward); + return; + } + /* No IPsec processing required */ } -#endif - +#endif /* IPSEC */ /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. @@ -1118,14 +1105,6 @@ case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; - -#ifdef IPSEC - /* - * If IPsec is configured for this path, - * override any possibly mtu value set by ip_output. - */ - mtu = ip_ipsec_mtu(mcopy, mtu); -#endif /* IPSEC */ /* * If the MTU was set before make sure we are below the * interface MTU. Index: head/sys/netinet/ip_ipsec.h =================================================================== --- head/sys/netinet/ip_ipsec.h +++ head/sys/netinet/ip_ipsec.h @@ -1,40 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP_IPSEC_H_ -#define _NETINET_IP_IPSEC_H_ - -int ip_ipsec_filtertunnel(struct mbuf *); -int ip_ipsec_fwd(struct mbuf *); -int ip_ipsec_input(struct mbuf *, int); -int ip_ipsec_mtu(struct mbuf *, int); -int ip_ipsec_output(struct mbuf **, struct inpcb *, int *); -#endif Index: head/sys/netinet/ip_ipsec.c =================================================================== --- head/sys/netinet/ip_ipsec.c +++ head/sys/netinet/ip_ipsec.c @@ -1,245 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "opt_ipsec.h" -#include "opt_sctp.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef SCTP -#include -#endif - -#include - -#include -#include -#include - -extern struct protosw inetsw[]; - -static VNET_DEFINE(int, ip4_ipsec_filtertunnel) = 0; -#define V_ip4_ipsec_filtertunnel VNET(ip4_ipsec_filtertunnel) - -SYSCTL_DECL(_net_inet_ipsec); -SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_filtertunnel), 0, - "If set filter packets from an IPsec tunnel."); - -/* - * Check if we have to jump over firewall processing for this packet. - * Called from ip_input(). - * 1 = jump over firewall, 0 = packet goes through firewall. - */ -int -ip_ipsec_filtertunnel(struct mbuf *m) -{ - - /* - * Bypass packet filtering for packets previously handled by IPsec. - */ - if (!V_ip4_ipsec_filtertunnel && - m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) - return 1; - return 0; -} - -/* - * Check if this packet has an active SA and needs to be dropped instead - * of forwarded. - * Called from ip_forward(). - * 1 = drop packet, 0 = forward packet. - */ -int -ip_ipsec_fwd(struct mbuf *m) -{ - - return (ipsec4_in_reject(m, NULL)); -} - -/* - * Check if protocol type doesn't have a further header and do IPSEC - * decryption or reject right now. Protocols with further headers get - * their IPSEC treatment within the protocol specific processing. - * Called from ip_input(). - * 1 = drop packet, 0 = continue processing packet. - */ -int -ip_ipsec_input(struct mbuf *m, int nxt) -{ - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if ((inetsw[ip_protox[nxt]].pr_flags & PR_LASTHDR) != 0) - return (ipsec4_in_reject(m, NULL)); - return (0); -} - -/* - * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. - * Called from ip_forward(). - * Returns MTU suggestion for ICMP needfrag reply. - */ -int -ip_ipsec_mtu(struct mbuf *m, int mtu) -{ - /* - * If the packet is routed over IPsec tunnel, tell the - * originator the tunnel MTU. - * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz - * XXX quickhack!!! - */ - return (mtu - ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL)); -} - -/* - * - * Called from ip_output(). - * 1 = drop packet, 0 = continue processing packet, - * -1 = packet was reinjected and stop processing packet - */ -int -ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) -{ - struct secpolicy *sp; - - if (!key_havesp(IPSEC_DIR_OUTBOUND)) - return 0; - - /* - * Check the security policy (SP) for the packet and, if - * required, do IPsec-related processing. There are two - * cases here; the first time a packet is sent through - * it will be untagged and handled by ipsec4_checkpolicy. - * If the packet is resubmitted to ip_output (e.g. after - * AH, ESP, etc. processing), there will be a tag to bypass - * the lookup and related policy checking. - */ - if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { - *error = 0; - return (0); - } - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); - /* - * There are four return cases: - * sp != NULL apply IPsec policy - * sp == NULL, error == 0 no IPsec handling needed - * sp == NULL, error == -EINVAL discard packet w/o error - * sp == NULL, error != 0 discard packet, report error - */ - if (sp != NULL) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(*m); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#ifdef SCTP - if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) { - struct ip *ip = mtod(*m, struct ip *); - - sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } -#endif - - /* NB: callee frees mbuf */ - *error = ipsec4_process_packet(*m, sp->req); - KEY_FREESP(&sp); - if (*error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing and return without error. - */ - *error = 0; - goto done; - } - /* - * Preserve KAME behaviour: ENOENT can be returned - * when an SA acquire is in progress. Don't propagate - * this to user-level; it confuses applications. - * - * XXX this will go away when the SADB is redone. - */ - if (*error == ENOENT) - *error = 0; - goto reinjected; - } else { /* sp == NULL */ - - if (*error != 0) { - /* - * Hack: -EINVAL is used to signal that a packet - * should be silently discarded. This is typically - * because we asked key management for an SA and - * it was delayed (e.g. kicked up to IKE). - */ - if (*error == -EINVAL) - *error = 0; - goto bad; - } - /* No IPsec processing for this packet. */ - } -done: - return (0); -reinjected: - return (-1); -bad: - if (sp != NULL) - KEY_FREESP(&sp); - return 1; -} Index: head/sys/netinet/ip_output.c =================================================================== --- head/sys/netinet/ip_output.c +++ head/sys/netinet/ip_output.c @@ -84,10 +84,7 @@ #include #endif -#ifdef IPSEC -#include -#include -#endif /* IPSEC*/ +#include #include @@ -228,7 +225,7 @@ struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; int have_ia_ref; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; #endif M_ASSERTPKTHDR(m); @@ -384,7 +381,7 @@ (rte->rt_flags & RTF_UP) == 0 || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * There is no route for this packet, but it is * possible that a matching SPD entry exists. @@ -556,15 +553,13 @@ } sendit: -#ifdef IPSEC - switch(ip_ipsec_output(&m, inp, &error)) { - case 1: - goto bad; - case -1: - goto done; - case 0: - default: - break; /* Continue with packet processing. */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv4)) { + if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } } /* * Check if there was a route for this packet; return error if not. @@ -1230,23 +1225,13 @@ INP_WUNLOCK(inp); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: - { - caddr_t req; - struct mbuf *m; - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if (IPSEC_ENABLED(ipv4)) { + error = IPSEC_PCBCTL(ipv4, inp, sopt); break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - break; - req = mtod(m, caddr_t); - error = ipsec_set_policy(inp, sopt->sopt_name, req, - m->m_len, (sopt->sopt_td != NULL) ? - sopt->sopt_td->td_ucred : NULL); - m_freem(m); - break; - } + } + /* FALLTHROUGH */ #endif /* IPSEC */ default: @@ -1389,24 +1374,13 @@ error = inp_getmoptions(inp, sopt); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IP_IPSEC_POLICY: - { - struct mbuf *m = NULL; - caddr_t req = NULL; - size_t len = 0; - - if (m != NULL) { - req = mtod(m, caddr_t); - len = m->m_len; + if (IPSEC_ENABLED(ipv4)) { + error = IPSEC_PCBCTL(ipv4, inp, sopt); + break; } - error = ipsec_get_policy(sotoinpcb(so), req, len, &m); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0) - m_freem(m); - break; - } + /* FALLTHROUGH */ #endif /* IPSEC */ default: Index: head/sys/netinet/raw_ip.c =================================================================== --- head/sys/netinet/raw_ip.c +++ head/sys/netinet/raw_ip.c @@ -73,9 +73,7 @@ #include #include -#ifdef IPSEC -#include -#endif /*IPSEC*/ +#include #include #include @@ -236,10 +234,11 @@ INP_LOCK_ASSERT(last); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* check AH/ESP integrity. */ - if (ipsec4_in_reject(n, last)) { - policyfail = 1; + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_CHECK_POLICY(ipv4, n, last) != 0) + policyfail = 1; } #endif /* IPSEC */ #ifdef MAC Index: head/sys/netinet/sctp_input.c =================================================================== --- head/sys/netinet/sctp_input.c +++ head/sys/netinet/sctp_input.c @@ -5790,7 +5790,7 @@ } else if (stcb == NULL) { inp_decr = inp; } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /*- * I very much doubt any of the IPSEC stuff will work but I have no * idea, so I will leave it in place. @@ -5799,17 +5799,23 @@ switch (dst->sa_family) { #ifdef INET case AF_INET: - if (ipsec4_in_reject(m, &inp->ip_inp.inp)) { - SCTP_STAT_INCR(sctps_hdrops); - goto out; + if (IPSEC_ENABLED(ipv4)) { + if (IPSEC_CHECK_POLICY(ipv4, m, + &inp->ip_inp.inp) != 0) { + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } } break; #endif #ifdef INET6 case AF_INET6: - if (ipsec6_in_reject(m, &inp->ip_inp.inp)) { - SCTP_STAT_INCR(sctps_hdrops); - goto out; + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_CHECK_POLICY(ipv6, m, + &inp->ip_inp.inp) != 0) { + SCTP_STAT_INCR(sctps_hdrops); + goto out; + } } break; #endif @@ -5817,7 +5823,7 @@ break; } } -#endif +#endif /* IPSEC */ SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n", (void *)m, iphlen, offset, length, (void *)stcb); if (stcb) { Index: head/sys/netinet/sctp_os_bsd.h =================================================================== --- head/sys/netinet/sctp_os_bsd.h +++ head/sys/netinet/sctp_os_bsd.h @@ -82,16 +82,10 @@ #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #ifdef INET6 #include -#ifdef IPSEC -#include -#endif #include #include #include Index: head/sys/netinet/sctp_pcb.c =================================================================== --- head/sys/netinet/sctp_pcb.c +++ head/sys/netinet/sctp_pcb.c @@ -2469,8 +2469,8 @@ SCTP_INP_INFO_WUNLOCK(); return (ENOBUFS); } -#ifdef IPSEC - error = ipsec_init_policy(so, &inp->ip_inp.inp.inp_sp); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + error = ipsec_init_pcbpolicy(&inp->ip_inp.inp); if (error != 0) { crfree(inp->ip_inp.inp.inp_cred); SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); @@ -2504,7 +2504,7 @@ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, EOPNOTSUPP); so->so_pcb = NULL; crfree(inp->ip_inp.inp.inp_cred); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipsec_delete_pcbpolicy(&inp->ip_inp.inp); #endif SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); @@ -2527,7 +2527,7 @@ SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_PCB, ENOBUFS); so->so_pcb = NULL; crfree(inp->ip_inp.inp.inp_cred); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipsec_delete_pcbpolicy(&inp->ip_inp.inp); #endif SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_ep), inp); @@ -3641,7 +3641,7 @@ * macro here since le_next will get freed as part of the * sctp_free_assoc() call. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipsec_delete_pcbpolicy(ip_pcb); #endif if (ip_pcb->inp_options) { Index: head/sys/netinet/tcp_input.c =================================================================== --- head/sys/netinet/tcp_input.c +++ head/sys/netinet/tcp_input.c @@ -120,10 +120,7 @@ #include #endif -#ifdef IPSEC -#include -#include -#endif /*IPSEC*/ +#include #include @@ -487,20 +484,6 @@ tp->t_bytes_acked = 0; } -#ifdef TCP_SIGNATURE -static inline int -tcp_signature_verify_input(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) -{ - int ret; - - tcp_fields_to_net(th); - ret = tcp_signature_verify(m, off0, tlen, optlen, to, th, tcpbflag); - tcp_fields_to_host(th); - return (ret); -} -#endif - /* * Indicate whether this ack should be delayed. We can delay the ack if * following conditions are met: @@ -611,9 +594,6 @@ int drop_hdrlen; int thflags; int rstreason = 0; /* For badport_bandlim accounting purposes */ -#ifdef TCP_SIGNATURE - uint8_t sig_checked = 0; -#endif uint8_t iptos; struct m_tag *fwd_tag = NULL; #ifdef INET6 @@ -944,15 +924,22 @@ inp->inp_flowid = m->m_pkthdr.flowid; inp->inp_flowtype = M_HASHTYPE_GET(m); } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) #ifdef INET6 - if (isipv6 && ipsec6_in_reject(m, inp)) { + if (isipv6 && IPSEC_ENABLED(ipv6) && + IPSEC_CHECK_POLICY(ipv6, m, inp) != 0) { goto dropunlock; - } else + } +#ifdef INET + else +#endif #endif /* INET6 */ - if (ipsec4_in_reject(m, inp) != 0) { +#ifdef INET + if (IPSEC_ENABLED(ipv4) && + IPSEC_CHECK_POLICY(ipv4, m, inp) != 0) { goto dropunlock; } +#endif /* INET */ #endif /* IPSEC */ /* @@ -1135,8 +1122,17 @@ * NB: syncache_expand() doesn't unlock * inp and tcpinfo locks. */ - if (!syncache_expand(&inc, &to, th, &so, m)) { + rstreason = syncache_expand(&inc, &to, th, &so, m); + if (rstreason < 0) { /* + * A failing TCP MD5 signature comparison + * must result in the segment being dropped + * and must not produce any response back + * to the sender. + */ + goto dropunlock; + } else if (rstreason == 0) { + /* * No syncache entry or ACK was not * for our SYN/ACK. Send a RST. * NB: syncache did its own logging @@ -1187,26 +1183,6 @@ tp = intotcpcb(inp); KASSERT(tp->t_state == TCPS_SYN_RECEIVED, ("%s: ", __func__)); -#ifdef TCP_SIGNATURE - if (sig_checked == 0) { - tcp_dooptions(&to, optp, optlen, - (thflags & TH_SYN) ? TO_SYN : 0); - if (!tcp_signature_verify_input(m, off0, tlen, - optlen, &to, th, tp->t_flags)) { - - /* - * In SYN_SENT state if it receives an - * RST, it is allowed for further - * processing. - */ - if ((thflags & TH_RST) == 0 || - (tp->t_state == TCPS_SYN_SENT) == 0) - goto dropunlock; - } - sig_checked = 1; - } -#endif - /* * Process the segment and the data it * contains. tcp_do_segment() consumes @@ -1436,26 +1412,18 @@ */ goto dropunlock; } - -#ifdef TCP_SIGNATURE - if (sig_checked == 0) { - tcp_dooptions(&to, optp, optlen, - (thflags & TH_SYN) ? TO_SYN : 0); - if (!tcp_signature_verify_input(m, off0, tlen, optlen, &to, - th, tp->t_flags)) { - - /* - * In SYN_SENT state if it receives an RST, it is - * allowed for further processing. - */ - if ((thflags & TH_RST) == 0 || - (tp->t_state == TCPS_SYN_SENT) == 0) - goto dropunlock; +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (tp->t_flags & TF_SIGNATURE) { + tcp_dooptions(&to, optp, optlen, thflags); + if ((to.to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + goto dropunlock; } - sig_checked = 1; + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to.to_signature) != 0) + goto dropunlock; } #endif - TCP_PROBE5(receive, NULL, tp, m, tp, th); /* @@ -1632,6 +1600,13 @@ (th->th_off << 2) - sizeof(struct tcphdr), (thflags & TH_SYN) ? TO_SYN : 0); +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if ((tp->t_flags & TF_SIGNATURE) != 0 && + (to.to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_sigopt); + /* XXX: should drop? */ + } +#endif /* * If echoed timestamp is later than the current time, * fall back to non RFC1323 RTT calculation. Normalize @@ -3418,20 +3393,19 @@ (char *)&to->to_tsecr, sizeof(to->to_tsecr)); to->to_tsecr = ntohl(to->to_tsecr); break; -#ifdef TCP_SIGNATURE - /* - * XXX In order to reply to a host which has set the - * TCP_SIGNATURE option in its initial SYN, we have to - * record the fact that the option was observed here - * for the syncache code to perform the correct response. - */ case TCPOPT_SIGNATURE: + /* + * In order to reply to a host which has set the + * TCP_SIGNATURE option in its initial SYN, we have + * to record the fact that the option was observed + * here for the syncache code to perform the correct + * response. + */ if (optlen != TCPOLEN_SIGNATURE) continue; to->to_flags |= TOF_SIGNATURE; to->to_signature = cp + 2; break; -#endif case TCPOPT_SACK_PERMITTED: if (optlen != TCPOLEN_SACK_PERMITTED) continue; Index: head/sys/netinet/tcp_output.c =================================================================== --- head/sys/netinet/tcp_output.c +++ head/sys/netinet/tcp_output.c @@ -90,9 +90,7 @@ #include #endif -#ifdef IPSEC -#include -#endif /*IPSEC*/ +#include #include @@ -200,7 +198,7 @@ struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; #endif int idle, sendalot; @@ -551,14 +549,23 @@ * the right thing below to provide length of just ip options and thus * checking for ipoptlen is enough to decide if ip options are present. */ -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Pre-calculate here as we save another lookup into the darknesses * of IPsec that way and can actually decide if TSO is ok. */ - ipsec_optlen = ipsec_hdrsiz_tcp(tp); +#ifdef INET6 + if (isipv6 && IPSEC_ENABLED(ipv6)) + ipsec_optlen = IPSEC_HDRSIZE(ipv6, tp->t_inpcb); +#ifdef INET + else #endif - +#endif /* INET6 */ +#ifdef INET + if (IPSEC_ENABLED(ipv4)) + ipsec_optlen = IPSEC_HDRSIZE(ipv4, tp->t_inpcb); +#endif /* INET */ +#endif /* IPSEC */ #ifdef INET6 if (isipv6) ipoptlen = ip6_optlen(tp->t_inpcb); @@ -569,7 +576,7 @@ offsetof(struct ipoption, ipopt_list); else ipoptlen = 0; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; #endif @@ -839,8 +846,12 @@ to.to_sacks = (u_char *)tp->sackblks; } } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ + /* + * Check that TCP_MD5SIG is enabled in tcpcb to + * account the size needed to set this TCP option. + */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif /* TCP_SIGNATURE */ @@ -1253,20 +1264,31 @@ */ tp->snd_up = tp->snd_una; /* drag it along */ -#ifdef TCP_SIGNATURE - if (to.to_flags & TOF_SIGNATURE) { - int sigoff = to.to_signature - opt; - tcp_signature_compute(m, 0, len, optlen, - (u_char *)(th + 1) + sigoff, IPSEC_DIR_OUTBOUND); - } -#endif - /* * Put TCP length in extended header, and then * checksum extended header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (to.to_flags & TOF_SIGNATURE) { + /* + * Calculate MD5 signature and put it into the place + * determined before. + * NOTE: since TCP options buffer doesn't point into + * mbuf's data, calculate offset and use it. + */ + if (!TCPMD5_ENABLED() || TCPMD5_OUTPUT(m, th, + (u_char *)(th + 1) + (to.to_signature - opt)) != 0) { + /* + * Do not send segment if the calculation of MD5 + * digest has failed. + */ + goto out; + } + } +#endif #ifdef INET6 if (isipv6) { /* @@ -1304,7 +1326,7 @@ m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) KASSERT(len + hdrlen + ipoptlen - ipsec_optlen == m_length(m, NULL), ("%s: mbuf chain shorter than expected: %d + %u + %u - %u != %u", __func__, len, hdrlen, ipoptlen, ipsec_optlen, m_length(m, NULL))); @@ -1563,6 +1585,9 @@ } SOCKBUF_UNLOCK_ASSERT(&so->so_snd); /* Check gotos. */ switch (error) { + case EACCES: + tp->t_softerror = error; + return (0); case EPERM: tp->t_softerror = error; return (error); @@ -1730,7 +1755,6 @@ bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr)); optp += sizeof(to->to_tsecr); break; -#ifdef TCP_SIGNATURE case TOF_SIGNATURE: { int siglen = TCPOLEN_SIGNATURE - 2; @@ -1739,8 +1763,10 @@ optlen += TCPOLEN_NOP; *optp++ = TCPOPT_NOP; } - if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) + if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) { + to->to_flags &= ~TOF_SIGNATURE; continue; + } optlen += TCPOLEN_SIGNATURE; *optp++ = TCPOPT_SIGNATURE; *optp++ = TCPOLEN_SIGNATURE; @@ -1749,7 +1775,6 @@ *optp++ = 0; break; } -#endif case TOF_SACK: { int sackblks = 0; Index: head/sys/netinet/tcp_stacks/fastpath.c =================================================================== --- head/sys/netinet/tcp_stacks/fastpath.c +++ head/sys/netinet/tcp_stacks/fastpath.c @@ -56,7 +56,6 @@ #include "opt_inet.h" #include "opt_inet6.h" -#include "opt_ipsec.h" #include "opt_tcpdebug.h" #include @@ -116,11 +115,6 @@ #ifdef TCP_OFFLOAD #include #endif - -#ifdef IPSEC -#include -#include -#endif /*IPSEC*/ #include Index: head/sys/netinet/tcp_subr.c =================================================================== --- head/sys/netinet/tcp_subr.c +++ head/sys/netinet/tcp_subr.c @@ -118,15 +118,7 @@ #include #endif -#ifdef IPSEC -#include -#include -#ifdef INET6 -#include -#endif -#include -#include -#endif /*IPSEC*/ +#include #include #include @@ -233,12 +225,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN, &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets"); -#ifdef TCP_SIGNATURE -static int tcp_sig_checksigs = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, signature_verify_input, CTLFLAG_RW, - &tcp_sig_checksigs, 0, "Verify RFC2385 digests on inbound traffic"); -#endif - VNET_DEFINE(uma_zone_t, sack_hole_zone); #define V_sack_hole_zone VNET(sack_hole_zone) @@ -1064,12 +1050,11 @@ to.to_tsecr = tp->ts_recent; to.to_flags |= TOF_TS; } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* TCP-MD5 (RFC2385). */ if (tp->t_flags & TF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif - /* Add the options. */ tlen += optlen = tcp_addoptions(&to, optp); @@ -1125,10 +1110,13 @@ nth->th_win = htons((u_short)win); nth->th_urp = 0; -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (to.to_flags & TOF_SIGNATURE) { - tcp_signature_compute(m, 0, 0, optlen, to.to_signature, - IPSEC_DIR_OUTBOUND); + if (!TCPMD5_ENABLED() || + TCPMD5_OUTPUT(m, nth, to.to_signature) != 0) { + m_freem(m); + return; + } } #endif @@ -2501,7 +2489,7 @@ optlen = TCPOLEN_TSTAMP_APPA; else optlen = 0; -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif @@ -2517,7 +2505,7 @@ optlen = PAD(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) optlen += PAD(TCPOLEN_WINDOW); -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) optlen += PAD(TCPOLEN_SIGNATURE); #endif @@ -2528,343 +2516,6 @@ optlen = min(optlen, TCP_MAXOLEN); return (tp->t_maxseg - optlen); } - -#ifdef IPSEC -/* compute ESP/AH header size for TCP, including outer IP header. */ -size_t -ipsec_hdrsiz_tcp(struct tcpcb *tp) -{ - struct inpcb *inp; - struct mbuf *m; - size_t hdrsiz; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; -#endif - struct tcphdr *th; - - if ((tp == NULL) || ((inp = tp->t_inpcb) == NULL) || - (!key_havesp(IPSEC_DIR_OUTBOUND))) - return (0); - m = m_gethdr(M_NOWAIT, MT_DATA); - if (!m) - return (0); - -#ifdef INET6 - if ((inp->inp_vflag & INP_IPV6) != 0) { - ip6 = mtod(m, struct ip6_hdr *); - th = (struct tcphdr *)(ip6 + 1); - m->m_pkthdr.len = m->m_len = - sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - tcpip_fillheaders(inp, ip6, th); - hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); - } else -#endif /* INET6 */ - { - ip = mtod(m, struct ip *); - th = (struct tcphdr *)(ip + 1); - m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); - tcpip_fillheaders(inp, ip, th); - hdrsiz = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); - } - - m_free(m); - return (hdrsiz); -} -#endif /* IPSEC */ - -#ifdef TCP_SIGNATURE -/* - * Callback function invoked by m_apply() to digest TCP segment data - * contained within an mbuf chain. - */ -static int -tcp_signature_apply(void *fstate, void *data, u_int len) -{ - - MD5Update(fstate, (u_char *)data, len); - return (0); -} - -/* - * XXX The key is retrieved from the system's PF_KEY SADB, by keying a - * search with the destination IP address, and a 'magic SPI' to be - * determined by the application. This is hardcoded elsewhere to 1179 -*/ -struct secasvar * -tcp_get_sav(struct mbuf *m, u_int direction) -{ - union sockaddr_union dst; - struct secasvar *sav; - struct ip *ip; -#ifdef INET6 - struct ip6_hdr *ip6; - char ip6buf[INET6_ADDRSTRLEN]; -#endif - - /* Extract the destination from the IP header in the mbuf. */ - bzero(&dst, sizeof(union sockaddr_union)); - ip = mtod(m, struct ip *); -#ifdef INET6 - ip6 = NULL; /* Make the compiler happy. */ -#endif - switch (ip->ip_v) { -#ifdef INET - case IPVERSION: - dst.sa.sa_len = sizeof(struct sockaddr_in); - dst.sa.sa_family = AF_INET; - dst.sin.sin_addr = (direction == IPSEC_DIR_INBOUND) ? - ip->ip_src : ip->ip_dst; - break; -#endif -#ifdef INET6 - case (IPV6_VERSION >> 4): - ip6 = mtod(m, struct ip6_hdr *); - dst.sa.sa_len = sizeof(struct sockaddr_in6); - dst.sa.sa_family = AF_INET6; - dst.sin6.sin6_addr = (direction == IPSEC_DIR_INBOUND) ? - ip6->ip6_src : ip6->ip6_dst; - break; -#endif - default: - return (NULL); - /* NOTREACHED */ - break; - } - - /* Look up an SADB entry which matches the address of the peer. */ - sav = KEY_ALLOCSA(&dst, IPPROTO_TCP, htonl(TCP_SIG_SPI)); - if (sav == NULL) { - ipseclog((LOG_ERR, "%s: SADB lookup failed for %s\n", __func__, - (ip->ip_v == IPVERSION) ? inet_ntoa(dst.sin.sin_addr) : -#ifdef INET6 - (ip->ip_v == (IPV6_VERSION >> 4)) ? - ip6_sprintf(ip6buf, &dst.sin6.sin6_addr) : -#endif - "(unsupported)")); - } - - return (sav); -} - -/* - * Compute TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Parameters: - * m pointer to head of mbuf chain - * len length of TCP segment data, excluding options - * optlen length of TCP segment options - * buf pointer to storage for computed MD5 digest - * sav pointer to security assosiation - * - * We do this over ip, tcphdr, segment data, and the key in the SADB. - * When called from tcp_input(), we can be sure that th_sum has been - * zeroed out and verified already. - * - * Releases reference to SADB key before return. - * - * Return 0 if successful, otherwise return -1. - * - */ -int -tcp_signature_do_compute(struct mbuf *m, int len, int optlen, - u_char *buf, struct secasvar *sav) -{ -#ifdef INET - struct ippseudo ippseudo; -#endif - MD5_CTX ctx; - int doff; - struct ip *ip; -#ifdef INET - struct ipovly *ipovly; -#endif - struct tcphdr *th; -#ifdef INET6 - struct ip6_hdr *ip6; - struct in6_addr in6; - uint32_t plen; - uint16_t nhdr; -#endif - u_short savecsum; - - KASSERT(m != NULL, ("NULL mbuf chain")); - KASSERT(buf != NULL, ("NULL signature pointer")); - - /* Extract the destination from the IP header in the mbuf. */ - ip = mtod(m, struct ip *); -#ifdef INET6 - ip6 = NULL; /* Make the compiler happy. */ -#endif - - MD5Init(&ctx); - /* - * Step 1: Update MD5 hash with IP(v6) pseudo-header. - * - * XXX The ippseudo header MUST be digested in network byte order, - * or else we'll fail the regression test. Assume all fields we've - * been doing arithmetic on have been in host byte order. - * XXX One cannot depend on ipovly->ih_len here. When called from - * tcp_output(), the underlying ip_len member has not yet been set. - */ - switch (ip->ip_v) { -#ifdef INET - case IPVERSION: - ipovly = (struct ipovly *)ip; - ippseudo.ippseudo_src = ipovly->ih_src; - ippseudo.ippseudo_dst = ipovly->ih_dst; - ippseudo.ippseudo_pad = 0; - ippseudo.ippseudo_p = IPPROTO_TCP; - ippseudo.ippseudo_len = htons(len + sizeof(struct tcphdr) + - optlen); - MD5Update(&ctx, (char *)&ippseudo, sizeof(struct ippseudo)); - - th = (struct tcphdr *)((u_char *)ip + sizeof(struct ip)); - doff = sizeof(struct ip) + sizeof(struct tcphdr) + optlen; - break; -#endif -#ifdef INET6 - /* - * RFC 2385, 2.0 Proposal - * For IPv6, the pseudo-header is as described in RFC 2460, namely the - * 128-bit source IPv6 address, 128-bit destination IPv6 address, zero- - * extended next header value (to form 32 bits), and 32-bit segment - * length. - * Note: Upper-Layer Packet Length comes before Next Header. - */ - case (IPV6_VERSION >> 4): - ip6 = mtod(m, struct ip6_hdr *); - in6 = ip6->ip6_src; - in6_clearscope(&in6); - MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); - in6 = ip6->ip6_dst; - in6_clearscope(&in6); - MD5Update(&ctx, (char *)&in6, sizeof(struct in6_addr)); - plen = htonl(len + sizeof(struct tcphdr) + optlen); - MD5Update(&ctx, (char *)&plen, sizeof(uint32_t)); - nhdr = 0; - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - nhdr = IPPROTO_TCP; - MD5Update(&ctx, (char *)&nhdr, sizeof(uint8_t)); - - th = (struct tcphdr *)((u_char *)ip6 + sizeof(struct ip6_hdr)); - doff = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + optlen; - break; -#endif - default: - KEY_FREESAV(&sav); - return (-1); - /* NOTREACHED */ - break; - } - - - /* - * Step 2: Update MD5 hash with TCP header, excluding options. - * The TCP checksum must be set to zero. - */ - savecsum = th->th_sum; - th->th_sum = 0; - MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); - th->th_sum = savecsum; - - /* - * Step 3: Update MD5 hash with TCP segment data. - * Use m_apply() to avoid an early m_pullup(). - */ - if (len > 0) - m_apply(m, doff, len, tcp_signature_apply, &ctx); - - /* - * Step 4: Update MD5 hash with shared secret. - */ - MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); - MD5Final(buf, &ctx); - - key_sa_recordxfer(sav, m); - KEY_FREESAV(&sav); - return (0); -} - -/* - * Compute TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Return 0 if successful, otherwise return -1. - */ -int -tcp_signature_compute(struct mbuf *m, int _unused, int len, int optlen, - u_char *buf, u_int direction) -{ - struct secasvar *sav; - - if ((sav = tcp_get_sav(m, direction)) == NULL) - return (-1); - - return (tcp_signature_do_compute(m, len, optlen, buf, sav)); -} - -/* - * Verify the TCP-MD5 hash of a TCP segment. (RFC2385) - * - * Parameters: - * m pointer to head of mbuf chain - * len length of TCP segment data, excluding options - * optlen length of TCP segment options - * buf pointer to storage for computed MD5 digest - * direction direction of flow (IPSEC_DIR_INBOUND or OUTBOUND) - * - * Return 1 if successful, otherwise return 0. - */ -int -tcp_signature_verify(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag) -{ - char tmpdigest[TCP_SIGLEN]; - - if (tcp_sig_checksigs == 0) - return (1); - if ((tcpbflag & TF_SIGNATURE) == 0) { - if ((to->to_flags & TOF_SIGNATURE) != 0) { - - /* - * If this socket is not expecting signature but - * the segment contains signature just fail. - */ - TCPSTAT_INC(tcps_sig_err_sigopt); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - - /* Signature is not expected, and not present in segment. */ - return (1); - } - - /* - * If this socket is expecting signature but the segment does not - * contain any just fail. - */ - if ((to->to_flags & TOF_SIGNATURE) == 0) { - TCPSTAT_INC(tcps_sig_err_nosigopt); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - if (tcp_signature_compute(m, off0, tlen, optlen, &tmpdigest[0], - IPSEC_DIR_INBOUND) == -1) { - TCPSTAT_INC(tcps_sig_err_buildsig); - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - - if (bcmp(to->to_signature, &tmpdigest[0], TCP_SIGLEN) != 0) { - TCPSTAT_INC(tcps_sig_rcvbadsig); - return (0); - } - TCPSTAT_INC(tcps_sig_rcvgoodsig); - return (1); -} -#endif /* TCP_SIGNATURE */ static int sysctl_drop(SYSCTL_HANDLER_ARGS) Index: head/sys/netinet/tcp_syncache.c =================================================================== --- head/sys/netinet/tcp_syncache.c +++ head/sys/netinet/tcp_syncache.c @@ -96,13 +96,7 @@ #include #endif -#ifdef IPSEC -#include -#ifdef INET6 -#include -#endif -#include -#endif /*IPSEC*/ +#include #include @@ -744,11 +738,6 @@ INP_HASH_WUNLOCK(&V_tcbinfo); goto abort; } -#ifdef IPSEC - /* Copy old policy into new socket's. */ - if (ipsec_copy_policy(sotoinpcb(lso)->inp_sp, inp->inp_sp)) - printf("syncache_socket: could not copy policy\n"); -#endif #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { struct inpcb *oinp = sotoinpcb(lso); @@ -830,6 +819,11 @@ } } #endif /* INET */ +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* Copy old policy into new socket's. */ + if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0) + printf("syncache_socket: could not copy policy\n"); +#endif INP_HASH_WUNLOCK(&V_tcbinfo); tp = intotcpcb(inp); tcp_state_change(tp, TCPS_SYN_RECEIVED); @@ -880,7 +874,7 @@ tp->ts_recent_age = tcp_ts_getticks(); tp->ts_offset = sc->sc_tsoff; } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (sc->sc_flags & SCF_SIGNATURE) tp->t_flags |= TF_SIGNATURE; #endif @@ -1004,8 +998,58 @@ "(probably spoofed)\n", s, __func__); goto failed; } +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + /* If received ACK has MD5 signature, check it. */ + if ((to->to_flags & TOF_SIGNATURE) != 0 && + (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0)) { + /* Drop the ACK. */ + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment rejected, " + "MD5 signature doesn't match.\n", + s, __func__); + free(s, M_TCPLOG); + } + TCPSTAT_INC(tcps_sig_err_sigopt); + return (-1); /* Do not send RST */ + } +#endif /* TCP_SIGNATURE */ } else { +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* + * If listening socket requested TCP digests, check that + * received ACK has signature and it is correct. + * If not, drop the ACK and leave sc entry in th cache, + * because SYN was received with correct signature. + */ + if (sc->sc_flags & SCF_SIGNATURE) { + if ((to->to_flags & TOF_SIGNATURE) == 0) { + /* No signature */ + TCPSTAT_INC(tcps_sig_err_nosigopt); + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment " + "rejected, MD5 signature wasn't " + "provided.\n", s, __func__); + free(s, M_TCPLOG); + } + return (-1); /* Do not send RST */ + } + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0) { + /* Doesn't match or no SA */ + SCH_UNLOCK(sch); + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, "%s; %s: Segment " + "rejected, MD5 signature doesn't " + "match.\n", s, __func__); + free(s, M_TCPLOG); + } + return (-1); /* Do not send RST */ + } + } +#endif /* TCP_SIGNATURE */ + /* * Pull out the entry to unlock the bucket row. * * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not @@ -1274,7 +1318,23 @@ ipopts = NULL; #endif +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* + * If listening socket requested TCP digests, check that received + * SYN has signature and it is correct. If signature doesn't match + * or TCP_SIGNATURE support isn't enabled, drop the packet. + */ + if (ltflags & TF_SIGNATURE) { + if ((to->to_flags & TOF_SIGNATURE) == 0) { + TCPSTAT_INC(tcps_sig_err_nosigopt); + goto done; + } + if (!TCPMD5_ENABLED() || + TCPMD5_INPUT(m, th, to->to_signature) != 0) + goto done; + } +#endif /* TCP_SIGNATURE */ + /* * See if we already have an entry for this connection. * If we do, resend the SYN,ACK, and reset the retransmit timer. * @@ -1449,15 +1509,15 @@ sc->sc_flags |= SCF_WINSCALE; } } -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* - * If listening socket requested TCP digests, OR received SYN - * contains the option, flag this in the syncache so that - * syncache_respond() will do the right thing with the SYN+ACK. + * If listening socket requested TCP digests, flag this in the + * syncache so that syncache_respond() will do the right thing + * with the SYN+ACK. */ - if (to->to_flags & TOF_SIGNATURE || ltflags & TF_SIGNATURE) + if (ltflags & TF_SIGNATURE) sc->sc_flags |= SCF_SIGNATURE; -#endif +#endif /* TCP_SIGNATURE */ if (to->to_flags & TOF_SACKPERM) sc->sc_flags |= SCF_SACK; if (to->to_flags & TOF_MSS) @@ -1548,10 +1608,6 @@ #ifdef INET6 struct ip6_hdr *ip6 = NULL; #endif -#ifdef TCP_SIGNATURE - struct secasvar *sav; -#endif - hlen = #ifdef INET6 (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) : @@ -1660,32 +1716,10 @@ } if (sc->sc_flags & SCF_SACK) to.to_flags |= TOF_SACKPERM; -#ifdef TCP_SIGNATURE - sav = NULL; - if (sc->sc_flags & SCF_SIGNATURE) { - sav = tcp_get_sav(m, IPSEC_DIR_OUTBOUND); - if (sav != NULL) - to.to_flags |= TOF_SIGNATURE; - else { - - /* - * We've got SCF_SIGNATURE flag - * inherited from listening socket, - * but no SADB key for given source - * address. Assume signature is not - * required and remove signature flag - * instead of silently dropping - * connection. - */ - if (locked == 0) - SCH_LOCK(sch); - sc->sc_flags &= ~SCF_SIGNATURE; - if (locked == 0) - SCH_UNLOCK(sch); - } - } +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) + to.to_flags |= TOF_SIGNATURE; #endif - #ifdef TCP_RFC7413 if (sc->sc_tfo_cookie) { to.to_flags |= TOF_FASTOPEN; @@ -1701,18 +1735,25 @@ th->th_off = (sizeof(struct tcphdr) + optlen) >> 2; m->m_len += optlen; m->m_pkthdr.len += optlen; - -#ifdef TCP_SIGNATURE - if (sc->sc_flags & SCF_SIGNATURE) - tcp_signature_do_compute(m, 0, optlen, - to.to_signature, sav); -#endif #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen); else #endif ip->ip_len = htons(ntohs(ip->ip_len) + optlen); +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) + if (sc->sc_flags & SCF_SIGNATURE) { + KASSERT(to.to_flags & TOF_SIGNATURE, + ("tcp_addoptions() didn't set tcp_signature")); + + /* NOTE: to.to_signature is inside of mbuf */ + if (!TCPMD5_ENABLED() || + TCPMD5_OUTPUT(m, th, to.to_signature) != 0) { + m_freem(m); + return (EACCES); + } + } +#endif } else optlen = 0; Index: head/sys/netinet/tcp_usrreq.c =================================================================== --- head/sys/netinet/tcp_usrreq.c +++ head/sys/netinet/tcp_usrreq.c @@ -41,6 +41,7 @@ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" #include "opt_tcpdebug.h" #include @@ -101,6 +102,7 @@ #ifdef TCP_OFFLOAD #include #endif +#include /* * TCP protocol interface to socket abstraction. @@ -1552,21 +1554,17 @@ switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: - INP_WUNLOCK(inp); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + if (!TCPMD5_ENABLED()) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = TCPMD5_PCBCTL(inp, sopt); if (error) return (error); - - INP_WLOCK_RECHECK(inp); - if (optval > 0) - tp->t_flags |= TF_SIGNATURE; - else - tp->t_flags &= ~TF_SIGNATURE; goto unlock_and_done; -#endif /* TCP_SIGNATURE */ +#endif /* IPSEC */ case TCP_NODELAY: case TCP_NOOPT: @@ -1792,11 +1790,13 @@ case SOPT_GET: tp = intotcpcb(inp); switch (sopt->sopt_name) { -#ifdef TCP_SIGNATURE +#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) case TCP_MD5SIG: - optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); + if (!TCPMD5_ENABLED()) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = TCPMD5_PCBCTL(inp, sopt); break; #endif Index: head/sys/netinet/tcp_var.h =================================================================== --- head/sys/netinet/tcp_var.h +++ head/sys/netinet/tcp_var.h @@ -363,22 +363,7 @@ #define TCPOOB_HAVEDATA 0x01 #define TCPOOB_HADDATA 0x02 -#ifdef TCP_SIGNATURE /* - * Defines which are needed by the xform_tcp module and tcp_[in|out]put - * for SADB verification and lookup. - */ -#define TCP_SIGLEN 16 /* length of computed digest in bytes */ -#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ -#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ -/* - * Only a single SA per host may be specified at this time. An SPI is - * needed in order for the KEY_ALLOCSA() lookup to work. - */ -#define TCP_SIG_SPI 0x1000 -#endif /* TCP_SIGNATURE */ - -/* * Flags for PLPMTU handling, t_flags2 */ #define TF2_PLPMTU_BLACKHOLE 0x00000001 /* Possible PLPMTUD Black Hole. */ @@ -614,7 +599,7 @@ /* TCP_SIGNATURE related stats */ uint64_t tcps_sig_rcvgoodsig; /* Total matching signature received */ uint64_t tcps_sig_rcvbadsig; /* Total bad signature received */ - uint64_t tcps_sig_err_buildsig; /* Mismatching signature received */ + uint64_t tcps_sig_err_buildsig; /* Failed to make signature */ uint64_t tcps_sig_err_sigopt; /* No signature expected by socket */ uint64_t tcps_sig_err_nosigopt; /* No signature provided by segment */ @@ -835,17 +820,6 @@ int tcp_twcheck(struct inpcb *, struct tcpopt *, struct tcphdr *, struct mbuf *, int); void tcp_setpersist(struct tcpcb *); -#ifdef TCP_SIGNATURE -struct secasvar; -struct secasvar *tcp_get_sav(struct mbuf *, u_int); -int tcp_signature_do_compute(struct mbuf *, int, int, u_char *, - struct secasvar *); -int tcp_signature_compute(struct mbuf *, int, int, int, u_char *, u_int); -int tcp_signature_verify(struct mbuf *, int, int, int, struct tcpopt *, - struct tcphdr *, u_int); -int tcp_signature_check(struct mbuf *m, int off0, int tlen, int optlen, - struct tcpopt *to, struct tcphdr *th, u_int tcpbflag); -#endif void tcp_slowtimo(void); struct tcptemp * tcpip_maketemplate(struct inpcb *); @@ -889,17 +863,6 @@ th->th_urp = ntohs(th->th_urp); } -#ifdef TCP_SIGNATURE -static inline void -tcp_fields_to_net(struct tcphdr *th) -{ - - th->th_seq = htonl(th->th_seq); - th->th_ack = htonl(th->th_ack); - th->th_win = htons(th->th_win); - th->th_urp = htons(th->th_urp); -} -#endif #endif /* _KERNEL */ #endif /* _NETINET_TCP_VAR_H_ */ Index: head/sys/netinet/udp.h =================================================================== --- head/sys/netinet/udp.h +++ head/sys/netinet/udp.h @@ -58,7 +58,7 @@ */ /* Encapsulation types. */ #define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ -#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */ +#define UDP_ENCAP_ESPINUDP 2 /* RFC3948 */ /* Default ESP in UDP encapsulation port. */ #define UDP_ENCAP_ESPINUDP_PORT 500 Index: head/sys/netinet/udp_usrreq.c =================================================================== --- head/sys/netinet/udp_usrreq.c +++ head/sys/netinet/udp_usrreq.c @@ -92,10 +92,7 @@ #include #include -#ifdef IPSEC -#include -#include -#endif +#include #include @@ -172,15 +169,6 @@ struct mbuf *, struct thread *); #endif -#ifdef IPSEC -#ifdef IPSEC_NAT_T -#define UF_ESPINUDP_ALL (UF_ESPINUDP_NON_IKE|UF_ESPINUDP) -#ifdef INET -static struct mbuf *udp4_espdecap(struct inpcb *, struct mbuf *, int); -#endif -#endif /* IPSEC_NAT_T */ -#endif /* IPSEC */ - static void udp_zone_change(void *tag) { @@ -339,21 +327,18 @@ off += sizeof(struct udphdr); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ - if (ipsec4_in_reject(n, inp)) { + if (IPSEC_ENABLED(ipv4) && + IPSEC_CHECK_POLICY(ipv4, n, inp) != 0) { m_freem(n); return (0); } -#ifdef IPSEC_NAT_T - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); - if (up->u_flags & UF_ESPINUDP_ALL) { /* IPSec UDP encaps. */ - n = udp4_espdecap(inp, n, off); - if (n == NULL) /* Consumed. */ - return (0); + if (up->u_flags & UF_ESPINUDP) {/* IPSec UDP encaps. */ + if (IPSEC_ENABLED(ipv4) && + UDPENCAP_INPUT(n, off, AF_INET) != 0) + return (0); /* Consumed. */ } -#endif /* IPSEC_NAT_T */ #endif /* IPSEC */ #ifdef MAC if (mac_inpcb_check_deliver(inp, n) != 0) { @@ -1021,42 +1006,17 @@ switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +#ifdef INET case UDP_ENCAP: - INP_WUNLOCK(inp); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); - if (error) - break; - inp = sotoinpcb(so); - KASSERT(inp != NULL, ("%s: inp == NULL", __func__)); - INP_WLOCK(inp); -#ifdef IPSEC_NAT_T - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: up == NULL", __func__)); -#endif - switch (optval) { - case 0: - /* Clear all UDP encap. */ -#ifdef IPSEC_NAT_T - up->u_flags &= ~UF_ESPINUDP_ALL; -#endif - break; -#ifdef IPSEC_NAT_T - case UDP_ENCAP_ESPINUDP: - case UDP_ENCAP_ESPINUDP_NON_IKE: - up->u_flags &= ~UF_ESPINUDP_ALL; - if (optval == UDP_ENCAP_ESPINUDP) - up->u_flags |= UF_ESPINUDP; - else if (optval == UDP_ENCAP_ESPINUDP_NON_IKE) - up->u_flags |= UF_ESPINUDP_NON_IKE; - break; -#endif - default: - error = EINVAL; - break; + if (!IPSEC_ENABLED(ipv4)) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); } - INP_WUNLOCK(inp); + error = UDPENCAP_PCBCTL(inp, sopt); break; +#endif /* INET */ +#endif /* IPSEC */ case UDPLITE_SEND_CSCOV: case UDPLITE_RECV_CSCOV: if (!isudplite) { @@ -1093,15 +1053,17 @@ break; case SOPT_GET: switch (sopt->sopt_name) { -#ifdef IPSEC_NAT_T +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +#ifdef INET case UDP_ENCAP: - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: up == NULL", __func__)); - optval = up->u_flags & UF_ESPINUDP_ALL; - INP_WUNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); + if (!IPSEC_ENABLED(ipv4)) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + error = UDPENCAP_PCBCTL(inp, sopt); break; -#endif +#endif /* INET */ +#endif /* IPSEC */ case UDPLITE_SEND_CSCOV: case UDPLITE_RECV_CSCOV: if (!isudplite) { @@ -1583,142 +1545,6 @@ m_freem(m); return (error); } - - -#if defined(IPSEC) && defined(IPSEC_NAT_T) -/* - * Potentially decap ESP in UDP frame. Check for an ESP header - * and optional marker; if present, strip the UDP header and - * push the result through IPSec. - * - * Returns mbuf to be processed (potentially re-allocated) or - * NULL if consumed and/or processed. - */ -static struct mbuf * -udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off) -{ - size_t minlen, payload, skip, iphlen; - caddr_t data; - struct udpcb *up; - struct m_tag *tag; - struct udphdr *udphdr; - struct ip *ip; - - INP_RLOCK_ASSERT(inp); - - /* - * Pull up data so the longest case is contiguous: - * IP/UDP hdr + non ESP marker + ESP hdr. - */ - minlen = off + sizeof(uint64_t) + sizeof(struct esp); - if (minlen > m->m_pkthdr.len) - minlen = m->m_pkthdr.len; - if ((m = m_pullup(m, minlen)) == NULL) { - IPSECSTAT_INC(ips_in_inval); - return (NULL); /* Bypass caller processing. */ - } - data = mtod(m, caddr_t); /* Points to ip header. */ - payload = m->m_len - off; /* Size of payload. */ - - if (payload == 1 && data[off] == '\xff') - return (m); /* NB: keepalive packet, no decap. */ - - up = intoudpcb(inp); - KASSERT(up != NULL, ("%s: udpcb NULL", __func__)); - KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0, - ("u_flags 0x%x", up->u_flags)); - - /* - * Check that the payload is large enough to hold an - * ESP header and compute the amount of data to remove. - * - * NB: the caller has already done a pullup for us. - * XXX can we assume alignment and eliminate bcopys? - */ - if (up->u_flags & UF_ESPINUDP_NON_IKE) { - /* - * draft-ietf-ipsec-nat-t-ike-0[01].txt and - * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring - * possible AH mode non-IKE marker+non-ESP marker - * from draft-ietf-ipsec-udp-encaps-00.txt. - */ - uint64_t marker; - - if (payload <= sizeof(uint64_t) + sizeof(struct esp)) - return (m); /* NB: no decap. */ - bcopy(data + off, &marker, sizeof(uint64_t)); - if (marker != 0) /* Non-IKE marker. */ - return (m); /* NB: no decap. */ - skip = sizeof(uint64_t) + sizeof(struct udphdr); - } else { - uint32_t spi; - - if (payload <= sizeof(struct esp)) { - IPSECSTAT_INC(ips_in_inval); - m_freem(m); - return (NULL); /* Discard. */ - } - bcopy(data + off, &spi, sizeof(uint32_t)); - if (spi == 0) /* Non-ESP marker. */ - return (m); /* NB: no decap. */ - skip = sizeof(struct udphdr); - } - - /* - * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember - * the UDP ports. This is required if we want to select - * the right SPD for multiple hosts behind same NAT. - * - * NB: ports are maintained in network byte order everywhere - * in the NAT-T code. - */ - tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS, - 2 * sizeof(uint16_t), M_NOWAIT); - if (tag == NULL) { - IPSECSTAT_INC(ips_in_nomem); - m_freem(m); - return (NULL); /* Discard. */ - } - iphlen = off - sizeof(struct udphdr); - udphdr = (struct udphdr *)(data + iphlen); - ((uint16_t *)(tag + 1))[0] = udphdr->uh_sport; - ((uint16_t *)(tag + 1))[1] = udphdr->uh_dport; - m_tag_prepend(m, tag); - - /* - * Remove the UDP header (and possibly the non ESP marker) - * IP header length is iphlen - * Before: - * <--- off ---> - * +----+------+-----+ - * | IP | UDP | ESP | - * +----+------+-----+ - * <-skip-> - * After: - * +----+-----+ - * | IP | ESP | - * +----+-----+ - * <-skip-> - */ - ovbcopy(data, data + skip, iphlen); - m_adj(m, skip); - - ip = mtod(m, struct ip *); - ip->ip_len = htons(ntohs(ip->ip_len) - skip); - ip->ip_p = IPPROTO_ESP; - - /* - * We cannot yet update the cksums so clear any - * h/w cksum flags as they are no longer valid. - */ - if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) - m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR); - - (void) ipsec_common_input(m, iphlen, offsetof(struct ip, ip_p), - AF_INET, ip->ip_p); - return (NULL); /* NB: consumed, bypass processing. */ -} -#endif /* defined(IPSEC) && defined(IPSEC_NAT_T) */ static void udp_abort(struct socket *so) Index: head/sys/netinet6/in6.h =================================================================== --- head/sys/netinet6/in6.h +++ head/sys/netinet6/in6.h @@ -432,10 +432,7 @@ #define IPV6_BINDV6ONLY IPV6_V6ONLY #endif -#if 1 /* IPSEC */ #define IPV6_IPSEC_POLICY 28 /* struct; get/set security policy */ -#endif /* IPSEC */ - /* 29; unused; was IPV6_FAITH */ #if 1 /* IPV6FIREWALL */ #define IPV6_FW_ADD 30 /* add a firewall rule to chain */ Index: head/sys/netinet6/in6_proto.c =================================================================== --- head/sys/netinet6/in6_proto.c +++ head/sys/netinet6/in6_proto.c @@ -121,11 +121,6 @@ #include #endif /* SCTP */ -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - #include /* @@ -276,33 +271,6 @@ .pr_input = frag6_input, .pr_usrreqs = &nousrreqs }, -#ifdef IPSEC -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_AH, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_usrreqs = &nousrreqs, -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_ESP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_ctlinput = esp6_ctlinput, - .pr_usrreqs = &nousrreqs, -}, -{ - .pr_type = SOCK_RAW, - .pr_domain = &inet6domain, - .pr_protocol = IPPROTO_IPCOMP, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = ipsec6_common_input, - .pr_usrreqs = &nousrreqs, -}, -#endif /* IPSEC */ #ifdef INET { .pr_type = SOCK_RAW, @@ -470,7 +438,7 @@ #ifdef SCTP SYSCTL_NODE(_net_inet6, IPPROTO_SCTP, sctp6, CTLFLAG_RW, 0, "SCTP6"); #endif -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) SYSCTL_NODE(_net_inet6, IPPROTO_ESP, ipsec6, CTLFLAG_RW, 0, "IPSEC6"); #endif /* IPSEC */ Index: head/sys/netinet6/ip6_forward.c =================================================================== --- head/sys/netinet6/ip6_forward.c +++ head/sys/netinet6/ip6_forward.c @@ -69,12 +69,7 @@ #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include /* * Forward a packet. If some error occurs return the sender @@ -100,9 +95,6 @@ struct ifnet *origifp; /* maybe unnecessary */ u_int32_t inzone, outzone; struct in6_addr src_in6, dst_in6, odst; -#ifdef IPSEC - struct secpolicy *sp = NULL; -#endif struct m_tag *fwd_tag; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; @@ -130,33 +122,18 @@ m_freem(m); return; } -#ifdef IPSEC - /* - * Check if this packet has an active SA and needs to be dropped - * instead of forwarded. - */ - if (ip6_ipsec_fwd(m) != 0) { - IP6STAT_INC(ip6s_cantforward); - m_freem(m); - return; - } -#endif /* IPSEC */ + if ( #ifdef IPSTEALTH - if (!V_ip6stealth) { + V_ip6stealth == 0 && #endif - if (ip6->ip6_hlim <= IPV6_HLIMDEC) { + ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ icmp6_error(m, ICMP6_TIME_EXCEEDED, - ICMP6_TIME_EXCEED_TRANSIT, 0); + ICMP6_TIME_EXCEED_TRANSIT, 0); return; } - ip6->ip6_hlim -= IPV6_HLIMDEC; -#ifdef IPSTEALTH - } -#endif - /* * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - * size of IPv6 + ICMPv6 headers) bytes of the packet in case @@ -168,168 +145,23 @@ */ mcopy = m_copym(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN), M_NOWAIT); - -#ifdef IPSEC - /* get a security policy for this packet */ - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, &error); - if (sp == NULL) { - IPSEC6STAT_INC(ips_out_inval); - IP6STAT_INC(ip6s_cantforward); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); +#ifdef IPSTEALTH + if (V_ip6stealth == 0) #endif - } - m_freem(m); - return; - } + ip6->ip6_hlim -= IPV6_HLIMDEC; - error = 0; - - /* check policy */ - switch (sp->policy) { - case IPSEC_POLICY_DISCARD: - /* - * This packet is just discarded. - */ - IPSEC6STAT_INC(ips_out_polvio); - IP6STAT_INC(ip6s_cantforward); - KEY_FREESP(&sp); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if ((error = IPSEC_FORWARD(ipv6, m)) != 0) { + /* mbuf consumed by IPsec */ m_freem(mcopy); -#endif - } - m_freem(m); - return; - - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_NONE: - /* no need to do IPsec. */ - KEY_FREESP(&sp); - goto skip_ipsec; - - case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) { - /* XXX should be panic ? */ - printf("ip6_forward: No IPsec request specified.\n"); - IP6STAT_INC(ip6s_cantforward); - KEY_FREESP(&sp); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); -#endif - } - m_freem(m); + if (error != EINPROGRESS) + IP6STAT_INC(ip6s_cantforward); return; } - /* do IPsec */ - break; - - case IPSEC_POLICY_ENTRUST: - default: - /* should be panic ?? */ - printf("ip6_forward: Invalid policy found. %d\n", sp->policy); - KEY_FREESP(&sp); - goto skip_ipsec; + /* No IPsec processing required */ } - - { - struct ipsecrequest *isr = NULL; - - /* - * when the kernel forwards a packet, it is not proper to apply - * IPsec transport mode to the packet. This check avoid from this. - * at present, if there is even a transport mode SA request in the - * security policy, the kernel does not apply IPsec to the packet. - * this check is not enough because the following case is valid. - * ipsec esp/tunnel/xxx-xxx/require esp/transport//require; - */ - for (isr = sp->req; isr; isr = isr->next) { - if (isr->saidx.mode == IPSEC_MODE_ANY) - goto doipsectunnel; - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) - goto doipsectunnel; - } - - /* - * if there's no need for tunnel mode IPsec, skip. - */ - if (!isr) - goto skip_ipsec; - - doipsectunnel: - /* - * All the extension headers will become inaccessible - * (since they can be encrypted). - * Don't panic, we need no more updates to extension headers - * on inner IPv6 packet (since they are now encapsulated). - * - * IPv6 [ESP|AH] IPv6 [extension headers] payload - */ - - /* - * If we need to encapsulate the packet, do it here - * ipsec6_proces_packet will send the packet using ip6_output - */ - error = ipsec6_process_packet(m, sp->req); - /* Release SP if an error occurred */ - if (error != 0) - KEY_FREESP(&sp); - if (error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - error = 0; - goto skip_ipsec; - } - - if (error) { - /* mbuf is already reclaimed in ipsec6_process_packet. */ - switch (error) { - case EHOSTUNREACH: - case ENETUNREACH: - case EMSGSIZE: - case ENOBUFS: - case ENOMEM: - break; - default: - printf("ip6_output (ipsec): error code %d\n", error); - /* FALLTHROUGH */ - case ENOENT: - /* don't show these error codes to the user */ - break; - } - IP6STAT_INC(ip6s_cantforward); - if (mcopy) { -#if 0 - /* XXX: what icmp ? */ -#else - m_freem(mcopy); #endif - } - return; - } else { - /* - * In the FAST IPSec case we have already - * re-injected the packet and it has been freed - * by the ipsec_done() function. So, just clean - * up after ourselves. - */ - m = NULL; - goto freecopy; - } - } -skip_ipsec: -#endif again: bzero(&rin6, sizeof(struct route_in6)); dst = (struct sockaddr_in6 *)&rin6.ro_dst; @@ -540,34 +372,9 @@ /* See if the size was changed by the packet filter. */ if (m->m_pkthdr.len > IN6_LINKMTU(rt->rt_ifp)) { in6_ifstat_inc(rt->rt_ifp, ifs6_in_toobig); - if (mcopy) { - u_long mtu; -#ifdef IPSEC - size_t ipsechdrsiz; -#endif /* IPSEC */ - - mtu = IN6_LINKMTU(rt->rt_ifp); -#ifdef IPSEC - /* - * When we do IPsec tunnel ingress, we need to play - * with the link value (decrement IPsec header size - * from mtu value). The code is much simpler than v4 - * case, as we have the outgoing interface for - * encapsulated packet as "rt->rt_ifp". - */ - ipsechdrsiz = ipsec_hdrsiz(mcopy, IPSEC_DIR_OUTBOUND, - NULL); - if (ipsechdrsiz < mtu) - mtu -= ipsechdrsiz; - /* - * if mtu becomes less than minimum MTU, - * tell minimum MTU (and I'll need to fragment it). - */ - if (mtu < IPV6_MMTU) - mtu = IPV6_MMTU; -#endif /* IPSEC */ - icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, mtu); - } + if (mcopy) + icmp6_error(mcopy, ICMP6_PACKET_TOO_BIG, 0, + IN6_LINKMTU(rt->rt_ifp)); goto bad; } Index: head/sys/netinet6/ip6_input.c =================================================================== --- head/sys/netinet6/ip6_input.c +++ head/sys/netinet6/ip6_input.c @@ -118,12 +118,7 @@ #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include #include @@ -525,14 +520,11 @@ goto bad; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip6_ipsec_input(m, nxt)) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_INPUT(ipv6, m, off, nxt) != 0) + return; + } #endif /* IPSEC */ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); @@ -563,7 +555,7 @@ if ((ND_IFINFO(rcvif)->flags & ND6_IFF_IFDISABLED)) goto bad; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * should the inner packet be considered authentic? * see comment in ah4_input(). @@ -735,9 +727,9 @@ * ip6 pointer. */ if (V_ip6_forwarding != 0 -#ifdef IPSEC - && !key_havesp(IPSEC_DIR_INBOUND) - && !key_havesp(IPSEC_DIR_OUTBOUND) +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + && (!IPSEC_ENABLED(ipv6) || + IPSEC_CAPS(ipv6, m, IPSEC_CAP_OPERABLE) == 0) #endif ) { if ((m = ip6_tryforward(m)) == NULL) @@ -749,12 +741,13 @@ goto hbhcheck; } } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Bypass packet filtering for packets previously handled by IPsec. */ - if (ip6_ipsec_filtertunnel(m)) - goto passin; + if (IPSEC_ENABLED(ipv6) && + IPSEC_CAPS(ipv6, m, IPSEC_CAP_BYPASS_FILTER) != 0) + goto passin; #endif /* * Run through list of hooks for input packets. @@ -962,14 +955,11 @@ goto bad; } -#ifdef IPSEC - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if (ip6_ipsec_input(m, nxt)) - goto bad; +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_INPUT(ipv6, m, off, nxt) != 0) + return; + } #endif /* IPSEC */ nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); Index: head/sys/netinet6/ip6_ipsec.h =================================================================== --- head/sys/netinet6/ip6_ipsec.h +++ head/sys/netinet6/ip6_ipsec.h @@ -1,42 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _NETINET_IP6_IPSEC_H_ -#define _NETINET_IP6_IPSEC_H_ - -int ip6_ipsec_filtertunnel(struct mbuf *); -int ip6_ipsec_fwd(struct mbuf *); -int ip6_ipsec_input(struct mbuf *, int); -int ip6_ipsec_output(struct mbuf **, struct inpcb *, int *); -#if 0 -int ip6_ipsec_mtu(struct mbuf *); -#endif -#endif Index: head/sys/netinet6/ip6_ipsec.c =================================================================== --- head/sys/netinet6/ip6_ipsec.c +++ head/sys/netinet6/ip6_ipsec.c @@ -1,293 +0,0 @@ -/*- - * Copyright (c) 1982, 1986, 1988, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "opt_inet.h" -#include "opt_sctp.h" -#include "opt_ipsec.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef SCTP -#include -#endif - -#include - -#include -#include -#include -#include -#ifdef IPSEC_DEBUG -#include -#else -#define KEYDEBUG(lev,arg) -#endif - -#include -#include - -extern struct protosw inet6sw[]; - -static VNET_DEFINE(int, ip6_ipsec6_filtertunnel) = 0; -#define V_ip6_ipsec6_filtertunnel VNET(ip6_ipsec6_filtertunnel) - -SYSCTL_DECL(_net_inet6_ipsec6); -SYSCTL_INT(_net_inet6_ipsec6, OID_AUTO, filtertunnel, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec6_filtertunnel), 0, - "If set filter packets from an IPsec tunnel."); - -/* - * Check if we have to jump over firewall processing for this packet. - * Called from ip6_input(). - * 1 = jump over firewall, 0 = packet goes through firewall. - */ -int -ip6_ipsec_filtertunnel(struct mbuf *m) -{ - - /* - * Bypass packet filtering for packets previously handled by IPsec. - */ - if (!V_ip6_ipsec6_filtertunnel && - m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) - return (1); - return (0); -} - -/* - * Check if this packet has an active SA and needs to be dropped instead - * of forwarded. - * Called from ip6_forward(). - * 1 = drop packet, 0 = forward packet. - */ -int -ip6_ipsec_fwd(struct mbuf *m) -{ - - return (ipsec6_in_reject(m, NULL)); -} - -/* - * Check if protocol type doesn't have a further header and do IPSEC - * decryption or reject right now. Protocols with further headers get - * their IPSEC treatment within the protocol specific processing. - * Called from ip6_input(). - * 1 = drop packet, 0 = continue processing packet. - */ -int -ip6_ipsec_input(struct mbuf *m, int nxt) -{ - - /* - * enforce IPsec policy checking if we are seeing last header. - * note that we do not visit this with protocols with pcb layer - * code - like udp/tcp/raw ip. - */ - if ((inet6sw[ip6_protox[nxt]].pr_flags & PR_LASTHDR) != 0) - return (ipsec6_in_reject(m, NULL)); - return (0); -} - -/* - * Called from ip6_output(). - * 1 = drop packet, 0 = continue processing packet, - * -1 = packet was reinjected and stop processing packet - */ - -int -ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *error) -{ - struct secpolicy *sp; - - /* - * Check the security policy (SP) for the packet and, if - * required, do IPsec-related processing. There are two - * cases here; the first time a packet is sent through - * it will be untagged and handled by ipsec4_checkpolicy. - * If the packet is resubmitted to ip6_output (e.g. after - * AH, ESP, etc. processing), there will be a tag to bypass - * the lookup and related policy checking. - */ - if (m_tag_find(*m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { - *error = 0; - return (0); - } - sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, error, inp); - /* - * There are four return cases: - * sp != NULL apply IPsec policy - * sp == NULL, error == 0 no IPsec handling needed - * sp == NULL, error == -EINVAL discard packet w/o error - * sp == NULL, error != 0 discard packet, report error - */ - if (sp != NULL) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ -#ifdef INET - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(*m); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#endif - if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - in6_delayed_cksum(*m, (*m)->m_pkthdr.len - sizeof(struct ip6_hdr), - sizeof(struct ip6_hdr)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } -#ifdef SCTP - if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { - sctp_delayed_cksum(*m, sizeof(struct ip6_hdr)); - (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; - } -#endif - - /* NB: callee frees mbuf */ - *error = ipsec6_process_packet(*m, sp->req); - KEY_FREESP(&sp); - if (*error == EJUSTRETURN) { - /* - * We had a SP with a level of 'use' and no SA. We - * will just continue to process the packet without - * IPsec processing. - */ - *error = 0; - goto done; - } - - /* - * Preserve KAME behaviour: ENOENT can be returned - * when an SA acquire is in progress. Don't propagate - * this to user-level; it confuses applications. - * - * XXX this will go away when the SADB is redone. - */ - if (*error == ENOENT) - *error = 0; - goto reinjected; - } else { /* sp == NULL */ - if (*error != 0) { - /* - * Hack: -EINVAL is used to signal that a packet - * should be silently discarded. This is typically - * because we asked key management for an SA and - * it was delayed (e.g. kicked up to IKE). - */ - if (*error == -EINVAL) - *error = 0; - goto bad; - } - /* No IPsec processing for this packet. */ - } -done: - return (0); -reinjected: - return (-1); -bad: - if (sp != NULL) - KEY_FREESP(&sp); - return (1); -} - -#if 0 -/* - * Compute the MTU for a forwarded packet that gets IPSEC encapsulated. - * Called from ip_forward(). - * Returns MTU suggestion for ICMP needfrag reply. - */ -int -ip6_ipsec_mtu(struct mbuf *m) -{ - int mtu = 0; - /* - * If the packet is routed over IPsec tunnel, tell the - * originator the tunnel MTU. - * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz - * XXX quickhack!!! - */ -#ifdef IPSEC - struct secpolicy *sp = NULL; - int ipsecerror; - int ipsechdr; - struct route *ro; - sp = ipsec_getpolicybyaddr(m, - IPSEC_DIR_OUTBOUND, - IP_FORWARDING, - &ipsecerror); - if (sp != NULL) { - /* count IPsec header size */ - ipsechdr = ipsec_hdrsiz(m, IPSEC_DIR_OUTBOUND, NULL); - - /* - * find the correct route for outer IPv4 - * header, compute tunnel MTU. - */ - if (sp->req != NULL && - sp->req->sav != NULL && - sp->req->sav->sah != NULL) { - ro = &sp->req->sav->sah->route_cache.sa_route; - if (ro->ro_rt && ro->ro_rt->rt_ifp) { - mtu = ro->ro_rt->rt_mtu ? ro->ro_rt->rt_mtu : - ro->ro_rt->rt_ifp->if_mtu; - mtu -= ipsechdr; - } - } - KEY_FREESP(&sp); - } -#endif /* IPSEC */ - /* XXX else case missing. */ - return mtu; -} -#endif Index: head/sys/netinet6/ip6_output.c =================================================================== --- head/sys/netinet6/ip6_output.c +++ head/sys/netinet6/ip6_output.c @@ -108,12 +108,7 @@ #include #include -#ifdef IPSEC -#include -#include -#include -#include -#endif /* IPSEC */ +#include #ifdef SCTP #include #include @@ -336,6 +331,21 @@ } } +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * IPSec checking which handles several cases. + * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. + */ + if (IPSEC_ENABLED(ipv6)) { + if ((error = IPSEC_OUTPUT(ipv6, m, inp)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } + } +#endif /* IPSEC */ + bzero(&exthdrs, sizeof(exthdrs)); if (opt) { /* Hop-by-Hop options header */ @@ -360,25 +370,7 @@ MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2); } -#ifdef IPSEC /* - * IPSec checking which handles several cases. - * FAST IPSEC: We re-injected the packet. - * XXX: need scope argument. - */ - switch(ip6_ipsec_output(&m, inp, &error)) - { - case 1: /* Bad packet */ - goto freehdrs; - case -1: /* IPSec done */ - goto done; - case 0: /* No IPSec */ - default: - break; - } -#endif /* IPSEC */ - - /* * Calculate the total length of the extension header chain. * Keep the length of the unfragmentable part for fragmentation. */ @@ -1912,23 +1904,13 @@ INP_WUNLOCK(in6p); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: - { - caddr_t req; - struct mbuf *m; - - if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */ + if (IPSEC_ENABLED(ipv6)) { + error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; - if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */ - break; - req = mtod(m, caddr_t); - error = ipsec_set_policy(in6p, optname, req, - m->m_len, (sopt->sopt_td != NULL) ? - sopt->sopt_td->td_ucred : NULL); - m_freem(m); - break; - } + } + /* FALLTHROUGH */ #endif /* IPSEC */ default: @@ -2153,37 +2135,14 @@ error = ip6_getmoptions(in6p, sopt); break; -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) case IPV6_IPSEC_POLICY: - { - caddr_t req = NULL; - size_t len = 0; - struct mbuf *m = NULL; - struct mbuf **mp = &m; - size_t ovalsize = sopt->sopt_valsize; - caddr_t oval = (caddr_t)sopt->sopt_val; - - error = soopt_getm(sopt, &m); /* XXX */ - if (error != 0) + if (IPSEC_ENABLED(ipv6)) { + error = IPSEC_PCBCTL(ipv6, in6p, sopt); break; - error = soopt_mcopyin(sopt, m); /* XXX */ - if (error != 0) - break; - sopt->sopt_valsize = ovalsize; - sopt->sopt_val = oval; - if (m) { - req = mtod(m, caddr_t); - len = m->m_len; } - error = ipsec_get_policy(in6p, req, len, mp); - if (error == 0) - error = soopt_mcopyout(sopt, m); /* XXX */ - if (error == 0 && m) - m_freem(m); - break; - } + /* FALLTHROUGH */ #endif /* IPSEC */ - default: error = ENOPROTOOPT; break; Index: head/sys/netinet6/raw_ip6.c =================================================================== --- head/sys/netinet6/raw_ip6.c +++ head/sys/netinet6/raw_ip6.c @@ -104,10 +104,7 @@ #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include @@ -258,14 +255,18 @@ if (last != NULL) { struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ - if (n && ipsec6_in_reject(n, last)) { - m_freem(n); - /* Do not inject data into pcb. */ - } else + if (IPSEC_ENABLED(ipv6)) { + if (n != NULL && + IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { + m_freem(n); + /* Do not inject data into pcb. */ + n = NULL; + } + } #endif /* IPSEC */ if (n) { if (last->inp_flags & INP_CONTROLOPTS || @@ -289,11 +290,12 @@ last = in6p; } INP_INFO_RUNLOCK(&V_ripcbinfo); -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * Check AH/ESP integrity. */ - if ((last != NULL) && ipsec6_in_reject(m, last)) { + if (IPSEC_ENABLED(ipv6) && last != NULL && + IPSEC_CHECK_POLICY(ipv6, m, last) != 0) { m_freem(m); IP6STAT_DEC(ip6s_delivered); /* Do not inject data into pcb. */ Index: head/sys/netinet6/sctp6_usrreq.c =================================================================== --- head/sys/netinet6/sctp6_usrreq.c +++ head/sys/netinet6/sctp6_usrreq.c @@ -55,11 +55,6 @@ #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ - extern struct protosw inetsw[]; int Index: head/sys/netinet6/udp6_usrreq.c =================================================================== --- head/sys/netinet6/udp6_usrreq.c +++ head/sys/netinet6/udp6_usrreq.c @@ -120,10 +120,7 @@ #include #include -#ifdef IPSEC -#include -#include -#endif /* IPSEC */ +#include #include @@ -157,11 +154,13 @@ INP_RLOCK(inp); return (in_pcbrele_rlocked(inp)); } -#ifdef IPSEC +#if defined(IPSEC) || defined(IPSEC_SUPPORT) /* Check AH/ESP integrity. */ - if (ipsec6_in_reject(n, inp)) { - m_freem(n); - return (0); + if (IPSEC_ENABLED(ipv6)) { + if (IPSEC_CHECK_POLICY(ipv6, n, inp) != 0) { + m_freem(n); + return (0); + } } #endif /* IPSEC */ #ifdef MAC Index: head/sys/netipsec/ipsec.h =================================================================== --- head/sys/netipsec/ipsec.h +++ head/sys/netipsec/ipsec.h @@ -53,11 +53,6 @@ #define IPSEC_ASSERT(_c,_m) KASSERT(_c, _m) -#define IPSEC_IS_PRIVILEGED_SO(_so) \ - ((_so)->so_cred != NULL && \ - priv_check_cred((_so)->so_cred, PRIV_NETINET_IPSEC, 0) \ - == 0) - /* * Security Policy Index * Ensure that both address families in the "src" and "dst" are same. @@ -65,35 +60,41 @@ * specifies ICMPv6 type, and the port field in "dst" specifies ICMPv6 code. */ struct secpolicyindex { - u_int8_t dir; /* direction of packet flow, see below */ union sockaddr_union src; /* IP src address for SP */ union sockaddr_union dst; /* IP dst address for SP */ - u_int8_t prefs; /* prefix length in bits for src */ - u_int8_t prefd; /* prefix length in bits for dst */ - u_int16_t ul_proto; /* upper layer Protocol */ -#ifdef notyet - uid_t uids; - uid_t uidd; - gid_t gids; - gid_t gidd; -#endif + uint8_t ul_proto; /* upper layer Protocol */ + uint8_t dir; /* direction of packet flow */ + uint8_t prefs; /* prefix length in bits for src */ + uint8_t prefd; /* prefix length in bits for dst */ }; +/* Request for IPsec */ +struct ipsecrequest { + struct secasindex saidx;/* hint for search proper SA */ + /* if __ss_len == 0 then no address specified.*/ + u_int level; /* IPsec level defined below. */ +}; + /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; + LIST_ENTRY(secpolicy) idhash; + LIST_ENTRY(secpolicy) drainq; struct secpolicyindex spidx; /* selector */ - struct ipsecrequest *req; - /* pointer to the ipsec request tree, */ - /* if policy == IPSEC else this value == NULL.*/ - u_int refcnt; /* reference count */ +#define IPSEC_MAXREQ 4 + struct ipsecrequest *req[IPSEC_MAXREQ]; + u_int tcount; /* IPsec transforms count */ + volatile u_int refcnt; /* reference count */ u_int policy; /* policy_type per pfkeyv2.h */ u_int state; #define IPSEC_SPSTATE_DEAD 0 -#define IPSEC_SPSTATE_ALIVE 1 - u_int32_t priority; /* priority of this policy */ - u_int32_t id; /* It's unique number on the system. */ +#define IPSEC_SPSTATE_LARVAL 1 +#define IPSEC_SPSTATE_ALIVE 2 +#define IPSEC_SPSTATE_PCB 3 +#define IPSEC_SPSTATE_IFNET 4 + uint32_t priority; /* priority of this policy */ + uint32_t id; /* It's unique number on the system. */ /* * lifetime handler. * the policy can be used without limitiation if both lifetime and @@ -107,41 +108,25 @@ long validtime; /* duration this policy is valid without use */ }; -/* Request for IPsec */ -struct ipsecrequest { - struct ipsecrequest *next; - /* pointer to next structure */ - /* If NULL, it means the end of chain. */ - struct secasindex saidx;/* hint for search proper SA */ - /* if __ss_len == 0 then no address specified.*/ - u_int level; /* IPsec level defined below. */ - - struct secasvar *sav; /* place holder of SA for use */ - struct secpolicy *sp; /* back pointer to SP */ - struct rwlock lock; /* to interlock updates */ -}; - /* - * Need recursion for when crypto callbacks happen directly, - * as in the case of software crypto. Need to look at how - * hard it is to remove this... + * PCB security policies. + * Application can setup private security policies for socket. + * Such policies can have IPSEC, BYPASS and ENTRUST type. + * By default, policies are set to NULL. This means that they have ENTRUST type. + * When application sets BYPASS or IPSEC type policy, the flags field + * is also updated. When flags is not set, the system could store + * used security policy into the sp_in/sp_out pointer to speed up further + * lookups. */ -#define IPSECREQUEST_LOCK_INIT(_isr) \ - rw_init_flags(&(_isr)->lock, "ipsec request", RW_RECURSE) -#define IPSECREQUEST_LOCK(_isr) rw_rlock(&(_isr)->lock) -#define IPSECREQUEST_UNLOCK(_isr) rw_runlock(&(_isr)->lock) -#define IPSECREQUEST_WLOCK(_isr) rw_wlock(&(_isr)->lock) -#define IPSECREQUEST_WUNLOCK(_isr) rw_wunlock(&(_isr)->lock) -#define IPSECREQUEST_UPGRADE(_isr) rw_try_upgrade(&(_isr)->lock) -#define IPSECREQUEST_DOWNGRADE(_isr) rw_downgrade(&(_isr)->lock) -#define IPSECREQUEST_LOCK_DESTROY(_isr) rw_destroy(&(_isr)->lock) -#define IPSECREQUEST_LOCK_ASSERT(_isr) rw_assert(&(_isr)->lock, RA_LOCKED) - -/* security policy in PCB */ struct inpcbpolicy { - struct secpolicy *sp_in; - struct secpolicy *sp_out; - int priv; /* privileged socket ? */ + struct secpolicy *sp_in; + struct secpolicy *sp_out; + + uint32_t genid; + uint16_t flags; +#define INP_INBOUND_POLICY 0x0001 +#define INP_OUTBOUND_POLICY 0x0002 + uint16_t hdrsz; }; /* SP acquiring list table. */ @@ -156,6 +141,9 @@ }; #endif /* _KERNEL */ +/* buffer size for formatted output of ipsec address */ +#define IPSEC_ADDRSTRLEN (INET6_ADDRSTRLEN + 11) + /* according to IANA assignment, port 0x0000 and proto 0xff are reserved. */ #define IPSEC_PORT_ANY 0 #define IPSEC_ULPROTO_ANY 255 @@ -288,6 +276,7 @@ VNET_DECLARE(int, ip4_ipsec_ecn); VNET_DECLARE(int, ip4_esp_randpad); VNET_DECLARE(int, crypto_support); +VNET_DECLARE(int, natt_cksum_policy); #define IPSECSTAT_INC(name) \ VNET_PCPUSTAT_ADD(struct ipsecstat, ipsec4stat, name, 1) @@ -300,59 +289,52 @@ #define V_ip4_ipsec_ecn VNET(ip4_ipsec_ecn) #define V_ip4_esp_randpad VNET(ip4_esp_randpad) #define V_crypto_support VNET(crypto_support) +#define V_natt_cksum_policy VNET(natt_cksum_policy) #define ipseclog(x) do { if (V_ipsec_debug) log x; } while (0) /* for openbsd compatibility */ #define DPRINTF(x) do { if (V_ipsec_debug) printf x; } while (0) -extern struct ipsecrequest *ipsec_newisr(void); -extern void ipsec_delisr(struct ipsecrequest *); - -struct tdb_ident; -extern struct secpolicy *ipsec_getpolicy(struct tdb_ident*, u_int); struct inpcb; -extern struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, u_int, - int *, struct inpcb *); -extern struct secpolicy * ipsec_getpolicybyaddr(const struct mbuf *, u_int, - int *); +struct m_tag; +struct secasvar; +struct sockopt; +struct tcphdr; +union sockaddr_union; -struct inpcb; -extern int ipsec_init_policy(struct socket *so, struct inpcbpolicy **); -extern int ipsec_copy_policy(struct inpcbpolicy *, struct inpcbpolicy *); -extern u_int ipsec_get_reqlevel(struct ipsecrequest *); +int ipsec_if_input(struct mbuf *, struct secasvar *, uint32_t); -extern int ipsec_set_policy(struct inpcb *inp, int optname, - caddr_t request, size_t len, struct ucred *cred); -extern int ipsec_get_policy(struct inpcb *inpcb, caddr_t request, - size_t len, struct mbuf **mp); -extern int ipsec_delete_pcbpolicy(struct inpcb *); -extern int ipsec4_in_reject(const struct mbuf *, struct inpcb *); +struct ipsecrequest *ipsec_newisr(void); +void ipsec_delisr(struct ipsecrequest *); +struct secpolicy *ipsec4_checkpolicy(const struct mbuf *, struct inpcb *, + int *); -struct secas; -struct tcpcb; -extern int ipsec_chkreplay(u_int32_t, struct secasvar *); -extern int ipsec_updatereplay(u_int32_t, struct secasvar *); +u_int ipsec_get_reqlevel(struct secpolicy *, u_int); -extern size_t ipsec_hdrsiz(const struct mbuf *, u_int, struct inpcb *); -extern size_t ipsec_hdrsiz_tcp(struct tcpcb *); +void udp_ipsec_adjust_cksum(struct mbuf *, struct secasvar *, int, int); +int udp_ipsec_output(struct mbuf *, struct secasvar *); +int udp_ipsec_input(struct mbuf *, int, int); +int udp_ipsec_pcbctl(struct inpcb *, struct sockopt *); -union sockaddr_union; -extern char *ipsec_address(union sockaddr_union *, char *, socklen_t); -extern char *ipsec_logsastr(struct secasvar *, char *, size_t); +int ipsec_chkreplay(uint32_t, struct secasvar *); +int ipsec_updatereplay(uint32_t, struct secasvar *); +int ipsec_updateid(struct secasvar *, uint64_t *, uint64_t *); +int ipsec_initialized(void); -extern void ipsec_dumpmbuf(const struct mbuf *); +void ipsec_setspidx_inpcb(struct inpcb *, struct secpolicyindex *, u_int); -struct m_tag; -extern int ah4_input(struct mbuf **mp, int *offp, int proto); -extern void ah4_ctlinput(int cmd, struct sockaddr *sa, void *); -extern int esp4_input(struct mbuf **mp, int *offp, int proto); -extern void esp4_ctlinput(int cmd, struct sockaddr *sa, void *); -extern int ipcomp4_input(struct mbuf **mp, int *offp, int proto); -extern int ipsec_common_input(struct mbuf *m, int, int, int, int); -extern int ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff); -extern int ipsec4_process_packet(struct mbuf *, struct ipsecrequest *); -extern int ipsec_process_done(struct mbuf *, struct ipsecrequest *); +void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, + union sockaddr_union *); +int ipsec4_in_reject(const struct mbuf *, struct inpcb *); +int ipsec4_input(struct mbuf *, int, int); +int ipsec4_forward(struct mbuf *); +int ipsec4_pcbctl(struct inpcb *, struct sockopt *); +int ipsec4_output(struct mbuf *, struct inpcb *); +int ipsec4_capability(struct mbuf *, u_int); +int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, + u_int); extern void m_checkalignment(const char* where, struct mbuf *m0, int off, int len); Index: head/sys/netipsec/ipsec.c =================================================================== --- head/sys/netipsec/ipsec.c +++ head/sys/netipsec/ipsec.c @@ -88,6 +88,7 @@ #include #include /*XXX*/ #include +#include #include #include @@ -99,12 +100,6 @@ #include -#ifdef IPSEC_DEBUG -VNET_DEFINE(int, ipsec_debug) = 1; -#else -VNET_DEFINE(int, ipsec_debug) = 0; -#endif - /* NB: name changed so netstat doesn't use it. */ VNET_PCPUSTAT_DEFINE(struct ipsecstat, ipsec4stat); VNET_PCPUSTAT_SYSINIT(ipsec4stat); @@ -124,8 +119,28 @@ VNET_DEFINE(int, ip4_ipsec_ecn) = 0; VNET_DEFINE(int, ip4_esp_randpad) = -1; -static VNET_DEFINE(struct secpolicy, def_policy); +static VNET_DEFINE(int, ip4_filtertunnel) = 0; +#define V_ip4_filtertunnel VNET(ip4_filtertunnel) +static VNET_DEFINE(int, check_policy_history) = 0; +#define V_check_policy_history VNET(check_policy_history) +static VNET_DEFINE(struct secpolicy *, def_policy) = NULL; #define V_def_policy VNET(def_policy) +static int +sysctl_def_policy(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = V_def_policy->policy; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error == 0) { + if (value != IPSEC_POLICY_DISCARD && + value != IPSEC_POLICY_NONE) + return (EINVAL); + V_def_policy->policy = value; + } + return (error); +} + /* * Crypto support requirements: * @@ -134,17 +149,24 @@ * 0 take anything */ VNET_DEFINE(int, crypto_support) = CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE; +/* + * TCP/UDP checksum handling policy for transport mode NAT-T (RFC3948) + * + * 0 - auto: incrementally recompute, when checksum delta is known; + * if checksum delta isn't known, reset checksum to zero for UDP, + * and mark csum_flags as valid for TCP. + * 1 - fully recompute TCP/UDP checksum. + */ +VNET_DEFINE(int, natt_cksum_policy) = 0; FEATURE(ipsec, "Internet Protocol Security (IPsec)"); -#ifdef IPSEC_NAT_T FEATURE(ipsec_natt, "UDP Encapsulation of IPsec ESP Packets ('NAT-T')"); -#endif SYSCTL_DECL(_net_inet_ipsec); /* net.inet.ipsec */ -SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0, +SYSCTL_PROC(_net_inet_ipsec, IPSECCTL_DEF_POLICY, def_policy, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_esp_trans_deflev), 0, @@ -160,22 +182,28 @@ "AH tunnel mode default level."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_CLEARTOS, ah_cleartos, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ah_cleartos), 0, - "If set clear type-of-service field when doing AH computation."); + "If set, clear type-of-service field when doing AH computation."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_AH_OFFSETMASK, ah_offsetmask, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ah_offsetmask), 0, - "If not set clear offset field mask when doing AH computation."); + "If not set, clear offset field mask when doing AH computation."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DFBIT, dfbit, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_dfbit), 0, "Do not fragment bit on encap."); SYSCTL_INT(_net_inet_ipsec, IPSECCTL_ECN, ecn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_ipsec_ecn), 0, "Explicit Congestion Notification handling."); -SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, - "Enable IPsec debugging output when set."); SYSCTL_INT(_net_inet_ipsec, OID_AUTO, crypto_support, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(crypto_support), 0, "Crypto driver selection."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, check_policy_history, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(check_policy_history), 0, + "Use strict check of inbound packets to security policy compliance."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, natt_cksum_policy, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(natt_cksum_policy), 0, + "Method to fix TCP/UDP checksum for transport mode IPsec after NAT."); +SYSCTL_INT(_net_inet_ipsec, OID_AUTO, filtertunnel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip4_filtertunnel), 0, + "If set, filter packets from an IPsec tunnel."); SYSCTL_VNET_PCPUSTAT(_net_inet_ipsec, OID_AUTO, ipsecstats, struct ipsecstat, ipsec4stat, "IPsec IPv4 statistics."); @@ -212,11 +240,14 @@ VNET_DEFINE(int, ip6_ah_net_deflev) = IPSEC_LEVEL_USE; VNET_DEFINE(int, ip6_ipsec_ecn) = 0; /* ECN ignore(-1)/forbidden(0)/allowed(1) */ +static VNET_DEFINE(int, ip6_filtertunnel) = 0; +#define V_ip6_filtertunnel VNET(ip6_filtertunnel) + SYSCTL_DECL(_net_inet6_ipsec6); /* net.inet6.ipsec6 */ -SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(def_policy).policy, 0, +SYSCTL_PROC(_net_inet6_ipsec6, IPSECCTL_DEF_POLICY, def_policy, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RW, 0, 0, sysctl_def_policy, "I", "IPsec default policy."); SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEF_ESP_TRANSLEV, esp_trans_deflev, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_esp_trans_deflev), 0, @@ -233,385 +264,228 @@ SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_ECN, ecn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_ipsec_ecn), 0, "Explicit Congestion Notification handling."); -SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, - CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, - "Enable IPsec debugging output when set."); +SYSCTL_INT(_net_inet6_ipsec6, OID_AUTO, filtertunnel, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_filtertunnel), 0, + "If set, filter packets from an IPsec tunnel."); SYSCTL_VNET_PCPUSTAT(_net_inet6_ipsec6, IPSECCTL_STATS, ipsecstats, struct ipsecstat, ipsec6stat, "IPsec IPv6 statistics."); #endif /* INET6 */ -static int ipsec_in_reject(struct secpolicy *, const struct mbuf *); -static int ipsec_setspidx_inpcb(const struct mbuf *, struct inpcb *, u_int); -static int ipsec_setspidx(const struct mbuf *, struct secpolicyindex *, int); -static void ipsec4_get_ulp(const struct mbuf *m, struct secpolicyindex *, int); -static int ipsec4_setspidx_ipaddr(const struct mbuf *, struct secpolicyindex *); +static int ipsec_in_reject(struct secpolicy *, struct inpcb *, + const struct mbuf *); + +#ifdef INET +static void ipsec4_get_ulp(const struct mbuf *, struct secpolicyindex *, int); +static void ipsec4_setspidx_ipaddr(const struct mbuf *, + struct secpolicyindex *); +#endif #ifdef INET6 static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *, int); -static int ipsec6_setspidx_ipaddr(const struct mbuf *, struct secpolicyindex *); +static void ipsec6_setspidx_ipaddr(const struct mbuf *, + struct secpolicyindex *); #endif -static void ipsec_delpcbpolicy(struct inpcbpolicy *); -static struct secpolicy *ipsec_deepcopy_policy(struct secpolicy *src); -MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); - /* * Return a held reference to the default SP. */ static struct secpolicy * -key_allocsp_default(const char* where, int tag) +key_allocsp_default(void) { - struct secpolicy *sp; - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp_default from %s:%u\n", where, tag)); - - sp = &V_def_policy; - if (sp->policy != IPSEC_POLICY_DISCARD && - sp->policy != IPSEC_POLICY_NONE) { - ipseclog((LOG_INFO, "fixed system default policy: %d->%d\n", - sp->policy, IPSEC_POLICY_NONE)); - sp->policy = IPSEC_POLICY_NONE; - } - key_addref(sp); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP key_allocsp_default returns SP:%p (%u)\n", - sp, sp->refcnt)); - return (sp); + key_addref(V_def_policy); + return (V_def_policy); } -#define KEY_ALLOCSP_DEFAULT() \ - key_allocsp_default(__FILE__, __LINE__) -/* - * For OUTBOUND packet having a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occurred. - * others: a pointer to SP - * - * NOTE: IPv6 mapped adddress concern is implemented here. - */ -struct secpolicy * -ipsec_getpolicy(struct tdb_ident *tdbi, u_int dir) +static void +ipsec_invalidate_cache(struct inpcb *inp, u_int dir) { struct secpolicy *sp; - IPSEC_ASSERT(tdbi != NULL, ("null tdbi")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); - - sp = KEY_ALLOCSP2(tdbi->spi, &tdbi->dst, tdbi->proto, dir); - if (sp == NULL) /*XXX????*/ - sp = KEY_ALLOCSP_DEFAULT(); - IPSEC_ASSERT(sp != NULL, ("null SP")); - return (sp); + INP_WLOCK_ASSERT(inp); + if (dir == IPSEC_DIR_OUTBOUND) { + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + return; + sp = inp->inp_sp->sp_in; + inp->inp_sp->sp_in = NULL; + } else { + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + return; + sp = inp->inp_sp->sp_out; + inp->inp_sp->sp_out = NULL; + } + if (sp != NULL) + key_freesp(&sp); /* release extra reference */ } -/* - * For OUTBOUND packet having a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occurred. - * others: a pointer to SP - * - * NOTE: IPv6 mapped adddress concern is implemented here. - */ -static struct secpolicy * -ipsec_getpolicybysock(const struct mbuf *m, u_int dir, struct inpcb *inp, - int *error) +static void +ipsec_cachepolicy(struct inpcb *inp, struct secpolicy *sp, u_int dir) { - struct inpcbpolicy *pcbsp; - struct secpolicy *currsp = NULL; /* Policy on socket. */ - struct secpolicy *sp; + uint32_t genid; + int downgrade; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(inp != NULL, ("null inpcb")); - IPSEC_ASSERT(error != NULL, ("null error")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + INP_LOCK_ASSERT(inp); - if (!key_havesp(dir)) { - /* No SP found, use system default. */ - sp = KEY_ALLOCSP_DEFAULT(); - return (sp); + if (dir == IPSEC_DIR_OUTBOUND) { + /* Do we have configured PCB policy? */ + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + return; + /* Another thread has already set cached policy */ + if (inp->inp_sp->sp_out != NULL) + return; + /* + * Do not cache OUTBOUND policy if PCB isn't connected, + * i.e. foreign address is INADDR_ANY/UNSPECIFIED. + */ +#ifdef INET + if ((inp->inp_vflag & INP_IPV4) != 0 && + inp->inp_faddr.s_addr == INADDR_ANY) + return; +#endif +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0 && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + return; +#endif + } else { + /* Do we have configured PCB policy? */ + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + return; + /* Another thread has already set cached policy */ + if (inp->inp_sp->sp_in != NULL) + return; + /* + * Do not cache INBOUND policy for listen socket, + * that is bound to INADDR_ANY/UNSPECIFIED address. + */ +#ifdef INET + if ((inp->inp_vflag & INP_IPV4) != 0 && + inp->inp_faddr.s_addr == INADDR_ANY) + return; +#endif +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0 && + IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) + return; +#endif } - - /* Set spidx in pcb. */ - *error = ipsec_setspidx_inpcb(m, inp, dir); - if (*error) - return (NULL); - - pcbsp = inp->inp_sp; - IPSEC_ASSERT(pcbsp != NULL, ("null pcbsp")); - switch (dir) { - case IPSEC_DIR_INBOUND: - currsp = pcbsp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - currsp = pcbsp->sp_out; - break; + downgrade = 0; + if (!INP_WLOCKED(inp)) { + if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) + return; } - IPSEC_ASSERT(currsp != NULL, ("null currsp")); - - if (pcbsp->priv) { /* When privilieged socket. */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - case IPSEC_POLICY_IPSEC: - key_addref(currsp); - sp = currsp; - break; - - case IPSEC_POLICY_ENTRUST: - /* Look for a policy in SPD. */ - sp = KEY_ALLOCSP(&currsp->spidx, dir); - if (sp == NULL) /* No SP found. */ - sp = KEY_ALLOCSP_DEFAULT(); - break; - - default: - ipseclog((LOG_ERR, "%s: Invalid policy for PCB %d\n", - __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - } - } else { /* Unpriv, SPD has policy. */ - sp = KEY_ALLOCSP(&currsp->spidx, dir); - if (sp == NULL) { /* No SP found. */ - switch (currsp->policy) { - case IPSEC_POLICY_BYPASS: - ipseclog((LOG_ERR, "%s: Illegal policy for " - "non-priviliged defined %d\n", - __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - - case IPSEC_POLICY_ENTRUST: - sp = KEY_ALLOCSP_DEFAULT(); - break; - - case IPSEC_POLICY_IPSEC: - key_addref(currsp); - sp = currsp; - break; - - default: - ipseclog((LOG_ERR, "%s: Invalid policy for " - "PCB %d\n", __func__, currsp->policy)); - *error = EINVAL; - return (NULL); - } - } + if (dir == IPSEC_DIR_OUTBOUND) + inp->inp_sp->sp_out = sp; + else + inp->inp_sp->sp_in = sp; + /* + * SP is already referenced by the lookup code. + * We take extra reference here to avoid race in the + * ipsec_getpcbpolicy() function - SP will not be freed in the + * time between we take SP pointer from the cache and key_addref() + * call. + */ + key_addref(sp); + genid = key_getspgen(); + if (genid != inp->inp_sp->genid) { + ipsec_invalidate_cache(inp, dir); + inp->inp_sp->genid = genid; } - IPSEC_ASSERT(sp != NULL, - ("null SP (priv %u policy %u", pcbsp->priv, currsp->policy)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s (priv %u policy %u) allocate SP:%p (refcnt %u)\n", - __func__, pcbsp->priv, currsp->policy, sp, sp->refcnt)); - return (sp); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): cached %s SP(%p)\n", + __func__, inp, dir == IPSEC_DIR_OUTBOUND ? "OUTBOUND": + "INBOUND", sp)); + if (downgrade != 0) + INP_DOWNGRADE(inp); } -/* - * For FORWADING packet or OUTBOUND without a socket. Searching SPD for packet, - * and return a pointer to SP. - * OUT: positive: a pointer to the entry for security policy leaf matched. - * NULL: no apropreate SP found, the following value is set to error. - * 0 : bypass - * EACCES : discard packet. - * ENOENT : ipsec_acquire() in progress, maybe. - * others : error occurred. - */ -struct secpolicy * -ipsec_getpolicybyaddr(const struct mbuf *m, u_int dir, int *error) +static struct secpolicy * +ipsec_checkpolicy(struct secpolicy *sp, struct inpcb *inp, int *error) { - struct secpolicyindex spidx; - struct secpolicy *sp; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(error != NULL, ("null error")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + /* Save found OUTBOUND policy into PCB SP cache. */ + if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_out == NULL) + ipsec_cachepolicy(inp, sp, IPSEC_DIR_OUTBOUND); - sp = NULL; - *error = 0; - if (key_havesp(dir)) { - /* Make an index to look for a policy. */ - *error = ipsec_setspidx(m, &spidx, 0); - if (*error != 0) { - DPRINTF(("%s: setpidx failed, dir %u\n", - __func__, dir)); - return (NULL); - } - spidx.dir = dir; - sp = KEY_ALLOCSP(&spidx, dir); - } - if (sp == NULL) /* No SP found, use system default. */ - sp = KEY_ALLOCSP_DEFAULT(); - IPSEC_ASSERT(sp != NULL, ("null SP")); - return (sp); -} - -struct secpolicy * -ipsec4_checkpolicy(const struct mbuf *m, u_int dir, int *error, - struct inpcb *inp) -{ - struct secpolicy *sp; - - *error = 0; - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, error); - else - sp = ipsec_getpolicybysock(m, dir, inp, error); - if (sp == NULL) { - IPSEC_ASSERT(*error != 0, ("getpolicy failed w/o error")); - IPSECSTAT_INC(ips_out_inval); - return (NULL); - } - IPSEC_ASSERT(*error == 0, ("sp w/ error set to %u", *error)); switch (sp->policy) { - case IPSEC_POLICY_ENTRUST: default: printf("%s: invalid policy %u\n", __func__, sp->policy); /* FALLTHROUGH */ case IPSEC_POLICY_DISCARD: - IPSECSTAT_INC(ips_out_polvio); *error = -EINVAL; /* Packet is discarded by caller. */ - break; + /* FALLTHROUGH */ case IPSEC_POLICY_BYPASS: case IPSEC_POLICY_NONE: - KEY_FREESP(&sp); + key_freesp(&sp); sp = NULL; /* NB: force NULL result. */ break; case IPSEC_POLICY_IPSEC: - if (sp->req == NULL) /* Acquire a SA. */ - *error = key_spdacquire(sp); + /* XXXAE: handle LARVAL SP */ break; } - if (*error != 0) { - KEY_FREESP(&sp); - sp = NULL; - } + KEYDBG(IPSEC_DUMP, + printf("%s: get SP(%p), error %d\n", __func__, sp, *error)); return (sp); } -static int -ipsec_setspidx_inpcb(const struct mbuf *m, struct inpcb *inp, u_int dir) +static struct secpolicy * +ipsec_getpcbpolicy(struct inpcb *inp, u_int dir) { - struct secpolicyindex *spidx; - int error; + struct secpolicy *sp; + int flags, downgrade; - IPSEC_ASSERT(inp != NULL, ("null inp")); - IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp")); - IPSEC_ASSERT(inp->inp_sp->sp_out != NULL && inp->inp_sp->sp_in != NULL, - ("null sp_in || sp_out")); + if (inp == NULL || inp->inp_sp == NULL) + return (NULL); - if (dir == IPSEC_DIR_INBOUND) - spidx = &inp->inp_sp->sp_in->spidx; - else - spidx = &inp->inp_sp->sp_out->spidx; - error = ipsec_setspidx(m, spidx, 1); - if (error == 0) { - spidx->dir = dir; + INP_LOCK_ASSERT(inp); + + flags = inp->inp_sp->flags; + if (dir == IPSEC_DIR_OUTBOUND) { + sp = inp->inp_sp->sp_out; + flags &= INP_OUTBOUND_POLICY; } else { - bzero(&inp->inp_sp->sp_in->spidx, - sizeof (inp->inp_sp->sp_in->spidx)); - bzero(&inp->inp_sp->sp_out->spidx, - sizeof (inp->inp_sp->sp_in->spidx)); + sp = inp->inp_sp->sp_in; + flags &= INP_INBOUND_POLICY; } - return (error); -} - -/* - * Configure security policy index (src/dst/proto/sport/dport) - * by looking at the content of mbuf. - * The caller is responsible for error recovery (like clearing up spidx). - */ -static int -ipsec_setspidx(const struct mbuf *m, struct secpolicyindex *spidx, - int needport) -{ - struct ip ipbuf; - const struct ip *ip = NULL; - const struct mbuf *n; - u_int v; - int len; - int error; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - /* - * Validate m->m_pkthdr.len. We see incorrect length if we - * mistakenly call this function with inconsistent mbuf chain - * (like 4.4BSD tcp/udp processing). XXX Should we panic here? + * Check flags. If we have PCB SP, just return it. + * Otherwise we need to check that cached SP entry isn't stale. */ - len = 0; - for (n = m; n; n = n->m_next) - len += n->m_len; - if (m->m_pkthdr.len != len) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) mismatch (%d), ignored.\n", - __func__, len, m->m_pkthdr.len)); - return (EINVAL); - } - - if (m->m_pkthdr.len < sizeof(struct ip)) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) too small (v4), ignored.\n", - __func__, m->m_pkthdr.len)); - return (EINVAL); - } - - if (m->m_len >= sizeof(*ip)) - ip = mtod(m, const struct ip *); - else { - m_copydata(m, 0, sizeof(ipbuf), (caddr_t)&ipbuf); - ip = &ipbuf; - } - v = ip->ip_v; - switch (v) { - case 4: - error = ipsec4_setspidx_ipaddr(m, spidx); - if (error) - return (error); - ipsec4_get_ulp(m, spidx, needport); - return (0); -#ifdef INET6 - case 6: - if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: pkthdr len(%d) too small (v6), " - "ignored\n", __func__, m->m_pkthdr.len)); - return (EINVAL); + if (flags == 0) { + if (sp == NULL) + return (NULL); + if (inp->inp_sp->genid != key_getspgen()) { + /* Invalidate the cache. */ + downgrade = 0; + if (!INP_WLOCKED(inp)) { + if ((downgrade = INP_TRY_UPGRADE(inp)) == 0) + return (NULL); + } + ipsec_invalidate_cache(inp, IPSEC_DIR_OUTBOUND); + ipsec_invalidate_cache(inp, IPSEC_DIR_INBOUND); + if (downgrade != 0) + INP_DOWNGRADE(inp); + return (NULL); } - error = ipsec6_setspidx_ipaddr(m, spidx); - if (error) - return (error); - ipsec6_get_ulp(m, spidx, needport); - return (0); -#endif - default: - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: " "unknown IP version %u, ignored.\n", - __func__, v)); - return (EINVAL); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): cache hit SP(%p)\n", + __func__, inp, sp)); + /* Return referenced cached policy */ } + key_addref(sp); + return (sp); } +#ifdef INET static void ipsec4_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, int needport) { - u_int8_t nxt; + uint8_t nxt; int off; /* Sanity check. */ - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip),("packet too short")); + IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip), + ("packet too short")); if (m->m_len >= sizeof (struct ip)) { const struct ip *ip = mtod(m, const struct ip *); @@ -675,61 +549,134 @@ done_proto: spidx->src.sin.sin_port = IPSEC_PORT_ANY; spidx->dst.sin.sin_port = IPSEC_PORT_ANY; + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } -/* Assumes that m is sane. */ -static int +static void ipsec4_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { - static const struct sockaddr_in template = { - sizeof (struct sockaddr_in), - AF_INET, - 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } - }; - spidx->src.sin = template; - spidx->dst.sin = template; + ipsec4_setsockaddrs(m, &spidx->src, &spidx->dst); + spidx->prefs = sizeof(struct in_addr) << 3; + spidx->prefd = sizeof(struct in_addr) << 3; +} - if (m->m_len < sizeof (struct ip)) { - m_copydata(m, offsetof(struct ip, ip_src), - sizeof (struct in_addr), - (caddr_t) &spidx->src.sin.sin_addr); - m_copydata(m, offsetof(struct ip, ip_dst), - sizeof (struct in_addr), - (caddr_t) &spidx->dst.sin.sin_addr); - } else { - const struct ip *ip = mtod(m, const struct ip *); - spidx->src.sin.sin_addr = ip->ip_src; - spidx->dst.sin.sin_addr = ip->ip_dst; +static struct secpolicy * +ipsec4_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) +{ + struct secpolicyindex spidx; + struct secpolicy *sp; + + sp = ipsec_getpcbpolicy(inp, dir); + if (sp == NULL && key_havesp(dir)) { + /* Make an index to look for a policy. */ + ipsec4_setspidx_ipaddr(m, &spidx); + /* Fill ports in spidx if we have inpcb. */ + ipsec4_get_ulp(m, &spidx, inp != NULL); + spidx.dir = dir; + sp = key_allocsp(&spidx, dir); } + if (sp == NULL) /* No SP found, use system default. */ + sp = key_allocsp_default(); + return (sp); +} - spidx->prefs = sizeof(struct in_addr) << 3; - spidx->prefd = sizeof(struct in_addr) << 3; +/* + * Check security policy for *OUTBOUND* IPv4 packet. + */ +struct secpolicy * +ipsec4_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) +{ + struct secpolicy *sp; - return (0); + *error = 0; + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + if (sp != NULL) + sp = ipsec_checkpolicy(sp, inp, error); + if (sp == NULL) { + switch (*error) { + case 0: /* No IPsec required: BYPASS or NONE */ + break; + case -EINVAL: + IPSECSTAT_INC(ips_out_polvio); + break; + default: + IPSECSTAT_INC(ips_out_inval); + } + } + KEYDBG(IPSEC_STAMP, + printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); + if (sp != NULL) + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + return (sp); } +/* + * Check IPv4 packet against *INBOUND* security policy. + * This function is called from tcp_input(), udp_input(), + * rip_input() and sctp_input(). + */ +int +ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) +{ + struct secpolicy *sp; + int result; + + sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND); + result = ipsec_in_reject(sp, inp, m); + key_freesp(&sp); + if (result != 0) + IPSECSTAT_INC(ips_in_polvio); + return (result); +} + +/* + * IPSEC_CAP() method implementation for IPv4. + */ +int +ipsec4_capability(struct mbuf *m, u_int cap) +{ + + switch (cap) { + case IPSEC_CAP_BYPASS_FILTER: + /* + * Bypass packet filtering for packets previously handled + * by IPsec. + */ + if (!V_ip4_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return (1); + return (0); + case IPSEC_CAP_OPERABLE: + /* Do we have active security policies? */ + if (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0) + return (1); + return (0); + }; + return (EOPNOTSUPP); +} + +#endif /* INET */ + #ifdef INET6 static void ipsec6_get_ulp(const struct mbuf *m, struct secpolicyindex *spidx, int needport) { - int off, nxt; struct tcphdr th; struct udphdr uh; struct icmp6_hdr ih; + int off, nxt; - /* Sanity check. */ - if (m == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + IPSEC_ASSERT(m->m_pkthdr.len >= sizeof(struct ip6_hdr), + ("packet too short")); - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s:\n", __func__); kdebug_mbuf(m)); - /* Set default. */ spidx->ul_proto = IPSEC_ULPROTO_ANY; - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = IPSEC_PORT_ANY; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = IPSEC_PORT_ANY; + spidx->src.sin6.sin6_port = IPSEC_PORT_ANY; + spidx->dst.sin6.sin6_port = IPSEC_PORT_ANY; nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); @@ -744,8 +691,8 @@ if (off + sizeof(struct tcphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(th), (caddr_t)&th); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = th.th_sport; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = th.th_dport; + spidx->src.sin6.sin6_port = th.th_sport; + spidx->dst.sin6.sin6_port = th.th_dport; break; case IPPROTO_UDP: spidx->ul_proto = nxt; @@ -754,68 +701,133 @@ if (off + sizeof(struct udphdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(uh), (caddr_t)&uh); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = uh.uh_sport; - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = uh.uh_dport; + spidx->src.sin6.sin6_port = uh.uh_sport; + spidx->dst.sin6.sin6_port = uh.uh_dport; break; case IPPROTO_ICMPV6: spidx->ul_proto = nxt; if (off + sizeof(struct icmp6_hdr) > m->m_pkthdr.len) break; m_copydata(m, off, sizeof(ih), (caddr_t)&ih); - ((struct sockaddr_in6 *)&spidx->src)->sin6_port = - htons((uint16_t)ih.icmp6_type); - ((struct sockaddr_in6 *)&spidx->dst)->sin6_port = - htons((uint16_t)ih.icmp6_code); + spidx->src.sin6.sin6_port = htons((uint16_t)ih.icmp6_type); + spidx->dst.sin6.sin6_port = htons((uint16_t)ih.icmp6_code); break; default: /* XXX Intermediate headers??? */ spidx->ul_proto = nxt; break; } + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); } -/* Assumes that m is sane. */ -static int +static void ipsec6_setspidx_ipaddr(const struct mbuf *m, struct secpolicyindex *spidx) { - struct ip6_hdr ip6buf; - const struct ip6_hdr *ip6 = NULL; - struct sockaddr_in6 *sin6; - if (m->m_len >= sizeof(*ip6)) - ip6 = mtod(m, const struct ip6_hdr *); - else { - m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); - ip6 = &ip6buf; - } + ipsec6_setsockaddrs(m, &spidx->src, &spidx->dst); + spidx->prefs = sizeof(struct in6_addr) << 3; + spidx->prefd = sizeof(struct in6_addr) << 3; +} - sin6 = (struct sockaddr_in6 *)&spidx->src; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - bcopy(&ip6->ip6_src, &sin6->sin6_addr, sizeof(ip6->ip6_src)); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); +static struct secpolicy * +ipsec6_getpolicy(const struct mbuf *m, struct inpcb *inp, u_int dir) +{ + struct secpolicyindex spidx; + struct secpolicy *sp; + + sp = ipsec_getpcbpolicy(inp, dir); + if (sp == NULL && key_havesp(dir)) { + /* Make an index to look for a policy. */ + ipsec6_setspidx_ipaddr(m, &spidx); + /* Fill ports in spidx if we have inpcb. */ + ipsec6_get_ulp(m, &spidx, inp != NULL); + spidx.dir = dir; + sp = key_allocsp(&spidx, dir); } - spidx->prefs = sizeof(struct in6_addr) << 3; + if (sp == NULL) /* No SP found, use system default. */ + sp = key_allocsp_default(); + return (sp); +} - sin6 = (struct sockaddr_in6 *)&spidx->dst; - bzero(sin6, sizeof(*sin6)); - sin6->sin6_family = AF_INET6; - sin6->sin6_len = sizeof(struct sockaddr_in6); - bcopy(&ip6->ip6_dst, &sin6->sin6_addr, sizeof(ip6->ip6_dst)); - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); +/* + * Check security policy for *OUTBOUND* IPv6 packet. + */ +struct secpolicy * +ipsec6_checkpolicy(const struct mbuf *m, struct inpcb *inp, int *error) +{ + struct secpolicy *sp; + + *error = 0; + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_OUTBOUND); + if (sp != NULL) + sp = ipsec_checkpolicy(sp, inp, error); + if (sp == NULL) { + switch (*error) { + case 0: /* No IPsec required: BYPASS or NONE */ + break; + case -EINVAL: + IPSEC6STAT_INC(ips_out_polvio); + break; + default: + IPSEC6STAT_INC(ips_out_inval); + } } - spidx->prefd = sizeof(struct in6_addr) << 3; + KEYDBG(IPSEC_STAMP, + printf("%s: using SP(%p), error %d\n", __func__, sp, *error)); + if (sp != NULL) + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + return (sp); +} - return (0); +/* + * Check IPv6 packet against inbound security policy. + * This function is called from tcp6_input(), udp6_input(), + * rip6_input() and sctp_input(). + */ +int +ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) +{ + struct secpolicy *sp; + int result; + + sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND); + result = ipsec_in_reject(sp, inp, m); + key_freesp(&sp); + if (result) + IPSEC6STAT_INC(ips_in_polvio); + return (result); } -#endif +/* + * IPSEC_CAP() method implementation for IPv6. + */ int +ipsec6_capability(struct mbuf *m, u_int cap) +{ + + switch (cap) { + case IPSEC_CAP_BYPASS_FILTER: + /* + * Bypass packet filtering for packets previously handled + * by IPsec. + */ + if (!V_ip6_filtertunnel && + m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) + return (1); + return (0); + case IPSEC_CAP_OPERABLE: + /* Do we have active security policies? */ + if (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0) + return (1); + return (0); + }; + return (EOPNOTSUPP); +} +#endif /* INET6 */ + +int ipsec_run_hhooks(struct ipsec_ctx_data *ctx, int type) { int idx; @@ -843,322 +855,41 @@ return (0); } -static void -ipsec_delpcbpolicy(struct inpcbpolicy *p) -{ - - free(p, M_IPSEC_INPCB); -} - -/* Initialize policy in PCB. */ -int -ipsec_init_policy(struct socket *so, struct inpcbpolicy **pcb_sp) -{ - struct inpcbpolicy *new; - - /* Sanity check. */ - if (so == NULL || pcb_sp == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - - new = (struct inpcbpolicy *) malloc(sizeof(struct inpcbpolicy), - M_IPSEC_INPCB, M_NOWAIT|M_ZERO); - if (new == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return (ENOBUFS); - } - - new->priv = IPSEC_IS_PRIVILEGED_SO(so); - - if ((new->sp_in = KEY_NEWSP()) == NULL) { - ipsec_delpcbpolicy(new); - return (ENOBUFS); - } - new->sp_in->policy = IPSEC_POLICY_ENTRUST; - if ((new->sp_out = KEY_NEWSP()) == NULL) { - KEY_FREESP(&new->sp_in); - ipsec_delpcbpolicy(new); - return (ENOBUFS); - } - new->sp_out->policy = IPSEC_POLICY_ENTRUST; - *pcb_sp = new; - - return (0); -} - -/* Copy old IPsec policy into new. */ -int -ipsec_copy_policy(struct inpcbpolicy *old, struct inpcbpolicy *new) -{ - struct secpolicy *sp; - - sp = ipsec_deepcopy_policy(old->sp_in); - if (sp) { - KEY_FREESP(&new->sp_in); - new->sp_in = sp; - } else - return (ENOBUFS); - - sp = ipsec_deepcopy_policy(old->sp_out); - if (sp) { - KEY_FREESP(&new->sp_out); - new->sp_out = sp; - } else - return (ENOBUFS); - - new->priv = old->priv; - - return (0); -} - -struct ipsecrequest * -ipsec_newisr(void) -{ - struct ipsecrequest *p; - - p = malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, M_NOWAIT|M_ZERO); - if (p != NULL) - IPSECREQUEST_LOCK_INIT(p); - return (p); -} - -void -ipsec_delisr(struct ipsecrequest *p) -{ - - IPSECREQUEST_LOCK_DESTROY(p); - free(p, M_IPSEC_SR); -} - -/* Deep-copy a policy in PCB. */ -static struct secpolicy * -ipsec_deepcopy_policy(struct secpolicy *src) -{ - struct ipsecrequest *newchain = NULL; - struct ipsecrequest *p; - struct ipsecrequest **q; - struct ipsecrequest *r; - struct secpolicy *dst; - - if (src == NULL) - return (NULL); - dst = KEY_NEWSP(); - if (dst == NULL) - return (NULL); - - /* - * Deep-copy IPsec request chain. This is required since struct - * ipsecrequest is not reference counted. - */ - q = &newchain; - for (p = src->req; p; p = p->next) { - *q = ipsec_newisr(); - if (*q == NULL) - goto fail; - (*q)->saidx.proto = p->saidx.proto; - (*q)->saidx.mode = p->saidx.mode; - (*q)->level = p->level; - (*q)->saidx.reqid = p->saidx.reqid; - - bcopy(&p->saidx.src, &(*q)->saidx.src, sizeof((*q)->saidx.src)); - bcopy(&p->saidx.dst, &(*q)->saidx.dst, sizeof((*q)->saidx.dst)); - - (*q)->sp = dst; - - q = &((*q)->next); - } - - dst->req = newchain; - dst->policy = src->policy; - /* Do not touch the refcnt fields. */ - - return (dst); - -fail: - for (p = newchain; p; p = r) { - r = p->next; - ipsec_delisr(p); - p = NULL; - } - KEY_FREESP(&dst); - return (NULL); -} - -/* Set policy and IPsec request if present. */ -static int -ipsec_set_policy_internal(struct secpolicy **pcb_sp, int optname, - caddr_t request, size_t len, struct ucred *cred) -{ - struct sadb_x_policy *xpl; - struct secpolicy *newsp = NULL; - int error; - - /* Sanity check. */ - if (pcb_sp == NULL || *pcb_sp == NULL || request == NULL) - return (EINVAL); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: passed policy\n", __func__); - kdebug_sadb_x_policy((struct sadb_ext *)xpl)); - - /* Check policy type. */ - /* ipsec_set_policy_internal() accepts IPSEC, ENTRUST and BYPASS. */ - if (xpl->sadb_x_policy_type == IPSEC_POLICY_DISCARD - || xpl->sadb_x_policy_type == IPSEC_POLICY_NONE) - return (EINVAL); - - /* Check privileged socket. */ - if (cred != NULL && xpl->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { - error = priv_check_cred(cred, PRIV_NETINET_IPSEC, 0); - if (error) - return (EACCES); - } - - /* Allocating new SP entry. */ - if ((newsp = key_msg2sp(xpl, len, &error)) == NULL) - return (error); - - /* Clear old SP and set new SP. */ - KEY_FREESP(pcb_sp); - *pcb_sp = newsp; - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: new policy\n", __func__); - kdebug_secpolicy(newsp)); - - return (0); -} - -int -ipsec_set_policy(struct inpcb *inp, int optname, caddr_t request, - size_t len, struct ucred *cred) -{ - struct sadb_x_policy *xpl; - struct secpolicy **pcb_sp; - - /* Sanity check. */ - if (inp == NULL || request == NULL) - return (EINVAL); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - /* Select direction. */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = &inp->inp_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = &inp->inp_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, - xpl->sadb_x_policy_dir)); - return (EINVAL); - } - - return (ipsec_set_policy_internal(pcb_sp, optname, request, len, cred)); -} - -int -ipsec_get_policy(struct inpcb *inp, caddr_t request, size_t len, - struct mbuf **mp) -{ - struct sadb_x_policy *xpl; - struct secpolicy *pcb_sp; - - /* Sanity check. */ - if (inp == NULL || request == NULL || mp == NULL) - return (EINVAL); - IPSEC_ASSERT(inp->inp_sp != NULL, ("null inp_sp")); - if (len < sizeof(*xpl)) - return (EINVAL); - xpl = (struct sadb_x_policy *)request; - - /* Select direction. */ - switch (xpl->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - pcb_sp = inp->inp_sp->sp_in; - break; - case IPSEC_DIR_OUTBOUND: - pcb_sp = inp->inp_sp->sp_out; - break; - default: - ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, - xpl->sadb_x_policy_dir)); - return (EINVAL); - } - - /* Sanity check. Should be an IPSEC_ASSERT. */ - if (pcb_sp == NULL) - return (EINVAL); - - *mp = key_sp2msg(pcb_sp); - if (!*mp) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return (ENOBUFS); - } - - (*mp)->m_type = MT_DATA; - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s:\n", __func__); kdebug_mbuf(*mp)); - - return (0); -} - -/* Delete policy in PCB. */ -int -ipsec_delete_pcbpolicy(struct inpcb *inp) -{ - IPSEC_ASSERT(inp != NULL, ("null inp")); - - if (inp->inp_sp == NULL) - return (0); - - if (inp->inp_sp->sp_in != NULL) - KEY_FREESP(&inp->inp_sp->sp_in); - - if (inp->inp_sp->sp_out != NULL) - KEY_FREESP(&inp->inp_sp->sp_out); - - ipsec_delpcbpolicy(inp->inp_sp); - inp->inp_sp = NULL; - - return (0); -} - /* * Return current level. * Either IPSEC_LEVEL_USE or IPSEC_LEVEL_REQUIRE are always returned. */ u_int -ipsec_get_reqlevel(struct ipsecrequest *isr) +ipsec_get_reqlevel(struct secpolicy *sp, u_int idx) { - u_int level = 0; + struct ipsecrequest *isr; u_int esp_trans_deflev, esp_net_deflev; u_int ah_trans_deflev, ah_net_deflev; + u_int level = 0; - IPSEC_ASSERT(isr != NULL && isr->sp != NULL, ("null argument")); - IPSEC_ASSERT(isr->sp->spidx.src.sa.sa_family == isr->sp->spidx.dst.sa.sa_family, - ("af family mismatch, src %u, dst %u", - isr->sp->spidx.src.sa.sa_family, - isr->sp->spidx.dst.sa.sa_family)); - + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); /* XXX Note that we have ipseclog() expanded here - code sync issue. */ #define IPSEC_CHECK_DEFAULT(lev) \ - (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE \ - && (lev) != IPSEC_LEVEL_UNIQUE) \ - ? (V_ipsec_debug \ - ? log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ - (lev), IPSEC_LEVEL_REQUIRE) \ - : 0), \ - (lev) = IPSEC_LEVEL_REQUIRE, \ - (lev) \ - : (lev)) + (((lev) != IPSEC_LEVEL_USE && (lev) != IPSEC_LEVEL_REQUIRE && \ + (lev) != IPSEC_LEVEL_UNIQUE) \ + ? (V_ipsec_debug ? \ + log(LOG_INFO, "fixed system default level " #lev ":%d->%d\n",\ + (lev), IPSEC_LEVEL_REQUIRE) : 0), \ + (lev) = IPSEC_LEVEL_REQUIRE, (lev) : (lev)) + /* + * IPsec VTI uses unique security policy with fake spidx filled + * with zeroes. Just return IPSEC_LEVEL_REQUIRE instead of doing + * full level lookup for such policies. + */ + if (sp->state == IPSEC_SPSTATE_IFNET) { + IPSEC_ASSERT(sp->req[idx]->level == IPSEC_LEVEL_UNIQUE, + ("Wrong IPsec request level %d", sp->req[idx]->level)); + return (IPSEC_LEVEL_REQUIRE); + } + /* Set default level. */ - switch (((struct sockaddr *)&isr->sp->spidx.src)->sa_family) { + switch (sp->spidx.src.sa.sa_family) { #ifdef INET case AF_INET: esp_trans_deflev = IPSEC_CHECK_DEFAULT(V_ip4_esp_trans_deflev); @@ -1177,11 +908,12 @@ #endif /* INET6 */ default: panic("%s: unknown af %u", - __func__, isr->sp->spidx.src.sa.sa_family); + __func__, sp->spidx.src.sa.sa_family); } #undef IPSEC_CHECK_DEFAULT + isr = sp->req[idx]; /* Set level. */ switch (isr->level) { case IPSEC_LEVEL_DEFAULT: @@ -1226,6 +958,45 @@ return (level); } +static int +ipsec_check_history(const struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + struct xform_history *xh; + struct m_tag *mtag; + + mtag = NULL; + while ((mtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_IN_DONE, mtag)) != NULL) { + xh = (struct xform_history *)(mtag + 1); + KEYDBG(IPSEC_DATA, + char buf[IPSEC_ADDRSTRLEN]; + printf("%s: mode %s proto %u dst %s\n", __func__, + kdebug_secasindex_mode(xh->mode), xh->proto, + ipsec_address(&xh->dst, buf, sizeof(buf)))); + if (xh->proto != sp->req[idx]->saidx.proto) + continue; + /* If SA had IPSEC_MODE_ANY, consider this as match. */ + if (xh->mode != sp->req[idx]->saidx.mode && + xh->mode != IPSEC_MODE_ANY) + continue; + /* + * For transport mode IPsec request doesn't contain + * addresses. We need to use address from spidx. + */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TRANSPORT) { + if (key_sockaddrcmp_withmask(&xh->dst.sa, + &sp->spidx.dst.sa, sp->spidx.prefd) != 0) + continue; + } else { + if (key_sockaddrcmp(&xh->dst.sa, + &sp->req[idx]->saidx.dst.sa, 0) != 0) + continue; + } + return (0); /* matched */ + } + return (1); +} + /* * Check security policy requirements against the actual * packet contents. Return one if the packet should be @@ -1237,14 +1008,17 @@ * 1: invalid */ static int -ipsec_in_reject(struct secpolicy *sp, const struct mbuf *m) +ipsec_in_reject(struct secpolicy *sp, struct inpcb *inp, const struct mbuf *m) { - struct ipsecrequest *isr; - int need_auth; + int i; - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: using SP\n", __func__); kdebug_secpolicy(sp)); + KEYDBG(IPSEC_STAMP, + printf("%s: PCB(%p): using SP(%p)\n", __func__, inp, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + if (inp != NULL && inp->inp_sp != NULL && inp->inp_sp->sp_in == NULL) + ipsec_cachepolicy(inp, sp, IPSEC_DIR_INBOUND); + /* Check policy. */ switch (sp->policy) { case IPSEC_POLICY_DISCARD: @@ -1257,132 +1031,59 @@ IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); - /* XXX Should compare policy against IPsec header history. */ - - need_auth = 0; - for (isr = sp->req; isr != NULL; isr = isr->next) { - if (ipsec_get_reqlevel(isr) != IPSEC_LEVEL_REQUIRE) + /* + * ipsec[46]_common_input_cb after each transform adds + * PACKET_TAG_IPSEC_IN_DONE mbuf tag. It contains SPI, proto, mode + * and destination address from saidx. We can compare info from + * these tags with requirements in SP. + */ + for (i = 0; i < sp->tcount; i++) { + /* + * Do not check IPcomp, since IPcomp document + * says that we shouldn't compress small packets. + * IPComp policy should always be treated as being + * in "use" level. + */ + if (sp->req[i]->saidx.proto == IPPROTO_IPCOMP || + ipsec_get_reqlevel(sp, i) != IPSEC_LEVEL_REQUIRE) continue; - switch (isr->saidx.proto) { + if (V_check_policy_history != 0 && + ipsec_check_history(m, sp, i) != 0) + return (1); + else switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: if ((m->m_flags & M_DECRYPTED) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, + KEYDBG(IPSEC_DUMP, printf("%s: ESP m_flags:%x\n", __func__, m->m_flags)); return (1); } - - if (!need_auth && - isr->sav != NULL && - isr->sav->tdb_authalgxform != NULL && - (m->m_flags & M_AUTHIPDGM) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, - printf("%s: ESP/AH m_flags:%x\n", __func__, - m->m_flags)); - return (1); - } break; case IPPROTO_AH: - need_auth = 1; if ((m->m_flags & M_AUTHIPHDR) == 0) { - KEYDEBUG(KEYDEBUG_IPSEC_DUMP, + KEYDBG(IPSEC_DUMP, printf("%s: AH m_flags:%x\n", __func__, m->m_flags)); return (1); } break; - case IPPROTO_IPCOMP: - /* - * We don't really care, as IPcomp document - * says that we shouldn't compress small - * packets. IPComp policy should always be - * treated as being in "use" level. - */ - break; } } return (0); /* Valid. */ } /* - * Non zero return value means security policy DISCARD or policy violation. - */ -static int -ipsec46_in_reject(const struct mbuf *m, struct inpcb *inp) -{ - struct secpolicy *sp; - int error; - int result; - - if (!key_havesp(IPSEC_DIR_INBOUND)) - return 0; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - - /* Get SP for this packet. */ - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND, &error); - else - sp = ipsec_getpolicybysock(m, IPSEC_DIR_INBOUND, inp, &error); - - if (sp != NULL) { - result = ipsec_in_reject(sp, m); - KEY_FREESP(&sp); - } else { - result = 1; /* treat errors as policy violation */ - } - return (result); -} - -/* - * Check AH/ESP integrity. - * This function is called from tcp_input(), udp_input(), - * and {ah,esp}4_input for tunnel mode. - */ -int -ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) -{ - int result; - - result = ipsec46_in_reject(m, inp); - if (result) - IPSECSTAT_INC(ips_in_polvio); - - return (result); -} - -#ifdef INET6 -/* - * Check AH/ESP integrity. - * This function is called from tcp6_input(), udp6_input(), - * and {ah,esp}6_input for tunnel mode. - */ -int -ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) -{ - int result; - - result = ipsec46_in_reject(m, inp); - if (result) - IPSEC6STAT_INC(ips_in_polvio); - - return (result); -} -#endif - -/* * Compute the byte size to be occupied by IPsec header. * In case it is tunnelled, it includes the size of outer IP header. - * NOTE: SP passed is freed in this function. */ static size_t ipsec_hdrsiz_internal(struct secpolicy *sp) { - struct ipsecrequest *isr; size_t size; + int i; - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: using SP\n", __func__); kdebug_secpolicy(sp)); + KEYDBG(IPSEC_STAMP, printf("%s: using SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); switch (sp->policy) { case IPSEC_POLICY_DISCARD: @@ -1394,80 +1095,69 @@ IPSEC_ASSERT(sp->policy == IPSEC_POLICY_IPSEC, ("invalid policy %u", sp->policy)); + /* + * XXX: for each transform we need to lookup suitable SA + * and use info from SA to calculate headers size. + * XXX: for NAT-T we need to cosider UDP header size. + */ size = 0; - for (isr = sp->req; isr != NULL; isr = isr->next) { - size_t clen = 0; - - switch (isr->saidx.proto) { + for (i = 0; i < sp->tcount; i++) { + switch (sp->req[i]->saidx.proto) { case IPPROTO_ESP: - clen = esp_hdrsiz(isr->sav); + size += esp_hdrsiz(NULL); break; case IPPROTO_AH: - clen = ah_hdrsiz(isr->sav); + size += ah_hdrsiz(NULL); break; case IPPROTO_IPCOMP: - clen = sizeof(struct ipcomp); + size += sizeof(struct ipcomp); break; } - if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { - switch (isr->saidx.dst.sa.sa_family) { + if (sp->req[i]->saidx.mode == IPSEC_MODE_TUNNEL) { + switch (sp->req[i]->saidx.dst.sa.sa_family) { +#ifdef INET case AF_INET: - clen += sizeof(struct ip); + size += sizeof(struct ip); break; +#endif #ifdef INET6 case AF_INET6: - clen += sizeof(struct ip6_hdr); + size += sizeof(struct ip6_hdr); break; #endif default: ipseclog((LOG_ERR, "%s: unknown AF %d in " "IPsec tunnel SA\n", __func__, - ((struct sockaddr *)&isr->saidx.dst)->sa_family)); + sp->req[i]->saidx.dst.sa.sa_family)); break; } } - size += clen; } - return (size); } -/* - * This function is called from ipsec_hdrsiz_tcp(), ip_ipsec_mtu(), - * disabled ip6_ipsec_mtu() and ip6_forward(). +/* + * Compute ESP/AH header size for protocols with PCB, including + * outer IP header. Currently only tcp_output() uses it. */ size_t -ipsec_hdrsiz(const struct mbuf *m, u_int dir, struct inpcb *inp) +ipsec_hdrsiz_inpcb(struct inpcb *inp) { + struct secpolicyindex spidx; struct secpolicy *sp; - int error; - size_t size; + size_t sz; - if (!key_havesp(dir)) - return 0; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - - /* Get SP for this packet. */ - if (inp == NULL) - sp = ipsec_getpolicybyaddr(m, dir, &error); - else - sp = ipsec_getpolicybysock(m, dir, inp, &error); - - if (sp != NULL) { - size = ipsec_hdrsiz_internal(sp); - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("%s: size:%lu.\n", __func__, - (unsigned long)size)); - - KEY_FREESP(&sp); - } else { - size = 0; /* XXX Should be panic? - * -> No, we are called w/o knowing if - * IPsec processing is needed. */ + sp = ipsec_getpcbpolicy(inp, IPSEC_DIR_OUTBOUND); + if (sp == NULL && key_havesp(IPSEC_DIR_OUTBOUND)) { + ipsec_setspidx_inpcb(inp, &spidx, IPSEC_DIR_OUTBOUND); + sp = key_allocsp(&spidx, IPSEC_DIR_OUTBOUND); } - return (size); + if (sp == NULL) + sp = key_allocsp_default(); + sz = ipsec_hdrsiz_internal(sp); + key_freesp(&sp); + return (sz); } /* @@ -1487,42 +1177,39 @@ #define IPSEC_BITMAP_LOC_MASK (IPSEC_REDUNDANT_BITS - 1) int -ipsec_chkreplay(u_int32_t seq, struct secasvar *sav) +ipsec_chkreplay(uint32_t seq, struct secasvar *sav) { const struct secreplay *replay; - u_int32_t wsizeb; /* Constant: window size. */ - int ret, index, bit_location; + uint32_t wsizeb; /* Constant: window size. */ + int index, bit_location; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); - SECASVAR_LOCK(sav); - - ret = 0; replay = sav->replay; /* No need to check replay if disabled. */ if (replay->wsize == 0) - goto allowed; + return (1); /* Constant. */ wsizeb = replay->wsize << 3; /* Sequence number of 0 is invalid. */ if (seq == 0) - goto end; + return (0); /* First time is always okay. */ if (replay->count == 0) - goto allowed; + return (1); /* Larger sequences are okay. */ if (seq > replay->lastseq) - goto allowed; + return (1); /* Over range to check, i.e. too old or wrapped. */ if (replay->lastseq - seq >= wsizeb) - goto end; + return (0); /* The sequence is inside the sliding window * now check the bit in the bitmap @@ -1534,14 +1221,8 @@ /* This packet already seen? */ if ((replay->bitmap)[index] & (1 << bit_location)) - goto end; - -allowed: - ret = 1; -end: - SECASVAR_UNLOCK(sav); - - return (ret); + return (0); + return (1); } /* @@ -1550,19 +1231,16 @@ * 1: NG */ int -ipsec_updatereplay(u_int32_t seq, struct secasvar *sav) +ipsec_updatereplay(uint32_t seq, struct secasvar *sav) { char buf[128]; struct secreplay *replay; - u_int32_t wsizeb; /* Constant: window size. */ - int ret, diff, index, bit_location; + uint32_t wsizeb; /* Constant: window size. */ + int diff, index, bit_location; IPSEC_ASSERT(sav != NULL, ("Null SA")); IPSEC_ASSERT(sav->replay != NULL, ("Null replay state")); - SECASVAR_LOCK(sav); - - ret = 1; replay = sav->replay; if (replay->wsize == 0) @@ -1573,7 +1251,7 @@ /* Sequence number of 0 is invalid. */ if (seq == 0) - goto end; + return (1); /* The packet is too old, no need to update */ if (wsizeb + seq < replay->lastseq) @@ -1606,7 +1284,7 @@ /* this packet has already been received */ if (replay->bitmap[index] & (1 << bit_location)) - goto end; + return (1); replay->bitmap[index] |= (1 << bit_location); @@ -1617,118 +1295,99 @@ replay->overflow++; /* Don't increment, no more packets accepted. */ - if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) - goto end; + if ((sav->flags & SADB_X_EXT_CYCSEQ) == 0) { + if (sav->sah->saidx.proto == IPPROTO_AH) + AHSTAT_INC(ahs_wrap); + else if (sav->sah->saidx.proto == IPPROTO_ESP) + ESPSTAT_INC(esps_wrap); + return (1); + } ipseclog((LOG_WARNING, "%s: replay counter made %d cycle. %s\n", __func__, replay->overflow, - ipsec_logsastr(sav, buf, sizeof(buf)))); + ipsec_sa2str(sav, buf, sizeof(buf)))); } - - ret = 0; - -end: - SECASVAR_UNLOCK(sav); - return (ret); + return (0); } -/* Return a printable string for the address. */ -char* -ipsec_address(union sockaddr_union* sa, char *buf, socklen_t size) +int +ipsec_updateid(struct secasvar *sav, uint64_t *new, uint64_t *old) { + uint64_t tmp; - switch (sa->sa.sa_family) { -#ifdef INET - case AF_INET: - return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size)); -#endif /* INET */ -#ifdef INET6 - case AF_INET6: - return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, buf, size)); -#endif /* INET6 */ - default: - return ("(unknown address family)"); + /* + * tdb_cryptoid is initialized by xform_init(). + * Then it can be changed only when some crypto error occurred or + * when SA is deleted. We stored used cryptoid in the xform_data + * structure. In case when crypto error occurred and crypto + * subsystem has reinited the session, it returns new cryptoid + * and EAGAIN error code. + * + * This function will be called when we got EAGAIN from crypto + * subsystem. + * *new is cryptoid that was returned by crypto subsystem in + * the crp_sid. + * *old is the original cryptoid that we stored in xform_data. + * + * For first failed request *old == sav->tdb_cryptoid, then + * we update sav->tdb_cryptoid and redo crypto_dispatch(). + * For next failed request *old != sav->tdb_cryptoid, then + * we store cryptoid from first request into the *new variable + * and crp_sid from this second session will be returned via + * *old pointer, so caller can release second session. + * + * XXXAE: check this more carefully. + */ + KEYDBG(IPSEC_STAMP, + printf("%s: SA(%p) moves cryptoid %jd -> %jd\n", + __func__, sav, (uintmax_t)(*old), (uintmax_t)(*new))); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + SECASVAR_LOCK(sav); + if (sav->tdb_cryptoid != *old) { + /* cryptoid was already updated */ + tmp = *new; + *new = sav->tdb_cryptoid; + *old = tmp; + SECASVAR_UNLOCK(sav); + return (1); } + sav->tdb_cryptoid = *new; + SECASVAR_UNLOCK(sav); + return (0); } -char * -ipsec_logsastr(struct secasvar *sav, char *buf, size_t size) +int +ipsec_initialized(void) { - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - IPSEC_ASSERT(sav->sah->saidx.src.sa.sa_family == - sav->sah->saidx.dst.sa.sa_family, ("address family mismatch")); - - snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)", - (u_long)ntohl(sav->spi), - ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf))); - return (buf); + return (V_def_policy != NULL); } -void -ipsec_dumpmbuf(const struct mbuf *m) -{ - const u_char *p; - int totlen; - int i; - - totlen = 0; - printf("---\n"); - while (m) { - p = mtod(m, const u_char *); - for (i = 0; i < m->m_len; i++) { - printf("%02x ", p[i]); - totlen++; - if (totlen % 16 == 0) - printf("\n"); - } - m = m->m_next; - } - if (totlen % 16 != 0) - printf("\n"); - printf("---\n"); -} - static void def_policy_init(const void *unused __unused) { - bzero(&V_def_policy, sizeof(struct secpolicy)); - V_def_policy.policy = IPSEC_POLICY_NONE; - V_def_policy.refcnt = 1; + V_def_policy = key_newsp(); + if (V_def_policy != NULL) { + V_def_policy->policy = IPSEC_POLICY_NONE; + /* Force INPCB SP cache invalidation */ + key_bumpspgen(); + } else + printf("%s: failed to initialize default policy\n", __func__); } -VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, - def_policy_init, NULL); -/* XXX This stuff doesn't belong here... */ - -static struct xformsw* xforms = NULL; - -/* - * Register a transform; typically at system startup. - */ -void -xform_register(struct xformsw* xsp) +static void +def_policy_uninit(const void *unused __unused) { - xsp->xf_next = xforms; - xforms = xsp; + if (V_def_policy != NULL) { + key_freesp(&V_def_policy); + key_bumpspgen(); + } } -/* - * Initialize transform support in an sav. - */ -int -xform_init(struct secasvar *sav, int xftype) -{ - struct xformsw *xsp; - - if (sav->tdb_xform != NULL) /* Previously initialized. */ - return (0); - for (xsp = xforms; xsp; xsp = xsp->xf_next) - if (xsp->xf_type == xftype) - return ((*xsp->xf_init)(sav, xsp)); - return (EINVAL); -} +VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + def_policy_init, NULL); +VNET_SYSUNINIT(def_policy_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + def_policy_uninit, NULL); Index: head/sys/netipsec/ipsec6.h =================================================================== --- head/sys/netipsec/ipsec6.h +++ head/sys/netipsec/ipsec6.h @@ -59,14 +59,22 @@ #define V_ip6_ipsec_ecn VNET(ip6_ipsec_ecn) struct inpcb; -extern int ipsec6_in_reject(const struct mbuf *, struct inpcb *); +struct secpolicy *ipsec6_checkpolicy(const struct mbuf *, + struct inpcb *, int *); -struct m_tag; -extern int ipsec6_common_input(struct mbuf **mp, int *offp, int proto); -extern int ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, - int skip, int protoff); -extern void esp6_ctlinput(int, struct sockaddr *, void *); -extern int ipsec6_process_packet(struct mbuf *, struct ipsecrequest *); +void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, + union sockaddr_union *); +int ipsec6_input(struct mbuf *, int, int); +int ipsec6_in_reject(const struct mbuf *, struct inpcb *); +int ipsec6_forward(struct mbuf *); +int ipsec6_pcbctl(struct inpcb *, struct sockopt *); +int ipsec6_output(struct mbuf *, struct inpcb *); +int ipsec6_capability(struct mbuf *, u_int); +int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); +int ipsec6_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); + +int ip6_ipsec_filtertunnel(struct mbuf *); +int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); #endif /*_KERNEL*/ #endif /*_NETIPSEC_IPSEC6_H_*/ Index: head/sys/netipsec/ipsec_input.c =================================================================== --- head/sys/netipsec/ipsec_input.c +++ head/sys/netipsec/ipsec_input.c @@ -1,4 +1,3 @@ -/* $FreeBSD$ */ /* $OpenBSD: ipsec_input.c,v 1.63 2003/02/20 18:35:43 deraadt Exp $ */ /*- * The authors of this code are John Ioannidis (ji@tla.org), @@ -19,6 +18,7 @@ * Copyright (C) 1995, 1996, 1997, 1998, 1999 by John Ioannidis, * Angelos D. Keromytis and Niels Provos. * Copyright (c) 2001, Angelos D. Keromytis. + * Copyright (c) 2016 Andrey V. Elsukov * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in @@ -40,6 +40,9 @@ * IPsec input processing. */ +#include +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" @@ -87,6 +90,7 @@ #include #include +#include #include #include @@ -104,29 +108,20 @@ IPCOMPSTAT_INC(ipcomps_##name); \ } while (0) -#ifdef INET -static void ipsec4_common_ctlinput(int, struct sockaddr *, void *, int); -#endif - /* * ipsec_common_input gets called when an IPsec-protected packet * is received by IPv4 or IPv6. Its job is to find the right SA * and call the appropriate transform. The transform callback * takes care of further processing (like ingress filtering). */ -int +static int ipsec_common_input(struct mbuf *m, int skip, int protoff, int af, int sproto) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; union sockaddr_union dst_address; struct secasvar *sav; - u_int32_t spi; + uint32_t spi; int error; -#ifdef INET -#ifdef IPSEC_NAT_T - struct m_tag *tag; -#endif -#endif IPSEC_ISTAT(sproto, input); @@ -178,12 +173,6 @@ m_copydata(m, offsetof(struct ip, ip_dst), sizeof(struct in_addr), (caddr_t) &dst_address.sin.sin_addr); -#ifdef IPSEC_NAT_T - /* Find the source port for NAT-T; see udp*_espdecap. */ - tag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL); - if (tag != NULL) - dst_address.sin.sin_port = ((u_int16_t *)(tag + 1))[1]; -#endif /* IPSEC_NAT_T */ break; #endif /* INET */ #ifdef INET6 @@ -209,7 +198,7 @@ } /* NB: only pass dst since key_allocsa follows RFC2401 */ - sav = KEY_ALLOCSA(&dst_address, sproto, spi); + sav = key_allocsa(&dst_address, sproto, spi); if (sav == NULL) { DPRINTF(("%s: no key association found for SA %s/%08lx/%u\n", __func__, ipsec_address(&dst_address, buf, sizeof(buf)), @@ -224,7 +213,7 @@ __func__, ipsec_address(&dst_address, buf, sizeof(buf)), (u_long) ntohl(spi), sproto)); IPSEC_ISTAT(sproto, noxform); - KEY_FREESAV(&sav); + key_freesav(&sav); m_freem(m); return ENXIO; } @@ -234,71 +223,52 @@ * everything else. */ error = (*sav->tdb_xform->xf_input)(m, sav, skip, protoff); - KEY_FREESAV(&sav); - return error; + if (error != 0) + key_freesav(&sav); + return (error); } #ifdef INET -int -ah4_input(struct mbuf **mp, int *offp, int proto) -{ - struct mbuf *m; - int off; +extern struct protosw inetsw[]; - m = *mp; - off = *offp; - *mp = NULL; - - ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, IPPROTO_AH); - return (IPPROTO_DONE); -} -void -ah4_ctlinput(int cmd, struct sockaddr *sa, void *v) -{ - if (sa->sa_family == AF_INET && - sa->sa_len == sizeof(struct sockaddr_in)) - ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_AH); -} - +/* + * IPSEC_INPUT() method implementation for IPv4. + * 0 - Permitted by inbound security policy for further processing. + * EACCES - Forbidden by inbound security policy. + * EINPROGRESS - consumed by IPsec. + */ int -esp4_input(struct mbuf **mp, int *offp, int proto) +ipsec4_input(struct mbuf *m, int offset, int proto) { - struct mbuf *m; - int off; - m = *mp; - off = *offp; - mp = NULL; - - ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, IPPROTO_ESP); - return (IPPROTO_DONE); + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* Do inbound IPsec processing for AH/ESP/IPCOMP */ + ipsec_common_input(m, offset, + offsetof(struct ip, ip_p), AF_INET, proto); + return (EINPROGRESS); /* mbuf consumed by IPsec */ + default: + /* + * Protocols with further headers get their IPsec treatment + * within the protocol specific processing. + */ + if ((inetsw[ip_protox[proto]].pr_flags & PR_LASTHDR) == 0) + return (0); + /* FALLTHROUGH */ + }; + /* + * Enforce IPsec policy checking if we are seeing last header. + */ + if (ipsec4_in_reject(m, NULL) != 0) { + /* Forbidden by inbound security policy */ + m_freem(m); + return (EACCES); + } + return (0); } -void -esp4_ctlinput(int cmd, struct sockaddr *sa, void *v) -{ - if (sa->sa_family == AF_INET && - sa->sa_len == sizeof(struct sockaddr_in)) - ipsec4_common_ctlinput(cmd, sa, v, IPPROTO_ESP); -} - -int -ipcomp4_input(struct mbuf **mp, int *offp, int proto) -{ - struct mbuf *m; - int off; - - m = *mp; - off = *offp; - mp = NULL; - - ipsec_common_input(m, off, offsetof(struct ip, ip_p), - AF_INET, IPPROTO_IPCOMP); - return (IPPROTO_DONE); -} - /* * IPsec input callback for INET protocols. * This routine is called as the transform callback. @@ -309,21 +279,14 @@ ipsec4_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; struct ipsec_ctx_data ctx; - int prot, af, sproto, isr_prot; - struct ip *ip; - struct m_tag *mtag; - struct tdb_ident *tdbi; + struct xform_history *xh; struct secasindex *saidx; - int error; -#ifdef INET6 -#ifdef notyet - char ip6buf[INET6_ADDRSTRLEN]; -#endif -#endif + struct m_tag *mtag; + struct ip *ip; + int error, prot, af, sproto, isr_prot; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; @@ -337,7 +300,6 @@ if (skip != 0) { /* * Fix IPv4 header - * XXXGL: do we need this entire block? */ if (m->m_len < skip && (m = m_pullup(m, skip)) == NULL) { DPRINTF(("%s: processing failed for SA %s/%08lx\n", @@ -356,16 +318,23 @@ ip = mtod(m, struct ip *); } prot = ip->ip_p; + /* + * Check that we have NAT-T enabled and apply transport mode + * decapsulation NAT procedure (RFC3948). + * Do this before invoking into the PFIL. + */ + if (sav->natt != NULL && + (prot == IPPROTO_UDP || prot == IPPROTO_TCP)) + udp_ipsec_adjust_cksum(m, sav, prot, skip); IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; - ip = mtod(m, struct ip *); + ip = mtod(m, struct ip *); /* update pointer */ /* IP-in-IP encapsulation */ if (prot == IPPROTO_IPIP && saidx->mode != IPSEC_MODE_TRANSPORT) { - if (m->m_pkthdr.len - skip < sizeof(struct ip)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; @@ -373,40 +342,11 @@ } /* enc0: strip outer IPv4 header */ m_striphdr(m, 0, ip->ip_hl << 2); - -#ifdef notyet - /* XXX PROXY address isn't recorded in SAH */ - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET && - saidx->proxy.sin.sin_addr.s_addr != - INADDR_ANY && - ipn.ip_src.s_addr != - saidx->proxy.sin.sin_addr.s_addr) || - (saidx->proxy.sa.sa_family != AF_INET && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - inet_ntoa4(ipn.ip_src), - ipsp_address(saidx->proxy), - ipsp_address(saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #ifdef INET6 /* IPv6-in-IP encapsulation. */ else if (prot == IPPROTO_IPV6 && saidx->mode != IPSEC_MODE_TRANSPORT) { - if (m->m_pkthdr.len - skip < sizeof(struct ip6_hdr)) { IPSEC_ISTAT(sproto, hdrops); error = EINVAL; @@ -414,39 +354,14 @@ } /* enc0: strip IPv4 header, keep IPv6 header only */ m_striphdr(m, 0, ip->ip_hl << 2); -#ifdef notyet - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET6 && - !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src, - &saidx->proxy.sin6.sin6_addr)) || - (saidx->proxy.sa.sa_family != AF_INET6 && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - ip6_sprintf(ip6buf, &ip6n.ip6_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #endif /* INET6 */ else if (prot != IPPROTO_IPV6 && saidx->mode == IPSEC_MODE_ANY) { /* * When mode is wildcard, inner protocol is IPv6 and * we have no INET6 support - drop this packet a bit later. - * In other cases we assume transport mode and outer - * header was already stripped in xform_xxx_cb. + * In other cases we assume transport mode. Set prot to + * correctly choose netisr. */ prot = IPPROTO_IPIP; } @@ -457,7 +372,7 @@ */ if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); @@ -465,14 +380,11 @@ goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - bcopy(&saidx->dst, &tdbi->dst, saidx->dst.sa.sa_len); - tdbi->proto = sproto; - tdbi->spi = sav->spi; - /* Cache those two for enc(4) in xform_ipip. */ - tdbi->alg_auth = sav->alg_auth; - tdbi->alg_enc = sav->alg_enc; - + xh = (struct xform_history *)(mtag + 1); + bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); + xh->spi = sav->spi; + xh->proto = sproto; + xh->mode = saidx->mode; m_tag_prepend(m, mtag); } @@ -509,69 +421,65 @@ IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; - error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); - if (error) { - IPSEC_ISTAT(sproto, qfull); - DPRINTF(("%s: queue full; proto %u packet dropped\n", - __func__, sproto)); - return error; + + /* Handle virtual tunneling interfaces */ + if (saidx->mode == IPSEC_MODE_TUNNEL) + error = ipsec_if_input(m, sav, af); + if (error == 0) { + error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet dropped\n", + __func__, sproto)); + } } - return 0; + key_freesav(&sav); + return (error); bad: - m_freem(m); - return error; + key_freesav(&sav); + if (m != NULL) + m_freem(m); + return (error); } - -void -ipsec4_common_ctlinput(int cmd, struct sockaddr *sa, void *v, int proto) -{ - /* XXX nothing just yet */ -} #endif /* INET */ #ifdef INET6 -/* IPv6 AH wrapper. */ +/* + * IPSEC_INPUT() method implementation for IPv6. + * 0 - Permitted by inbound security policy for further processing. + * EACCES - Forbidden by inbound security policy. + * EINPROGRESS - consumed by IPsec. + */ int -ipsec6_common_input(struct mbuf **mp, int *offp, int proto) +ipsec6_input(struct mbuf *m, int offset, int proto) { - int l = 0; - int protoff; - struct ip6_ext ip6e; - if (*offp < sizeof(struct ip6_hdr)) { - DPRINTF(("%s: bad offset %u\n", __func__, *offp)); - return IPPROTO_DONE; - } else if (*offp == sizeof(struct ip6_hdr)) { - protoff = offsetof(struct ip6_hdr, ip6_nxt); - } else { - /* Chase down the header chain... */ - protoff = sizeof(struct ip6_hdr); - - do { - protoff += l; - m_copydata(*mp, protoff, sizeof(ip6e), - (caddr_t) &ip6e); - - if (ip6e.ip6e_nxt == IPPROTO_AH) - l = (ip6e.ip6e_len + 2) << 2; - else - l = (ip6e.ip6e_len + 1) << 3; - IPSEC_ASSERT(l > 0, ("l went zero or negative")); - } while (protoff + l < *offp); - - /* Malformed packet check */ - if (protoff + l != *offp) { - DPRINTF(("%s: bad packet header chain, protoff %u, " - "l %u, off %u\n", __func__, protoff, l, *offp)); - IPSEC_ISTAT(proto, hdrops); - m_freem(*mp); - *mp = NULL; - return IPPROTO_DONE; - } - protoff += offsetof(struct ip6_ext, ip6e_nxt); + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_IPCOMP: + /* Do inbound IPsec processing for AH/ESP/IPCOMP */ + ipsec_common_input(m, offset, + offsetof(struct ip6_hdr, ip6_nxt), AF_INET6, proto); + return (EINPROGRESS); /* mbuf consumed by IPsec */ + default: + /* + * Protocols with further headers get their IPsec treatment + * within the protocol specific processing. + */ + if ((inet6sw[ip6_protox[proto]].pr_flags & PR_LASTHDR) == 0) + return (0); + /* FALLTHROUGH */ + }; + /* + * Enforce IPsec policy checking if we are seeing last header. + */ + if (ipsec6_in_reject(m, NULL) != 0) { + /* Forbidden by inbound security policy */ + m_freem(m); + return (EACCES); } - (void) ipsec_common_input(*mp, *offp, protoff, AF_INET6, proto); - return IPPROTO_DONE; + return (0); } /* @@ -582,21 +490,17 @@ ipsec6_common_input_cb(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; struct ipsec_ctx_data ctx; - int prot, af, sproto; + struct xform_history *xh; + struct secasindex *saidx; struct ip6_hdr *ip6; struct m_tag *mtag; - struct tdb_ident *tdbi; - struct secasindex *saidx; + int prot, af, sproto; int nxt, isr_prot; - u_int8_t nxt8; int error, nest; -#ifdef notyet - char ip6buf[INET6_ADDRSTRLEN]; -#endif + uint8_t nxt8; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); saidx = &sav->sah->saidx; @@ -620,12 +524,13 @@ goto bad; } - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); - IPSEC_INIT_CTX(&ctx, &m, sav, af, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_IN)) != 0) goto bad; + + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); + /* Save protocol */ m_copydata(m, protoff, 1, &nxt8); prot = nxt8; @@ -641,31 +546,6 @@ /* ip6n will now contain the inner IPv6 header. */ m_striphdr(m, 0, skip); skip = 0; -#ifdef notyet - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET6 && - !IN6_IS_ADDR_UNSPECIFIED(&saidx->proxy.sin6.sin6_addr) && - !IN6_ARE_ADDR_EQUAL(&ip6n.ip6_src, - &saidx->proxy.sin6.sin6_addr)) || - (saidx->proxy.sa.sa_family != AF_INET6 && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - ip6_sprintf(ip6buf, &ip6n.ip6_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #ifdef INET /* IP-in-IP encapsulation */ @@ -677,32 +557,8 @@ goto bad; } /* ipn will now contain the inner IPv4 header */ - m_striphdr(m, 0, skip); + m_striphdr(m, 0, skip); skip = 0; -#ifdef notyet - /* - * Check that the inner source address is the same as - * the proxy address, if available. - */ - if ((saidx->proxy.sa.sa_family == AF_INET && - saidx->proxy.sin.sin_addr.s_addr != INADDR_ANY && - ipn.ip_src.s_addr != saidx->proxy.sin.sin_addr.s_addr) || - (saidx->proxy.sa.sa_family != AF_INET && - saidx->proxy.sa.sa_family != 0)) { - - DPRINTF(("%s: inner source address %s doesn't " - "correspond to expected proxy source %s, " - "SA %s/%08lx\n", __func__, - inet_ntoa4(ipn.ip_src), - ipsec_address(&saidx->proxy), - ipsec_address(&saidx->dst), - (u_long) ntohl(sav->spi))); - - IPSEC_ISTAT(sproto, pdrops); - error = EACCES; - goto bad; - } -#endif /* notyet */ } #endif /* INET */ else { @@ -715,7 +571,7 @@ */ if (sproto != IPPROTO_IPCOMP) { mtag = m_tag_get(PACKET_TAG_IPSEC_IN_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + sizeof(struct xform_history), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: failed to get tag\n", __func__)); IPSEC_ISTAT(sproto, hdrops); @@ -723,20 +579,16 @@ goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - bcopy(&saidx->dst, &tdbi->dst, sizeof(union sockaddr_union)); - tdbi->proto = sproto; - tdbi->spi = sav->spi; - /* Cache those two for enc(4) in xform_ipip. */ - tdbi->alg_auth = sav->alg_auth; - tdbi->alg_enc = sav->alg_enc; - + xh = (struct xform_history *)(mtag + 1); + bcopy(&saidx->dst, &xh->dst, saidx->dst.sa.sa_len); + xh->spi = sav->spi; + xh->proto = sproto; + xh->mode = saidx->mode; m_tag_prepend(m, mtag); } key_sa_recordxfer(sav, m); - #ifdef INET if (prot == IPPROTO_IPIP) af = AF_INET; @@ -767,12 +619,19 @@ error = EPFNOSUPPORT; goto bad; } - error = netisr_queue_src(isr_prot, (uintptr_t)sav->spi, m); - if (error) { - IPSEC_ISTAT(sproto, qfull); - DPRINTF(("%s: queue full; proto %u packet dropped\n", - __func__, sproto)); + /* Handle virtual tunneling interfaces */ + if (saidx->mode == IPSEC_MODE_TUNNEL) + error = ipsec_if_input(m, sav, af); + if (error == 0) { + error = netisr_queue_src(isr_prot, + (uintptr_t)sav->spi, m); + if (error) { + IPSEC_ISTAT(sproto, qfull); + DPRINTF(("%s: queue full; proto %u packet" + " dropped\n", __func__, sproto)); + } } + key_freesav(&sav); return (error); } /* @@ -810,99 +669,12 @@ } nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &skip, nxt); } - return 0; + key_freesav(&sav); + return (0); bad: + key_freesav(&sav); if (m) m_freem(m); - return error; -} - -void -esp6_ctlinput(int cmd, struct sockaddr *sa, void *d) -{ - struct ip6ctlparam *ip6cp = NULL; - struct mbuf *m = NULL; - struct ip6_hdr *ip6; - int off; - - if (sa->sa_family != AF_INET6 || - sa->sa_len != sizeof(struct sockaddr_in6)) - return; - if ((unsigned)cmd >= PRC_NCMDS) - return; - - /* if the parameter is from icmp6, decode it. */ - if (d != NULL) { - ip6cp = (struct ip6ctlparam *)d; - m = ip6cp->ip6c_m; - ip6 = ip6cp->ip6c_ip6; - off = ip6cp->ip6c_off; - } else { - m = NULL; - ip6 = NULL; - off = 0; /* calm gcc */ - } - - if (ip6 != NULL) { - - struct ip6ctlparam ip6cp1; - - /* - * Notify the error to all possible sockets via pfctlinput2. - * Since the upper layer information (such as protocol type, - * source and destination ports) is embedded in the encrypted - * data and might have been cut, we can't directly call - * an upper layer ctlinput function. However, the pcbnotify - * function will consider source and destination addresses - * as well as the flow info value, and may be able to find - * some PCB that should be notified. - * Although pfctlinput2 will call esp6_ctlinput(), there is - * no possibility of an infinite loop of function calls, - * because we don't pass the inner IPv6 header. - */ - bzero(&ip6cp1, sizeof(ip6cp1)); - ip6cp1.ip6c_src = ip6cp->ip6c_src; - pfctlinput2(cmd, sa, (void *)&ip6cp1); - - /* - * Then go to special cases that need ESP header information. - * XXX: We assume that when ip6 is non NULL, - * M and OFF are valid. - */ - - if (cmd == PRC_MSGSIZE) { - struct secasvar *sav; - u_int32_t spi; - int valid; - - /* check header length before using m_copydata */ - if (m->m_pkthdr.len < off + sizeof (struct esp)) - return; - m_copydata(m, off + offsetof(struct esp, esp_spi), - sizeof(u_int32_t), (caddr_t) &spi); - /* - * Check to see if we have a valid SA corresponding to - * the address in the ICMP message payload. - */ - sav = KEY_ALLOCSA((union sockaddr_union *)sa, - IPPROTO_ESP, spi); - valid = (sav != NULL); - if (sav) - KEY_FREESAV(&sav); - - /* XXX Further validation? */ - - /* - * Depending on whether the SA is "valid" and - * routing table size (mtudisc_{hi,lo}wat), we will: - * - recalcurate the new MTU and create the - * corresponding routing entry, or - * - ignore the MTU change notification. - */ - icmp6_mtudisc_update(ip6cp, valid); - } - } else { - /* we normally notify any pcb here */ - } + return (error); } #endif /* INET6 */ Index: head/sys/netipsec/ipsec_mbuf.c =================================================================== --- head/sys/netipsec/ipsec_mbuf.c +++ head/sys/netipsec/ipsec_mbuf.c @@ -30,8 +30,6 @@ * IPsec-specific mbuf routines. */ -#include "opt_param.h" - #include #include #include @@ -72,7 +70,21 @@ * the contents of m as needed. */ remain = m->m_len - skip; /* data to move */ - if (hlen > M_TRAILINGSPACE(m)) { + if (remain > skip && + hlen + max_linkhdr < M_LEADINGSPACE(m)) { + /* + * mbuf has enough free space at the beginning. + * XXX: which operation is the most heavy - copying of + * possible several hundred of bytes or allocation + * of new mbuf? We can remove max_linkhdr check + * here, but it is possible that this will lead + * to allocation of new mbuf in Layer 2 code. + */ + m->m_data -= hlen; + bcopy(mtodo(m, hlen), mtod(m, caddr_t), skip); + m->m_len += hlen; + *off = skip; + } else if (hlen > M_TRAILINGSPACE(m)) { struct mbuf *n0, *n, **np; int todo, len, done, alloc; @@ -140,7 +152,7 @@ * so there's space to write the new header. */ bcopy(mtod(m, caddr_t) + skip, - mtod(m, caddr_t) + skip + hlen, remain); + mtod(m, caddr_t) + skip + hlen, remain); m->m_len += hlen; *off = skip; } Index: head/sys/netipsec/ipsec_mod.c =================================================================== --- head/sys/netipsec/ipsec_mod.c +++ head/sys/netipsec/ipsec_mod.c @@ -0,0 +1,148 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#ifdef INET +static const struct ipsec_methods ipv4_methods = { + .input = ipsec4_input, + .forward = ipsec4_forward, + .output = ipsec4_output, + .pcbctl = ipsec4_pcbctl, + .capability = ipsec4_capability, + .check_policy = ipsec4_in_reject, + .hdrsize = ipsec_hdrsiz_inpcb, + .udp_input = udp_ipsec_input, + .udp_pcbctl = udp_ipsec_pcbctl, +}; +#ifndef KLD_MODULE +static const struct ipsec_support ipv4_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &ipv4_methods +}; +const struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; +#endif /* !KLD_MODULE */ +#endif /* INET */ + +#ifdef INET6 +static const struct ipsec_methods ipv6_methods = { + .input = ipsec6_input, + .forward = ipsec6_forward, + .output = ipsec6_output, + .pcbctl = ipsec6_pcbctl, + .capability = ipsec6_capability, + .check_policy = ipsec6_in_reject, + .hdrsize = ipsec_hdrsiz_inpcb, +}; +#ifndef KLD_MODULE +static const struct ipsec_support ipv6_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &ipv6_methods +}; +const struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; +#endif /* !KLD_MODULE */ +#endif /* INET6 */ + +/* + * Always register ipsec module. + * Even when IPsec is build in the kernel, we need to have + * module registered. This will prevent to load ipsec.ko. + */ +static int +ipsec_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + /* All xforms are registered via SYSINIT */ + if (!ipsec_initialized()) + return (ENOMEM); +#ifdef KLD_MODULE +#ifdef INET + ipsec_support_enable(ipv4_ipsec_support, &ipv4_methods); +#endif +#ifdef INET6 + ipsec_support_enable(ipv6_ipsec_support, &ipv6_methods); +#endif +#endif /* KLD_MODULE */ + break; + case MOD_UNLOAD: + /* All xforms are unregistered via SYSUNINIT */ +#ifdef KLD_MODULE +#ifdef INET + ipsec_support_disable(ipv4_ipsec_support); +#endif +#ifdef INET6 + ipsec_support_disable(ipv6_ipsec_support); +#endif +#endif /* KLD_MODULE */ + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t ipsec_mod = { + "ipsec", + ipsec_modevent, + 0 +}; + +DECLARE_MODULE(ipsec, ipsec_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_VERSION(ipsec, 1); +#ifdef KLD_MODULE +MODULE_DEPEND(ipsec, ipsec_support, 1, 1, 1); +#endif Index: head/sys/netipsec/ipsec_output.c =================================================================== --- head/sys/netipsec/ipsec_output.c +++ head/sys/netipsec/ipsec_output.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting + * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,6 +33,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_sctp.h" #include #include @@ -67,7 +69,13 @@ #ifdef INET6 #include #endif +#ifdef SCTP +#include +#endif +#include +#include +#include #include #ifdef INET6 #include @@ -84,26 +92,617 @@ #include -#ifdef IPSEC_NAT_T -#include +#define IPSEC_OSTAT_INC(proto, name) do { \ + if ((proto) == IPPROTO_ESP) \ + ESPSTAT_INC(esps_##name); \ + else if ((proto) == IPPROTO_AH)\ + AHSTAT_INC(ahs_##name); \ + else \ + IPCOMPSTAT_INC(ipcomps_##name); \ +} while (0) + +static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx); + +#ifdef INET +static struct secasvar * +ipsec4_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +{ + struct secasindex *saidx, tmpsaidx; + struct ipsecrequest *isr; + struct sockaddr_in *sin; + struct secasvar *sav; + struct ip *ip; + + /* + * Check system global policy controls. + */ +next: + isr = sp->req[*pidx]; + if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || + (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || + (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { + DPRINTF(("%s: IPsec outbound packet dropped due" + " to policy (check your sysctls)\n", __func__)); + IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); + *error = EHOSTUNREACH; + return (NULL); + } + /* + * Craft SA index to search for proper SA. Note that + * we only initialize unspecified SA peers for transport + * mode; for tunnel mode they must already be filled in. + */ + if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + saidx = &tmpsaidx; + *saidx = isr->saidx; + ip = mtod(m, struct ip *); + if (saidx->src.sa.sa_len == 0) { + sin = &saidx->src.sin; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + sin->sin_addr = ip->ip_src; + } + if (saidx->dst.sa.sa_len == 0) { + sin = &saidx->dst.sin; + sin->sin_len = sizeof(*sin); + sin->sin_family = AF_INET; + sin->sin_port = IPSEC_PORT_ANY; + sin->sin_addr = ip->ip_dst; + } + } else + saidx = &sp->req[*pidx]->saidx; + /* + * Lookup SA and validate it. + */ + sav = key_allocsa_policy(sp, saidx, error); + if (sav == NULL) { + IPSECSTAT_INC(ips_out_nosa); + if (*error != 0) + return (NULL); + if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { + /* + * We have no SA and policy that doesn't require + * this IPsec transform, thus we can continue w/o + * IPsec processing, i.e. return EJUSTRETURN. + * But first check if there is some bundled transform. + */ + if (sp->tcount > ++(*pidx)) + goto next; + *error = EJUSTRETURN; + } + return (NULL); + } + IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + return (sav); +} + +/* + * IPsec output logic for IPv4. + */ +static int +ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + union sockaddr_union *dst; + struct secasvar *sav; + struct ip *ip; + int error, i, off; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); + + /* + * We hold the reference to SP. Content of SP couldn't be changed. + * Craft secasindex and do lookup for suitable SA. + * Then do encapsulation if needed and call xform's output. + * We need to store SP in the xform callback parameters. + * In xform callback we will extract SP and it can be used to + * determine next transform. At the end of transform we can + * release reference to SP. + */ + sav = ipsec4_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; + } + /* + * XXXAE: most likely ip_sum at this point is wrong. + */ + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + ip = mtod(m, struct ip *); + dst = &sav->sah->saidx.dst; + /* Do the appropriate encapsulation, if necessary */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET || /* PF mismatch */ + (dst->sa.sa_family == AF_INET && /* Proxy */ + dst->sin.sin_addr.s_addr != INADDR_ANY && + dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { + /* Fix IPv4 header checksum and length */ + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl << 2); + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ + goto bad; + } + } + + IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + /* + * Dispatch to the appropriate IPsec transform logic. The + * packet will be returned for transmission after crypto + * processing, etc. are completed. + * + * NB: m & sav are ``passed to caller'' who's responsible for + * reclaiming their resources. + */ + switch(dst->sa.sa_family) { + case AF_INET: + ip = mtod(m, struct ip *); + i = ip->ip_hl << 2; + off = offsetof(struct ip, ip_p); + break; +#ifdef INET6 + case AF_INET6: + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; +#endif /* INET6 */ + default: + DPRINTF(("%s: unsupported protocol family %u\n", + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); + goto bad; + } + error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); + if (error != 0) { + key_freesav(&sav); + key_freesp(&sp); + } + return (error); +bad: + IPSECSTAT_INC(ips_out_inval); + if (m != NULL) + m_freem(m); + if (sav != NULL) + key_freesav(&sav); + key_freesp(&sp); + return (error); +} + +int +ipsec4_process_packet(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp) +{ + + return (ipsec4_perform_request(m, sp, 0)); +} + +static int +ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +{ + struct secpolicy *sp; + int error; + + /* Lookup for the corresponding outbound security policy */ + sp = ipsec4_checkpolicy(m, inp, &error); + if (sp == NULL) { + if (error == -EINVAL) { + /* Discarded by policy. */ + m_freem(m); + return (EACCES); + } + return (0); /* No IPsec required. */ + } + + /* + * Usually we have to have tunnel mode IPsec security policy + * when we are forwarding a packet. Otherwise we could not handle + * encrypted replies, because they are not destined for us. But + * some users are doing source address translation for forwarded + * packets, and thus, even if they are forwarded, the replies will + * return back to us. + */ + if (!forwarding) { + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) { + struct ip *ip = mtod(m, struct ip *); + + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } #endif + } + /* NB: callee frees mbuf and releases reference to SP */ + error = ipsec4_process_packet(m, sp, inp); + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing and return without error. + */ + return (0); + } + if (error == 0) + return (EINPROGRESS); /* consumed by IPsec */ + return (error); +} +/* + * IPSEC_OUTPUT() method implementation for IPv4. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ int -ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr) +ipsec4_output(struct mbuf *m, struct inpcb *inp) { - struct tdb_ident *tdbi; - struct m_tag *mtag; + + /* + * If the packet is resubmitted to ip_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) + return (0); + + return (ipsec4_common_output(m, inp, 0)); +} + +/* + * IPSEC_FORWARD() method implementation for IPv4. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec4_forward(struct mbuf *m) +{ + + /* + * Check if this packet has an active inbound SP and needs to be + * dropped instead of forwarded. + */ + if (ipsec4_in_reject(m, NULL) != 0) { + m_freem(m); + return (EACCES); + } + return (ipsec4_common_output(m, NULL, 1)); +} +#endif + +#ifdef INET6 +static int +in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, + const struct in6_addr *ia) +{ + struct in6_addr ia2; + + if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) { + memcpy(&ia2, &sa->sin6_addr, sizeof(ia2)); + ia2.s6_addr16[1] = htons(sa->sin6_scope_id); + return (IN6_ARE_ADDR_EQUAL(ia, &ia2)); + } + return (IN6_ARE_ADDR_EQUAL(&sa->sin6_addr, ia)); +} + +static struct secasvar * +ipsec6_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +{ + struct secasindex *saidx, tmpsaidx; + struct ipsecrequest *isr; + struct sockaddr_in6 *sin6; struct secasvar *sav; - struct secasindex *saidx; + struct ip6_hdr *ip6; + + /* + * Check system global policy controls. + */ +next: + isr = sp->req[*pidx]; + if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || + (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || + (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { + DPRINTF(("%s: IPsec outbound packet dropped due" + " to policy (check your sysctls)\n", __func__)); + IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); + *error = EHOSTUNREACH; + return (NULL); + } + /* + * Craft SA index to search for proper SA. Note that + * we only fillin unspecified SA peers for transport + * mode; for tunnel mode they must already be filled in. + */ + if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { + saidx = &tmpsaidx; + *saidx = isr->saidx; + ip6 = mtod(m, struct ip6_hdr *); + if (saidx->src.sin6.sin6_len == 0) { + sin6 = (struct sockaddr_in6 *)&saidx->src; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = IPSEC_PORT_ANY; + sin6->sin6_addr = ip6->ip6_src; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + /* fix scope id for comparing SPD */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = + ntohs(ip6->ip6_src.s6_addr16[1]); + } + } + if (saidx->dst.sin6.sin6_len == 0) { + sin6 = (struct sockaddr_in6 *)&saidx->dst; + sin6->sin6_len = sizeof(*sin6); + sin6->sin6_family = AF_INET6; + sin6->sin6_port = IPSEC_PORT_ANY; + sin6->sin6_addr = ip6->ip6_dst; + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + /* fix scope id for comparing SPD */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = + ntohs(ip6->ip6_dst.s6_addr16[1]); + } + } + } else + saidx = &sp->req[*pidx]->saidx; + /* + * Lookup SA and validate it. + */ + sav = key_allocsa_policy(sp, saidx, error); + if (sav == NULL) { + IPSEC6STAT_INC(ips_out_nosa); + if (*error != 0) + return (NULL); + if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { + /* + * We have no SA and policy that doesn't require + * this IPsec transform, thus we can continue w/o + * IPsec processing, i.e. return EJUSTRETURN. + * But first check if there is some bundled transform. + */ + if (sp->tcount > ++(*pidx)) + goto next; + *error = EJUSTRETURN; + } + return (NULL); + } + IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + return (sav); +} + +/* + * IPsec output logic for IPv6. + */ +static int +ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, u_int idx) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + struct ipsec_ctx_data ctx; + union sockaddr_union *dst; + struct secasvar *sav; + struct ip6_hdr *ip6; + int error, i, off; + + IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); + + sav = ipsec6_allocsa(m, sp, &idx, &error); + if (sav == NULL) { + if (error == EJUSTRETURN) { /* No IPsec required */ + key_freesp(&sp); + return (error); + } + goto bad; + } + + /* Fix IP length in case if it is not set yet. */ + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); + + IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET6, IPSEC_ENC_BEFORE); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ + dst = &sav->sah->saidx.dst; + + /* Do the appropriate encapsulation, if necessary */ + if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ + dst->sa.sa_family != AF_INET6 || /* PF mismatch */ + ((dst->sa.sa_family == AF_INET6) && + (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && + (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) { + if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { + /* No jumbogram support. */ + error = ENXIO; /*XXX*/ + goto bad; + } + error = ipsec_encap(&m, &sav->sah->saidx); + if (error != 0) { + DPRINTF(("%s: encapsulation for SA %s->%s " + "SPI 0x%08x failed with error %d\n", __func__, + ipsec_address(&sav->sah->saidx.src, sbuf, + sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, + sizeof(dbuf)), ntohl(sav->spi), error)); + /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ + goto bad; + } + } + + IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); + if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) + goto bad; + + switch(dst->sa.sa_family) { +#ifdef INET + case AF_INET: + { + struct ip *ip; + ip = mtod(m, struct ip *); + i = ip->ip_hl << 2; + off = offsetof(struct ip, ip_p); + } + break; +#endif /* AF_INET */ + case AF_INET6: + i = sizeof(struct ip6_hdr); + off = offsetof(struct ip6_hdr, ip6_nxt); + break; + default: + DPRINTF(("%s: unsupported protocol family %u\n", + __func__, dst->sa.sa_family)); + error = EPFNOSUPPORT; + IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); + goto bad; + } + error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); + if (error != 0) { + key_freesav(&sav); + key_freesp(&sp); + } + return (error); +bad: + IPSEC6STAT_INC(ips_out_inval); + if (m != NULL) + m_freem(m); + if (sav != NULL) + key_freesav(&sav); + key_freesp(&sp); + return (error); +} + +int +ipsec6_process_packet(struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp) +{ + + return (ipsec6_perform_request(m, sp, 0)); +} + +static int +ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +{ + struct secpolicy *sp; int error; - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(isr != NULL, ("null ISR")); - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - sav = isr->sav; - IPSEC_ASSERT(sav != NULL, ("null SA")); - IPSEC_ASSERT(sav->sah != NULL, ("null SAH")); + /* Lookup for the corresponding outbound security policy */ + sp = ipsec6_checkpolicy(m, inp, &error); + if (sp == NULL) { + if (error == -EINVAL) { + /* Discarded by policy. */ + m_freem(m); + return (EACCES); + } + return (0); /* No IPsec required. */ + } + if (!forwarding) { + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { + in6_delayed_cksum(m, m->m_pkthdr.len - + sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + } +#endif + } + /* NB: callee frees mbuf and releases reference to SP */ + error = ipsec6_process_packet(m, sp, inp); + if (error == EJUSTRETURN) { + /* + * We had a SP with a level of 'use' and no SA. We + * will just continue to process the packet without + * IPsec processing and return without error. + */ + return (0); + } + if (error == 0) + return (EINPROGRESS); /* consumed by IPsec */ + return (error); +} + +/* + * IPSEC_OUTPUT() method implementation for IPv6. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec6_output(struct mbuf *m, struct inpcb *inp) +{ + + /* + * If the packet is resubmitted to ip_output (e.g. after + * AH, ESP, etc. processing), there will be a tag to bypass + * the lookup and related policy checking. + */ + if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) + return (0); + + return (ipsec6_common_output(m, inp, 0)); +} + +/* + * IPSEC_FORWARD() method implementation for IPv6. + * 0 - no IPsec handling needed + * other values - mbuf consumed by IPsec. + */ +int +ipsec6_forward(struct mbuf *m) +{ + + /* + * Check if this packet has an active inbound SP and needs to be + * dropped instead of forwarded. + */ + if (ipsec6_in_reject(m, NULL) != 0) { + m_freem(m); + return (EACCES); + } + return (ipsec6_common_output(m, NULL, 1)); +} +#endif /* INET6 */ + +int +ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx) +{ + struct xform_history *xh; + struct secasindex *saidx; + struct m_tag *mtag; + int error; + saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET @@ -136,21 +735,20 @@ } /* - * Add a record of what we've done or what needs to be done to the - * packet. + * Add a record of what we've done to the packet. */ - mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, - sizeof(struct tdb_ident), M_NOWAIT); + mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, sizeof(*xh), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: could not get packet tag\n", __func__)); error = ENOMEM; goto bad; } - tdbi = (struct tdb_ident *)(mtag + 1); - tdbi->dst = saidx->dst; - tdbi->proto = saidx->proto; - tdbi->spi = sav->spi; + xh = (struct xform_history *)(mtag + 1); + xh->dst = saidx->dst; + xh->proto = saidx->proto; + xh->mode = saidx->mode; + xh->spi = sav->spi; m_tag_prepend(m, mtag); key_sa_recordxfer(sav, m); /* record data transfer */ @@ -162,258 +760,110 @@ * to set the packet on so we can unwind the stack before * doing further processing. */ - if (isr->next) { - /* XXX-BZ currently only support same AF bundles. */ + if (++idx < sp->tcount) { switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: + key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); - return (ipsec4_process_packet(m, isr->next)); + return (ipsec4_perform_request(m, sp, idx)); /* NOTREACHED */ #endif -#ifdef notyet #ifdef INET6 case AF_INET6: - /* XXX */ + key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); - return (ipsec6_process_packet(m, isr->next)); + return (ipsec6_perform_request(m, sp, idx)); /* NOTREACHED */ #endif /* INET6 */ -#endif default: DPRINTF(("%s: unknown protocol family %u\n", __func__, saidx->dst.sa.sa_family)); - error = ENXIO; + error = EPFNOSUPPORT; goto bad; } } + key_freesp(&sp), sp = NULL; /* Release reference to SP */ +#ifdef INET /* + * Do UDP encapsulation if SA requires it. + */ + if (sav->natt != NULL) { + error = udp_ipsec_output(m, sav); + if (error != 0) + goto bad; + } +#endif /* INET */ + /* * We're done with IPsec processing, transmit the packet using the * appropriate network protocol (IP or IPv6). */ switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: -#ifdef IPSEC_NAT_T - /* - * If NAT-T is enabled, now that all IPsec processing is done - * insert UDP encapsulation header after IP header. - */ - if (sav->natt_type) { - struct ip *ip = mtod(m, struct ip *); - const int hlen = (ip->ip_hl << 2); - int size, off; - struct mbuf *mi; - struct udphdr *udp; - - size = sizeof(struct udphdr); - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) { - /* - * draft-ietf-ipsec-nat-t-ike-0[01].txt and - * draft-ietf-ipsec-udp-encaps-(00/)01.txt, - * ignoring possible AH mode - * non-IKE marker + non-ESP marker - * from draft-ietf-ipsec-udp-encaps-00.txt. - */ - size += sizeof(u_int64_t); - } - mi = m_makespace(m, hlen, size, &off); - if (mi == NULL) { - DPRINTF(("%s: m_makespace for udphdr failed\n", - __func__)); - error = ENOBUFS; - goto bad; - } - - udp = (struct udphdr *)(mtod(mi, caddr_t) + off); - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) - udp->uh_sport = htons(UDP_ENCAP_ESPINUDP_PORT); - else - udp->uh_sport = - KEY_PORTFROMSADDR(&sav->sah->saidx.src); - udp->uh_dport = KEY_PORTFROMSADDR(&sav->sah->saidx.dst); - udp->uh_sum = 0; - udp->uh_ulen = htons(m->m_pkthdr.len - hlen); - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_p = IPPROTO_UDP; - - if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) - *(u_int64_t *)(udp + 1) = 0; - } -#endif /* IPSEC_NAT_T */ - + key_freesav(&sav); return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL); #endif /* INET */ #ifdef INET6 case AF_INET6: - /* - * We don't need massage, IPv6 header fields are always in - * net endian. - */ + key_freesav(&sav); return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); #endif /* INET6 */ } panic("ipsec_process_done"); bad: m_freem(m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); return (error); } -static struct ipsecrequest * -ipsec_nextisr( - struct mbuf *m, - struct ipsecrequest *isr, - int af, - struct secasindex *saidx, - int *error -) +/* + * ipsec_prepend() is optimized version of M_PREPEND(). + * ipsec_encap() is called by IPsec output routine for tunnel mode SA. + * It is expected that after IP encapsulation some IPsec transform will + * be performed. Each IPsec transform inserts its variable length header + * just after outer IP header using m_makespace(). If given mbuf has not + * enough free space at the beginning, we allocate new mbuf and reserve + * some space at the beginning and at the end. + * This helps avoid allocating of new mbuf and data copying in m_makespace(), + * we place outer header in the middle of mbuf's data with reserved leading + * and trailing space: + * [ LEADINGSPACE ][ Outer IP header ][ TRAILINGSPACE ] + * LEADINGSPACE will be used to add ethernet header, TRAILINGSPACE will + * be used to inject AH/ESP/IPCOMP header. + */ +#define IPSEC_TRAILINGSPACE (sizeof(struct udphdr) +/* NAT-T */ \ + max(sizeof(struct newesp) + EALG_MAX_BLOCK_LEN, /* ESP + IV */ \ + sizeof(struct newah) + HASH_MAX_LEN /* AH + ICV */)) +static struct mbuf * +ipsec_prepend(struct mbuf *m, int len, int how) { -#define IPSEC_OSTAT(name) do { \ - if (isr->saidx.proto == IPPROTO_ESP) \ - ESPSTAT_INC(esps_##name); \ - else if (isr->saidx.proto == IPPROTO_AH)\ - AHSTAT_INC(ahs_##name); \ - else \ - IPCOMPSTAT_INC(ipcomps_##name); \ -} while (0) - struct secasvar *sav; + struct mbuf *n; - IPSECREQUEST_LOCK_ASSERT(isr); - - IPSEC_ASSERT(af == AF_INET || af == AF_INET6, - ("invalid address family %u", af)); -again: - /* - * Craft SA index to search for proper SA. Note that - * we only fillin unspecified SA peers for transport - * mode; for tunnel mode they must already be filled in. - */ - *saidx = isr->saidx; - if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { - /* Fillin unspecified SA peers only for transport mode */ - if (af == AF_INET) { - struct sockaddr_in *sin; - struct ip *ip = mtod(m, struct ip *); - - if (saidx->src.sa.sa_len == 0) { - sin = &saidx->src.sin; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - sin->sin_addr = ip->ip_src; - } - if (saidx->dst.sa.sa_len == 0) { - sin = &saidx->dst.sin; - sin->sin_len = sizeof(*sin); - sin->sin_family = AF_INET; - sin->sin_port = IPSEC_PORT_ANY; - sin->sin_addr = ip->ip_dst; - } - } else { - struct sockaddr_in6 *sin6; - struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); - - if (saidx->src.sin6.sin6_len == 0) { - sin6 = (struct sockaddr_in6 *)&saidx->src; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = IPSEC_PORT_ANY; - sin6->sin6_addr = ip6->ip6_src; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { - /* fix scope id for comparing SPD */ - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - ntohs(ip6->ip6_src.s6_addr16[1]); - } - } - if (saidx->dst.sin6.sin6_len == 0) { - sin6 = (struct sockaddr_in6 *)&saidx->dst; - sin6->sin6_len = sizeof(*sin6); - sin6->sin6_family = AF_INET6; - sin6->sin6_port = IPSEC_PORT_ANY; - sin6->sin6_addr = ip6->ip6_dst; - if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { - /* fix scope id for comparing SPD */ - sin6->sin6_addr.s6_addr16[1] = 0; - sin6->sin6_scope_id = - ntohs(ip6->ip6_dst.s6_addr16[1]); - } - } - } + M_ASSERTPKTHDR(m); + IPSEC_ASSERT(len < MHLEN, ("wrong length")); + if (M_LEADINGSPACE(m) >= len) { + /* No need to allocate new mbuf. */ + m->m_data -= len; + m->m_len += len; + m->m_pkthdr.len += len; + return (m); } - - /* - * Lookup SA and validate it. - */ - *error = key_checkrequest(isr, saidx); - if (*error != 0) { - /* - * IPsec processing is required, but no SA found. - * I assume that key_acquire() had been called - * to get/establish the SA. Here I discard - * this packet because it is responsibility for - * upper layer to retransmit the packet. - */ - switch(af) { - case AF_INET: - IPSECSTAT_INC(ips_out_nosa); - break; -#ifdef INET6 - case AF_INET6: - IPSEC6STAT_INC(ips_out_nosa); - break; -#endif - } - goto bad; + n = m_gethdr(how, m->m_type); + if (n == NULL) { + m_freem(m); + return (NULL); } - sav = isr->sav; - if (sav == NULL) { - IPSEC_ASSERT(ipsec_get_reqlevel(isr) == IPSEC_LEVEL_USE, - ("no SA found, but required; level %u", - ipsec_get_reqlevel(isr))); - IPSECREQUEST_UNLOCK(isr); - isr = isr->next; - /* - * If isr is NULL, we found a 'use' policy w/o SA. - * Return w/o error and w/o isr so we can drop out - * and continue w/o IPsec processing. - */ - if (isr == NULL) - return isr; - IPSECREQUEST_LOCK(isr); - goto again; - } - - /* - * Check system global policy controls. - */ - if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || - (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || - (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { - DPRINTF(("%s: IPsec outbound packet dropped due" - " to policy (check your sysctls)\n", __func__)); - IPSEC_OSTAT(pdrops); - *error = EHOSTUNREACH; - goto bad; - } - - /* - * Sanity check the SA contents for the caller - * before they invoke the xform output method. - */ - if (sav->tdb_xform == NULL) { - DPRINTF(("%s: no transform for SA\n", __func__)); - IPSEC_OSTAT(noxform); - *error = EHOSTUNREACH; - goto bad; - } - return isr; -bad: - IPSEC_ASSERT(*error != 0, ("error return w/ no error code")); - IPSECREQUEST_UNLOCK(isr); - return NULL; -#undef IPSEC_OSTAT + m_move_pkthdr(n, m); + n->m_next = m; + if (len + IPSEC_TRAILINGSPACE < M_SIZE(n)) + m_align(n, len + IPSEC_TRAILINGSPACE); + n->m_len = len; + n->m_pkthdr.len += len; + return (n); } static int @@ -467,7 +917,7 @@ saidx->src.sin.sin_addr.s_addr == INADDR_ANY || saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) return (EINVAL); - M_PREPEND(*mp, sizeof(struct ip), M_NOWAIT); + *mp = ipsec_prepend(*mp, sizeof(struct ip), M_NOWAIT); if (*mp == NULL) return (ENOBUFS); ip = mtod(*mp, struct ip *); @@ -490,7 +940,7 @@ IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr)) return (EINVAL); - M_PREPEND(*mp, sizeof(struct ip6_hdr), M_NOWAIT); + *mp = ipsec_prepend(*mp, sizeof(struct ip6_hdr), M_NOWAIT); if (*mp == NULL) return (ENOBUFS); ip6 = mtod(*mp, struct ip6_hdr *); @@ -515,221 +965,7 @@ default: return (EAFNOSUPPORT); } + (*mp)->m_flags &= ~(M_BCAST | M_MCAST); return (0); } -#ifdef INET -/* - * IPsec output logic for IPv4. - */ -int -ipsec4_process_packet(struct mbuf *m, struct ipsecrequest *isr) -{ - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - struct ipsec_ctx_data ctx; - union sockaddr_union *dst; - struct secasindex saidx; - struct secasvar *sav; - struct ip *ip; - int error, i, off; - - IPSEC_ASSERT(m != NULL, ("null mbuf")); - IPSEC_ASSERT(isr != NULL, ("null isr")); - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ - - isr = ipsec_nextisr(m, isr, AF_INET, &saidx, &error); - if (isr == NULL) { - if (error != 0) - goto bad; - return EJUSTRETURN; - } - - sav = isr->sav; - if (m->m_len < sizeof(struct ip) && - (m = m_pullup(m, sizeof (struct ip))) == NULL) { - error = ENOBUFS; - goto bad; - } - - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET, IPSEC_ENC_BEFORE); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - ip = mtod(m, struct ip *); - dst = &sav->sah->saidx.dst; - /* Do the appropriate encapsulation, if necessary */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ - dst->sa.sa_family != AF_INET || /* PF mismatch */ - (dst->sa.sa_family == AF_INET && /* Proxy */ - dst->sin.sin_addr.s_addr != INADDR_ANY && - dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { - /* Fix IPv4 header checksum and length */ - ip->ip_len = htons(m->m_pkthdr.len); - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, ip->ip_hl << 2); - error = ipsec_encap(&m, &sav->sah->saidx); - if (error != 0) { - DPRINTF(("%s: encapsulation for SA %s->%s " - "SPI 0x%08x failed with error %d\n", __func__, - ipsec_address(&sav->sah->saidx.src, sbuf, - sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, - sizeof(dbuf)), ntohl(sav->spi), error)); - goto bad; - } - } - - IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - /* - * Dispatch to the appropriate IPsec transform logic. The - * packet will be returned for transmission after crypto - * processing, etc. are completed. - * - * NB: m & sav are ``passed to caller'' who's responsible for - * for reclaiming their resources. - */ - switch(dst->sa.sa_family) { - case AF_INET: - ip = mtod(m, struct ip *); - i = ip->ip_hl << 2; - off = offsetof(struct ip, ip_p); - break; -#ifdef INET6 - case AF_INET6: - i = sizeof(struct ip6_hdr); - off = offsetof(struct ip6_hdr, ip6_nxt); - break; -#endif /* INET6 */ - default: - DPRINTF(("%s: unsupported protocol family %u\n", - __func__, dst->sa.sa_family)); - error = EPFNOSUPPORT; - IPSECSTAT_INC(ips_out_inval); - goto bad; - } - error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); - IPSECREQUEST_UNLOCK(isr); - return (error); -bad: - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - return error; -} -#endif - - -#ifdef INET6 -static int -in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, const struct in6_addr *ia) -{ - struct in6_addr ia2; - - memcpy(&ia2, &sa->sin6_addr, sizeof(ia2)); - if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) - ia2.s6_addr16[1] = htons(sa->sin6_scope_id); - - return IN6_ARE_ADDR_EQUAL(ia, &ia2); -} - -/* - * IPsec output logic for IPv6. - */ -int -ipsec6_process_packet(struct mbuf *m, struct ipsecrequest *isr) -{ - char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; - struct ipsec_ctx_data ctx; - struct secasindex saidx; - struct secasvar *sav; - struct ip6_hdr *ip6; - int error, i, off; - union sockaddr_union *dst; - - IPSEC_ASSERT(m != NULL, ("ipsec6_process_packet: null mbuf")); - IPSEC_ASSERT(isr != NULL, ("ipsec6_process_packet: null isr")); - - IPSECREQUEST_LOCK(isr); /* insure SA contents don't change */ - - isr = ipsec_nextisr(m, isr, AF_INET6, &saidx, &error); - if (isr == NULL) { - if (error != 0) - goto bad; - return EJUSTRETURN; - } - sav = isr->sav; - dst = &sav->sah->saidx.dst; - - IPSEC_INIT_CTX(&ctx, &m, sav, AF_INET6, IPSEC_ENC_BEFORE); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - ip6 = mtod(m, struct ip6_hdr *); - ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); - - /* Do the appropriate encapsulation, if necessary */ - if (isr->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ - dst->sa.sa_family != AF_INET6 || /* PF mismatch */ - ((dst->sa.sa_family == AF_INET6) && - (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && - (!in6_sa_equal_addrwithscope(&dst->sin6, - &ip6->ip6_dst)))) { - if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { - /* No jumbogram support. */ - error = ENXIO; /*XXX*/ - goto bad; - } - error = ipsec_encap(&m, &sav->sah->saidx); - if (error != 0) { - DPRINTF(("%s: encapsulation for SA %s->%s " - "SPI 0x%08x failed with error %d\n", __func__, - ipsec_address(&sav->sah->saidx.src, sbuf, - sizeof(sbuf)), - ipsec_address(&sav->sah->saidx.dst, dbuf, - sizeof(dbuf)), ntohl(sav->spi), error)); - goto bad; - } - } - - IPSEC_INIT_CTX(&ctx, &m, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); - if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) - goto bad; - - switch(dst->sa.sa_family) { -#ifdef INET - case AF_INET: - { - struct ip *ip; - ip = mtod(m, struct ip *); - i = ip->ip_hl << 2; - off = offsetof(struct ip, ip_p); - } - break; -#endif /* AF_INET */ - case AF_INET6: - i = sizeof(struct ip6_hdr); - off = offsetof(struct ip6_hdr, ip6_nxt); - break; - default: - DPRINTF(("%s: unsupported protocol family %u\n", - __func__, dst->sa.sa_family)); - error = EPFNOSUPPORT; - goto bad; - } - error = (*sav->tdb_xform->xf_output)(m, isr, NULL, i, off); - IPSECREQUEST_UNLOCK(isr); - return error; -bad: - IPSEC6STAT_INC(ips_out_inval); - if (isr) - IPSECREQUEST_UNLOCK(isr); - if (m) - m_freem(m); - return error; -} -#endif /*INET6*/ Index: head/sys/netipsec/ipsec_pcb.c =================================================================== --- head/sys/netipsec/ipsec_pcb.c +++ head/sys/netipsec/ipsec_pcb.c @@ -0,0 +1,479 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); + +static void +ipsec_setsockaddrs_inpcb(struct inpcb *inp, union sockaddr_union *src, + union sockaddr_union *dst, u_int dir) +{ + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + struct sockaddr_in6 *sin6; + + bzero(&src->sin6, sizeof(src->sin6)); + bzero(&dst->sin6, sizeof(dst->sin6)); + src->sin6.sin6_family = AF_INET6; + src->sin6.sin6_len = sizeof(struct sockaddr_in6); + dst->sin6.sin6_family = AF_INET6; + dst->sin6.sin6_len = sizeof(struct sockaddr_in6); + + if (dir == IPSEC_DIR_OUTBOUND) + sin6 = &src->sin6; + else + sin6 = &dst->sin6; + sin6->sin6_addr = inp->in6p_laddr; + sin6->sin6_port = inp->inp_lport; + if (IN6_IS_SCOPE_LINKLOCAL(&inp->in6p_laddr)) { + /* XXXAE: use in6p_zoneid */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs( + inp->in6p_laddr.s6_addr16[1]); + } + + if (dir == IPSEC_DIR_OUTBOUND) + sin6 = &dst->sin6; + else + sin6 = &src->sin6; + sin6->sin6_addr = inp->in6p_faddr; + sin6->sin6_port = inp->inp_fport; + if (IN6_IS_SCOPE_LINKLOCAL(&inp->in6p_faddr)) { + /* XXXAE: use in6p_zoneid */ + sin6->sin6_addr.s6_addr16[1] = 0; + sin6->sin6_scope_id = ntohs( + inp->in6p_faddr.s6_addr16[1]); + } + } +#endif +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + struct sockaddr_in *sin; + + bzero(&src->sin, sizeof(src->sin)); + bzero(&dst->sin, sizeof(dst->sin)); + src->sin.sin_family = AF_INET; + src->sin.sin_len = sizeof(struct sockaddr_in); + dst->sin.sin_family = AF_INET; + dst->sin.sin_len = sizeof(struct sockaddr_in); + + if (dir == IPSEC_DIR_OUTBOUND) + sin = &src->sin; + else + sin = &dst->sin; + sin->sin_addr = inp->inp_laddr; + sin->sin_port = inp->inp_lport; + + if (dir == IPSEC_DIR_OUTBOUND) + sin = &dst->sin; + else + sin = &src->sin; + sin->sin_addr = inp->inp_faddr; + sin->sin_port = inp->inp_fport; + } +#endif +} + +void +ipsec_setspidx_inpcb(struct inpcb *inp, struct secpolicyindex *spidx, + u_int dir) +{ + + ipsec_setsockaddrs_inpcb(inp, &spidx->src, &spidx->dst, dir); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + spidx->prefs = sizeof(struct in6_addr) << 3; + spidx->prefd = sizeof(struct in6_addr) << 3; + } +#endif +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + spidx->prefs = sizeof(struct in_addr) << 3; + spidx->prefd = sizeof(struct in_addr) << 3; + } +#endif + spidx->ul_proto = IPPROTO_TCP; /* XXX: currently only TCP uses this */ + spidx->dir = dir; + KEYDBG(IPSEC_DUMP, + printf("%s: ", __func__); kdebug_secpolicyindex(spidx, NULL)); +} + +/* Initialize PCB policy. */ +int +ipsec_init_pcbpolicy(struct inpcb *inp) +{ + + IPSEC_ASSERT(inp != NULL, ("null inp")); + IPSEC_ASSERT(inp->inp_sp == NULL, ("inp_sp already initialized")); + + inp->inp_sp = malloc(sizeof(struct inpcbpolicy), M_IPSEC_INPCB, + M_NOWAIT | M_ZERO); + if (inp->inp_sp == NULL) + return (ENOBUFS); + return (0); +} + +/* Delete PCB policy. */ +int +ipsec_delete_pcbpolicy(struct inpcb *inp) +{ + + if (inp->inp_sp == NULL) + return (0); + + if (inp->inp_sp->flags & INP_INBOUND_POLICY) + key_freesp(&inp->inp_sp->sp_in); + + if (inp->inp_sp->flags & INP_OUTBOUND_POLICY) + key_freesp(&inp->inp_sp->sp_out); + + free(inp->inp_sp, M_IPSEC_INPCB); + inp->inp_sp = NULL; + return (0); +} + +/* Deep-copy a policy in PCB. */ +static struct secpolicy * +ipsec_deepcopy_pcbpolicy(struct secpolicy *src) +{ + struct secpolicy *dst; + int i; + + if (src == NULL) + return (NULL); + + IPSEC_ASSERT(src->state == IPSEC_SPSTATE_PCB, ("SP isn't PCB")); + + dst = key_newsp(); + if (dst == NULL) + return (NULL); + + /* spidx is not copied here */ + dst->policy = src->policy; + dst->state = src->state; + dst->priority = src->priority; + /* Do not touch the refcnt field. */ + + /* Copy IPsec request chain. */ + for (i = 0; i < src->tcount; i++) { + dst->req[i] = ipsec_newisr(); + if (dst->req[i] == NULL) { + key_freesp(&dst); + return (NULL); + } + bcopy(src->req[i], dst->req[i], sizeof(struct ipsecrequest)); + dst->tcount++; + } + KEYDBG(IPSEC_DUMP, + printf("%s: copied SP(%p) -> SP(%p)\n", __func__, src, dst); + kdebug_secpolicy(dst)); + return (dst); +} + +/* + * Copy IPsec policy from old INPCB into new. + * It is expected that new INPCB has not configured policies. + */ +int +ipsec_copy_pcbpolicy(struct inpcb *old, struct inpcb *new) +{ + struct secpolicy *sp; + + /* + * old->inp_sp can be NULL if PCB was created when an IPsec + * support was unavailable. This is not an error, we don't have + * policies in this PCB, so nothing to copy. + */ + if (old->inp_sp == NULL) + return (0); + + IPSEC_ASSERT(new->inp_sp != NULL, ("new inp_sp is NULL")); + IPSEC_ASSERT((new->inp_sp->flags & ( + INP_INBOUND_POLICY | INP_OUTBOUND_POLICY)) == 0, + ("new PCB already has configured policies")); + INP_WLOCK_ASSERT(new); + INP_LOCK_ASSERT(old); + + if (old->inp_sp->flags & INP_INBOUND_POLICY) { + sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_in); + if (sp == NULL) + return (ENOBUFS); + ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); + new->inp_sp->sp_in = sp; + new->inp_sp->flags |= INP_INBOUND_POLICY; + } + if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { + sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); + if (sp == NULL) + return (ENOBUFS); + ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); + new->inp_sp->sp_out = sp; + new->inp_sp->flags |= INP_OUTBOUND_POLICY; + } + return (0); +} + +static int +ipsec_set_pcbpolicy(struct inpcb *inp, struct ucred *cred, + void *request, size_t len) +{ + struct sadb_x_policy *xpl; + struct secpolicy **spp, *newsp; + int error, flags; + + xpl = (struct sadb_x_policy *)request; + /* Select direction. */ + switch (xpl->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, + xpl->sadb_x_policy_dir)); + return (EINVAL); + } + /* + * Privileged sockets are allowed to set own security policy + * and configure IPsec bypass. Unprivileged sockets only can + * have ENTRUST policy. + */ + switch (xpl->sadb_x_policy_type) { + case IPSEC_POLICY_IPSEC: + case IPSEC_POLICY_BYPASS: + if (cred != NULL && + priv_check_cred(cred, PRIV_NETINET_IPSEC, 0) != 0) + return (EACCES); + /* Allocate new SP entry. */ + newsp = key_msg2sp(xpl, len, &error); + if (newsp == NULL) + return (error); + newsp->state = IPSEC_SPSTATE_PCB; + newsp->spidx.ul_proto = IPSEC_ULPROTO_ANY; +#ifdef INET + if (inp->inp_vflag & INP_IPV4) { + newsp->spidx.src.sin.sin_family = + newsp->spidx.dst.sin.sin_family = AF_INET; + newsp->spidx.src.sin.sin_len = + newsp->spidx.dst.sin.sin_len = + sizeof(struct sockaddr_in); + } +#endif +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + newsp->spidx.src.sin6.sin6_family = + newsp->spidx.dst.sin6.sin6_family = AF_INET6; + newsp->spidx.src.sin6.sin6_len = + newsp->spidx.dst.sin6.sin6_len = + sizeof(struct sockaddr_in6); + } +#endif + break; + case IPSEC_POLICY_ENTRUST: + /* We just use NULL pointer for ENTRUST policy */ + newsp = NULL; + break; + default: + /* Other security policy types aren't allowed for PCB */ + return (EINVAL); + } + + INP_WLOCK(inp); + if (xpl->sadb_x_policy_dir == IPSEC_DIR_INBOUND) { + spp = &inp->inp_sp->sp_in; + flags = INP_INBOUND_POLICY; + } else { + spp = &inp->inp_sp->sp_out; + flags = INP_OUTBOUND_POLICY; + } + /* Clear old SP and set new SP. */ + if (*spp != NULL) + key_freesp(spp); + *spp = newsp; + KEYDBG(IPSEC_DUMP, + printf("%s: new SP(%p)\n", __func__, newsp)); + if (newsp == NULL) + inp->inp_sp->flags &= ~flags; + else { + inp->inp_sp->flags |= flags; + KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); + } + INP_WUNLOCK(inp); + return (0); +} + +static int +ipsec_get_pcbpolicy(struct inpcb *inp, void *request, size_t *len) +{ + struct sadb_x_policy *xpl; + struct secpolicy *sp; + int error, flags; + + xpl = (struct sadb_x_policy *)request; + + INP_RLOCK(inp); + flags = inp->inp_sp->flags; + /* Select direction. */ + switch (xpl->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + sp = inp->inp_sp->sp_in; + flags &= INP_INBOUND_POLICY; + break; + case IPSEC_DIR_OUTBOUND: + sp = inp->inp_sp->sp_out; + flags &= INP_OUTBOUND_POLICY; + break; + default: + INP_RUNLOCK(inp); + ipseclog((LOG_ERR, "%s: invalid direction=%u\n", __func__, + xpl->sadb_x_policy_dir)); + return (EINVAL); + } + + if (flags == 0) { + /* Return ENTRUST policy */ + INP_RUNLOCK(inp); + xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; + xpl->sadb_x_policy_type = IPSEC_POLICY_ENTRUST; + xpl->sadb_x_policy_id = 0; + xpl->sadb_x_policy_priority = 0; + xpl->sadb_x_policy_len = PFKEY_UNIT64(sizeof(*xpl)); + *len = sizeof(*xpl); + return (0); + } + + IPSEC_ASSERT(sp != NULL, + ("sp is NULL, but flags is 0x%04x", inp->inp_sp->flags)); + + key_addref(sp); + INP_RUNLOCK(inp); + error = key_sp2msg(sp, request, len); + key_freesp(&sp); + if (error == EINVAL) + return (error); + /* + * We return "success", but user should check *len. + * *len will be set to size of valid data and + * sadb_x_policy_len will contain needed size. + */ + return (0); +} + +/* Handle socket option control request for PCB */ +static int +ipsec_control_pcbpolicy(struct inpcb *inp, struct sockopt *sopt) +{ + void *optdata; + size_t optlen; + int error; + + if (inp->inp_sp == NULL) + return (ENOPROTOOPT); + + /* Limit maximum request size to PAGE_SIZE */ + optlen = sopt->sopt_valsize; + if (optlen < sizeof(struct sadb_x_policy) || optlen > PAGE_SIZE) + return (EINVAL); + + optdata = malloc(optlen, M_TEMP, sopt->sopt_td ? M_WAITOK: M_NOWAIT); + if (optdata == NULL) + return (ENOBUFS); + /* + * We need a hint from the user, what policy is requested - input + * or output? User should specify it in the buffer, even for + * setsockopt(). + */ + error = sooptcopyin(sopt, optdata, optlen, optlen); + if (error == 0) { + if (sopt->sopt_dir == SOPT_SET) + error = ipsec_set_pcbpolicy(inp, + sopt->sopt_td ? sopt->sopt_td->td_ucred: NULL, + optdata, optlen); + else { + error = ipsec_get_pcbpolicy(inp, optdata, &optlen); + if (error == 0) + error = sooptcopyout(sopt, optdata, optlen); + } + } + free(optdata, M_TEMP); + return (error); +} + +#ifdef INET +/* + * IPSEC_PCBCTL() method implementation for IPv4. + */ +int +ipsec4_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + + if (sopt->sopt_name != IP_IPSEC_POLICY) + return (ENOPROTOOPT); + return (ipsec_control_pcbpolicy(inp, sopt)); +} +#endif + +#ifdef INET6 +/* + * IPSEC_PCBCTL() method implementation for IPv6. + */ +int +ipsec6_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + + if (sopt->sopt_name != IPV6_IPSEC_POLICY) + return (ENOPROTOOPT); + return (ipsec_control_pcbpolicy(inp, sopt)); +} +#endif + Index: head/sys/netipsec/ipsec_support.h =================================================================== --- head/sys/netipsec/ipsec_support.h +++ head/sys/netipsec/ipsec_support.h @@ -0,0 +1,190 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETIPSEC_IPSEC_SUPPORT_H_ +#define _NETIPSEC_IPSEC_SUPPORT_H_ + +#ifdef _KERNEL +#if defined(IPSEC) || defined(IPSEC_SUPPORT) +struct mbuf; +struct inpcb; +struct tcphdr; +struct sockopt; +struct sockaddr; +struct ipsec_support; +struct tcpmd5_support; + +size_t ipsec_hdrsiz_inpcb(struct inpcb *); +int ipsec_init_pcbpolicy(struct inpcb *); +int ipsec_delete_pcbpolicy(struct inpcb *); +int ipsec_copy_pcbpolicy(struct inpcb *, struct inpcb *); + +struct ipsec_methods { + int (*input)(struct mbuf *, int, int); + int (*check_policy)(const struct mbuf *, struct inpcb *); + int (*forward)(struct mbuf *); + int (*output)(struct mbuf *, struct inpcb *); + int (*pcbctl)(struct inpcb *, struct sockopt *); + size_t (*hdrsize)(struct inpcb *); + int (*capability)(struct mbuf *, u_int); + int (*ctlinput)(int, struct sockaddr *, void *); + + int (*udp_input)(struct mbuf *, int, int); + int (*udp_pcbctl)(struct inpcb *, struct sockopt *); +}; +#define IPSEC_CAP_OPERABLE 1 +#define IPSEC_CAP_BYPASS_FILTER 2 + +struct tcpmd5_methods { + int (*input)(struct mbuf *, struct tcphdr *, u_char *); + int (*output)(struct mbuf *, struct tcphdr *, u_char *); + int (*pcbctl)(struct inpcb *, struct sockopt *); +}; + +#define IPSEC_MODULE_ENABLED 0x0001 +#define IPSEC_ENABLED(proto) \ + ((proto ## _ipsec_support)->enabled & IPSEC_MODULE_ENABLED) +#define TCPMD5_ENABLED() IPSEC_ENABLED(tcp) + +#ifdef TCP_SIGNATURE +/* TCP-MD5 build in the kernel */ +struct tcpmd5_support { + const u_int enabled; + const struct tcpmd5_methods * const methods; +}; +extern const struct tcpmd5_support * const tcp_ipsec_support; + +#define TCPMD5_INPUT(m, ...) \ + (*tcp_ipsec_support->methods->input)(m, __VA_ARGS__) +#define TCPMD5_OUTPUT(m, ...) \ + (*tcp_ipsec_support->methods->output)(m, __VA_ARGS__) +#define TCPMD5_PCBCTL(inp, sopt) \ + (*tcp_ipsec_support->methods->pcbctl)(inp, sopt) +#elif defined(IPSEC_SUPPORT) +/* TCP-MD5 build as module */ +struct tcpmd5_support { + volatile u_int enabled; + const struct tcpmd5_methods * volatile methods; +}; +extern struct tcpmd5_support * const tcp_ipsec_support; + +void tcpmd5_support_enable(const struct tcpmd5_methods * const); +void tcpmd5_support_disable(void); + +int tcpmd5_kmod_pcbctl(struct tcpmd5_support * const, struct inpcb *, + struct sockopt *); +int tcpmd5_kmod_input(struct tcpmd5_support * const, struct mbuf *, + struct tcphdr *, u_char *); +int tcpmd5_kmod_output(struct tcpmd5_support * const, struct mbuf *, + struct tcphdr *, u_char *); +#define TCPMD5_INPUT(m, ...) \ + tcpmd5_kmod_input(tcp_ipsec_support, m, __VA_ARGS__) +#define TCPMD5_OUTPUT(m, ...) \ + tcpmd5_kmod_output(tcp_ipsec_support, m, __VA_ARGS__) +#define TCPMD5_PCBCTL(inp, sopt) \ + tcpmd5_kmod_pcbctl(tcp_ipsec_support, inp, sopt) +#endif + +#endif /* IPSEC || IPSEC_SUPPORT */ + +#if defined(IPSEC) +struct ipsec_support { + const u_int enabled; + const struct ipsec_methods * const methods; +}; +extern const struct ipsec_support * const ipv4_ipsec_support; +extern const struct ipsec_support * const ipv6_ipsec_support; + +#define IPSEC_INPUT(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->input)(m, __VA_ARGS__) +#define IPSEC_CHECK_POLICY(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->check_policy)(m, __VA_ARGS__) +#define IPSEC_FORWARD(proto, m) \ + (*(proto ## _ipsec_support)->methods->forward)(m) +#define IPSEC_OUTPUT(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->output)(m, __VA_ARGS__) +#define IPSEC_PCBCTL(proto, inp, sopt) \ + (*(proto ## _ipsec_support)->methods->pcbctl)(inp, sopt) +#define IPSEC_CAPS(proto, m, ...) \ + (*(proto ## _ipsec_support)->methods->capability)(m, __VA_ARGS__) +#define IPSEC_HDRSIZE(proto, inp) \ + (*(proto ## _ipsec_support)->methods->hdrsize)(inp) + +#define UDPENCAP_INPUT(m, ...) \ + (*ipv4_ipsec_support->methods->udp_input)(m, __VA_ARGS__) +#define UDPENCAP_PCBCTL(inp, sopt) \ + (*ipv4_ipsec_support->methods->udp_pcbctl)(inp, sopt) + +#elif defined(IPSEC_SUPPORT) +struct ipsec_support { + volatile u_int enabled; + const struct ipsec_methods * volatile methods; +}; +extern struct ipsec_support * const ipv4_ipsec_support; +extern struct ipsec_support * const ipv6_ipsec_support; + +void ipsec_support_enable(struct ipsec_support * const, + const struct ipsec_methods * const); +void ipsec_support_disable(struct ipsec_support * const); + +int ipsec_kmod_input(struct ipsec_support * const, struct mbuf *, int, int); +int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, + struct inpcb *); +int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); +int ipsec_kmod_output(struct ipsec_support * const, struct mbuf *, + struct inpcb *); +int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, + struct sockopt *); +int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); +size_t ipsec_kmod_hdrsize(struct ipsec_support * const, struct inpcb *); +int ipsec_kmod_udp_input(struct ipsec_support * const, struct mbuf *, int, int); +int ipsec_kmod_udp_pcbctl(struct ipsec_support * const, struct inpcb *, + struct sockopt *); + +#define UDPENCAP_INPUT(m, ...) \ + ipsec_kmod_udp_input(ipv4_ipsec_support, m, __VA_ARGS__) +#define UDPENCAP_PCBCTL(inp, sopt) \ + ipsec_kmod_udp_pcbctl(ipv4_ipsec_support, inp, sopt) + +#define IPSEC_INPUT(proto, ...) \ + ipsec_kmod_input(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CHECK_POLICY(proto, ...) \ + ipsec_kmod_check_policy(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_FORWARD(proto, ...) \ + ipsec_kmod_forward(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_OUTPUT(proto, ...) \ + ipsec_kmod_output(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_PCBCTL(proto, ...) \ + ipsec_kmod_pcbctl(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_CAPS(proto, ...) \ + ipsec_kmod_capability(proto ## _ipsec_support, __VA_ARGS__) +#define IPSEC_HDRSIZE(proto, ...) \ + ipsec_kmod_hdrsize(proto ## _ipsec_support, __VA_ARGS__) +#endif /* IPSEC_SUPPORT */ +#endif /* _KERNEL */ +#endif /* _NETIPSEC_IPSEC_SUPPORT_H_ */ Index: head/sys/netipsec/key.h =================================================================== --- head/sys/netipsec/key.h +++ head/sys/netipsec/key.h @@ -37,7 +37,6 @@ struct secpolicy; struct secpolicyindex; -struct ipsecrequest; struct secasvar; struct sockaddr; struct socket; @@ -46,60 +45,33 @@ struct secasindex; union sockaddr_union; -extern void key_addref(struct secpolicy *sp); -extern int key_havesp(u_int dir); -extern struct secpolicy *key_allocsp(struct secpolicyindex *, u_int, - const char*, int); -extern struct secpolicy *key_allocsp2(u_int32_t spi, union sockaddr_union *dst, - u_int8_t proto, u_int dir, const char*, int); -extern struct secpolicy *key_newsp(const char*, int); -#if 0 -extern struct secpolicy *key_gettunnel(const struct sockaddr *, - const struct sockaddr *, const struct sockaddr *, - const struct sockaddr *, const char*, int); -#endif -/* NB: prepend with _ for KAME IPv6 compatbility */ -extern void _key_freesp(struct secpolicy **, const char*, int); +struct secpolicy *key_newsp(void); +struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); +struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); +int key_sp2msg(struct secpolicy *, void *, size_t *); +void key_addref(struct secpolicy *); +void key_freesp(struct secpolicy **); +int key_spdacquire(struct secpolicy *); +int key_havesp(u_int); +void key_bumpspgen(void); +uint32_t key_getspgen(void); +uint32_t key_newreqid(void); -#define KEY_ALLOCSP(spidx, dir) \ - key_allocsp(spidx, dir, __FILE__, __LINE__) -#define KEY_ALLOCSP2(spi, dst, proto, dir) \ - key_allocsp2(spi, dst, proto, dir, __FILE__, __LINE__) -#define KEY_NEWSP() \ - key_newsp(__FILE__, __LINE__) -#if 0 -#define KEY_GETTUNNEL(osrc, odst, isrc, idst) \ - key_gettunnel(osrc, odst, isrc, idst, __FILE__, __LINE__) -#endif -#define KEY_FREESP(spp) \ - _key_freesp(spp, __FILE__, __LINE__) +struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); +struct secasvar *key_allocsa_tunnel(union sockaddr_union *, + union sockaddr_union *, uint8_t); +struct secasvar *key_allocsa_policy(struct secpolicy *, + const struct secasindex *, int *); +struct secasvar *key_allocsa_tcpmd5(struct secasindex *); +void key_freesav(struct secasvar **); -extern struct secasvar *key_allocsa(union sockaddr_union *, u_int, u_int32_t, - const char*, int); -extern struct secasvar *key_allocsa_tunnel(union sockaddr_union *, - union sockaddr_union *, u_int, const char*, int); -extern void key_addrefsa(struct secasvar *, const char*, int); -extern void key_freesav(struct secasvar **, const char*, int); +int key_sockaddrcmp(const struct sockaddr *, const struct sockaddr *, int); +int key_sockaddrcmp_withmask(const struct sockaddr *, const struct sockaddr *, + size_t); -#define KEY_ALLOCSA(dst, proto, spi) \ - key_allocsa(dst, proto, spi, __FILE__, __LINE__) -#define KEY_ALLOCSA_TUNNEL(src, dst, proto) \ - key_allocsa_tunnel(src, dst, proto, __FILE__, __LINE__) -#define KEY_ADDREFSA(sav) \ - key_addrefsa(sav, __FILE__, __LINE__) -#define KEY_FREESAV(psav) \ - key_freesav(psav, __FILE__, __LINE__) +int key_register_ifnet(struct secpolicy **, u_int); +void key_unregister_ifnet(struct secpolicy **, u_int); -extern void key_freeso(struct socket *); -extern int key_checktunnelsanity(struct secasvar *, u_int, - caddr_t, caddr_t); -extern int key_checkrequest(struct ipsecrequest *isr, - const struct secasindex *); -extern struct secpolicy *key_msg2sp(struct sadb_x_policy *, - size_t, int *); -extern struct mbuf *key_sp2msg(struct secpolicy *); -extern int key_ismyaddr(struct sockaddr *); -extern int key_spdacquire(struct secpolicy *); extern u_long key_random(void); extern void key_randomfill(void *, size_t); extern void key_freereg(struct socket *); @@ -109,11 +81,8 @@ extern void key_destroy(void); #endif extern void key_sa_recordxfer(struct secasvar *, struct mbuf *); -#ifdef IPSEC_NAT_T -u_int16_t key_portfromsaddr(struct sockaddr *); -#define KEY_PORTFROMSADDR(saddr) \ - key_portfromsaddr((struct sockaddr *)(saddr)) -#endif +uint16_t key_portfromsaddr(struct sockaddr *); +void key_porttosaddr(struct sockaddr *, uint16_t port); #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); Index: head/sys/netipsec/key.c =================================================================== --- head/sys/netipsec/key.c +++ head/sys/netipsec/key.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,8 @@ #include #include +#include + #include #include #include @@ -67,6 +70,7 @@ #include #include #include +#include #ifdef INET6 #include @@ -74,13 +78,6 @@ #include #endif /* INET6 */ -#if defined(INET) || defined(INET6) -#include -#endif -#ifdef INET6 -#include -#endif /* INET6 */ - #include #include #include @@ -93,7 +90,7 @@ #endif #include - +#include #include /* randomness */ @@ -141,10 +138,17 @@ static VNET_DEFINE(u_int32_t, acq_seq) = 0; #define V_acq_seq VNET(acq_seq) - /* SPD */ -static VNET_DEFINE(TAILQ_HEAD(_sptree, secpolicy), sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(uint32_t, sp_genid) = 0; +#define V_sp_genid VNET(sp_genid) + +/* SPD */ +TAILQ_HEAD(secpolicy_queue, secpolicy); +LIST_HEAD(secpolicy_list, secpolicy); +static VNET_DEFINE(struct secpolicy_queue, sptree[IPSEC_DIR_MAX]); +static VNET_DEFINE(struct secpolicy_queue, sptree_ifnet[IPSEC_DIR_MAX]); static struct rmlock sptree_lock; #define V_sptree VNET(sptree) +#define V_sptree_ifnet VNET(sptree_ifnet) #define SPTREE_LOCK_INIT() rm_init(&sptree_lock, "sptree") #define SPTREE_LOCK_DESTROY() rm_destroy(&sptree_lock) #define SPTREE_RLOCK_TRACKER struct rm_priotracker sptree_tracker @@ -156,17 +160,97 @@ #define SPTREE_WLOCK_ASSERT() rm_assert(&sptree_lock, RA_WLOCKED) #define SPTREE_UNLOCK_ASSERT() rm_assert(&sptree_lock, RA_UNLOCKED) -static VNET_DEFINE(LIST_HEAD(_sahtree, secashead), sahtree); /* SAD */ +/* Hash table for lookup SP using unique id */ +static VNET_DEFINE(struct secpolicy_list *, sphashtbl); +static VNET_DEFINE(u_long, sphash_mask); +#define V_sphashtbl VNET(sphashtbl) +#define V_sphash_mask VNET(sphash_mask) + +#define SPHASH_NHASH_LOG2 7 +#define SPHASH_NHASH (1 << SPHASH_NHASH_LOG2) +#define SPHASH_HASHVAL(id) (key_u32hash(id) & V_sphash_mask) +#define SPHASH_HASH(id) &V_sphashtbl[SPHASH_HASHVAL(id)] + +/* SAD */ +TAILQ_HEAD(secashead_queue, secashead); +LIST_HEAD(secashead_list, secashead); +static VNET_DEFINE(struct secashead_queue, sahtree); +static struct rmlock sahtree_lock; #define V_sahtree VNET(sahtree) -static struct mtx sahtree_lock; -#define SAHTREE_LOCK_INIT() \ - mtx_init(&sahtree_lock, "sahtree", \ - "fast ipsec security association database", MTX_DEF) -#define SAHTREE_LOCK_DESTROY() mtx_destroy(&sahtree_lock) -#define SAHTREE_LOCK() mtx_lock(&sahtree_lock) -#define SAHTREE_UNLOCK() mtx_unlock(&sahtree_lock) -#define SAHTREE_LOCK_ASSERT() mtx_assert(&sahtree_lock, MA_OWNED) +#define SAHTREE_LOCK_INIT() rm_init(&sahtree_lock, "sahtree") +#define SAHTREE_LOCK_DESTROY() rm_destroy(&sahtree_lock) +#define SAHTREE_RLOCK_TRACKER struct rm_priotracker sahtree_tracker +#define SAHTREE_RLOCK() rm_rlock(&sahtree_lock, &sahtree_tracker) +#define SAHTREE_RUNLOCK() rm_runlock(&sahtree_lock, &sahtree_tracker) +#define SAHTREE_RLOCK_ASSERT() rm_assert(&sahtree_lock, RA_RLOCKED) +#define SAHTREE_WLOCK() rm_wlock(&sahtree_lock) +#define SAHTREE_WUNLOCK() rm_wunlock(&sahtree_lock) +#define SAHTREE_WLOCK_ASSERT() rm_assert(&sahtree_lock, RA_WLOCKED) +#define SAHTREE_UNLOCK_ASSERT() rm_assert(&sahtree_lock, RA_UNLOCKED) +/* Hash table for lookup in SAD using SA addresses */ +static VNET_DEFINE(struct secashead_list *, sahaddrhashtbl); +static VNET_DEFINE(u_long, sahaddrhash_mask); +#define V_sahaddrhashtbl VNET(sahaddrhashtbl) +#define V_sahaddrhash_mask VNET(sahaddrhash_mask) + +#define SAHHASH_NHASH_LOG2 7 +#define SAHHASH_NHASH (1 << SAHHASH_NHASH_LOG2) +#define SAHADDRHASH_HASHVAL(saidx) \ + (key_saidxhash(saidx) & V_sahaddrhash_mask) +#define SAHADDRHASH_HASH(saidx) \ + &V_sahaddrhashtbl[SAHADDRHASH_HASHVAL(saidx)] + +/* Hash table for lookup in SAD using SPI */ +LIST_HEAD(secasvar_list, secasvar); +static VNET_DEFINE(struct secasvar_list *, savhashtbl); +static VNET_DEFINE(u_long, savhash_mask); +#define V_savhashtbl VNET(savhashtbl) +#define V_savhash_mask VNET(savhash_mask) +#define SAVHASH_NHASH_LOG2 7 +#define SAVHASH_NHASH (1 << SAVHASH_NHASH_LOG2) +#define SAVHASH_HASHVAL(spi) (key_u32hash(spi) & V_savhash_mask) +#define SAVHASH_HASH(spi) &V_savhashtbl[SAVHASH_HASHVAL(spi)] + +static uint32_t +key_saidxhash(const struct secasindex *saidx) +{ + uint32_t hval; + + hval = fnv_32_buf(&saidx->proto, sizeof(saidx->proto), + FNV1_32_INIT); + switch (saidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + hval = fnv_32_buf(&saidx->src.sin.sin_addr, + sizeof(in_addr_t), hval); + hval = fnv_32_buf(&saidx->dst.sin.sin_addr, + sizeof(in_addr_t), hval); + break; +#endif +#ifdef INET6 + case AF_INET6: + hval = fnv_32_buf(&saidx->src.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + hval = fnv_32_buf(&saidx->dst.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + break; +#endif + default: + hval = 0; + ipseclog((LOG_DEBUG, "%s: unknown address family %d", + __func__, saidx->dst.sa.sa_family)); + } + return (hval); +} + +static uint32_t +key_u32hash(uint32_t val) +{ + + return (fnv_32_buf(&val, sizeof(val), FNV1_32_INIT)); +} + /* registed list */ static VNET_DEFINE(LIST_HEAD(_regtree, secreg), regtree[SADB_SATYPE_MAX + 1]); #define V_regtree VNET(regtree) @@ -178,16 +262,40 @@ #define REGTREE_UNLOCK() mtx_unlock(®tree_lock) #define REGTREE_LOCK_ASSERT() mtx_assert(®tree_lock, MA_OWNED) -static VNET_DEFINE(LIST_HEAD(_acqtree, secacq), acqtree); /* acquiring list */ +/* Acquiring list */ +LIST_HEAD(secacq_list, secacq); +static VNET_DEFINE(struct secacq_list, acqtree); #define V_acqtree VNET(acqtree) static struct mtx acq_lock; #define ACQ_LOCK_INIT() \ - mtx_init(&acq_lock, "acqtree", "fast ipsec acquire list", MTX_DEF) + mtx_init(&acq_lock, "acqtree", "ipsec SA acquiring list", MTX_DEF) #define ACQ_LOCK_DESTROY() mtx_destroy(&acq_lock) #define ACQ_LOCK() mtx_lock(&acq_lock) #define ACQ_UNLOCK() mtx_unlock(&acq_lock) #define ACQ_LOCK_ASSERT() mtx_assert(&acq_lock, MA_OWNED) +/* Hash table for lookup in ACQ list using SA addresses */ +static VNET_DEFINE(struct secacq_list *, acqaddrhashtbl); +static VNET_DEFINE(u_long, acqaddrhash_mask); +#define V_acqaddrhashtbl VNET(acqaddrhashtbl) +#define V_acqaddrhash_mask VNET(acqaddrhash_mask) + +/* Hash table for lookup in ACQ list using SEQ number */ +static VNET_DEFINE(struct secacq_list *, acqseqhashtbl); +static VNET_DEFINE(u_long, acqseqhash_mask); +#define V_acqseqhashtbl VNET(acqseqhashtbl) +#define V_acqseqhash_mask VNET(acqseqhash_mask) + +#define ACQHASH_NHASH_LOG2 7 +#define ACQHASH_NHASH (1 << ACQHASH_NHASH_LOG2) +#define ACQADDRHASH_HASHVAL(saidx) \ + (key_saidxhash(saidx) & V_acqaddrhash_mask) +#define ACQSEQHASH_HASHVAL(seq) \ + (key_u32hash(seq) & V_acqseqhash_mask) +#define ACQADDRHASH_HASH(saidx) \ + &V_acqaddrhashtbl[ACQADDRHASH_HASHVAL(saidx)] +#define ACQSEQHASH_HASH(seq) \ + &V_acqseqhashtbl[ACQSEQHASH_HASHVAL(seq)] /* SP acquiring list */ static VNET_DEFINE(LIST_HEAD(_spacqtree, secspacq), spacqtree); #define V_spacqtree VNET(spacqtree) @@ -200,22 +308,6 @@ #define SPACQ_UNLOCK() mtx_unlock(&spacq_lock) #define SPACQ_LOCK_ASSERT() mtx_assert(&spacq_lock, MA_OWNED) -/* search order for SAs */ -static const u_int saorder_state_valid_prefer_old[] = { - SADB_SASTATE_DYING, SADB_SASTATE_MATURE, -}; -static const u_int saorder_state_valid_prefer_new[] = { - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, -}; -static const u_int saorder_state_alive[] = { - /* except DEAD */ - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, SADB_SASTATE_LARVAL -}; -static const u_int saorder_state_any[] = { - SADB_SASTATE_MATURE, SADB_SASTATE_DYING, - SADB_SASTATE_LARVAL, SADB_SASTATE_DEAD -}; - static const int minsize[] = { sizeof(struct sadb_msg), /* SADB_EXT_RESERVED */ sizeof(struct sadb_sa), /* SADB_EXT_SA */ @@ -244,6 +336,8 @@ sizeof(struct sadb_address), /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ sizeof(struct sadb_x_sa_replay), /* SADB_X_EXT_SA_REPLAY */ + sizeof(struct sadb_address), /* SADB_X_EXT_NEW_ADDRESS_SRC */ + sizeof(struct sadb_address), /* SADB_X_EXT_NEW_ADDRESS_DST */ }; _Static_assert(sizeof(minsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mismatch"); @@ -275,9 +369,23 @@ 0, /* SADB_X_EXT_NAT_T_OAR */ sizeof(struct sadb_x_nat_t_frag),/* SADB_X_EXT_NAT_T_FRAG */ sizeof(struct sadb_x_sa_replay), /* SADB_X_EXT_SA_REPLAY */ + 0, /* SADB_X_EXT_NEW_ADDRESS_SRC */ + 0, /* SADB_X_EXT_NEW_ADDRESS_DST */ }; _Static_assert(sizeof(maxsize)/sizeof(int) == SADB_EXT_MAX + 1, "minsize size mismatch"); +/* + * Internal values for SA flags: + * SADB_X_EXT_F_CLONED means that SA was cloned by key_updateaddresses, + * thus we will not free the most of SA content in key_delsav(). + */ +#define SADB_X_EXT_F_CLONED 0x80000000 + +#define SADB_CHECKLEN(_mhp, _ext) \ + ((_mhp)->extlen[(_ext)] < minsize[(_ext)] || (maxsize[(_ext)] != 0 && \ + ((_mhp)->extlen[(_ext)] > maxsize[(_ext)]))) +#define SADB_CHECKHDR(_mhp, _ext) ((_mhp)->ext[(_ext)] == NULL) + static VNET_DEFINE(int, ipsec_esp_keymin) = 256; static VNET_DEFINE(int, ipsec_esp_auth) = 0; static VNET_DEFINE(int, ipsec_ah_keymin) = 128; @@ -286,10 +394,26 @@ #define V_ipsec_esp_auth VNET(ipsec_esp_auth) #define V_ipsec_ah_keymin VNET(ipsec_ah_keymin) -#ifdef SYSCTL_DECL -SYSCTL_DECL(_net_key); +#ifdef IPSEC_DEBUG +VNET_DEFINE(int, ipsec_debug) = 1; +#else +VNET_DEFINE(int, ipsec_debug) = 0; #endif +#ifdef INET +SYSCTL_DECL(_net_inet_ipsec); +SYSCTL_INT(_net_inet_ipsec, IPSECCTL_DEBUG, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, + "Enable IPsec debugging output when set."); +#endif +#ifdef INET6 +SYSCTL_DECL(_net_inet6_ipsec6); +SYSCTL_INT(_net_inet6_ipsec6, IPSECCTL_DEBUG, debug, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ipsec_debug), 0, + "Enable IPsec debugging output when set."); +#endif + +SYSCTL_DECL(_net_key); SYSCTL_INT(_net_key, KEYCTL_DEBUG_LEVEL, debug, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_debug_level), 0, ""); @@ -339,36 +463,7 @@ #define __LIST_CHAINED(elm) \ (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) -#define LIST_INSERT_TAIL(head, elm, type, field) \ -do {\ - struct type *curelm = LIST_FIRST(head); \ - if (curelm == NULL) {\ - LIST_INSERT_HEAD(head, elm, field); \ - } else { \ - while (LIST_NEXT(curelm, field)) \ - curelm = LIST_NEXT(curelm, field);\ - LIST_INSERT_AFTER(curelm, elm, field);\ - }\ -} while (0) -#define KEY_CHKSASTATE(head, sav, name) \ -do { \ - if ((head) != (sav)) { \ - ipseclog((LOG_DEBUG, "%s: state mismatched (TREE=%d SA=%d)\n", \ - (name), (head), (sav))); \ - break; \ - } \ -} while (0) - -#define KEY_CHKSPDIR(head, sp, name) \ -do { \ - if ((head) != (sp)) { \ - ipseclog((LOG_DEBUG, "%s: direction mismatched (TREE=%d SP=%d), " \ - "anyway continue.\n", \ - (name), (head), (sp))); \ - } \ -} while (0) - MALLOC_DEFINE(M_IPSEC_SA, "secasvar", "ipsec security association"); MALLOC_DEFINE(M_IPSEC_SAH, "sahead", "ipsec sa head"); MALLOC_DEFINE(M_IPSEC_SP, "ipsecpolicy", "ipsec security policy"); @@ -377,6 +472,17 @@ MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire"); MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire"); +static VNET_DEFINE(uma_zone_t, key_lft_zone); +#define V_key_lft_zone VNET(key_lft_zone) + +static LIST_HEAD(xforms_list, xformsw) xforms = LIST_HEAD_INITIALIZER(); +static struct mtx xforms_lock; +#define XFORMS_LOCK_INIT() \ + mtx_init(&xforms_lock, "xforms_list", "IPsec transforms list", MTX_DEF) +#define XFORMS_LOCK_DESTROY() mtx_destroy(&xforms_lock) +#define XFORMS_LOCK() mtx_lock(&xforms_lock) +#define XFORMS_UNLOCK() mtx_unlock(&xforms_lock) + /* * set parameters into secpolicyindex buffer. * Must allocate secpolicyindex buffer passed to this function. @@ -404,6 +510,8 @@ (idx)->reqid = (r); \ bcopy((s), &(idx)->src, ((const struct sockaddr *)(s))->sa_len); \ bcopy((d), &(idx)->dst, ((const struct sockaddr *)(d))->sa_len); \ + key_porttosaddr(&(idx)->src.sa, 0); \ + key_porttosaddr(&(idx)->dst.sa, 0); \ } while (0) /* key statistics */ @@ -418,22 +526,59 @@ int extlen[SADB_EXT_MAX + 1]; }; +static struct supported_ealgs { + int sadb_alg; + const struct enc_xform *xform; +} supported_ealgs[] = { + { SADB_EALG_DESCBC, &enc_xform_des }, + { SADB_EALG_3DESCBC, &enc_xform_3des }, + { SADB_X_EALG_AES, &enc_xform_rijndael128 }, + { SADB_X_EALG_BLOWFISHCBC, &enc_xform_blf }, + { SADB_X_EALG_CAST128CBC, &enc_xform_cast5 }, + { SADB_EALG_NULL, &enc_xform_null }, + { SADB_X_EALG_CAMELLIACBC, &enc_xform_camellia }, + { SADB_X_EALG_AESCTR, &enc_xform_aes_icm }, + { SADB_X_EALG_AESGCM16, &enc_xform_aes_nist_gcm }, + { SADB_X_EALG_AESGMAC, &enc_xform_aes_nist_gmac }, +}; + +static struct supported_aalgs { + int sadb_alg; + const struct auth_hash *xform; +} supported_aalgs[] = { + { SADB_X_AALG_NULL, &auth_hash_null }, + { SADB_AALG_MD5HMAC, &auth_hash_hmac_md5 }, + { SADB_AALG_SHA1HMAC, &auth_hash_hmac_sha1 }, + { SADB_X_AALG_RIPEMD160HMAC, &auth_hash_hmac_ripemd_160 }, + { SADB_X_AALG_MD5, &auth_hash_key_md5 }, + { SADB_X_AALG_SHA, &auth_hash_key_sha1 }, + { SADB_X_AALG_SHA2_256, &auth_hash_hmac_sha2_256 }, + { SADB_X_AALG_SHA2_384, &auth_hash_hmac_sha2_384 }, + { SADB_X_AALG_SHA2_512, &auth_hash_hmac_sha2_512 }, + { SADB_X_AALG_AES128GMAC, &auth_hash_nist_gmac_aes_128 }, + { SADB_X_AALG_AES192GMAC, &auth_hash_nist_gmac_aes_192 }, + { SADB_X_AALG_AES256GMAC, &auth_hash_nist_gmac_aes_256 }, +}; + +static struct supported_calgs { + int sadb_alg; + const struct comp_algo *xform; +} supported_calgs[] = { + { SADB_X_CALG_DEFLATE, &comp_algo_deflate }, +}; + #ifndef IPSEC_DEBUG2 static struct callout key_timer; #endif -static struct secasvar *key_allocsa_policy(const struct secasindex *); -static void key_freesp_so(struct secpolicy **); -static struct secasvar *key_do_allocsa_policy(struct secashead *, u_int); static void key_unlink(struct secpolicy *); static struct secpolicy *key_getsp(struct secpolicyindex *); static struct secpolicy *key_getspbyid(u_int32_t); -static u_int32_t key_newreqid(void); static struct mbuf *key_gather_mbuf(struct mbuf *, const struct sadb_msghdr *, int, int, ...); static int key_spdadd(struct socket *, struct mbuf *, const struct sadb_msghdr *); -static u_int32_t key_getnewspid(void); +static uint32_t key_getnewspid(void); static int key_spddelete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_spddelete2(struct socket *, struct mbuf *, @@ -446,22 +591,25 @@ const struct sadb_msghdr *); static struct mbuf *key_setdumpsp(struct secpolicy *, u_int8_t, u_int32_t, u_int32_t); -static u_int key_getspreqmsglen(struct secpolicy *); +static struct mbuf *key_sp2mbuf(struct secpolicy *); +static size_t key_getspreqmsglen(struct secpolicy *); static int key_spdexpire(struct secpolicy *); static struct secashead *key_newsah(struct secasindex *); +static void key_freesah(struct secashead **); static void key_delsah(struct secashead *); -static struct secasvar *key_newsav(struct mbuf *, - const struct sadb_msghdr *, struct secashead *, int *, - const char*, int); -#define KEY_NEWSAV(m, sadb, sah, e) \ - key_newsav(m, sadb, sah, e, __FILE__, __LINE__) +static struct secasvar *key_newsav(const struct sadb_msghdr *, + struct secasindex *, uint32_t, int *); static void key_delsav(struct secasvar *); +static void key_unlinksav(struct secasvar *); static struct secashead *key_getsah(struct secasindex *); -static struct secasvar *key_checkspidup(struct secasindex *, u_int32_t); -static struct secasvar *key_getsavbyspi(struct secashead *, u_int32_t); -static int key_setsaval(struct secasvar *, struct mbuf *, - const struct sadb_msghdr *); -static int key_mature(struct secasvar *); +static int key_checkspidup(uint32_t); +static struct secasvar *key_getsavbyspi(uint32_t); +static int key_setnatt(struct secasvar *, const struct sadb_msghdr *); +static int key_setsaval(struct secasvar *, const struct sadb_msghdr *); +static int key_updatelifetimes(struct secasvar *, const struct sadb_msghdr *); +static int key_updateaddresses(struct socket *, struct mbuf *, + const struct sadb_msghdr *, struct secasvar *, struct secasindex *); + static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, @@ -469,24 +617,16 @@ static struct mbuf *key_setsadbsa(struct secasvar *); static struct mbuf *key_setsadbaddr(u_int16_t, const struct sockaddr *, u_int8_t, u_int16_t); -#ifdef IPSEC_NAT_T static struct mbuf *key_setsadbxport(u_int16_t, u_int16_t); static struct mbuf *key_setsadbxtype(u_int16_t); -#endif -static void key_porttosaddr(struct sockaddr *, u_int16_t); -#define KEY_PORTTOSADDR(saddr, port) \ - key_porttosaddr((struct sockaddr *)(saddr), (port)) static struct mbuf *key_setsadbxsa2(u_int8_t, u_int32_t, u_int32_t); static struct mbuf *key_setsadbxsareplay(u_int32_t); static struct mbuf *key_setsadbxpolicy(u_int16_t, u_int8_t, u_int32_t, u_int32_t); -static struct seckey *key_dup_keymsg(const struct sadb_key *, u_int, - struct malloc_type *); +static struct seckey *key_dup_keymsg(const struct sadb_key *, size_t, + struct malloc_type *); static struct seclifetime *key_dup_lifemsg(const struct sadb_lifetime *src, - struct malloc_type *type); -#ifdef INET6 -static int key_ismyaddr6(struct sockaddr_in6 *); -#endif + struct malloc_type *); /* flags for key_cmpsaidx() */ #define CMP_HEAD 1 /* protocol, addresses. */ @@ -499,44 +639,39 @@ struct secpolicyindex *); static int key_cmpspidx_withmask(struct secpolicyindex *, struct secpolicyindex *); -static int key_sockaddrcmp(const struct sockaddr *, - const struct sockaddr *, int); static int key_bbcmp(const void *, const void *, u_int); -static u_int16_t key_satype2proto(u_int8_t); -static u_int8_t key_proto2satype(u_int16_t); +static uint8_t key_satype2proto(uint8_t); +static uint8_t key_proto2satype(uint8_t); static int key_getspi(struct socket *, struct mbuf *, const struct sadb_msghdr *); -static u_int32_t key_do_getnewspi(struct sadb_spirange *, - struct secasindex *); +static uint32_t key_do_getnewspi(struct sadb_spirange *, struct secasindex *); static int key_update(struct socket *, struct mbuf *, const struct sadb_msghdr *); -#ifdef IPSEC_DOSEQCHECK -static struct secasvar *key_getsavbyseq(struct secashead *, u_int32_t); -#endif static int key_add(struct socket *, struct mbuf *, const struct sadb_msghdr *); -static int key_setident(struct secashead *, struct mbuf *, - const struct sadb_msghdr *); +static int key_setident(struct secashead *, const struct sadb_msghdr *); static struct mbuf *key_getmsgbuf_x1(struct mbuf *, const struct sadb_msghdr *); static int key_delete(struct socket *, struct mbuf *, const struct sadb_msghdr *); static int key_delete_all(struct socket *, struct mbuf *, - const struct sadb_msghdr *, u_int16_t); + const struct sadb_msghdr *, struct secasindex *); +static void key_delete_xform(const struct xformsw *); static int key_get(struct socket *, struct mbuf *, const struct sadb_msghdr *); static void key_getcomb_setlifetime(struct sadb_comb *); -static struct mbuf *key_getcomb_esp(void); +static struct mbuf *key_getcomb_ealg(void); static struct mbuf *key_getcomb_ah(void); static struct mbuf *key_getcomb_ipcomp(void); static struct mbuf *key_getprop(const struct secasindex *); static int key_acquire(const struct secasindex *, struct secpolicy *); -static struct secacq *key_newacq(const struct secasindex *); -static struct secacq *key_getacq(const struct secasindex *); -static struct secacq *key_getacqbyseq(u_int32_t); +static uint32_t key_newacq(const struct secasindex *, int *); +static uint32_t key_getacq(const struct secasindex *, int *); +static int key_acqdone(const struct secasindex *, uint32_t); +static int key_acqreset(uint32_t); static struct secspacq *key_newspacq(struct secpolicyindex *); static struct secspacq *key_getspacq(struct secpolicyindex *); static int key_acquire2(struct socket *, struct mbuf *, @@ -553,40 +688,45 @@ static int key_senderror(struct socket *, struct mbuf *, int); static int key_validate_ext(const struct sadb_ext *, int); static int key_align(struct mbuf *, struct sadb_msghdr *); -static struct mbuf *key_setlifetime(struct seclifetime *src, - u_int16_t exttype); -static struct mbuf *key_setkey(struct seckey *src, u_int16_t exttype); +static struct mbuf *key_setlifetime(struct seclifetime *, uint16_t); +static struct mbuf *key_setkey(struct seckey *, uint16_t); +static int xform_init(struct secasvar *, u_short); -#if 0 -static const char *key_getfqdn(void); -static const char *key_getuserfqdn(void); -#endif -static void key_sa_chgstate(struct secasvar *, u_int8_t); +#define DBG_IPSEC_INITREF(t, p) do { \ + refcount_init(&(p)->refcnt, 1); \ + KEYDBG(KEY_STAMP, \ + printf("%s: Initialize refcnt %s(%p) = %u\n", \ + __func__, #t, (p), (p)->refcnt)); \ +} while (0) +#define DBG_IPSEC_ADDREF(t, p) do { \ + refcount_acquire(&(p)->refcnt); \ + KEYDBG(KEY_STAMP, \ + printf("%s: Acquire refcnt %s(%p) -> %u\n", \ + __func__, #t, (p), (p)->refcnt)); \ +} while (0) +#define DBG_IPSEC_DELREF(t, p) do { \ + KEYDBG(KEY_STAMP, \ + printf("%s: Release refcnt %s(%p) -> %u\n", \ + __func__, #t, (p), (p)->refcnt - 1)); \ + refcount_release(&(p)->refcnt); \ +} while (0) -static __inline void -sa_initref(struct secasvar *sav) -{ +#define IPSEC_INITREF(t, p) refcount_init(&(p)->refcnt, 1) +#define IPSEC_ADDREF(t, p) refcount_acquire(&(p)->refcnt) +#define IPSEC_DELREF(t, p) refcount_release(&(p)->refcnt) - refcount_init(&sav->refcnt, 1); -} -static __inline void -sa_addref(struct secasvar *sav) -{ +#define SP_INITREF(p) IPSEC_INITREF(SP, p) +#define SP_ADDREF(p) IPSEC_ADDREF(SP, p) +#define SP_DELREF(p) IPSEC_DELREF(SP, p) - refcount_acquire(&sav->refcnt); - IPSEC_ASSERT(sav->refcnt != 0, ("SA refcnt overflow")); -} -static __inline int -sa_delref(struct secasvar *sav) -{ +#define SAH_INITREF(p) IPSEC_INITREF(SAH, p) +#define SAH_ADDREF(p) IPSEC_ADDREF(SAH, p) +#define SAH_DELREF(p) IPSEC_DELREF(SAH, p) - IPSEC_ASSERT(sav->refcnt > 0, ("SA refcnt underflow")); - return (refcount_release(&sav->refcnt)); -} +#define SAV_INITREF(p) IPSEC_INITREF(SAV, p) +#define SAV_ADDREF(p) IPSEC_ADDREF(SAV, p) +#define SAV_DELREF(p) IPSEC_DELREF(SAV, p) -#define SP_ADDREF(p) refcount_acquire(&(p)->refcnt) -#define SP_DELREF(p) refcount_release(&(p)->refcnt) - /* * Update the refcnt while holding the SPTREE lock. */ @@ -612,54 +752,49 @@ /* %%% IPsec policy management */ /* - * allocating a SP for OUTBOUND or INBOUND packet. - * Must call key_freesp() later. - * OUT: NULL: not found - * others: found and return the pointer. + * Return current SPDB generation. */ -struct secpolicy * -key_allocsp(struct secpolicyindex *spidx, u_int dir, const char* where, - int tag) +uint32_t +key_getspgen(void) { - SPTREE_RLOCK_TRACKER; - struct secpolicy *sp; - IPSEC_ASSERT(spidx != NULL, ("null spidx")); - IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, - ("invalid direction %u", dir)); + return (V_sp_genid); +} - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); +void +key_bumpspgen(void) +{ - /* get a SP entry */ - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - kdebug_secpolicyindex(spidx)); + V_sp_genid++; +} - SPTREE_RLOCK(); - TAILQ_FOREACH(sp, &V_sptree[dir], chain) { - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); - if (key_cmpspidx_withmask(&sp->spidx, spidx)) - goto found; - } - sp = NULL; -found: - if (sp) { - /* sanity check */ - KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); +static int +key_checksockaddrs(struct sockaddr *src, struct sockaddr *dst) +{ - /* found a SPD entry */ - sp->lastused = time_second; - SP_ADDREF(sp); + /* family match */ + if (src->sa_family != dst->sa_family) + return (EINVAL); + /* sa_len match */ + if (src->sa_len != dst->sa_len) + return (EINVAL); + switch (src->sa_family) { +#ifdef INET + case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + return (EINVAL); + break; +#endif +#ifdef INET6 + case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + break; +#endif + default: + return (EAFNOSUPPORT); } - SPTREE_RUNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (0); } /* @@ -669,395 +804,161 @@ * others: found and return the pointer. */ struct secpolicy * -key_allocsp2(u_int32_t spi, union sockaddr_union *dst, u_int8_t proto, - u_int dir, const char* where, int tag) +key_allocsp(struct secpolicyindex *spidx, u_int dir) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; - IPSEC_ASSERT(dst != NULL, ("null dst")); + IPSEC_ASSERT(spidx != NULL, ("null spidx")); IPSEC_ASSERT(dir == IPSEC_DIR_INBOUND || dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", dir)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - - /* get a SP entry */ - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** objects\n"); - printf("spi %u proto %u dir %u\n", spi, proto, dir); - kdebug_sockaddr(&dst->sa)); - SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { - KEYDEBUG(KEYDEBUG_IPSEC_DATA, - printf("*** in SPD\n"); - kdebug_secpolicyindex(&sp->spidx)); - /* compare simple values, then dst address */ - if (sp->spidx.ul_proto != proto) - continue; - /* NB: spi's must exist and match */ - if (!sp->req || !sp->req->sav || sp->req->sav->spi != spi) - continue; - if (key_sockaddrcmp(&sp->spidx.dst.sa, &dst->sa, 1) == 0) - goto found; + if (key_cmpspidx_withmask(&sp->spidx, spidx)) { + SP_ADDREF(sp); + break; + } } - sp = NULL; -found: - if (sp) { - /* sanity check */ - KEY_CHKSPDIR(sp->spidx.dir, dir, __func__); + SPTREE_RUNLOCK(); - /* found a SPD entry */ + if (sp != NULL) { /* found a SPD entry */ sp->lastused = time_second; - SP_ADDREF(sp); + KEYDBG(IPSEC_STAMP, + printf("%s: return SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + } else { + KEYDBG(IPSEC_DATA, + printf("%s: lookup failed for ", __func__); + kdebug_secpolicyindex(spidx, NULL)); } - SPTREE_RUNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (sp); } -#if 0 /* - * return a policy that matches this particular inbound packet. - * XXX slow + * Allocating an SA entry for an *INBOUND* or *OUTBOUND* TCP packet, signed + * or should be signed by MD5 signature. + * We don't use key_allocsa() for such lookups, because we don't know SPI. + * Unlike ESP and AH protocols, SPI isn't transmitted in the TCP header with + * signed packet. We use SADB only as storage for password. + * OUT: positive: corresponding SA for given saidx found. + * NULL: SA not found */ -struct secpolicy * -key_gettunnel(const struct sockaddr *osrc, - const struct sockaddr *odst, - const struct sockaddr *isrc, - const struct sockaddr *idst, - const char* where, int tag) +struct secasvar * +key_allocsa_tcpmd5(struct secasindex *saidx) { - struct secpolicy *sp; - const int dir = IPSEC_DIR_INBOUND; - struct ipsecrequest *r1, *r2, *p; - struct secpolicyindex spidx; + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; + struct secasvar *sav; - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); + IPSEC_ASSERT(saidx->proto == IPPROTO_TCP, + ("unexpected security protocol %u", saidx->proto)); + IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TCPMD5, + ("unexpected mode %u", saidx->mode)); - if (isrc->sa_family != idst->sa_family) { - ipseclog((LOG_ERR, "%s: protocol family mismatched %d != %d\n.", - __func__, isrc->sa_family, idst->sa_family)); - sp = NULL; - goto done; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + KEYDBG(IPSEC_DUMP, + printf("%s: checking SAH\n", __func__); + kdebug_secash(sah, " ")); + if (sah->saidx.proto != IPPROTO_TCP) + continue; + if (!key_sockaddrcmp(&saidx->dst.sa, &sah->saidx.dst.sa, 0)) + break; } + if (sah != NULL) { + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) + SAV_ADDREF(sav); + } else + sav = NULL; + SAHTREE_RUNLOCK(); - SPTREE_LOCK(); - LIST_FOREACH(sp, &V_sptree[dir], chain) { - if (sp->state == IPSEC_SPSTATE_DEAD) - continue; - - r1 = r2 = NULL; - for (p = sp->req; p; p = p->next) { - if (p->saidx.mode != IPSEC_MODE_TUNNEL) - continue; - - r1 = r2; - r2 = p; - - if (!r1) { - /* here we look at address matches only */ - spidx = sp->spidx; - if (isrc->sa_len > sizeof(spidx.src) || - idst->sa_len > sizeof(spidx.dst)) - continue; - bcopy(isrc, &spidx.src, isrc->sa_len); - bcopy(idst, &spidx.dst, idst->sa_len); - if (!key_cmpspidx_withmask(&sp->spidx, &spidx)) - continue; - } else { - if (key_sockaddrcmp(&r1->saidx.src.sa, isrc, 0) || - key_sockaddrcmp(&r1->saidx.dst.sa, idst, 0)) - continue; - } - - if (key_sockaddrcmp(&r2->saidx.src.sa, osrc, 0) || - key_sockaddrcmp(&r2->saidx.dst.sa, odst, 0)) - continue; - - goto found; - } + if (sav != NULL) { + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + } else { + KEYDBG(IPSEC_STAMP, + printf("%s: SA not found\n", __func__)); + KEYDBG(IPSEC_DATA, kdebug_secasindex(saidx, NULL)); } - sp = NULL; -found: - if (sp) { - sp->lastused = time_second; - SP_ADDREF(sp); - } - SPTREE_UNLOCK(); -done: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SP:%p (ID=%u) refcnt %u\n", __func__, - sp, sp ? sp->id : 0, sp ? sp->refcnt : 0)); - return sp; + return (sav); } -#endif /* - * allocating an SA entry for an *OUTBOUND* packet. - * checking each request entries in SP, and acquire an SA if need. - * OUT: 0: there are valid requests. - * ENOENT: policy may be valid, but SA with REQUIRE is on acquiring. + * Allocating an SA entry for an *OUTBOUND* packet. + * OUT: positive: corresponding SA for given saidx found. + * NULL: SA not found, but will be acquired, check *error + * for acquiring status. */ -int -key_checkrequest(struct ipsecrequest *isr, const struct secasindex *saidx) +struct secasvar * +key_allocsa_policy(struct secpolicy *sp, const struct secasindex *saidx, + int *error) { - u_int level; - int error; + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; struct secasvar *sav; - IPSEC_ASSERT(isr != NULL, ("null isr")); IPSEC_ASSERT(saidx != NULL, ("null saidx")); IPSEC_ASSERT(saidx->mode == IPSEC_MODE_TRANSPORT || saidx->mode == IPSEC_MODE_TUNNEL, ("unexpected policy %u", saidx->mode)); /* - * XXX guard against protocol callbacks from the crypto - * thread as they reference ipsecrequest.sav which we - * temporarily null out below. Need to rethink how we - * handle bundled SA's in the callback thread. - */ - IPSECREQUEST_LOCK_ASSERT(isr); - - /* get current level */ - level = ipsec_get_reqlevel(isr); - - /* * We check new SA in the IPsec request because a different * SA may be involved each time this request is checked, either * because new SAs are being configured, or this request is * associated with an unconnected datagram socket, or this request * is associated with a system default policy. - * - * key_allocsa_policy should allocate the oldest SA available. - * See key_do_allocsa_policy(), and draft-jenkins-ipsec-rekeying-03.txt. */ - sav = key_allocsa_policy(saidx); - if (sav != isr->sav) { - /* SA need to be updated. */ - if (!IPSECREQUEST_UPGRADE(isr)) { - /* Kick everyone off. */ - IPSECREQUEST_UNLOCK(isr); - IPSECREQUEST_WLOCK(isr); - } - if (isr->sav != NULL) - KEY_FREESAV(&isr->sav); - isr->sav = sav; - IPSECREQUEST_DOWNGRADE(isr); - } else if (sav != NULL) - KEY_FREESAV(&sav); - - /* When there is SA. */ - if (isr->sav != NULL) { - if (isr->sav->state != SADB_SASTATE_MATURE && - isr->sav->state != SADB_SASTATE_DYING) - return EINVAL; - return 0; - } - - /* there is no SA */ - error = key_acquire(saidx, isr->sp); - if (error != 0) { - /* XXX What should I do ? */ - ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", - __func__, error)); - return error; - } - - if (level != IPSEC_LEVEL_REQUIRE) { - /* XXX sigh, the interface to this routine is botched */ - IPSEC_ASSERT(isr->sav == NULL, ("unexpected SA")); - return 0; - } else { - return ENOENT; - } -} - -/* - * allocating a SA for policy entry from SAD. - * NOTE: searching SAD of aliving state. - * OUT: NULL: not found. - * others: found and return the pointer. - */ -static struct secasvar * -key_allocsa_policy(const struct secasindex *saidx) -{ -#define N(a) _ARRAYLEN(a) - struct secashead *sah; - struct secasvar *sav; - u_int stateidx, arraysize; - const u_int *state_valid; - - state_valid = NULL; /* silence gcc */ - arraysize = 0; /* silence gcc */ - - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) { - if (V_key_preferred_oldsa) { - state_valid = saorder_state_valid_prefer_old; - arraysize = N(saorder_state_valid_prefer_old); - } else { - state_valid = saorder_state_valid_prefer_new; - arraysize = N(saorder_state_valid_prefer_new); - } + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + KEYDBG(IPSEC_DUMP, + printf("%s: checking SAH\n", __func__); + kdebug_secash(sah, " ")); + if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID)) break; - } - } - SAHTREE_UNLOCK(); - if (sah == NULL) - return NULL; - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - sav = key_do_allocsa_policy(sah, state_valid[stateidx]); - if (sav != NULL) - return sav; } - - return NULL; -#undef N -} - -/* - * searching SAD with direction, protocol, mode and state. - * called by key_allocsa_policy(). - * OUT: - * NULL : not found - * others : found, pointer to a SA. - */ -static struct secasvar * -key_do_allocsa_policy(struct secashead *sah, u_int state) -{ - struct secasvar *sav, *nextsav, *candidate, *d; - - /* initialize */ - candidate = NULL; - - SAHTREE_LOCK(); - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; - sav = nextsav) { - - nextsav = LIST_NEXT(sav, chain); - - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); - - /* initialize */ - if (candidate == NULL) { - candidate = sav; - continue; - } - - /* Which SA is the better ? */ - - IPSEC_ASSERT(candidate->lft_c != NULL, - ("null candidate lifetime")); - IPSEC_ASSERT(sav->lft_c != NULL, ("null sav lifetime")); - - /* What the best method is to compare ? */ - if (V_key_preferred_oldsa) { - if (candidate->lft_c->addtime > - sav->lft_c->addtime) { - candidate = sav; - } - continue; - /*NOTREACHED*/ - } - - /* preferred new sa rather than old sa */ - if (candidate->lft_c->addtime < - sav->lft_c->addtime) { - d = candidate; - candidate = sav; - } else - d = sav; - + if (sah != NULL) { /* - * prepared to delete the SA when there is more - * suitable candidate and the lifetime of the SA is not - * permanent. + * Allocate the oldest SA available according to + * draft-jenkins-ipsec-rekeying-03. */ - if (d->lft_h->addtime != 0) { - struct mbuf *m, *result; - u_int8_t satype; + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) + SAV_ADDREF(sav); + } else + sav = NULL; + SAHTREE_RUNLOCK(); - key_sa_chgstate(d, SADB_SASTATE_DEAD); - - IPSEC_ASSERT(d->refcnt > 0, ("bogus ref count")); - - satype = key_proto2satype(d->sah->saidx.proto); - if (satype == 0) - goto msgfail; - - m = key_setsadbmsg(SADB_DELETE, 0, - satype, 0, 0, d->refcnt - 1); - if (!m) - goto msgfail; - result = m; - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, - &d->sah->saidx.src.sa, - d->sah->saidx.src.sa.sa_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* set sadb_address for saidx's. */ - m = key_setsadbaddr(SADB_EXT_ADDRESS_DST, - &d->sah->saidx.dst.sa, - d->sah->saidx.dst.sa.sa_len << 3, - IPSEC_ULPROTO_ANY); - if (!m) - goto msgfail; - m_cat(result, m); - - /* create SA extension */ - m = key_setsadbsa(d); - if (!m) - goto msgfail; - m_cat(result, m); - - if (result->m_len < sizeof(struct sadb_msg)) { - result = m_pullup(result, - sizeof(struct sadb_msg)); - if (result == NULL) - goto msgfail; - } - - result->m_pkthdr.len = 0; - for (m = result; m; m = m->m_next) - result->m_pkthdr.len += m->m_len; - mtod(result, struct sadb_msg *)->sadb_msg_len = - PFKEY_UNIT64(result->m_pkthdr.len); - - if (key_sendup_mbuf(NULL, result, - KEY_SENDUP_REGISTERED)) - goto msgfail; - msgfail: - KEY_FREESAV(&d); - } + if (sav != NULL) { + *error = 0; + KEYDBG(IPSEC_STAMP, + printf("%s: chosen SA(%p) for SP(%p)\n", __func__, + sav, sp)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); + return (sav); /* return referenced SA */ } - if (candidate) { - sa_addref(candidate); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s cause refcnt++:%d SA:%p\n", - __func__, candidate->refcnt, candidate)); - } - SAHTREE_UNLOCK(); - return candidate; + /* there is no SA */ + *error = key_acquire(saidx, sp); + if ((*error) != 0) + ipseclog((LOG_DEBUG, + "%s: error %d returned from key_acquire()\n", + __func__, *error)); + KEYDBG(IPSEC_STAMP, + printf("%s: acquire SA for SP(%p), error %d\n", + __func__, sp, *error)); + KEYDBG(IPSEC_DATA, kdebug_secasindex(saidx, NULL)); + return (NULL); } /* @@ -1066,190 +967,121 @@ * OUT: positive: pointer to a usable sav (i.e. MATURE or DYING state). * NULL: not found, or error occurred. * - * In the comparison, no source address is used--for RFC2401 conformance. - * To quote, from section 4.1: - * A security association is uniquely identified by a triple consisting - * of a Security Parameter Index (SPI), an IP Destination Address, and a - * security protocol (AH or ESP) identifier. + * According to RFC 2401 SA is uniquely identified by a triple SPI, + * destination address, and security protocol. But according to RFC 4301, + * SPI by itself suffices to specify an SA. + * * Note that, however, we do need to keep source address in IPsec SA. * IKE specification and PF_KEY specification do assume that we * keep source address in IPsec SA. We see a tricky situation here. */ struct secasvar * -key_allocsa(union sockaddr_union *dst, u_int proto, u_int32_t spi, - const char* where, int tag) +key_allocsa(union sockaddr_union *dst, uint8_t proto, uint32_t spi) { - struct secashead *sah; + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - u_int stateidx, arraysize, state; - const u_int *saorder_state_valid; -#ifdef IPSEC_NAT_T - int natt_chkport; -#endif - IPSEC_ASSERT(dst != NULL, ("null dst address")); + IPSEC_ASSERT(proto == IPPROTO_ESP || proto == IPPROTO_AH || + proto == IPPROTO_IPCOMP, ("unexpected security protocol %u", + proto)); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - -#ifdef IPSEC_NAT_T - natt_chkport = (dst->sa.sa_family == AF_INET && - dst->sa.sa_len == sizeof(struct sockaddr_in) && - dst->sin.sin_port != 0); -#endif - + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi == spi) + break; + } /* - * searching SAD. - * XXX: to be checked internal IP header somewhere. Also when - * IPsec tunnel packet is received. But ESP tunnel mode is - * encrypted so we can't check internal IP header. + * We use single SPI namespace for all protocols, so it is + * impossible to have SPI duplicates in the SAVHASH. */ - SAHTREE_LOCK(); - if (V_key_preferred_oldsa) { - saorder_state_valid = saorder_state_valid_prefer_old; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); - } else { - saorder_state_valid = saorder_state_valid_prefer_new; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); + if (sav != NULL) { + if (sav->state != SADB_SASTATE_LARVAL && + sav->sah->saidx.proto == proto && + key_sockaddrcmp(&dst->sa, + &sav->sah->saidx.dst.sa, 0) == 0) + SAV_ADDREF(sav); + else + sav = NULL; } - LIST_FOREACH(sah, &V_sahtree, chain) { - int checkport; + SAHTREE_RUNLOCK(); - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - state = saorder_state_valid[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); - /* do not return entries w/ unusable state */ - if (sav->state != SADB_SASTATE_MATURE && - sav->state != SADB_SASTATE_DYING) - continue; - if (proto != sav->sah->saidx.proto) - continue; - if (spi != sav->spi) - continue; - checkport = 0; -#ifdef IPSEC_NAT_T - /* - * Really only check ports when this is a NAT-T - * SA. Otherwise other lookups providing ports - * might suffer. - */ - if (sav->natt_type && natt_chkport) - checkport = 1; -#endif -#if 0 /* don't check src */ - /* check src address */ - if (key_sockaddrcmp(&src->sa, - &sav->sah->saidx.src.sa, checkport) != 0) - continue; -#endif - /* check dst address */ - if (key_sockaddrcmp(&dst->sa, - &sav->sah->saidx.dst.sa, checkport) != 0) - continue; - sa_addref(sav); - goto done; - } - } + if (sav == NULL) { + KEYDBG(IPSEC_STAMP, + char buf[IPSEC_ADDRSTRLEN]; + printf("%s: SA not found for spi %u proto %u dst %s\n", + __func__, ntohl(spi), proto, ipsec_address(dst, buf, + sizeof(buf)))); + } else { + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); } - sav = NULL; -done: - SAHTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SA:%p; refcnt %u\n", __func__, - sav, sav ? sav->refcnt : 0)); - return sav; + return (sav); } struct secasvar * key_allocsa_tunnel(union sockaddr_union *src, union sockaddr_union *dst, - u_int proto, const char* where, int tag) + uint8_t proto) { + SAHTREE_RLOCK_TRACKER; + struct secasindex saidx; struct secashead *sah; struct secasvar *sav; - u_int stateidx, arraysize, state; - const u_int *saorder_state_valid; IPSEC_ASSERT(src != NULL, ("null src address")); IPSEC_ASSERT(dst != NULL, ("null dst address")); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u\n", __func__, where, tag)); - SAHTREE_LOCK(); - if (V_key_preferred_oldsa) { - saorder_state_valid = saorder_state_valid_prefer_old; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_old); - } else { - saorder_state_valid = saorder_state_valid_prefer_new; - arraysize = _ARRAYLEN(saorder_state_valid_prefer_new); - } - LIST_FOREACH(sah, &V_sahtree, chain) { - /* search valid state */ - for (stateidx = 0; stateidx < arraysize; stateidx++) { - state = saorder_state_valid[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - /* sanity check */ - KEY_CHKSASTATE(sav->state, state, __func__); - /* do not return entries w/ unusable state */ - if (sav->state != SADB_SASTATE_MATURE && - sav->state != SADB_SASTATE_DYING) - continue; - if (IPSEC_MODE_TUNNEL != sav->sah->saidx.mode) - continue; - if (proto != sav->sah->saidx.proto) - continue; - /* check src address */ - if (key_sockaddrcmp(&src->sa, - &sav->sah->saidx.src.sa, 0) != 0) - continue; - /* check dst address */ - if (key_sockaddrcmp(&dst->sa, - &sav->sah->saidx.dst.sa, 0) != 0) - continue; - sa_addref(sav); - goto done; - } + KEY_SETSECASIDX(proto, IPSEC_MODE_TUNNEL, 0, &src->sa, + &dst->sa, &saidx); + + sav = NULL; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(&saidx), addrhash) { + if (IPSEC_MODE_TUNNEL != sah->saidx.mode) + continue; + if (proto != sah->saidx.proto) + continue; + if (key_sockaddrcmp(&src->sa, &sav->sah->saidx.src.sa, 0) != 0) + continue; + if (key_sockaddrcmp(&dst->sa, &sav->sah->saidx.dst.sa, 0) != 0) + continue; + /* XXXAE: is key_preferred_oldsa reasonably?*/ + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) { + SAV_ADDREF(sav); + break; } } - sav = NULL; -done: - SAHTREE_UNLOCK(); - - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s return SA:%p; refcnt %u\n", __func__, - sav, sav ? sav->refcnt : 0)); + SAHTREE_RUNLOCK(); + KEYDBG(IPSEC_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + if (sav != NULL) + KEYDBG(IPSEC_DATA, kdebug_secasv(sav)); return (sav); } /* * Must be called after calling key_allocsp(). - * For both the packet without socket and key_freeso(). */ void -_key_freesp(struct secpolicy **spp, const char* where, int tag) +key_freesp(struct secpolicy **spp) { - struct ipsecrequest *isr, *nextisr; struct secpolicy *sp = *spp; IPSEC_ASSERT(sp != NULL, ("null sp")); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SP:%p (ID=%u) from %s:%u; refcnt now %u\n", - __func__, sp, sp->id, where, tag, sp->refcnt)); - if (SP_DELREF(sp) == 0) return; + + KEYDBG(IPSEC_STAMP, + printf("%s: last reference to SP(%p)\n", __func__, sp)); + KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); + *spp = NULL; - for (isr = sp->req; isr != NULL; isr = nextisr) { - if (isr->sav != NULL) { - KEY_FREESAV(&isr->sav); - isr->sav = NULL; - } - nextisr = isr->next; - ipsec_delisr(isr); - } + while (sp->tcount > 0) + ipsec_delisr(sp->req[--sp->tcount]); free(sp, M_IPSEC_SP); } @@ -1257,21 +1089,25 @@ key_unlink(struct secpolicy *sp) { - IPSEC_ASSERT(sp != NULL, ("null sp")); IPSEC_ASSERT(sp->spidx.dir == IPSEC_DIR_INBOUND || sp->spidx.dir == IPSEC_DIR_OUTBOUND, ("invalid direction %u", sp->spidx.dir)); SPTREE_UNLOCK_ASSERT(); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); SPTREE_WLOCK(); - if (sp->state == IPSEC_SPSTATE_DEAD) { + if (sp->state != IPSEC_SPSTATE_ALIVE) { + /* SP is already unlinked */ SPTREE_WUNLOCK(); return; } sp->state = IPSEC_SPSTATE_DEAD; TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + LIST_REMOVE(sp, idhash); + V_sp_genid++; SPTREE_WUNLOCK(); - KEY_FREESP(&sp); + key_freesp(&sp); } /* @@ -1282,78 +1118,101 @@ { struct secpolicy *sp; - SPTREE_WLOCK(); + SPTREE_WLOCK_ASSERT(); TAILQ_FOREACH(sp, &V_sptree[newsp->spidx.dir], chain) { if (newsp->priority < sp->priority) { TAILQ_INSERT_BEFORE(sp, newsp, chain); goto done; } } - TAILQ_INSERT_TAIL(&V_sptree[newsp->spidx.dir], newsp, chain); - done: + LIST_INSERT_HEAD(SPHASH_HASH(newsp->id), newsp, idhash); newsp->state = IPSEC_SPSTATE_ALIVE; - SPTREE_WUNLOCK(); + V_sp_genid++; } /* - * Must be called after calling key_allocsp(). - * For the packet with socket. + * Insert a bunch of VTI secpolicies into the SPDB. + * We keep VTI policies in the separate list due to following reasons: + * 1) they should be immutable to user's or some deamon's attempts to + * delete. The only way delete such policies - destroy or unconfigure + * corresponding virtual inteface. + * 2) such policies have traffic selector that matches all traffic per + * address family. + * Since all VTI policies have the same priority, we don't care about + * policies order. */ -void -key_freeso(struct socket *so) +int +key_register_ifnet(struct secpolicy **spp, u_int count) { - IPSEC_ASSERT(so != NULL, ("null so")); + struct mbuf *m; + u_int i; - switch (so->so_proto->pr_domain->dom_family) { -#if defined(INET) || defined(INET6) -#ifdef INET - case PF_INET: -#endif -#ifdef INET6 - case PF_INET6: -#endif - { - struct inpcb *pcb = sotoinpcb(so); + SPTREE_WLOCK(); + /* + * First of try to acquire id for each SP. + */ + for (i = 0; i < count; i++) { + IPSEC_ASSERT(spp[i]->spidx.dir == IPSEC_DIR_INBOUND || + spp[i]->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", spp[i]->spidx.dir)); - /* Does it have a PCB ? */ - if (pcb == NULL) - return; - key_freesp_so(&pcb->inp_sp->sp_in); - key_freesp_so(&pcb->inp_sp->sp_out); - } - break; -#endif /* INET || INET6 */ - default: - ipseclog((LOG_DEBUG, "%s: unknown address family=%d.\n", - __func__, so->so_proto->pr_domain->dom_family)); - return; + if ((spp[i]->id = key_getnewspid()) == 0) { + SPTREE_WUNLOCK(); + return (EAGAIN); + } } + for (i = 0; i < count; i++) { + TAILQ_INSERT_TAIL(&V_sptree_ifnet[spp[i]->spidx.dir], + spp[i], chain); + /* + * NOTE: despite the fact that we keep VTI SP in the + * separate list, SPHASH contains policies from both + * sources. Thus SADB_X_SPDGET will correctly return + * SP by id, because it uses SPHASH for lookups. + */ + LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); + spp[i]->state = IPSEC_SPSTATE_IFNET; + } + SPTREE_WUNLOCK(); + /* + * Notify user processes about new SP. + */ + for (i = 0; i < count; i++) { + m = key_setdumpsp(spp[i], SADB_X_SPDADD, 0, 0); + if (m != NULL) + key_sendup_mbuf(NULL, m, KEY_SENDUP_ALL); + } + return (0); } -static void -key_freesp_so(struct secpolicy **sp) -{ - IPSEC_ASSERT(sp != NULL && *sp != NULL, ("null sp")); - - if ((*sp)->policy == IPSEC_POLICY_ENTRUST || - (*sp)->policy == IPSEC_POLICY_BYPASS) - return; - - IPSEC_ASSERT((*sp)->policy == IPSEC_POLICY_IPSEC, - ("invalid policy %u", (*sp)->policy)); - KEY_FREESP(sp); -} - void -key_addrefsa(struct secasvar *sav, const char* where, int tag) +key_unregister_ifnet(struct secpolicy **spp, u_int count) { + struct mbuf *m; + u_int i; - IPSEC_ASSERT(sav != NULL, ("null sav")); - IPSEC_ASSERT(sav->refcnt > 0, ("refcount must exist")); + SPTREE_WLOCK(); + for (i = 0; i < count; i++) { + IPSEC_ASSERT(spp[i]->spidx.dir == IPSEC_DIR_INBOUND || + spp[i]->spidx.dir == IPSEC_DIR_OUTBOUND, + ("invalid direction %u", spp[i]->spidx.dir)); - sa_addref(sav); + if (spp[i]->state != IPSEC_SPSTATE_IFNET) + continue; + spp[i]->state = IPSEC_SPSTATE_DEAD; + TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], + spp[i], chain); + LIST_REMOVE(spp[i], idhash); + } + SPTREE_WUNLOCK(); + + for (i = 0; i < count; i++) { + m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); + if (m != NULL) + key_sendup_mbuf(NULL, m, KEY_SENDUP_ALL); + } } /* @@ -1362,23 +1221,54 @@ * for a policy. */ void -key_freesav(struct secasvar **psav, const char* where, int tag) +key_freesav(struct secasvar **psav) { struct secasvar *sav = *psav; IPSEC_ASSERT(sav != NULL, ("null sav")); + if (SAV_DELREF(sav) == 0) + return; - if (sa_delref(sav)) { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", - __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); - *psav = NULL; - key_delsav(sav); - } else { - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s SA:%p (SPI %u) from %s:%u; refcnt now %u\n", - __func__, sav, ntohl(sav->spi), where, tag, sav->refcnt)); + KEYDBG(IPSEC_STAMP, + printf("%s: last reference to SA(%p)\n", __func__, sav)); + + *psav = NULL; + key_delsav(sav); +} + +/* + * Unlink SA from SAH and SPI hash under SAHTREE_WLOCK. + * Expect that SA has extra reference due to lookup. + * Release this references, also release SAH reference after unlink. + */ +static void +key_unlinksav(struct secasvar *sav) +{ + struct secashead *sah; + + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + + SAHTREE_UNLOCK_ASSERT(); + SAHTREE_WLOCK(); + if (sav->state == SADB_SASTATE_DEAD) { + /* SA is already unlinked */ + SAHTREE_WUNLOCK(); + return; } + /* Unlink from SAH */ + if (sav->state == SADB_SASTATE_LARVAL) + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + else + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + /* Unlink from SPI hash */ + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sah = sav->sah; + SAHTREE_WUNLOCK(); + key_freesav(&sav); + /* Since we are unlinked, release reference to SAH */ + key_freesah(&sah); } /* %%% SPD management */ @@ -1410,54 +1300,55 @@ /* * get SP by index. * OUT: NULL : not found - * others : found, pointer to a SP. + * others : found, pointer to referenced SP. */ static struct secpolicy * -key_getspbyid(u_int32_t id) +key_getspbyid(uint32_t id) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; SPTREE_RLOCK(); - TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_INBOUND], chain) { + LIST_FOREACH(sp, SPHASH_HASH(id), idhash) { if (sp->id == id) { SP_ADDREF(sp); - goto done; + break; } } - - TAILQ_FOREACH(sp, &V_sptree[IPSEC_DIR_OUTBOUND], chain) { - if (sp->id == id) { - SP_ADDREF(sp); - goto done; - } - } -done: SPTREE_RUNLOCK(); - - return sp; + return (sp); } struct secpolicy * -key_newsp(const char* where, int tag) +key_newsp(void) { - struct secpolicy *newsp = NULL; + struct secpolicy *sp; - newsp = (struct secpolicy *) - malloc(sizeof(struct secpolicy), M_IPSEC_SP, M_NOWAIT|M_ZERO); - if (newsp) - refcount_init(&newsp->refcnt, 1); + sp = malloc(sizeof(*sp), M_IPSEC_SP, M_NOWAIT | M_ZERO); + if (sp != NULL) + SP_INITREF(sp); + return (sp); +} - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u return SP:%p\n", __func__, - where, tag, newsp)); - return newsp; +struct ipsecrequest * +ipsec_newisr(void) +{ + + return (malloc(sizeof(struct ipsecrequest), M_IPSEC_SR, + M_NOWAIT | M_ZERO)); } +void +ipsec_delisr(struct ipsecrequest *p) +{ + + free(p, M_IPSEC_SR); +} + /* * create secpolicy structure from sadb_x_policy structure. - * NOTE: `state', `secpolicyindex' in secpolicy structure are not set, - * so must be set properly later. + * NOTE: `state', `secpolicyindex' and 'id' in secpolicy structure + * are not set, so must be set properly later. */ struct secpolicy * key_msg2sp(struct sadb_x_policy *xpl0, size_t len, int *error) @@ -1473,7 +1364,7 @@ return NULL; } - if ((newsp = KEY_NEWSP()) == NULL) { + if ((newsp = key_newsp()) == NULL) { *error = ENOBUFS; return NULL; } @@ -1481,6 +1372,7 @@ newsp->spidx.dir = xpl0->sadb_x_policy_dir; newsp->policy = xpl0->sadb_x_policy_type; newsp->priority = xpl0->sadb_x_policy_priority; + newsp->tcount = 0; /* check policy */ switch (xpl0->sadb_x_policy_type) { @@ -1488,20 +1380,19 @@ case IPSEC_POLICY_NONE: case IPSEC_POLICY_ENTRUST: case IPSEC_POLICY_BYPASS: - newsp->req = NULL; break; case IPSEC_POLICY_IPSEC: { - int tlen; struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest **p_isr = &newsp->req; + struct ipsecrequest *isr; + int tlen; /* validity check */ if (PFKEY_EXTLEN(xpl0) < sizeof(*xpl0)) { ipseclog((LOG_DEBUG, "%s: Invalid msg length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } @@ -1514,22 +1405,33 @@ if (xisr->sadb_x_ipsecrequest_len < sizeof(*xisr)) { ipseclog((LOG_DEBUG, "%s: invalid ipsecrequest " "length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } + if (newsp->tcount >= IPSEC_MAXREQ) { + ipseclog((LOG_DEBUG, + "%s: too many ipsecrequests.\n", + __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } + /* allocate request buffer */ /* NB: data structure is zero'd */ - *p_isr = ipsec_newisr(); - if ((*p_isr) == NULL) { + isr = ipsec_newisr(); + if (isr == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = ENOBUFS; return NULL; } + newsp->req[newsp->tcount++] = isr; + /* set values */ switch (xisr->sadb_x_ipsecrequest_proto) { case IPPROTO_ESP: @@ -1540,11 +1442,12 @@ ipseclog((LOG_DEBUG, "%s: invalid proto type=%u\n", __func__, xisr->sadb_x_ipsecrequest_proto)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EPROTONOSUPPORT; return NULL; } - (*p_isr)->saidx.proto = xisr->sadb_x_ipsecrequest_proto; + isr->saidx.proto = + (uint8_t)xisr->sadb_x_ipsecrequest_proto; switch (xisr->sadb_x_ipsecrequest_mode) { case IPSEC_MODE_TRANSPORT: @@ -1555,11 +1458,11 @@ ipseclog((LOG_DEBUG, "%s: invalid mode=%u\n", __func__, xisr->sadb_x_ipsecrequest_mode)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - (*p_isr)->saidx.mode = xisr->sadb_x_ipsecrequest_mode; + isr->saidx.mode = xisr->sadb_x_ipsecrequest_mode; switch (xisr->sadb_x_ipsecrequest_level) { case IPSEC_LEVEL_DEFAULT: @@ -1586,16 +1489,16 @@ if (xisr->sadb_x_ipsecrequest_reqid == 0) { u_int32_t reqid; if ((reqid = key_newreqid()) == 0) { - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = ENOBUFS; return NULL; } - (*p_isr)->saidx.reqid = reqid; + isr->saidx.reqid = reqid; xisr->sadb_x_ipsecrequest_reqid = reqid; } else { /* set it for manual keying. */ - (*p_isr)->saidx.reqid = - xisr->sadb_x_ipsecrequest_reqid; + isr->saidx.reqid = + xisr->sadb_x_ipsecrequest_reqid; } break; @@ -1603,59 +1506,72 @@ ipseclog((LOG_DEBUG, "%s: invalid level=%u\n", __func__, xisr->sadb_x_ipsecrequest_level)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - (*p_isr)->level = xisr->sadb_x_ipsecrequest_level; + isr->level = xisr->sadb_x_ipsecrequest_level; /* set IP addresses if there */ if (xisr->sadb_x_ipsecrequest_len > sizeof(*xisr)) { struct sockaddr *paddr; paddr = (struct sockaddr *)(xisr + 1); - /* validity check */ if (paddr->sa_len - > sizeof((*p_isr)->saidx.src)) { + > sizeof(isr->saidx.src)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.src, - paddr->sa_len); + bcopy(paddr, &isr->saidx.src, paddr->sa_len); + paddr = (struct sockaddr *)((caddr_t)paddr + + paddr->sa_len); - paddr = (struct sockaddr *)((caddr_t)paddr - + paddr->sa_len); - /* validity check */ if (paddr->sa_len - > sizeof((*p_isr)->saidx.dst)) { + > sizeof(isr->saidx.dst)) { ipseclog((LOG_DEBUG, "%s: invalid " "request address length.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } - bcopy(paddr, &(*p_isr)->saidx.dst, - paddr->sa_len); + /* AF family should match */ + if (paddr->sa_family != + isr->saidx.src.sa.sa_family) { + ipseclog((LOG_DEBUG, "%s: address " + "family doesn't match.\n", + __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } + bcopy(paddr, &isr->saidx.dst, paddr->sa_len); + } else { + /* + * Addresses for TUNNEL mode requests are + * mandatory. + */ + if (isr->saidx.mode == IPSEC_MODE_TUNNEL) { + ipseclog((LOG_DEBUG, "%s: missing " + "request addresses.\n", __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } } - - (*p_isr)->sp = newsp; - - /* initialization for the next. */ - p_isr = &(*p_isr)->next; tlen -= xisr->sadb_x_ipsecrequest_len; /* validity check */ if (tlen < 0) { ipseclog((LOG_DEBUG, "%s: becoming tlen < 0.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } @@ -1663,72 +1579,104 @@ xisr = (struct sadb_x_ipsecrequest *)((caddr_t)xisr + xisr->sadb_x_ipsecrequest_len); } + /* XXXAE: LARVAL SP */ + if (newsp->tcount < 1) { + ipseclog((LOG_DEBUG, "%s: valid IPSEC transforms " + "not found.\n", __func__)); + key_freesp(&newsp); + *error = EINVAL; + return (NULL); + } } break; default: ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); - KEY_FREESP(&newsp); + key_freesp(&newsp); *error = EINVAL; return NULL; } *error = 0; - return newsp; + return (newsp); } -static u_int32_t -key_newreqid() +uint32_t +key_newreqid(void) { - static u_int32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + static uint32_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; - auto_reqid = (auto_reqid == ~0 - ? IPSEC_MANUAL_REQID_MAX + 1 : auto_reqid + 1); + if (auto_reqid == ~0) + auto_reqid = IPSEC_MANUAL_REQID_MAX + 1; + else + auto_reqid++; /* XXX should be unique check */ - - return auto_reqid; + return (auto_reqid); } /* * copy secpolicy struct to sadb_x_policy structure indicated. */ -struct mbuf * -key_sp2msg(struct secpolicy *sp) +static struct mbuf * +key_sp2mbuf(struct secpolicy *sp) { - struct sadb_x_policy *xpl; - int tlen; - caddr_t p; struct mbuf *m; + size_t tlen; - IPSEC_ASSERT(sp != NULL, ("null policy")); - tlen = key_getspreqmsglen(sp); - m = m_get2(tlen, M_NOWAIT, MT_DATA, 0); if (m == NULL) return (NULL); m_align(m, tlen); m->m_len = tlen; - xpl = mtod(m, struct sadb_x_policy *); - bzero(xpl, tlen); + if (key_sp2msg(sp, m->m_data, &tlen) != 0) { + m_freem(m); + return (NULL); + } + return (m); +} - xpl->sadb_x_policy_len = PFKEY_UNIT64(tlen); +int +key_sp2msg(struct secpolicy *sp, void *request, size_t *len) +{ + struct sadb_x_ipsecrequest *xisr; + struct sadb_x_policy *xpl; + struct ipsecrequest *isr; + size_t xlen, ilen; + caddr_t p; + int error, i; + + IPSEC_ASSERT(sp != NULL, ("null policy")); + + xlen = sizeof(*xpl); + if (*len < xlen) + return (EINVAL); + + error = 0; + bzero(request, *len); + xpl = (struct sadb_x_policy *)request; xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY; xpl->sadb_x_policy_type = sp->policy; xpl->sadb_x_policy_dir = sp->spidx.dir; xpl->sadb_x_policy_id = sp->id; xpl->sadb_x_policy_priority = sp->priority; - p = (caddr_t)xpl + sizeof(*xpl); /* if is the policy for ipsec ? */ if (sp->policy == IPSEC_POLICY_IPSEC) { - struct sadb_x_ipsecrequest *xisr; - struct ipsecrequest *isr; - - for (isr = sp->req; isr != NULL; isr = isr->next) { - + p = (caddr_t)xpl + sizeof(*xpl); + for (i = 0; i < sp->tcount; i++) { + isr = sp->req[i]; + ilen = PFKEY_ALIGN8(sizeof(*xisr) + + isr->saidx.src.sa.sa_len + + isr->saidx.dst.sa.sa_len); + xlen += ilen; + if (xlen > *len) { + error = ENOBUFS; + /* Calculate needed size */ + continue; + } xisr = (struct sadb_x_ipsecrequest *)p; - + xisr->sadb_x_ipsecrequest_len = ilen; xisr->sadb_x_ipsecrequest_proto = isr->saidx.proto; xisr->sadb_x_ipsecrequest_mode = isr->saidx.mode; xisr->sadb_x_ipsecrequest_level = isr->level; @@ -1738,16 +1686,15 @@ bcopy(&isr->saidx.src, p, isr->saidx.src.sa.sa_len); p += isr->saidx.src.sa.sa_len; bcopy(&isr->saidx.dst, p, isr->saidx.dst.sa.sa_len); - p += isr->saidx.src.sa.sa_len; - - xisr->sadb_x_ipsecrequest_len = - PFKEY_ALIGN8(sizeof(*xisr) - + isr->saidx.src.sa.sa_len - + isr->saidx.dst.sa.sa_len); + p += isr->saidx.dst.sa.sa_len; } } - - return m; + xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); + if (error == 0) + *len = xlen; + else + *len = sizeof(*xpl); + return (error); } /* m will not be freed nor modified */ @@ -1839,15 +1786,16 @@ * SPDSETIDX like SPDADD without a part of policy requests. * SPDUPDATE replace a unique policy entry. * + * XXXAE: serialize this in PF_KEY to avoid races. * m will always be freed. */ static int key_spdadd(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secpolicyindex spidx; struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0, *xpl; struct sadb_lifetime *lft = NULL; - struct secpolicyindex spidx; struct secpolicy *newsp; int error; @@ -1856,24 +1804,26 @@ IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_X_EXT_POLICY] == NULL) { - ipseclog((LOG_DEBUG, "key_spdadd: invalid message is passed.\n")); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] - < sizeof(struct sadb_lifetime)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (!SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_HARD)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); return key_senderror(so, m, EINVAL); } lft = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; @@ -1883,141 +1833,93 @@ dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; - /* - * Note: do not parse SADB_X_EXT_NAT_T_* here: - * we are processing traffic endpoints. - */ - - /* make secindex */ - /* XXX boundary check against sa_len */ - KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, - src0 + 1, - dst0 + 1, - src0->sadb_address_prefixlen, - dst0->sadb_address_prefixlen, - src0->sadb_address_proto, - &spidx); - - /* checking the direciton. */ + /* check the direciton */ switch (xpl0->sadb_x_policy_dir) { case IPSEC_DIR_INBOUND: case IPSEC_DIR_OUTBOUND: break; default: - ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); - mhp->msg->sadb_msg_errno = EINVAL; - return 0; + ipseclog((LOG_DEBUG, "%s: invalid SP direction.\n", __func__)); + return key_senderror(so, m, EINVAL); } - - /* check policy */ /* key_spdadd() accepts DISCARD, NONE and IPSEC. */ - if (xpl0->sadb_x_policy_type == IPSEC_POLICY_ENTRUST - || xpl0->sadb_x_policy_type == IPSEC_POLICY_BYPASS) { - ipseclog((LOG_DEBUG, "%s: Invalid policy type.\n", __func__)); + if (xpl0->sadb_x_policy_type != IPSEC_POLICY_DISCARD && + xpl0->sadb_x_policy_type != IPSEC_POLICY_NONE && + xpl0->sadb_x_policy_type != IPSEC_POLICY_IPSEC) { + ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); return key_senderror(so, m, EINVAL); } /* policy requests are mandatory when action is ipsec. */ - if (mhp->msg->sadb_msg_type != SADB_X_SPDSETIDX - && xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC - && mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { - ipseclog((LOG_DEBUG, "%s: some policy requests part required\n", - __func__)); + if (xpl0->sadb_x_policy_type == IPSEC_POLICY_IPSEC && + mhp->extlen[SADB_X_EXT_POLICY] <= sizeof(*xpl0)) { + ipseclog((LOG_DEBUG, + "%s: policy requests required.\n", __func__)); return key_senderror(so, m, EINVAL); } - /* - * checking there is SP already or not. - * SPDUPDATE doesn't depend on whether there is a SP or not. - * If the type is either SPDADD or SPDSETIDX AND a SP is found, - * then error. - */ - newsp = key_getsp(&spidx); - if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - if (newsp) { - key_unlink(newsp); - KEY_FREESP(&newsp); - } - } else { - if (newsp != NULL) { - KEY_FREESP(&newsp); - ipseclog((LOG_DEBUG, "%s: a SP entry exists already.\n", - __func__)); - return key_senderror(so, m, EEXIST); - } - } - - /* XXX: there is race between key_getsp and key_msg2sp. */ - - /* allocation new SP entry */ - if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0 || + src0->sadb_address_proto != dst0->sadb_address_proto) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); return key_senderror(so, m, error); } - - if ((newsp->id = key_getnewspid()) == 0) { - KEY_FREESP(&newsp); - return key_senderror(so, m, ENOBUFS); - } - - /* XXX boundary check against sa_len */ + /* make secindex */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, src0->sadb_address_prefixlen, dst0->sadb_address_prefixlen, src0->sadb_address_proto, - &newsp->spidx); - - /* sanity check on addr pair */ - if (((struct sockaddr *)(src0 + 1))->sa_family != - ((struct sockaddr *)(dst0+ 1))->sa_family) { - KEY_FREESP(&newsp); - return key_senderror(so, m, EINVAL); - } - if (((struct sockaddr *)(src0 + 1))->sa_len != - ((struct sockaddr *)(dst0+ 1))->sa_len) { - KEY_FREESP(&newsp); - return key_senderror(so, m, EINVAL); - } -#if 1 - if (newsp->req && newsp->req->saidx.src.sa.sa_family && - newsp->req->saidx.dst.sa.sa_family) { - if (newsp->req->saidx.src.sa.sa_family != - newsp->req->saidx.dst.sa.sa_family) { - KEY_FREESP(&newsp); - return key_senderror(so, m, EINVAL); + &spidx); + /* Checking there is SP already or not. */ + newsp = key_getsp(&spidx); + if (newsp != NULL) { + if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { + KEYDBG(KEY_STAMP, + printf("%s: unlink SP(%p) for SPDUPDATE\n", + __func__, newsp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(newsp)); + key_unlink(newsp); + key_freesp(&newsp); + } else { + key_freesp(&newsp); + ipseclog((LOG_DEBUG, "%s: a SP entry exists already.", + __func__)); + return (key_senderror(so, m, EEXIST)); } } -#endif - newsp->created = time_second; - newsp->lastused = newsp->created; + /* allocate new SP entry */ + if ((newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error)) == NULL) { + return key_senderror(so, m, error); + } + + newsp->lastused = newsp->created = time_second; newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; + bcopy(&spidx, &newsp->spidx, sizeof(spidx)); + /* XXXAE: there is race between key_getsp() and key_insertsp() */ + SPTREE_WLOCK(); + if ((newsp->id = key_getnewspid()) == 0) { + SPTREE_WUNLOCK(); + key_freesp(&newsp); + return key_senderror(so, m, ENOBUFS); + } key_insertsp(newsp); + SPTREE_WUNLOCK(); - /* delete the entry in spacqtree */ - if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) { - struct secspacq *spacq = key_getspacq(&spidx); - if (spacq != NULL) { - /* reset counter in order to deletion by timehandler. */ - spacq->created = time_second; - spacq->count = 0; - SPACQ_UNLOCK(); - } - } + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, newsp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(newsp)); { struct mbuf *n, *mpolicy; struct sadb_msg *newmsg; int off; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* create new sadb_msg to reply. */ if (lft) { n = key_gather_mbuf(m, mhp, 2, 5, SADB_EXT_RESERVED, @@ -2065,30 +1967,32 @@ * 0: failure. * others: success. */ -static u_int32_t -key_getnewspid() +static uint32_t +key_getnewspid(void) { - u_int32_t newid = 0; - int count = V_key_spi_trycnt; /* XXX */ struct secpolicy *sp; + uint32_t newid = 0; + int count = V_key_spi_trycnt; /* XXX */ - /* when requesting to allocate spi ranged */ + SPTREE_WLOCK_ASSERT(); while (count--) { - newid = (V_policy_id = (V_policy_id == ~0 ? 1 : V_policy_id + 1)); - - if ((sp = key_getspbyid(newid)) == NULL) + if (V_policy_id == ~0) /* overflowed */ + newid = V_policy_id = 1; + else + newid = ++V_policy_id; + LIST_FOREACH(sp, SPHASH_HASH(newid), idhash) { + if (sp->id == newid) + break; + } + if (sp == NULL) break; - - KEY_FREESP(&sp); } - if (count == 0 || newid == 0) { - ipseclog((LOG_DEBUG, "%s: to allocate policy id is failed.\n", - __func__)); - return 0; + ipseclog((LOG_DEBUG, "%s: failed to allocate policy id.\n", + __func__)); + return (0); } - - return newid; + return (newid); } /* @@ -2107,9 +2011,9 @@ key_spddelete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secpolicyindex spidx; struct sadb_address *src0, *dst0; struct sadb_x_policy *xpl0; - struct secpolicyindex spidx; struct secpolicy *sp; IPSEC_ASSERT(so != NULL, ("null so")); @@ -2117,18 +2021,19 @@ IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_X_EXT_POLICY] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } @@ -2136,13 +2041,29 @@ dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; xpl0 = (struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY]; - /* - * Note: do not parse SADB_X_EXT_NAT_T_* here: - * we are processing traffic endpoints. - */ - + /* check the direciton */ + switch (xpl0->sadb_x_policy_dir) { + case IPSEC_DIR_INBOUND: + case IPSEC_DIR_OUTBOUND: + break; + default: + ipseclog((LOG_DEBUG, "%s: invalid SP direction.\n", __func__)); + return key_senderror(so, m, EINVAL); + } + /* Only DISCARD, NONE and IPSEC are allowed */ + if (xpl0->sadb_x_policy_type != IPSEC_POLICY_DISCARD && + xpl0->sadb_x_policy_type != IPSEC_POLICY_NONE && + xpl0->sadb_x_policy_type != IPSEC_POLICY_IPSEC) { + ipseclog((LOG_DEBUG, "%s: invalid policy type.\n", __func__)); + return key_senderror(so, m, EINVAL); + } + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0 || + src0->sadb_address_proto != dst0->sadb_address_proto) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); + } /* make secindex */ - /* XXX boundary check against sa_len */ KEY_SETSECSPIDX(xpl0->sadb_x_policy_dir, src0 + 1, dst0 + 1, @@ -2151,16 +2072,6 @@ src0->sadb_address_proto, &spidx); - /* checking the direciton. */ - switch (xpl0->sadb_x_policy_dir) { - case IPSEC_DIR_INBOUND: - case IPSEC_DIR_OUTBOUND: - break; - default: - ipseclog((LOG_DEBUG, "%s: Invalid SP direction.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - /* Is there SP in SPD ? */ if ((sp = key_getsp(&spidx)) == NULL) { ipseclog((LOG_DEBUG, "%s: no SP found.\n", __func__)); @@ -2170,18 +2081,16 @@ /* save policy id to buffer to be returned. */ xpl0->sadb_x_policy_id = sp->id; + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); key_unlink(sp); - KEY_FREESP(&sp); + key_freesp(&sp); { struct mbuf *n; struct sadb_msg *newmsg; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* create new sadb_msg to reply. */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_X_EXT_POLICY, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); @@ -2213,30 +2122,42 @@ key_spddelete2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - u_int32_t id; struct secpolicy *sp; + uint32_t id; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_X_EXT_POLICY] == NULL || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", __func__)); + if (SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { + ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", + __func__)); return key_senderror(so, m, EINVAL); } - id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + id = ((struct sadb_x_policy *) + mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); + ipseclog((LOG_DEBUG, "%s: no SP found for id %u.\n", + __func__, id)); return key_senderror(so, m, EINVAL); } + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); key_unlink(sp); - KEY_FREESP(&sp); + if (sp->state != IPSEC_SPSTATE_DEAD) { + ipseclog((LOG_DEBUG, "%s: failed to delete SP with id %u.\n", + __func__, id)); + key_freesp(&sp); + return (key_senderror(so, m, EACCES)); + } + key_freesp(&sp); { struct mbuf *n, *nn; @@ -2301,33 +2222,35 @@ static int key_spdget(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - u_int32_t id; struct secpolicy *sp; struct mbuf *n; + uint32_t id; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_X_EXT_POLICY] == NULL || - mhp->extlen[SADB_X_EXT_POLICY] < sizeof(struct sadb_x_policy)) { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_POLICY) || + SADB_CHECKLEN(mhp, SADB_X_EXT_POLICY)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - id = ((struct sadb_x_policy *)mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; + id = ((struct sadb_x_policy *) + mhp->ext[SADB_X_EXT_POLICY])->sadb_x_policy_id; /* Is there SP in SPD ? */ if ((sp = key_getspbyid(id)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SP found id:%u.\n", __func__, id)); + ipseclog((LOG_DEBUG, "%s: no SP found for id %u.\n", + __func__, id)); return key_senderror(so, m, ENOENT); } n = key_setdumpsp(sp, SADB_X_SPDGET, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); - KEY_FREESP(&sp); + key_freesp(&sp); if (n != NULL) { m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); @@ -2413,7 +2336,7 @@ static int key_spdflush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - TAILQ_HEAD(, secpolicy) drainq; + struct secpolicy_queue drainq; struct sadb_msg *newmsg; struct secpolicy *sp, *nextsp; u_int dir; @@ -2434,14 +2357,18 @@ /* * We need to set state to DEAD for each policy to be sure, * that another thread won't try to unlink it. + * Also remove SP from sphash. */ - TAILQ_FOREACH(sp, &drainq, chain) + TAILQ_FOREACH(sp, &drainq, chain) { sp->state = IPSEC_SPSTATE_DEAD; + LIST_REMOVE(sp, idhash); + } + V_sp_genid++; SPTREE_WUNLOCK(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); - KEY_FREESP(&sp); + key_freesp(&sp); sp = nextsp; } @@ -2477,9 +2404,9 @@ { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; + struct mbuf *n; int cnt; u_int dir; - struct mbuf *n; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -2493,6 +2420,9 @@ TAILQ_FOREACH(sp, &V_sptree[dir], chain) { cnt++; } + TAILQ_FOREACH(sp, &V_sptree_ifnet[dir], chain) { + cnt++; + } } if (cnt == 0) { @@ -2509,11 +2439,19 @@ if (n) key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } + TAILQ_FOREACH(sp, &V_sptree_ifnet[dir], chain) { + --cnt; + n = key_setdumpsp(sp, SADB_X_SPDDUMP, cnt, + mhp->msg->sadb_msg_pid); + + if (n) + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } } SPTREE_RUNLOCK(); m_freem(m); - return 0; + return (0); } static struct mbuf * @@ -2528,10 +2466,6 @@ goto fail; result = m; - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, sp->spidx.prefs, sp->spidx.ul_proto); @@ -2546,7 +2480,7 @@ goto fail; m_cat(result, m); - m = key_sp2msg(sp); + m = key_sp2mbuf(sp); if (!m) goto fail; m_cat(result, m); @@ -2589,36 +2523,29 @@ m_freem(result); return NULL; } - /* * get PFKEY message length for security policy and request. */ -static u_int +static size_t key_getspreqmsglen(struct secpolicy *sp) { - u_int tlen; + size_t tlen, len; + int i; tlen = sizeof(struct sadb_x_policy); - /* if is the policy for ipsec ? */ if (sp->policy != IPSEC_POLICY_IPSEC) - return tlen; + return (tlen); /* get length of ipsec requests */ - { - struct ipsecrequest *isr; - int len; - - for (isr = sp->req; isr != NULL; isr = isr->next) { + for (i = 0; i < sp->tcount; i++) { len = sizeof(struct sadb_x_ipsecrequest) - + isr->saidx.src.sa.sa_len - + isr->saidx.dst.sa.sa_len; + + sp->req[i]->saidx.src.sa.sa_len + + sp->req[i]->saidx.dst.sa.sa_len; tlen += PFKEY_ALIGN8(len); } - } - - return tlen; + return (tlen); } /* @@ -2633,15 +2560,16 @@ static int key_spdexpire(struct secpolicy *sp) { - struct mbuf *result = NULL, *m; - int len; - int error = -1; struct sadb_lifetime *lt; + struct mbuf *result = NULL, *m; + int len, error = -1; - /* XXX: Why do we lock ? */ - IPSEC_ASSERT(sp != NULL, ("null secpolicy")); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + /* set msg header */ m = key_setsadbmsg(SADB_X_SPDEXPIRE, 0, 0, 0, 0, 0); if (!m) { @@ -2676,11 +2604,6 @@ lt->sadb_lifetime_usetime = sp->validtime; m_cat(result, m); - /* - * Note: do not send SADB_X_EXT_NAT_T_* here: - * we are sending traffic endpoints. - */ - /* set sadb_address for source */ m = key_setsadbaddr(SADB_EXT_ADDRESS_SRC, &sp->spidx.src.sa, @@ -2702,7 +2625,7 @@ m_cat(result, m); /* set secpolicy */ - m = key_sp2msg(sp); + m = key_sp2mbuf(sp); if (!m) { error = ENOBUFS; goto fail; @@ -2739,174 +2662,220 @@ /* %%% SAD management */ /* - * allocating a memory for new SA head, and copy from the values of mhp. + * allocating and initialize new SA head. * OUT: NULL : failure due to the lack of memory. * others : pointer to new SA head. */ static struct secashead * key_newsah(struct secasindex *saidx) { - struct secashead *newsah; + struct secashead *sah; - IPSEC_ASSERT(saidx != NULL, ("null saidx")); + sah = malloc(sizeof(struct secashead), M_IPSEC_SAH, + M_NOWAIT | M_ZERO); + if (sah == NULL) { + PFKEYSTAT_INC(in_nomem); + return (NULL); + } + TAILQ_INIT(&sah->savtree_larval); + TAILQ_INIT(&sah->savtree_alive); + sah->saidx = *saidx; + sah->state = SADB_SASTATE_DEAD; + SAH_INITREF(sah); - newsah = malloc(sizeof(struct secashead), M_IPSEC_SAH, M_NOWAIT|M_ZERO); - if (newsah != NULL) { - int i; - for (i = 0; i < sizeof(newsah->savtree)/sizeof(newsah->savtree[0]); i++) - LIST_INIT(&newsah->savtree[i]); - newsah->saidx = *saidx; + KEYDBG(KEY_STAMP, + printf("%s: SAH(%p)\n", __func__, sah)); + KEYDBG(KEY_DATA, kdebug_secash(sah, NULL)); + return (sah); +} - /* add to saidxtree */ - newsah->state = SADB_SASTATE_MATURE; +static void +key_freesah(struct secashead **psah) +{ + struct secashead *sah = *psah; - SAHTREE_LOCK(); - LIST_INSERT_HEAD(&V_sahtree, newsah, chain); - SAHTREE_UNLOCK(); - } - return(newsah); + if (SAH_DELREF(sah) == 0) + return; + + KEYDBG(KEY_STAMP, + printf("%s: last reference to SAH(%p)\n", __func__, sah)); + KEYDBG(KEY_DATA, kdebug_secash(sah, NULL)); + + *psah = NULL; + key_delsah(sah); } -/* - * delete SA index and all SA registerd. - */ static void key_delsah(struct secashead *sah) { - struct secasvar *sav, *nextsav; - u_int stateidx; - int zombie = 0; - IPSEC_ASSERT(sah != NULL, ("NULL sah")); - SAHTREE_LOCK_ASSERT(); + IPSEC_ASSERT(sah->state == SADB_SASTATE_DEAD, + ("Attempt to free non DEAD SAH %p", sah)); + IPSEC_ASSERT(TAILQ_EMPTY(&sah->savtree_larval), + ("Attempt to free SAH %p with LARVAL SA", sah)); + IPSEC_ASSERT(TAILQ_EMPTY(&sah->savtree_alive), + ("Attempt to free SAH %p with ALIVE SA", sah)); - /* searching all SA registerd in the secindex. */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - u_int state = saorder_state_any[stateidx]; - LIST_FOREACH_SAFE(sav, &sah->savtree[state], chain, nextsav) { - if (sav->refcnt == 0) { - /* sanity check */ - KEY_CHKSASTATE(state, sav->state, __func__); - /* - * do NOT call KEY_FREESAV here: - * it will only delete the sav if refcnt == 1, - * where we already know that refcnt == 0 - */ - key_delsav(sav); - } else { - /* give up to delete this sa */ - zombie++; - } - } - } - if (!zombie) { /* delete only if there are savs */ - /* remove from tree of SA index */ - if (__LIST_CHAINED(sah)) - LIST_REMOVE(sah, chain); - free(sah, M_IPSEC_SAH); - } + free(sah, M_IPSEC_SAH); } /* - * allocating a new SA with LARVAL state. key_add() and key_getspi() call, + * allocating a new SA for key_add() and key_getspi() call, * and copy the values of mhp into new buffer. - * When SAD message type is GETSPI: - * to set sequence number from acq_seq++, - * to set zero to SPI. - * not to call key_setsava(). + * When SAD message type is SADB_GETSPI set SA state to LARVAL. + * For SADB_ADD create and initialize SA with MATURE state. * OUT: NULL : fail * others : pointer to new secasvar. - * - * does not modify mbuf. does not free mbuf on error. */ static struct secasvar * -key_newsav(struct mbuf *m, const struct sadb_msghdr *mhp, - struct secashead *sah, int *errp, const char *where, int tag) +key_newsav(const struct sadb_msghdr *mhp, struct secasindex *saidx, + uint32_t spi, int *errp) { - struct secasvar *newsav; - const struct sadb_sa *xsa; + struct secashead *sah; + struct secasvar *sav; + int isnew; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - IPSEC_ASSERT(sah != NULL, ("null secashead")); + IPSEC_ASSERT(mhp->msg->sadb_msg_type == SADB_GETSPI || + mhp->msg->sadb_msg_type == SADB_ADD, ("wrong message type")); - newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT|M_ZERO); - if (newsav == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - *errp = ENOBUFS; - goto done; - } - - switch (mhp->msg->sadb_msg_type) { - case SADB_GETSPI: - newsav->spi = 0; - -#ifdef IPSEC_DOSEQCHECK - /* sync sequence number */ - if (mhp->msg->sadb_msg_seq == 0) - newsav->seq = - (V_acq_seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq)); - else -#endif - newsav->seq = mhp->msg->sadb_msg_seq; - break; - - case SADB_ADD: - /* sanity check */ - if (mhp->ext[SADB_EXT_SA] == NULL) { - free(newsav, M_IPSEC_SA); - newsav = NULL; - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + sav = NULL; + sah = NULL; + /* check SPI value */ + switch (saidx->proto) { + case IPPROTO_ESP: + case IPPROTO_AH: + /* + * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values + * 1-255 reserved by IANA for future use, + * 0 for implementation specific, local use. + */ + if (ntohl(spi) <= 255) { + ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", + __func__, ntohl(spi))); *errp = EINVAL; goto done; } - xsa = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - newsav->spi = xsa->sadb_sa_spi; - newsav->seq = mhp->msg->sadb_msg_seq; break; - default: - free(newsav, M_IPSEC_SA); - newsav = NULL; - *errp = EINVAL; + } + + sav = malloc(sizeof(struct secasvar), M_IPSEC_SA, M_NOWAIT | M_ZERO); + if (sav == NULL) { + *errp = ENOBUFS; goto done; } + sav->lock = malloc(sizeof(struct mtx), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->lock == NULL) { + *errp = ENOBUFS; + goto done; + } + mtx_init(sav->lock, "ipsec association", NULL, MTX_DEF); + sav->lft_c = uma_zalloc(V_key_lft_zone, M_NOWAIT); + if (sav->lft_c == NULL) { + *errp = ENOBUFS; + goto done; + } + counter_u64_zero(sav->lft_c_allocations); + counter_u64_zero(sav->lft_c_bytes); - - /* copy sav values */ - if (mhp->msg->sadb_msg_type != SADB_GETSPI) { - *errp = key_setsaval(newsav, m, mhp); - if (*errp) { - free(newsav, M_IPSEC_SA); - newsav = NULL; + sav->spi = spi; + sav->seq = mhp->msg->sadb_msg_seq; + sav->state = SADB_SASTATE_LARVAL; + sav->pid = (pid_t)mhp->msg->sadb_msg_pid; + SAV_INITREF(sav); +again: + sah = key_getsah(saidx); + if (sah == NULL) { + /* create a new SA index */ + sah = key_newsah(saidx); + if (sah == NULL) { + ipseclog((LOG_DEBUG, + "%s: No more memory.\n", __func__)); + *errp = ENOBUFS; goto done; } + isnew = 1; + } else + isnew = 0; + + sav->sah = sah; + if (mhp->msg->sadb_msg_type == SADB_GETSPI) { + sav->created = time_second; + } else if (sav->state == SADB_SASTATE_LARVAL) { + /* + * Do not call key_setsaval() second time in case + * of `goto again`. We will have MATURE state. + */ + *errp = key_setsaval(sav, mhp); + if (*errp != 0) + goto done; + sav->state = SADB_SASTATE_MATURE; } - SECASVAR_LOCK_INIT(newsav); - - /* reset created */ - newsav->created = time_second; - newsav->pid = mhp->msg->sadb_msg_pid; - - /* add to satree */ - newsav->sah = sah; - sa_initref(newsav); - newsav->state = SADB_SASTATE_LARVAL; - - SAHTREE_LOCK(); - LIST_INSERT_TAIL(&sah->savtree[SADB_SASTATE_LARVAL], newsav, - secasvar, chain); - SAHTREE_UNLOCK(); + SAHTREE_WLOCK(); + /* + * Check that existing SAH wasn't unlinked. + * Since we didn't hold the SAHTREE lock, it is possible, + * that callout handler or key_flush() or key_delete() could + * unlink this SAH. + */ + if (isnew == 0 && sah->state == SADB_SASTATE_DEAD) { + SAHTREE_WUNLOCK(); + key_freesah(&sah); /* reference from key_getsah() */ + goto again; + } + if (isnew != 0) { + /* + * Add new SAH into SADB. + * + * XXXAE: we can serialize key_add and key_getspi calls, so + * several threads will not fight in the race. + * Otherwise we should check under SAHTREE lock, that this + * SAH would not added twice. + */ + TAILQ_INSERT_HEAD(&V_sahtree, sah, chain); + /* Add new SAH into hash by addresses */ + LIST_INSERT_HEAD(SAHADDRHASH_HASH(saidx), sah, addrhash); + /* Now we are linked in the chain */ + sah->state = SADB_SASTATE_MATURE; + /* + * SAV references this new SAH. + * In case of existing SAH we reuse reference + * from key_getsah(). + */ + SAH_ADDREF(sah); + } + /* Link SAV with SAH */ + if (sav->state == SADB_SASTATE_MATURE) + TAILQ_INSERT_HEAD(&sah->savtree_alive, sav, chain); + else + TAILQ_INSERT_HEAD(&sah->savtree_larval, sav, chain); + /* Add SAV into SPI hash */ + LIST_INSERT_HEAD(SAVHASH_HASH(sav->spi), sav, spihash); + SAHTREE_WUNLOCK(); + *errp = 0; /* success */ done: - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s from %s:%u return SP:%p\n", __func__, - where, tag, newsav)); - - return newsav; + if (*errp != 0) { + if (sav != NULL) { + if (sav->lock != NULL) { + mtx_destroy(sav->lock); + free(sav->lock, M_IPSEC_MISC); + } + if (sav->lft_c != NULL) + uma_zfree(V_key_lft_zone, sav->lft_c); + free(sav, M_IPSEC_SA), sav = NULL; + } + if (sah != NULL) + key_freesah(&sah); + if (*errp == ENOBUFS) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); + PFKEYSTAT_INC(in_nomem); + } + } + return (sav); } /* @@ -2915,6 +2884,13 @@ static void key_cleansav(struct secasvar *sav) { + + if (sav->natt != NULL) { + free(sav->natt, M_IPSEC_MISC); + sav->natt = NULL; + } + if (sav->flags & SADB_X_EXT_F_CLONED) + return; /* * Cleanup xform state. Note that zeroize'ing causes the * keys to be cleared; otherwise we must do it ourself. @@ -2940,21 +2916,12 @@ free(sav->key_enc, M_IPSEC_MISC); sav->key_enc = NULL; } - if (sav->sched) { - bzero(sav->sched, sav->schedlen); - free(sav->sched, M_IPSEC_MISC); - sav->sched = NULL; - } if (sav->replay != NULL) { if (sav->replay->bitmap != NULL) free(sav->replay->bitmap, M_IPSEC_MISC); free(sav->replay, M_IPSEC_MISC); sav->replay = NULL; } - if (sav->lft_c != NULL) { - free(sav->lft_c, M_IPSEC_MISC); - sav->lft_c = NULL; - } if (sav->lft_h != NULL) { free(sav->lft_h, M_IPSEC_MISC); sav->lft_h = NULL; @@ -2972,173 +2939,230 @@ key_delsav(struct secasvar *sav) { IPSEC_ASSERT(sav != NULL, ("null sav")); - IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); + IPSEC_ASSERT(sav->state == SADB_SASTATE_DEAD, + ("attempt to free non DEAD SA %p", sav)); + IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", + sav->refcnt)); - /* remove from SA header */ - if (__LIST_CHAINED(sav)) - LIST_REMOVE(sav, chain); + /* + * SA must be unlinked from the chain and hashtbl. + * If SA was cloned, we leave all fields untouched, + * except NAT-T config. + */ key_cleansav(sav); - SECASVAR_LOCK_DESTROY(sav); + if ((sav->flags & SADB_X_EXT_F_CLONED) == 0) { + mtx_destroy(sav->lock); + free(sav->lock, M_IPSEC_MISC); + uma_zfree(V_key_lft_zone, sav->lft_c); + } free(sav, M_IPSEC_SA); } /* - * search SAD. + * search SAH. * OUT: * NULL : not found - * others : found, pointer to a SA. + * others : found, referenced pointer to a SAH. */ static struct secashead * key_getsah(struct secasindex *saidx) { + SAHTREE_RLOCK_TRACKER; struct secashead *sah; - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, saidx, CMP_REQID)) - break; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&sah->saidx, saidx, CMP_MODE_REQID) != 0) { + SAH_ADDREF(sah); + break; + } } - SAHTREE_UNLOCK(); - - return sah; + SAHTREE_RUNLOCK(); + return (sah); } /* - * check not to be duplicated SPI. - * NOTE: this function is too slow due to searching all SAD. + * Check not to be duplicated SPI. * OUT: - * NULL : not found - * others : found, pointer to a SA. + * 0 : not found + * 1 : found SA with given SPI. */ -static struct secasvar * -key_checkspidup(struct secasindex *saidx, u_int32_t spi) +static int +key_checkspidup(uint32_t spi) { - struct secashead *sah; + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - /* check address family */ - if (saidx->src.sa.sa_family != saidx->dst.sa.sa_family) { - ipseclog((LOG_DEBUG, "%s: address family mismatched.\n", - __func__)); - return NULL; - } - - sav = NULL; - /* check all SAD */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (!key_ismyaddr((struct sockaddr *)&sah->saidx.dst)) - continue; - sav = key_getsavbyspi(sah, spi); - if (sav != NULL) + /* Assume SPI is in network byte order */ + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi == spi) break; } - SAHTREE_UNLOCK(); - - return sav; + SAHTREE_RUNLOCK(); + return (sav != NULL); } /* - * search SAD litmited alive SA, protocol, SPI. + * Search SA by SPI. * OUT: * NULL : not found - * others : found, pointer to a SA. + * others : found, referenced pointer to a SA. */ static struct secasvar * -key_getsavbyspi(struct secashead *sah, u_int32_t spi) +key_getsavbyspi(uint32_t spi) { + SAHTREE_RLOCK_TRACKER; struct secasvar *sav; - u_int stateidx, state; - sav = NULL; - SAHTREE_LOCK_ASSERT(); - /* search all status */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { + /* Assume SPI is in network byte order */ + SAHTREE_RLOCK(); + LIST_FOREACH(sav, SAVHASH_HASH(spi), spihash) { + if (sav->spi != spi) + continue; + SAV_ADDREF(sav); + break; + } + SAHTREE_RUNLOCK(); + return (sav); +} - state = saorder_state_alive[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { +static int +key_updatelifetimes(struct secasvar *sav, const struct sadb_msghdr *mhp) +{ + struct seclifetime *lft_h, *lft_s, *tmp; - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "%s: " - "invalid sav->state (queue: %d SA: %d)\n", - __func__, state, sav->state)); - continue; - } - - if (sav->spi == spi) - return sav; + /* Lifetime extension is optional, check that it is present. */ + if (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) { + /* + * In case of SADB_UPDATE we may need to change + * existing lifetimes. + */ + if (sav->state == SADB_SASTATE_MATURE) { + lft_h = lft_s = NULL; + goto reset; } + return (0); } + /* Both HARD and SOFT extensions must present */ + if ((SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + return (EINVAL); + } + if (SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_HARD) || + SADB_CHECKLEN(mhp, SADB_EXT_LIFETIME_SOFT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + return (EINVAL); + } + lft_h = key_dup_lifemsg((const struct sadb_lifetime *) + mhp->ext[SADB_EXT_LIFETIME_HARD], M_IPSEC_MISC); + if (lft_h == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + lft_s = key_dup_lifemsg((const struct sadb_lifetime *) + mhp->ext[SADB_EXT_LIFETIME_SOFT], M_IPSEC_MISC); + if (lft_s == NULL) { + PFKEYSTAT_INC(in_nomem); + free(lft_h, M_IPSEC_MISC); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } +reset: + if (sav->state != SADB_SASTATE_LARVAL) { + /* + * key_update() holds reference to this SA, + * so it won't be deleted in meanwhile. + */ + SECASVAR_LOCK(sav); + tmp = sav->lft_h; + sav->lft_h = lft_h; + lft_h = tmp; - return NULL; + tmp = sav->lft_s; + sav->lft_s = lft_s; + lft_s = tmp; + SECASVAR_UNLOCK(sav); + if (lft_h != NULL) + free(lft_h, M_IPSEC_MISC); + if (lft_s != NULL) + free(lft_s, M_IPSEC_MISC); + return (0); + } + /* We can update lifetime without holding a lock */ + IPSEC_ASSERT(sav->lft_h == NULL, ("lft_h is already initialized\n")); + IPSEC_ASSERT(sav->lft_s == NULL, ("lft_s is already initialized\n")); + sav->lft_h = lft_h; + sav->lft_s = lft_s; + return (0); } /* - * copy SA values from PF_KEY message except *SPI, SEQ, PID, STATE and TYPE*. - * You must update these if need. + * copy SA values from PF_KEY message except *SPI, SEQ, PID and TYPE*. + * You must update these if need. Expects only LARVAL SAs. * OUT: 0: success. * !0: failure. - * - * does not modify mbuf. does not free mbuf on error. */ static int -key_setsaval(struct secasvar *sav, struct mbuf *m, - const struct sadb_msghdr *mhp) +key_setsaval(struct secasvar *sav, const struct sadb_msghdr *mhp) { - int error = 0; + const struct sadb_sa *sa0; + const struct sadb_key *key0; + uint32_t replay; + size_t len; + int error; - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); + IPSEC_ASSERT(sav->state == SADB_SASTATE_LARVAL, + ("Attempt to update non LARVAL SA")); - /* initialization */ - sav->replay = NULL; - sav->key_auth = NULL; - sav->key_enc = NULL; - sav->sched = NULL; - sav->schedlen = 0; - sav->lft_c = NULL; - sav->lft_h = NULL; - sav->lft_s = NULL; - sav->tdb_xform = NULL; /* transform */ - sav->tdb_encalgxform = NULL; /* encoding algorithm */ - sav->tdb_authalgxform = NULL; /* authentication algorithm */ - sav->tdb_compalgxform = NULL; /* compression algorithm */ - /* Initialize even if NAT-T not compiled in: */ - sav->natt_type = 0; - sav->natt_esp_frag_len = 0; + /* XXX rewrite */ + error = key_setident(sav->sah, mhp); + if (error != 0) + goto fail; /* SA */ - if (mhp->ext[SADB_EXT_SA] != NULL) { - const struct sadb_sa *sa0; - u_int32_t replay; - - sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - if (mhp->extlen[SADB_EXT_SA] < sizeof(*sa0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_SA)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_SA)) { error = EINVAL; goto fail; } - + sa0 = (const struct sadb_sa *)mhp->ext[SADB_EXT_SA]; sav->alg_auth = sa0->sadb_sa_auth; sav->alg_enc = sa0->sadb_sa_encrypt; sav->flags = sa0->sadb_sa_flags; + if ((sav->flags & SADB_KEY_FLAGS_MAX) != sav->flags) { + ipseclog((LOG_DEBUG, + "%s: invalid sa_flags 0x%08x.\n", __func__, + sav->flags)); + error = EINVAL; + goto fail; + } /* Optional replay window */ replay = 0; if ((sa0->sadb_sa_flags & SADB_X_EXT_OLD) == 0) replay = sa0->sadb_sa_replay; - if ((mhp->ext[SADB_X_EXT_SA_REPLAY]) != NULL) { + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_SA_REPLAY)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA_REPLAY)) { + error = EINVAL; + goto fail; + } replay = ((const struct sadb_x_sa_replay *) - mhp->ext[SADB_X_EXT_SA_REPLAY])->sadb_x_sa_replay_replay; + mhp->ext[SADB_X_EXT_SA_REPLAY])->sadb_x_sa_replay_replay; if (replay > UINT32_MAX - 32) { - ipseclog((LOG_DEBUG, "%s: replay window too big.\n", - __func__)); + ipseclog((LOG_DEBUG, + "%s: replay window too big.\n", __func__)); error = EINVAL; goto fail; } @@ -3146,32 +3170,35 @@ replay = (replay + 7) >> 3; } - sav->replay = (struct secreplay *) - malloc(sizeof(struct secreplay), - M_IPSEC_MISC, M_NOWAIT|M_ZERO); + sav->replay = malloc(sizeof(struct secreplay), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); if (sav->replay == NULL) { + PFKEYSTAT_INC(in_nomem); ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + __func__)); error = ENOBUFS; goto fail; } if (replay != 0) { /* number of 32b blocks to be allocated */ - u_int32_t bitmap_size; + uint32_t bitmap_size; /* RFC 6479: - * - the allocated replay window size must be a power of two - * - use an extra 32b block as a redundant window + * - the allocated replay window size must be + * a power of two. + * - use an extra 32b block as a redundant window. */ bitmap_size = 1; while (replay + 4 > bitmap_size) bitmap_size <<= 1; bitmap_size = bitmap_size / 4; - sav->replay->bitmap = malloc(bitmap_size*sizeof(u_int32_t), - M_IPSEC_MISC, M_NOWAIT|M_ZERO); + sav->replay->bitmap = malloc( + bitmap_size * sizeof(uint32_t), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); if (sav->replay->bitmap == NULL) { + PFKEYSTAT_INC(in_nomem); ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); error = ENOBUFS; @@ -3183,18 +3210,14 @@ } /* Authentication keys */ - if (mhp->ext[SADB_EXT_KEY_AUTH] != NULL) { - const struct sadb_key *key0; - int len; - - key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; - len = mhp->extlen[SADB_EXT_KEY_AUTH]; - - error = 0; - if (len < sizeof(*key0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_KEY_AUTH)) { error = EINVAL; goto fail; } + error = 0; + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_AUTH]; + len = mhp->extlen[SADB_EXT_KEY_AUTH]; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: case SADB_SATYPE_ESP: @@ -3214,29 +3237,25 @@ goto fail; } - sav->key_auth = (struct seckey *)key_dup_keymsg(key0, len, - M_IPSEC_MISC); + sav->key_auth = key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_auth == NULL ) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + PFKEYSTAT_INC(in_nomem); error = ENOBUFS; goto fail; } } /* Encryption key */ - if (mhp->ext[SADB_EXT_KEY_ENCRYPT] != NULL) { - const struct sadb_key *key0; - int len; - - key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; - len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; - - error = 0; - if (len < sizeof(*key0)) { + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT)) { + if (SADB_CHECKLEN(mhp, SADB_EXT_KEY_ENCRYPT)) { error = EINVAL; goto fail; } + error = 0; + key0 = (const struct sadb_key *)mhp->ext[SADB_EXT_KEY_ENCRYPT]; + len = mhp->extlen[SADB_EXT_KEY_ENCRYPT]; switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_ESP: if (len == PFKEY_ALIGN8(sizeof(struct sadb_key)) && @@ -3244,12 +3263,11 @@ error = EINVAL; break; } - sav->key_enc = (struct seckey *)key_dup_keymsg(key0, - len, - M_IPSEC_MISC); + sav->key_enc = key_dup_keymsg(key0, len, M_IPSEC_MISC); if (sav->key_enc == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + PFKEYSTAT_INC(in_nomem); error = ENOBUFS; goto fail; } @@ -3274,172 +3292,83 @@ /* set iv */ sav->ivlen = 0; - switch (mhp->msg->sadb_msg_satype) { case SADB_SATYPE_AH: - error = xform_init(sav, XF_AH); - break; - case SADB_SATYPE_ESP: - error = xform_init(sav, XF_ESP); - break; - case SADB_X_SATYPE_IPCOMP: - error = xform_init(sav, XF_IPCOMP); - break; - case SADB_X_SATYPE_TCPSIGNATURE: - error = xform_init(sav, XF_TCPSIGNATURE); - break; - } - if (error) { - ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", - __func__, mhp->msg->sadb_msg_satype)); - goto fail; - } - - /* reset created */ - sav->created = time_second; - - /* make lifetime for CURRENT */ - sav->lft_c = malloc(sizeof(struct seclifetime), M_IPSEC_MISC, M_NOWAIT); - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - error = ENOBUFS; - goto fail; - } - - sav->lft_c->allocations = 0; - sav->lft_c->bytes = 0; - sav->lft_c->addtime = time_second; - sav->lft_c->usetime = 0; - - /* lifetimes for HARD and SOFT */ - { - const struct sadb_lifetime *lft0; - - lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_HARD]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_HARD] < sizeof(*lft0)) { + if (sav->flags & SADB_X_EXT_DERIV) { + ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " + "given to AH SA.\n", __func__)); error = EINVAL; goto fail; } - sav->lft_h = key_dup_lifemsg(lft0, M_IPSEC_MISC); - if (sav->lft_h == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - error = ENOBUFS; - goto fail; - } - /* to be initialize ? */ - } - - lft0 = (struct sadb_lifetime *)mhp->ext[SADB_EXT_LIFETIME_SOFT]; - if (lft0 != NULL) { - if (mhp->extlen[SADB_EXT_LIFETIME_SOFT] < sizeof(*lft0)) { + if (sav->alg_enc != SADB_EALG_NONE) { + ipseclog((LOG_DEBUG, "%s: protocol and algorithm " + "mismated.\n", __func__)); error = EINVAL; goto fail; } - sav->lft_s = key_dup_lifemsg(lft0, M_IPSEC_MISC); - if (sav->lft_s == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - error = ENOBUFS; - goto fail; - } - /* to be initialize ? */ - } - } - - return 0; - - fail: - /* initialization */ - key_cleansav(sav); - - return error; -} - -/* - * validation with a secasvar entry, and set SADB_SATYPE_MATURE. - * OUT: 0: valid - * other: errno - */ -static int -key_mature(struct secasvar *sav) -{ - int error; - - /* check SPI value */ - switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - case IPPROTO_AH: - /* - * RFC 4302, 2.4. Security Parameters Index (SPI), SPI values - * 1-255 reserved by IANA for future use, - * 0 for implementation specific, local use. - */ - if (ntohl(sav->spi) <= 255) { - ipseclog((LOG_DEBUG, "%s: illegal range of SPI %u.\n", - __func__, (u_int32_t)ntohl(sav->spi))); - return EINVAL; - } + error = xform_init(sav, XF_AH); break; - } - - /* check satype */ - switch (sav->sah->saidx.proto) { - case IPPROTO_ESP: - /* check flags */ - if ((sav->flags & (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) == - (SADB_X_EXT_OLD|SADB_X_EXT_DERIV)) { + case SADB_SATYPE_ESP: + if ((sav->flags & (SADB_X_EXT_OLD | SADB_X_EXT_DERIV)) == + (SADB_X_EXT_OLD | SADB_X_EXT_DERIV)) { ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " - "given to old-esp.\n", __func__)); - return EINVAL; + "given to old-esp.\n", __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_ESP); break; - case IPPROTO_AH: - /* check flags */ - if (sav->flags & SADB_X_EXT_DERIV) { - ipseclog((LOG_DEBUG, "%s: invalid flag (derived) " - "given to AH SA.\n", __func__)); - return EINVAL; - } - if (sav->alg_enc != SADB_EALG_NONE) { - ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); - } - error = xform_init(sav, XF_AH); - break; - case IPPROTO_IPCOMP: + case SADB_X_SATYPE_IPCOMP: if (sav->alg_auth != SADB_AALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); + "mismated.\n", __func__)); + error = EINVAL; + goto fail; } - if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 - && ntohl(sav->spi) >= 0x10000) { + if ((sav->flags & SADB_X_EXT_RAWCPI) == 0 && + ntohl(sav->spi) >= 0x10000) { ipseclog((LOG_DEBUG, "%s: invalid cpi for IPComp.\n", - __func__)); - return(EINVAL); + __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_IPCOMP); break; - case IPPROTO_TCP: + case SADB_X_SATYPE_TCPSIGNATURE: if (sav->alg_enc != SADB_EALG_NONE) { ipseclog((LOG_DEBUG, "%s: protocol and algorithm " - "mismated.\n", __func__)); - return(EINVAL); + "mismated.\n", __func__)); + error = EINVAL; + goto fail; } error = xform_init(sav, XF_TCPSIGNATURE); break; default: ipseclog((LOG_DEBUG, "%s: Invalid satype.\n", __func__)); error = EPROTONOSUPPORT; - break; + goto fail; } - if (error == 0) { - SAHTREE_LOCK(); - key_sa_chgstate(sav, SADB_SASTATE_MATURE); - SAHTREE_UNLOCK(); + if (error) { + ipseclog((LOG_DEBUG, "%s: unable to initialize SA type %u.\n", + __func__, mhp->msg->sadb_msg_satype)); + goto fail; } + + /* Handle NAT-T headers */ + error = key_setnatt(sav, mhp); + if (error != 0) + goto fail; + + /* Initialize lifetime for CURRENT */ + sav->firstused = 0; + sav->created = time_second; + + /* lifetimes for HARD and SOFT */ + error = key_updatelifetimes(sav, mhp); + if (error == 0) + return (0); +fail: + key_cleansav(sav); return (error); } @@ -3447,26 +3376,25 @@ * subroutine for SADB_GET and SADB_DUMP. */ static struct mbuf * -key_setdumpsa(struct secasvar *sav, u_int8_t type, u_int8_t satype, - u_int32_t seq, u_int32_t pid) +key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, + uint32_t seq, uint32_t pid) { + struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; - int i; - int dumporder[] = { + int i, dumporder[] = { SADB_EXT_SA, SADB_X_EXT_SA2, SADB_X_EXT_SA_REPLAY, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, SADB_EXT_LIFETIME_CURRENT, SADB_EXT_ADDRESS_SRC, - SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, SADB_EXT_KEY_AUTH, - SADB_EXT_KEY_ENCRYPT, SADB_EXT_IDENTITY_SRC, - SADB_EXT_IDENTITY_DST, SADB_EXT_SENSITIVITY, -#ifdef IPSEC_NAT_T + SADB_EXT_ADDRESS_DST, SADB_EXT_ADDRESS_PROXY, + SADB_EXT_KEY_AUTH, SADB_EXT_KEY_ENCRYPT, + SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, + SADB_EXT_SENSITIVITY, SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, -#endif }; - u_int32_t replay_count; + uint32_t replay_count; m = key_setsadbmsg(type, 0, satype, seq, pid, sav->refcnt); if (m == NULL) @@ -3494,7 +3422,7 @@ case SADB_X_EXT_SA_REPLAY: if (sav->replay == NULL || - sav->replay->wsize <= UINT8_MAX) + sav->replay->wsize <= UINT8_MAX) continue; m = key_setsadbxsareplay(sav->replay->wsize); @@ -3535,10 +3463,12 @@ break; case SADB_EXT_LIFETIME_CURRENT: - if (!sav->lft_c) - continue; - m = key_setlifetime(sav->lft_c, - SADB_EXT_LIFETIME_CURRENT); + lft_c.addtime = sav->created; + lft_c.allocations = (uint32_t)counter_u64_fetch( + sav->lft_c_allocations); + lft_c.bytes = counter_u64_fetch(sav->lft_c_bytes); + lft_c.usetime = sav->firstused; + m = key_setlifetime(&lft_c, SADB_EXT_LIFETIME_CURRENT); if (!m) goto fail; break; @@ -3562,35 +3492,53 @@ goto fail; break; -#ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: - m = key_setsadbxtype(sav->natt_type); + if (sav->natt == NULL) + continue; + m = key_setsadbxtype(UDP_ENCAP_ESPINUDP); if (!m) goto fail; break; - + case SADB_X_EXT_NAT_T_DPORT: - m = key_setsadbxport( - KEY_PORTFROMSADDR(&sav->sah->saidx.dst), + if (sav->natt == NULL) + continue; + m = key_setsadbxport(sav->natt->dport, SADB_X_EXT_NAT_T_DPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_SPORT: - m = key_setsadbxport( - KEY_PORTFROMSADDR(&sav->sah->saidx.src), + if (sav->natt == NULL) + continue; + m = key_setsadbxport(sav->natt->sport, SADB_X_EXT_NAT_T_SPORT); if (!m) goto fail; break; case SADB_X_EXT_NAT_T_OAI: + if (sav->natt == NULL || + (sav->natt->flags & IPSEC_NATT_F_OAI) == 0) + continue; + m = key_setsadbaddr(SADB_X_EXT_NAT_T_OAI, + &sav->natt->oai.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; case SADB_X_EXT_NAT_T_OAR: + if (sav->natt == NULL || + (sav->natt->flags & IPSEC_NATT_F_OAR) == 0) + continue; + m = key_setsadbaddr(SADB_X_EXT_NAT_T_OAR, + &sav->natt->oar.sa, FULLMASK, IPSEC_ULPROTO_ANY); + if (!m) + goto fail; + break; case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; -#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: @@ -3606,7 +3554,6 @@ if (tres) m_cat(m, tres); tres = m; - } m_cat(result, tres); @@ -3695,14 +3642,13 @@ p->sadb_sa_exttype = SADB_EXT_SA; p->sadb_sa_spi = sav->spi; p->sadb_sa_replay = sav->replay ? - (sav->replay->wsize > UINT8_MAX ? - UINT8_MAX : sav->replay->wsize) : 0; + (sav->replay->wsize > UINT8_MAX ? UINT8_MAX : + sav->replay->wsize): 0; p->sadb_sa_state = sav->state; p->sadb_sa_auth = sav->alg_auth; p->sadb_sa_encrypt = sav->alg_enc; - p->sadb_sa_flags = sav->flags; - - return m; + p->sadb_sa_flags = sav->flags & SADB_KEY_FLAGS_MAX; + return (m); } /* @@ -3807,7 +3753,6 @@ return m; } -#ifdef IPSEC_NAT_T /* * Set a type in sadb_x_nat_t_type. */ @@ -3862,10 +3807,10 @@ return (m); } -/* +/* * Get port from sockaddr. Port is in network byte order. */ -u_int16_t +uint16_t key_portfromsaddr(struct sockaddr *sa) { @@ -3879,18 +3824,14 @@ return ((struct sockaddr_in6 *)sa)->sin6_port; #endif } - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s unexpected address family %d\n", - __func__, sa->sa_family)); return (0); } -#endif /* IPSEC_NAT_T */ /* * Set port in struct sockaddr. Port is in network byte order. */ -static void -key_porttosaddr(struct sockaddr *sa, u_int16_t port) +void +key_porttosaddr(struct sockaddr *sa, uint16_t port) { switch (sa->sa_family) { @@ -3948,29 +3889,29 @@ * OUT: NULL no more memory */ struct seckey * -key_dup_keymsg(const struct sadb_key *src, u_int len, +key_dup_keymsg(const struct sadb_key *src, size_t len, struct malloc_type *type) { struct seckey *dst; - dst = (struct seckey *)malloc(sizeof(struct seckey), type, M_NOWAIT); + + dst = malloc(sizeof(*dst), type, M_NOWAIT); if (dst != NULL) { dst->bits = src->sadb_key_bits; - dst->key_data = (char *)malloc(len, type, M_NOWAIT); + dst->key_data = malloc(len, type, M_NOWAIT); if (dst->key_data != NULL) { - bcopy((const char *)src + sizeof(struct sadb_key), - dst->key_data, len); + bcopy((const char *)(src + 1), dst->key_data, len); } else { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); free(dst, type); dst = NULL; } } else { - ipseclog((LOG_DEBUG, "%s: No more memory.\n", - __func__)); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", + __func__)); } - return dst; + return (dst); } /* Take a lifetime message (sadb_lifetime) passed in on a socket and @@ -3983,67 +3924,21 @@ static struct seclifetime * key_dup_lifemsg(const struct sadb_lifetime *src, struct malloc_type *type) { - struct seclifetime *dst = NULL; + struct seclifetime *dst; - dst = (struct seclifetime *)malloc(sizeof(struct seclifetime), - type, M_NOWAIT); + dst = malloc(sizeof(*dst), type, M_NOWAIT); if (dst == NULL) { - /* XXX counter */ ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - } else { - dst->allocations = src->sadb_lifetime_allocations; - dst->bytes = src->sadb_lifetime_bytes; - dst->addtime = src->sadb_lifetime_addtime; - dst->usetime = src->sadb_lifetime_usetime; + return (NULL); } - return dst; + dst->allocations = src->sadb_lifetime_allocations; + dst->bytes = src->sadb_lifetime_bytes; + dst->addtime = src->sadb_lifetime_addtime; + dst->usetime = src->sadb_lifetime_usetime; + return (dst); } -/* compare my own address - * OUT: 1: true, i.e. my address. - * 0: false - */ -int -key_ismyaddr(struct sockaddr *sa) -{ - - IPSEC_ASSERT(sa != NULL, ("null sockaddr")); - switch (sa->sa_family) { -#ifdef INET - case AF_INET: - return (in_localip(satosin(sa)->sin_addr)); -#endif -#ifdef INET6 - case AF_INET6: - return key_ismyaddr6((struct sockaddr_in6 *)sa); -#endif - } - - return 0; -} - -#ifdef INET6 /* - * compare my own address for IPv6. - * 1: ours - * 0: other - */ -static int -key_ismyaddr6(struct sockaddr_in6 *sin6) -{ - struct in6_addr in6; - - if (!IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) - return (in6_localip(&sin6->sin6_addr)); - - /* Convert address into kernel-internal form */ - in6 = sin6->sin6_addr; - in6.s6_addr16[1] = htons(sin6->sin6_scope_id & 0xffff); - return (in6_localip(&in6)); -} -#endif /*INET6*/ - -/* * compare two secasindex structure. * flag can specify to compare 2 saidxes. * compare two secasindex structure without both mode and reqid. @@ -4059,7 +3954,6 @@ key_cmpsaidx(const struct secasindex *saidx0, const struct secasindex *saidx1, int flag) { - int chkport = 0; /* sanity */ if (saidx0 == NULL && saidx1 == NULL) @@ -4076,19 +3970,21 @@ return 0; if (saidx0->reqid != saidx1->reqid) return 0; - if (bcmp(&saidx0->src, &saidx1->src, saidx0->src.sa.sa_len) != 0 || - bcmp(&saidx0->dst, &saidx1->dst, saidx0->dst.sa.sa_len) != 0) + if (bcmp(&saidx0->src, &saidx1->src, + saidx0->src.sa.sa_len) != 0 || + bcmp(&saidx0->dst, &saidx1->dst, + saidx0->dst.sa.sa_len) != 0) return 0; } else { /* CMP_MODE_REQID, CMP_REQID, CMP_HEAD */ - if (flag == CMP_MODE_REQID - ||flag == CMP_REQID) { + if (flag == CMP_MODE_REQID || flag == CMP_REQID) { /* * If reqid of SPD is non-zero, unique SA is required. * The result must be of same reqid in this case. */ - if (saidx1->reqid != 0 && saidx0->reqid != saidx1->reqid) + if (saidx1->reqid != 0 && + saidx0->reqid != saidx1->reqid) return 0; } @@ -4098,29 +3994,10 @@ return 0; } -#ifdef IPSEC_NAT_T - /* - * If NAT-T is enabled, check ports for tunnel mode. - * Do not check ports if they are set to zero in the SPD. - * Also do not do it for native transport mode, as there - * is no port information available in the SP. - */ - if ((saidx1->mode == IPSEC_MODE_TUNNEL || - (saidx1->mode == IPSEC_MODE_TRANSPORT && - saidx1->proto == IPPROTO_ESP)) && - saidx1->src.sa.sa_family == AF_INET && - saidx1->dst.sa.sa_family == AF_INET && - ((const struct sockaddr_in *)(&saidx1->src))->sin_port && - ((const struct sockaddr_in *)(&saidx1->dst))->sin_port) - chkport = 1; -#endif /* IPSEC_NAT_T */ - - if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, chkport) != 0) { + if (key_sockaddrcmp(&saidx0->src.sa, &saidx1->src.sa, 0) != 0) return 0; - } - if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, chkport) != 0) { + if (key_sockaddrcmp(&saidx0->dst.sa, &saidx1->dst.sa, 0) != 0) return 0; - } } return 1; @@ -4255,11 +4132,6 @@ return 1; } -/* returns 0 on match */ -static int -key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2, - int port) -{ #ifdef satosin #undef satosin #endif @@ -4268,10 +4140,16 @@ #undef satosin6 #endif #define satosin6(s) ((const struct sockaddr_in6 *)s) +/* returns 0 on match */ +int +key_sockaddrcmp(const struct sockaddr *sa1, const struct sockaddr *sa2, + int port) +{ if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) return 1; switch (sa1->sa_family) { +#ifdef INET case AF_INET: if (sa1->sa_len != sizeof(struct sockaddr_in)) return 1; @@ -4282,6 +4160,8 @@ if (port && satosin(sa1)->sin_port != satosin(sa2)->sin_port) return 1; break; +#endif +#ifdef INET6 case AF_INET6: if (sa1->sa_len != sizeof(struct sockaddr_in6)) return 1; /*EINVAL*/ @@ -4298,6 +4178,7 @@ return 1; } break; +#endif default: if (bcmp(sa1, sa2, sa1->sa_len) != 0) return 1; @@ -4305,9 +4186,35 @@ } return 0; +} + +/* returns 0 on match */ +int +key_sockaddrcmp_withmask(const struct sockaddr *sa1, + const struct sockaddr *sa2, size_t mask) +{ + if (sa1->sa_family != sa2->sa_family || sa1->sa_len != sa2->sa_len) + return (1); + + switch (sa1->sa_family) { +#ifdef INET + case AF_INET: + return (!key_bbcmp(&satosin(sa1)->sin_addr, + &satosin(sa2)->sin_addr, mask)); +#endif +#ifdef INET6 + case AF_INET6: + if (satosin6(sa1)->sin6_scope_id != + satosin6(sa2)->sin6_scope_id) + return (1); + return (!key_bbcmp(&satosin6(sa1)->sin6_addr, + &satosin6(sa2)->sin6_addr, mask)); +#endif + } + return (1); +} #undef satosin #undef satosin6 -} /* * compare two buffers with mask. @@ -4350,13 +4257,13 @@ key_flush_spd(time_t now) { SPTREE_RLOCK_TRACKER; - struct secpolicy *sp; + struct secpolicy_list drainq; + struct secpolicy *sp, *nextsp; u_int dir; - /* SPD */ + LIST_INIT(&drainq); + SPTREE_RLOCK(); for (dir = 0; dir < IPSEC_DIR_MAX; dir++) { -restart: - SPTREE_RLOCK(); TAILQ_FOREACH(sp, &V_sptree[dir], chain) { if (sp->lifetime == 0 && sp->validtime == 0) continue; @@ -4364,53 +4271,86 @@ now - sp->created > sp->lifetime) || (sp->validtime && now - sp->lastused > sp->validtime)) { + /* Hold extra reference to send SPDEXPIRE */ SP_ADDREF(sp); - SPTREE_RUNLOCK(); - key_spdexpire(sp); - key_unlink(sp); - KEY_FREESP(&sp); - goto restart; + LIST_INSERT_HEAD(&drainq, sp, drainq); } } - SPTREE_RUNLOCK(); } + SPTREE_RUNLOCK(); + if (LIST_EMPTY(&drainq)) + return; + + SPTREE_WLOCK(); + sp = LIST_FIRST(&drainq); + while (sp != NULL) { + nextsp = LIST_NEXT(sp, drainq); + /* Check that SP is still linked */ + if (sp->state != IPSEC_SPSTATE_ALIVE) { + LIST_REMOVE(sp, drainq); + key_freesp(&sp); /* release extra reference */ + sp = nextsp; + continue; + } + TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + LIST_REMOVE(sp, idhash); + sp->state = IPSEC_SPSTATE_DEAD; + sp = nextsp; + } + V_sp_genid++; + SPTREE_WUNLOCK(); + + sp = LIST_FIRST(&drainq); + while (sp != NULL) { + nextsp = LIST_NEXT(sp, drainq); + key_spdexpire(sp); + key_freesp(&sp); /* release extra reference */ + key_freesp(&sp); /* release last reference */ + sp = nextsp; + } } static void key_flush_sad(time_t now) { + SAHTREE_RLOCK_TRACKER; + struct secashead_list emptyq; + struct secasvar_list drainq, hexpireq, sexpireq, freeq; struct secashead *sah, *nextsah; struct secasvar *sav, *nextsav; - /* SAD */ - SAHTREE_LOCK(); - LIST_FOREACH_SAFE(sah, &V_sahtree, chain, nextsah) { - /* if sah has been dead, then delete it and process next sah. */ - if (sah->state == SADB_SASTATE_DEAD) { - key_delsah(sah); + LIST_INIT(&drainq); + LIST_INIT(&hexpireq); + LIST_INIT(&sexpireq); + LIST_INIT(&emptyq); + + SAHTREE_RLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + /* Check for empty SAH */ + if (TAILQ_EMPTY(&sah->savtree_larval) && + TAILQ_EMPTY(&sah->savtree_alive)) { + SAH_ADDREF(sah); + LIST_INSERT_HEAD(&emptyq, sah, drainq); continue; } - - /* if LARVAL entry doesn't become MATURE, delete it. */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_LARVAL], chain, nextsav) { - /* Need to also check refcnt for a larval SA ??? */ - if (now - sav->created > V_key_larval_lifetime) - KEY_FREESAV(&sav); + /* Add all stale LARVAL SAs into drainq */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + if (now - sav->created < V_key_larval_lifetime) + continue; + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&drainq, sav, drainq); } - - /* - * check MATURE entry to start to send expire message - * whether or not. - */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_MATURE], chain, nextsav) { - /* we don't need to check. */ - if (sav->lft_s == NULL) + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + /* lifetimes aren't specified */ + if (sav->lft_h == NULL) continue; - - /* sanity check */ - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG,"%s: there is no CURRENT " - "time, why?\n", __func__)); + SECASVAR_LOCK(sav); + /* + * Check again with lock held, because it may + * be updated by SADB_UPDATE. + */ + if (sav->lft_h == NULL) { + SECASVAR_UNLOCK(sav); continue; } /* @@ -4423,84 +4363,149 @@ /* check HARD lifetime */ if ((sav->lft_h->addtime != 0 && now - sav->created > sav->lft_h->addtime) || - (sav->lft_h->bytes != 0 && - sav->lft_h->bytes < sav->lft_c->bytes)) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_expire(sav, 1); - KEY_FREESAV(&sav); + (sav->lft_h->usetime != 0 && sav->firstused && + now - sav->firstused > sav->lft_h->usetime) || + (sav->lft_h->bytes != 0 && counter_u64_fetch( + sav->lft_c_bytes) > sav->lft_h->bytes)) { + SECASVAR_UNLOCK(sav); + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&hexpireq, sav, drainq); + continue; } - /* check SOFT lifetime */ - else if ((sav->lft_s->addtime != 0 && + /* check SOFT lifetime (only for MATURE SAs) */ + if (sav->state == SADB_SASTATE_MATURE && ( + (sav->lft_s->addtime != 0 && now - sav->created > sav->lft_s->addtime) || - (sav->lft_s->bytes != 0 && - sav->lft_s->bytes < sav->lft_c->bytes)) { - key_sa_chgstate(sav, SADB_SASTATE_DYING); - key_expire(sav, 0); + (sav->lft_s->usetime != 0 && sav->firstused && + now - sav->firstused > sav->lft_s->usetime) || + (sav->lft_s->bytes != 0 && counter_u64_fetch( + sav->lft_c_bytes) > sav->lft_s->bytes))) { + SECASVAR_UNLOCK(sav); + SAV_ADDREF(sav); + LIST_INSERT_HEAD(&sexpireq, sav, drainq); + continue; } + SECASVAR_UNLOCK(sav); } + } + SAHTREE_RUNLOCK(); - /* check DYING entry to change status to DEAD. */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DYING], chain, nextsav) { - /* we don't need to check. */ - if (sav->lft_h == NULL) - continue; + if (LIST_EMPTY(&emptyq) && LIST_EMPTY(&drainq) && + LIST_EMPTY(&hexpireq) && LIST_EMPTY(&sexpireq)) + return; - /* sanity check */ - if (sav->lft_c == NULL) { - ipseclog((LOG_DEBUG, "%s: there is no CURRENT " - "time, why?\n", __func__)); - continue; - } - - if (sav->lft_h->addtime != 0 && - now - sav->created > sav->lft_h->addtime) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_expire(sav, 1); - KEY_FREESAV(&sav); - } -#if 0 /* XXX Should we keep to send expire message until HARD lifetime ? */ - else if (sav->lft_s != NULL - && sav->lft_s->addtime != 0 - && now - sav->created > sav->lft_s->addtime) { - /* - * XXX: should be checked to be - * installed the valid SA. - */ - - /* - * If there is no SA then sending - * expire message. - */ - key_expire(sav, 0); - } -#endif - /* check HARD lifetime by bytes */ - else if (sav->lft_h->bytes != 0 && - sav->lft_h->bytes < sav->lft_c->bytes) { - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - key_expire(sav, 1); - KEY_FREESAV(&sav); - } + LIST_INIT(&freeq); + SAHTREE_WLOCK(); + /* Unlink stale LARVAL SAs */ + sav = LIST_FIRST(&drainq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is still LARVAL */ + if (sav->state != SADB_SASTATE_LARVAL) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; } - - /* delete entry in DEAD */ - LIST_FOREACH_SAFE(sav, &sah->savtree[SADB_SASTATE_DEAD], chain, nextsav) { - /* sanity check */ - if (sav->state != SADB_SASTATE_DEAD) { - ipseclog((LOG_DEBUG, "%s: invalid sav->state " - "(queue: %d SA: %d): kill it anyway\n", - __func__, - SADB_SASTATE_DEAD, sav->state)); - } - /* - * do not call key_freesav() here. - * sav should already be freed, and sav->refcnt - * shows other references to sav - * (such as from SPD). - */ + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sav = nextsav; + } + /* Unlink all SAs with expired HARD lifetime */ + sav = LIST_FIRST(&hexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is not unlinked */ + if (sav->state == SADB_SASTATE_DEAD) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; } + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + sav = nextsav; } - SAHTREE_UNLOCK(); + /* Mark all SAs with expired SOFT lifetime as DYING */ + sav = LIST_FIRST(&sexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + /* Check that SA is not unlinked */ + if (sav->state == SADB_SASTATE_DEAD) { + LIST_REMOVE(sav, drainq); + LIST_INSERT_HEAD(&freeq, sav, drainq); + sav = nextsav; + continue; + } + /* + * NOTE: this doesn't change SA order in the chain. + */ + sav->state = SADB_SASTATE_DYING; + sav = nextsav; + } + /* Unlink empty SAHs */ + sah = LIST_FIRST(&emptyq); + while (sah != NULL) { + nextsah = LIST_NEXT(sah, drainq); + /* Check that SAH is still empty and not unlinked */ + if (sah->state == SADB_SASTATE_DEAD || + !TAILQ_EMPTY(&sah->savtree_larval) || + !TAILQ_EMPTY(&sah->savtree_alive)) { + LIST_REMOVE(sah, drainq); + key_freesah(&sah); /* release extra reference */ + sah = nextsah; + continue; + } + TAILQ_REMOVE(&V_sahtree, sah, chain); + LIST_REMOVE(sah, addrhash); + sah->state = SADB_SASTATE_DEAD; + sah = nextsah; + } + SAHTREE_WUNLOCK(); + + /* Send SPDEXPIRE messages */ + sav = LIST_FIRST(&hexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_expire(sav, 1); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release extra reference */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + sav = LIST_FIRST(&sexpireq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_expire(sav, 0); + key_freesav(&sav); /* release extra reference */ + sav = nextsav; + } + /* Free stale LARVAL SAs */ + sav = LIST_FIRST(&drainq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release extra reference */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + /* Free SAs that were unlinked/changed by someone else */ + sav = LIST_FIRST(&freeq); + while (sav != NULL) { + nextsav = LIST_NEXT(sav, drainq); + key_freesav(&sav); /* release extra reference */ + sav = nextsav; + } + /* Free empty SAH */ + sah = LIST_FIRST(&emptyq); + while (sah != NULL) { + nextsah = LIST_NEXT(sah, drainq); + key_freesah(&sah); /* release extra reference */ + key_freesah(&sah); /* release last reference */ + sah = nextsah; + } } static void @@ -4510,13 +4515,16 @@ /* ACQ tree */ ACQ_LOCK(); - for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { + acq = LIST_FIRST(&V_acqtree); + while (acq != NULL) { nextacq = LIST_NEXT(acq, chain); - if (now - acq->created > V_key_blockacq_lifetime - && __LIST_CHAINED(acq)) { + if (now - acq->created > V_key_blockacq_lifetime) { LIST_REMOVE(acq, chain); + LIST_REMOVE(acq, addrhash); + LIST_REMOVE(acq, seqhash); free(acq, M_IPSEC_SAQ); } + acq = nextacq; } ACQ_UNLOCK(); } @@ -4607,8 +4615,8 @@ * OUT: * 0: invalid satype. */ -static u_int16_t -key_satype2proto(u_int8_t satype) +static uint8_t +key_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: @@ -4632,8 +4640,8 @@ * OUT: * 0: invalid protocol type. */ -static u_int8_t -key_proto2satype(u_int16_t proto) +static uint8_t +key_proto2satype(uint8_t proto) { switch (proto) { case IPPROTO_AH: @@ -4666,39 +4674,56 @@ static int key_getspi(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *newsah; - struct secasvar *newsav; - u_int8_t proto; - u_int32_t spi; - u_int8_t mode; - u_int32_t reqid; + struct sadb_address *src0, *dst0; + struct secasvar *sav; + uint32_t reqid, spi; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) +#ifdef PFKEY_STRICT_CHECKS + || SADB_CHECKHDR(mhp, SADB_EXT_SPIRANGE) +#endif + ) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + error = EINVAL; + goto fail; } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) +#ifdef PFKEY_STRICT_CHECKS + || SADB_CHECKLEN(mhp, SADB_EXT_SPIRANGE) +#endif + ) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + error = EINVAL; + goto fail; } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + error = EINVAL; + goto fail; + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); @@ -4708,122 +4733,56 @@ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", __func__)); - return key_senderror(so, m, EINVAL); + error = EINVAL; + goto fail; } - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - switch (((struct sockaddr *)(src0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(src0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(src0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(src0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + error = EINVAL; + goto fail; } - switch (((struct sockaddr *)(dst0 + 1))->sa_family) { - case AF_INET: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in *)(dst0 + 1))->sin_port = 0; - break; - case AF_INET6: - if (((struct sockaddr *)(dst0 + 1))->sa_len != - sizeof(struct sockaddr_in6)) - return key_senderror(so, m, EINVAL); - ((struct sockaddr_in6 *)(dst0 + 1))->sin6_port = 0; - break; - default: - ; /*???*/ - } - - /* XXX boundary check against sa_len */ KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - * We made sure the port numbers are zero above, so we do - * not have to worry in case we do not update them. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL) - ipseclog((LOG_DEBUG, "%s: NAT-T OAi present\n", __func__)); - if (mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) - ipseclog((LOG_DEBUG, "%s: NAT-T OAr present\n", __func__)); - - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_type *type; - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid nat-t message " - "passed.\n", __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, dport->sadb_x_nat_t_port_port); - } -#endif - /* SPI allocation */ - spi = key_do_getnewspi((struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], - &saidx); - if (spi == 0) - return key_senderror(so, m, EINVAL); - - /* get a SA index */ - if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA index */ - if ((newsah = key_newsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - return key_senderror(so, m, ENOBUFS); - } + spi = key_do_getnewspi( + (struct sadb_spirange *)mhp->ext[SADB_EXT_SPIRANGE], &saidx); + if (spi == 0) { + /* + * Requested SPI or SPI range is not available or + * already used. + */ + error = EEXIST; + goto fail; } + sav = key_newsav(mhp, &saidx, spi, &error); + if (sav == NULL) + goto fail; - /* get a new SA */ - /* XXX rewrite */ - newsav = KEY_NEWSAV(m, mhp, newsah, &error); - if (newsav == NULL) { - /* XXX don't free new SA index allocated in above. */ - return key_senderror(so, m, error); + if (sav->seq != 0) { + /* + * RFC2367: + * If the SADB_GETSPI message is in response to a + * kernel-generated SADB_ACQUIRE, the sadb_msg_seq + * MUST be the same as the SADB_ACQUIRE message. + * + * XXXAE: However it doesn't definethe behaviour how to + * check this and what to do if it doesn't match. + * Also what we should do if it matches? + * + * We can compare saidx used in SADB_ACQUIRE with saidx + * used in SADB_GETSPI, but this probably can break + * existing software. For now just warn if it doesn't match. + * + * XXXAE: anyway it looks useless. + */ + key_acqdone(&saidx, sav->seq); } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); - /* set spi */ - newsav->spi = htonl(spi); - - /* delete the entry in acqtree */ - if (mhp->msg->sadb_msg_seq != 0) { - struct secacq *acq; - if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) != NULL) { - /* reset counter in order to deletion by timehandler. */ - acq->created = time_second; - acq->count = 0; - } - } - { struct mbuf *n, *nn; struct sadb_sa *m_sa; @@ -4841,8 +4800,10 @@ n = NULL; } } - if (!n) - return key_senderror(so, m, ENOBUFS); + if (!n) { + error = ENOBUFS; + goto fail; + } n->m_len = len; n->m_next = NULL; @@ -4854,7 +4815,7 @@ m_sa = (struct sadb_sa *)(mtod(n, caddr_t) + off); m_sa->sadb_sa_len = PFKEY_UNIT64(sizeof(struct sadb_sa)); m_sa->sadb_sa_exttype = SADB_EXT_SA; - m_sa->sadb_sa_spi = htonl(spi); + m_sa->sadb_sa_spi = spi; /* SPI is already in network byte order */ off += PFKEY_ALIGN8(sizeof(struct sadb_sa)); IPSEC_ASSERT(off == len, @@ -4864,7 +4825,8 @@ SADB_EXT_ADDRESS_DST); if (!n->m_next) { m_freem(n); - return key_senderror(so, m, ENOBUFS); + error = ENOBUFS; + goto fail; } if (n->m_len < sizeof(struct sadb_msg)) { @@ -4878,13 +4840,16 @@ n->m_pkthdr.len += nn->m_len; newmsg = mtod(n, struct sadb_msg *); - newmsg->sadb_msg_seq = newsav->seq; + newmsg->sadb_msg_seq = sav->seq; newmsg->sadb_msg_errno = 0; newmsg->sadb_msg_len = PFKEY_UNIT64(n->m_pkthdr.len); m_freem(m); return key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } + +fail: + return (key_senderror(so, m, error)); } /* @@ -4892,13 +4857,12 @@ * called by key_getspi(). * OUT: * 0: failure. - * others: success. + * others: success, SPI in network byte order. */ -static u_int32_t +static uint32_t key_do_getnewspi(struct sadb_spirange *spirange, struct secasindex *saidx) { - u_int32_t newspi; - u_int32_t min, max; + uint32_t min, max, newspi, t; int count = V_key_spi_trycnt; /* set spi range to allocate */ @@ -4911,7 +4875,6 @@ } /* IPCOMP needs 2-byte SPI */ if (saidx->proto == IPPROTO_IPCOMP) { - u_int32_t t; if (min >= 0x10000) min = 0xffff; if (max >= 0x10000) @@ -4922,15 +4885,14 @@ } if (min == max) { - if (key_checkspidup(saidx, min) != NULL) { + if (!key_checkspidup(htonl(min))) { ipseclog((LOG_DEBUG, "%s: SPI %u exists already.\n", - __func__, min)); + __func__, min)); return 0; } count--; /* taking one cost. */ newspi = min; - } else { /* init SPI */ @@ -4940,26 +4902,247 @@ while (count--) { /* generate pseudo-random SPI value ranged. */ newspi = min + (key_random() % (max - min + 1)); - - if (key_checkspidup(saidx, newspi) == NULL) + if (!key_checkspidup(htonl(newspi))) break; } if (count == 0 || newspi == 0) { - ipseclog((LOG_DEBUG, "%s: to allocate spi is failed.\n", - __func__)); + ipseclog((LOG_DEBUG, + "%s: failed to allocate SPI.\n", __func__)); return 0; } } /* statistics */ keystat.getspi_count = - (keystat.getspi_count + V_key_spi_trycnt - count) / 2; + (keystat.getspi_count + V_key_spi_trycnt - count) / 2; - return newspi; + return (htonl(newspi)); } /* + * Find TCP-MD5 SA with corresponding secasindex. + * If not found, return NULL and fill SPI with usable value if needed. + */ +static struct secasvar * +key_getsav_tcpmd5(struct secasindex *saidx, uint32_t *spi) +{ + SAHTREE_RLOCK_TRACKER; + struct secashead *sah; + struct secasvar *sav; + + IPSEC_ASSERT(saidx->proto == IPPROTO_TCP, ("wrong proto")); + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (sah->saidx.proto != IPPROTO_TCP) + continue; + if (!key_sockaddrcmp(&saidx->dst.sa, &sah->saidx.dst.sa, 0)) + break; + } + if (sah != NULL) { + if (V_key_preferred_oldsa) + sav = TAILQ_LAST(&sah->savtree_alive, secasvar_queue); + else + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav != NULL) { + SAV_ADDREF(sav); + SAHTREE_RUNLOCK(); + return (sav); + } + } + if (spi == NULL) { + /* No SPI required */ + SAHTREE_RUNLOCK(); + return (NULL); + } + /* Check that SPI is unique */ + LIST_FOREACH(sav, SAVHASH_HASH(*spi), spihash) { + if (sav->spi == *spi) + break; + } + if (sav == NULL) { + SAHTREE_RUNLOCK(); + /* SPI is already unique */ + return (NULL); + } + SAHTREE_RUNLOCK(); + /* XXX: not optimal */ + *spi = key_do_getnewspi(NULL, saidx); + return (NULL); +} + +static int +key_updateaddresses(struct socket *so, struct mbuf *m, + const struct sadb_msghdr *mhp, struct secasvar *sav, + struct secasindex *saidx) +{ + struct sockaddr *newaddr; + struct secashead *sah; + struct secasvar *newsav, *tmp; + struct mbuf *n; + int error, isnew; + + /* Check that we need to change SAH */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC)) { + newaddr = (struct sockaddr *)( + ((struct sadb_address *) + mhp->ext[SADB_X_EXT_NEW_ADDRESS_SRC]) + 1); + bcopy(newaddr, &saidx->src, newaddr->sa_len); + key_porttosaddr(&saidx->src.sa, 0); + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST)) { + newaddr = (struct sockaddr *)( + ((struct sadb_address *) + mhp->ext[SADB_X_EXT_NEW_ADDRESS_DST]) + 1); + bcopy(newaddr, &saidx->dst, newaddr->sa_len); + key_porttosaddr(&saidx->dst.sa, 0); + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST)) { + error = key_checksockaddrs(&saidx->src.sa, &saidx->dst.sa); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid new sockaddr.\n", + __func__)); + return (error); + } + + sah = key_getsah(saidx); + if (sah == NULL) { + /* create a new SA index */ + sah = key_newsah(saidx); + if (sah == NULL) { + ipseclog((LOG_DEBUG, + "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + isnew = 2; /* SAH is new */ + } else + isnew = 1; /* existing SAH is referenced */ + } else { + /* + * src and dst addresses are still the same. + * Do we want to change NAT-T config? + */ + if (sav->sah->saidx.proto != IPPROTO_ESP || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_SPORT) || + SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_DPORT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + return (EINVAL); + } + /* We hold reference to SA, thus SAH will be referenced too. */ + sah = sav->sah; + isnew = 0; + } + + newsav = malloc(sizeof(struct secasvar), M_IPSEC_SA, + M_NOWAIT | M_ZERO); + if (newsav == NULL) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + error = ENOBUFS; + goto fail; + } + + /* Clone SA's content into newsav */ + SAV_INITREF(newsav); + bcopy(sav, newsav, offsetof(struct secasvar, chain)); + /* + * We create new NAT-T config if it is needed. + * Old NAT-T config will be freed by key_cleansav() when + * last reference to SA will be released. + */ + newsav->natt = NULL; + newsav->sah = sah; + newsav->state = SADB_SASTATE_MATURE; + error = key_setnatt(sav, mhp); + if (error != 0) + goto fail; + + SAHTREE_WLOCK(); + /* Check that SA is still alive */ + if (sav->state == SADB_SASTATE_DEAD) { + /* SA was unlinked */ + SAHTREE_WUNLOCK(); + error = ESRCH; + goto fail; + } + + /* Unlink SA from SAH and SPI hash */ + IPSEC_ASSERT((sav->flags & SADB_X_EXT_F_CLONED) == 0, + ("SA is already cloned")); + IPSEC_ASSERT(sav->state == SADB_SASTATE_MATURE || + sav->state == SADB_SASTATE_DYING, + ("Wrong SA state %u\n", sav->state)); + TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + + /* + * Link new SA with SAH. Keep SAs ordered by + * create time (newer are first). + */ + TAILQ_FOREACH(tmp, &sah->savtree_alive, chain) { + if (newsav->created > tmp->created) { + TAILQ_INSERT_BEFORE(tmp, newsav, chain); + break; + } + } + if (tmp == NULL) + TAILQ_INSERT_TAIL(&sah->savtree_alive, newsav, chain); + + /* Add new SA into SPI hash. */ + LIST_INSERT_HEAD(SAVHASH_HASH(newsav->spi), newsav, spihash); + + /* Add new SAH into SADB. */ + if (isnew == 2) { + TAILQ_INSERT_HEAD(&V_sahtree, sah, chain); + LIST_INSERT_HEAD(SAHADDRHASH_HASH(saidx), sah, addrhash); + sah->state = SADB_SASTATE_MATURE; + SAH_ADDREF(sah); /* newsav references new SAH */ + } + /* + * isnew == 1 -> @sah was referenced by key_getsah(). + * isnew == 0 -> we use the same @sah, that was used by @sav, + * and we use its reference for @newsav. + */ + SECASVAR_LOCK(sav); + /* XXX: replace cntr with pointer? */ + newsav->cntr = sav->cntr; + sav->flags |= SADB_X_EXT_F_CLONED; + SECASVAR_UNLOCK(sav); + + SAHTREE_WUNLOCK(); + + KEYDBG(KEY_STAMP, + printf("%s: SA(%p) cloned into SA(%p)\n", + __func__, sav, newsav)); + KEYDBG(KEY_DATA, kdebug_secasv(newsav)); + + key_freesav(&sav); /* release last reference */ + + /* set msg buf from mhp */ + n = key_getmsgbuf_x1(m, mhp); + if (n == NULL) { + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + m_freem(m); + key_sendup_mbuf(so, n, KEY_SENDUP_ALL); + return (0); +fail: + if (isnew != 0) + key_freesah(&sah); + if (newsav != NULL) { + if (newsav->natt != NULL) + free(newsav->natt, M_IPSEC_MISC); + free(newsav, M_IPSEC_SA); + } + return (error); +} + +/* * SADB_UPDATE processing * receive * msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && - mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && - mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return key_senderror(so, m, EINVAL); + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } - /* XXX boundary checking for other extensions */ sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. + * Only SADB_SASTATE_MATURE SAs may be submitted in an + * SADB_UPDATE message. */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - type = (struct sadb_x_nat_t_type *) - mhp->ext[SADB_X_EXT_NAT_T_TYPE]; - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - } else { - type = NULL; - sport = dport = NULL; - } - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || - mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; - raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; - ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); - } else { - iaddr = raddr = NULL; - } - if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - frag = (struct sadb_x_nat_t_frag *) - mhp->ext[SADB_X_EXT_NAT_T_FRAG]; - } else { - frag = NULL; - } + if (sa0->sadb_sa_state != SADB_SASTATE_MATURE) { + ipseclog((LOG_DEBUG, "%s: invalid state.\n", __func__)); +#ifdef PFKEY_STRICT_CHECKS + return key_senderror(so, m, EINVAL); #endif - - /* get a SA header */ - if ((sah = key_getsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no SA index found.\n", __func__)); - return key_senderror(so, m, ENOENT); } - - /* set spidx if there */ - /* XXX rewrite */ - error = key_setident(sah, m, mhp); - if (error) + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); return key_senderror(so, m, error); - - /* find a SA with sequence number. */ -#ifdef IPSEC_DOSEQCHECK - if (mhp->msg->sadb_msg_seq != 0 - && (sav = key_getsavbyseq(sah, mhp->msg->sadb_msg_seq)) == NULL) { - ipseclog((LOG_DEBUG, "%s: no larval SA with sequence %u " - "exists.\n", __func__, mhp->msg->sadb_msg_seq)); - return key_senderror(so, m, ENOENT); } -#else - SAHTREE_LOCK(); - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - SAHTREE_UNLOCK(); + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + sav = key_getsavbyspi(sa0->sadb_sa_spi); if (sav == NULL) { - ipseclog((LOG_DEBUG, "%s: no such a SA found (spi:%u)\n", - __func__, (u_int32_t)ntohl(sa0->sadb_sa_spi))); + ipseclog((LOG_DEBUG, "%s: no SA found for SPI %u\n", + __func__, ntohl(sa0->sadb_sa_spi))); return key_senderror(so, m, EINVAL); } -#endif - - /* validity check */ - if (sav->sah->saidx.proto != proto) { - ipseclog((LOG_DEBUG, "%s: protocol mismatched " - "(DB=%u param=%u)\n", __func__, - sav->sah->saidx.proto, proto)); - return key_senderror(so, m, EINVAL); - } -#ifdef IPSEC_DOSEQCHECK - if (sav->spi != sa0->sadb_sa_spi) { - ipseclog((LOG_DEBUG, "%s: SPI mismatched (DB:%u param:%u)\n", - __func__, - (u_int32_t)ntohl(sav->spi), - (u_int32_t)ntohl(sa0->sadb_sa_spi))); - return key_senderror(so, m, EINVAL); - } -#endif + /* + * Check that SADB_UPDATE issued by the same process that did + * SADB_GETSPI or SADB_ADD. + */ if (sav->pid != mhp->msg->sadb_msg_pid) { - ipseclog((LOG_DEBUG, "%s: pid mismatched (DB:%u param:%u)\n", - __func__, sav->pid, mhp->msg->sadb_msg_pid)); + ipseclog((LOG_DEBUG, + "%s: pid mismatched (SPI %u, pid %u vs. %u)\n", __func__, + ntohl(sav->spi), sav->pid, mhp->msg->sadb_msg_pid)); + key_freesav(&sav); return key_senderror(so, m, EINVAL); } - - /* copy sav values */ - error = key_setsaval(sav, m, mhp); - if (error) { - KEY_FREESAV(&sav); - return key_senderror(so, m, error); + /* saidx should match with SA. */ + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_MODE_REQID) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u", + __func__, ntohl(sav->spi))); + key_freesav(&sav); + return key_senderror(so, m, ESRCH); } -#ifdef IPSEC_NAT_T - /* - * Handle more NAT-T info if present, - * now that we have a sav to fill. - */ - if (type) - sav->natt_type = type->sadb_x_nat_t_type_type; - - if (sport) - KEY_PORTTOSADDR(&sav->sah->saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&sav->sah->saidx.dst, - dport->sadb_x_nat_t_port_port); - -#if 0 - /* - * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. - * We should actually check for a minimum MTU here, if we - * want to support it in ip_output. - */ - if (frag) - sav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; -#endif -#endif - - /* check SA values to be mature. */ - if ((mhp->msg->sadb_msg_errno = key_mature(sav)) != 0) { - KEY_FREESAV(&sav); - return key_senderror(so, m, 0); + if (sav->state == SADB_SASTATE_LARVAL) { + if ((mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && + SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT)) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && + SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); + key_freesav(&sav); + return key_senderror(so, m, EINVAL); + } + /* + * We can set any values except src, dst and SPI. + */ + error = key_setsaval(sav, mhp); + if (error != 0) { + key_freesav(&sav); + return (key_senderror(so, m, error)); + } + /* Change SA state to MATURE */ + SAHTREE_WLOCK(); + if (sav->state != SADB_SASTATE_LARVAL) { + /* SA was deleted or another thread made it MATURE. */ + SAHTREE_WUNLOCK(); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + /* + * NOTE: we keep SAs in savtree_alive ordered by created + * time. When SA's state changed from LARVAL to MATURE, + * we update its created time in key_setsaval() and move + * it into head of savtree_alive. + */ + TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); + TAILQ_INSERT_HEAD(&sav->sah->savtree_alive, sav, chain); + sav->state = SADB_SASTATE_MATURE; + SAHTREE_WUNLOCK(); + } else { + /* + * For DYING and MATURE SA we can change only state + * and lifetimes. Report EINVAL if something else attempted + * to change. + */ + if (!SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT) || + !SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH)) { + key_freesav(&sav); + return (key_senderror(so, m, EINVAL)); + } + error = key_updatelifetimes(sav, mhp); + if (error != 0) { + key_freesav(&sav); + return (key_senderror(so, m, error)); + } + /* + * This is FreeBSD extension to RFC2367. + * IKEd can specify SADB_X_EXT_NEW_ADDRESS_SRC and/or + * SADB_X_EXT_NEW_ADDRESS_DST when it wants to change + * SA addresses (for example to implement MOBIKE protocol + * as described in RFC4555). Also we allow to change + * NAT-T config. + */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_SRC) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NEW_ADDRESS_DST) || + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) || + sav->natt != NULL) { + error = key_updateaddresses(so, m, mhp, sav, &saidx); + key_freesav(&sav); + if (error != 0) + return (key_senderror(so, m, error)); + return (0); + } + /* Check that SA is still alive */ + SAHTREE_WLOCK(); + if (sav->state == SADB_SASTATE_DEAD) { + /* SA was unlinked */ + SAHTREE_WUNLOCK(); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } + /* + * NOTE: there is possible state moving from DYING to MATURE, + * but this doesn't change created time, so we won't reorder + * this SA. + */ + sav->state = SADB_SASTATE_MATURE; + SAHTREE_WUNLOCK(); } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + key_freesav(&sav); { struct mbuf *n; @@ -5209,40 +5367,6 @@ } /* - * search SAD with sequence for a SA which state is SADB_SASTATE_LARVAL. - * only called by key_update(). - * OUT: - * NULL : not found - * others : found, pointer to a SA. - */ -#ifdef IPSEC_DOSEQCHECK -static struct secasvar * -key_getsavbyseq(struct secashead *sah, u_int32_t seq) -{ - struct secasvar *sav; - u_int state; - - state = SADB_SASTATE_LARVAL; - - /* search SAD with sequence number ? */ - LIST_FOREACH(sav, &sah->savtree[state], chain) { - - KEY_CHKSASTATE(state, sav->state, __func__); - - if (sav->seq == seq) { - sa_addref(sav); - KEYDEBUG(KEYDEBUG_IPSEC_STAMP, - printf("DP %s cause refcnt++:%d SA:%p\n", - __func__, sav->refcnt, sav)); - return sav; - } - } - - return NULL; -} -#endif - -/* * SADB_ADD processing * add an entry to SA database, when received * msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && - mhp->ext[SADB_EXT_KEY_ENCRYPT] == NULL) || - (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && - mhp->ext[SADB_EXT_KEY_AUTH] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] != NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] == NULL) || - (mhp->ext[SADB_EXT_LIFETIME_HARD] == NULL && - mhp->ext[SADB_EXT_LIFETIME_SOFT] != NULL)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_ESP && ( + SADB_CHECKHDR(mhp, SADB_EXT_KEY_ENCRYPT) || + SADB_CHECKLEN(mhp, SADB_EXT_KEY_ENCRYPT))) || + (mhp->msg->sadb_msg_satype == SADB_SATYPE_AH && ( + SADB_CHECKHDR(mhp, SADB_EXT_KEY_AUTH) || + SADB_CHECKLEN(mhp, SADB_EXT_KEY_AUTH))) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT)) || + (SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_SOFT) && + !SADB_CHECKHDR(mhp, SADB_EXT_LIFETIME_HARD))) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - /* XXX need more */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_X_EXT_SA2] != NULL) { - mode = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; - reqid = ((struct sadb_x_sa2 *)mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; - } else { + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { mode = IPSEC_MODE_ANY; reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return key_senderror(so, m, EINVAL); + } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. + * Only SADB_SASTATE_MATURE SAs may be submitted in an + * SADB_ADD message. */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T + if (sa0->sadb_sa_state != SADB_SASTATE_MATURE) { + ipseclog((LOG_DEBUG, "%s: invalid state.\n", __func__)); +#ifdef PFKEY_STRICT_CHECKS + return key_senderror(so, m, EINVAL); +#endif + } + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, error); + } + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); + spi = sa0->sadb_sa_spi; /* - * Handle NAT-T info if present. + * For TCP-MD5 SAs we don't use SPI. Check the uniqueness using + * secasindex. + * XXXAE: IPComp seems also doesn't use SPI. */ - if (mhp->ext[SADB_X_EXT_NAT_T_TYPE] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_TYPE] < sizeof(*type) || - mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", + if (proto == IPPROTO_TCP) { + sav = key_getsav_tcpmd5(&saidx, &spi); + if (sav == NULL && spi == 0) { + /* Failed to allocate SPI */ + ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); - return key_senderror(so, m, EINVAL); + return key_senderror(so, m, EEXIST); } - - type = (struct sadb_x_nat_t_type *) - mhp->ext[SADB_X_EXT_NAT_T_TYPE]; - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + /* XXX: SPI that we report back can have another value */ } else { - type = NULL; + /* We can create new SA only if SPI is different. */ + sav = key_getsavbyspi(spi); } - if (mhp->ext[SADB_X_EXT_NAT_T_OAI] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_OAR] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_OAI] < sizeof(*iaddr) || - mhp->extlen[SADB_X_EXT_NAT_T_OAR] < sizeof(*raddr)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - iaddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; - raddr = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; - ipseclog((LOG_DEBUG, "%s: NAT-T OAi/r present\n", __func__)); - } else { - iaddr = raddr = NULL; - } - if (mhp->ext[SADB_X_EXT_NAT_T_FRAG] != NULL) { - if (mhp->extlen[SADB_X_EXT_NAT_T_FRAG] < sizeof(*frag)) { - ipseclog((LOG_DEBUG, "%s: invalid message\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - frag = (struct sadb_x_nat_t_frag *) - mhp->ext[SADB_X_EXT_NAT_T_FRAG]; - } else { - frag = NULL; - } -#endif - - /* get a SA header */ - if ((newsah = key_getsah(&saidx)) == NULL) { - /* create a new SA header */ - if ((newsah = key_newsah(&saidx)) == NULL) { - ipseclog((LOG_DEBUG, "%s: No more memory.\n",__func__)); - return key_senderror(so, m, ENOBUFS); - } - } - - /* set spidx if there */ - /* XXX rewrite */ - error = key_setident(newsah, m, mhp); - if (error) { - return key_senderror(so, m, error); - } - - /* create new SA entry. */ - /* We can create new SA only if SPI is differenct. */ - SAHTREE_LOCK(); - newsav = key_getsavbyspi(newsah, sa0->sadb_sa_spi); - SAHTREE_UNLOCK(); - if (newsav != NULL) { + if (sav != NULL) { + key_freesav(&sav); ipseclog((LOG_DEBUG, "%s: SA already exists.\n", __func__)); return key_senderror(so, m, EEXIST); } - newsav = KEY_NEWSAV(m, mhp, newsah, &error); - if (newsav == NULL) { - return key_senderror(so, m, error); - } -#ifdef IPSEC_NAT_T - /* - * Handle more NAT-T info if present, - * now that we have a sav to fill. - */ - if (type) - newsav->natt_type = type->sadb_x_nat_t_type_type; - -#if 0 - /* - * In case SADB_X_EXT_NAT_T_FRAG was not given, leave it at 0. - * We should actually check for a minimum MTU here, if we - * want to support it in ip_output. - */ - if (frag) - newsav->natt_esp_frag_len = frag->sadb_x_nat_t_frag_fraglen; -#endif -#endif - - /* check SA values to be mature. */ - if ((error = key_mature(newsav)) != 0) { - KEY_FREESAV(&newsav); + sav = key_newsav(mhp, &saidx, spi, &error); + if (sav == NULL) return key_senderror(so, m, error); - } - + KEYDBG(KEY_STAMP, + printf("%s: return SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); /* - * don't call key_freesav() here, as we would like to keep the SA - * in the database on success. + * If SADB_ADD was in response to SADB_ACQUIRE, we need to schedule + * ACQ for deletion. */ + if (sav->seq != 0) + key_acqdone(&saidx, sav->seq); { + /* + * Don't call key_freesav() on error here, as we would like to + * keep the SA in the database. + */ struct mbuf *n; /* set msg buf from mhp */ @@ -5467,31 +5523,199 @@ } } -/* m is retained */ +/* + * NAT-T support. + * IKEd may request the use ESP in UDP encapsulation when it detects the + * presence of NAT. It uses NAT-T extension headers for such SAs to specify + * parameters needed for encapsulation and decapsulation. These PF_KEY + * extension headers are not standardized, so this comment addresses our + * implementation. + * SADB_X_EXT_NAT_T_TYPE specifies type of encapsulation, we support only + * UDP_ENCAP_ESPINUDP as described in RFC3948. + * SADB_X_EXT_NAT_T_SPORT/DPORT specifies source and destination ports for + * UDP header. We use these ports in UDP encapsulation procedure, also we + * can check them in UDP decapsulation procedure. + * SADB_X_EXT_NAT_T_OA[IR] specifies original address of initiator or + * responder. These addresses can be used for transport mode to adjust + * checksum after decapsulation and decryption. Since original IP addresses + * used by peer usually different (we detected presence of NAT), TCP/UDP + * pseudo header checksum and IP header checksum was calculated using original + * addresses. After decapsulation and decryption we need to adjust checksum + * to have correct datagram. + * + * We expect presence of NAT-T extension headers only in SADB_ADD and + * SADB_UPDATE messages. We report NAT-T extension headers in replies + * to SADB_ADD, SADB_UPDATE, SADB_GET, and SADB_DUMP messages. + */ static int -key_setident(struct secashead *sah, struct mbuf *m, - const struct sadb_msghdr *mhp) +key_setnatt(struct secasvar *sav, const struct sadb_msghdr *mhp) { + struct sadb_x_nat_t_port *port; + struct sadb_x_nat_t_type *type; + struct sadb_address *oai, *oar; + struct sockaddr *sa; + uint32_t addr; + uint16_t cksum; + + IPSEC_ASSERT(sav->natt == NULL, ("natt is already initialized")); + /* + * Ignore NAT-T headers if sproto isn't ESP. + */ + if (sav->sah->saidx.proto != IPPROTO_ESP) + return (0); + + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_TYPE) && + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_SPORT) && + !SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_DPORT)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_TYPE) || + SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_SPORT) || + SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_DPORT)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + } else + return (0); + + type = (struct sadb_x_nat_t_type *)mhp->ext[SADB_X_EXT_NAT_T_TYPE]; + if (type->sadb_x_nat_t_type_type != UDP_ENCAP_ESPINUDP) { + ipseclog((LOG_DEBUG, "%s: unsupported NAT-T type %u.\n", + __func__, type->sadb_x_nat_t_type_type)); + return (EINVAL); + } + /* + * Allocate storage for NAT-T config. + * On error it will be released by key_cleansav(). + */ + sav->natt = malloc(sizeof(struct secnatt), M_IPSEC_MISC, + M_NOWAIT | M_ZERO); + if (sav->natt == NULL) { + PFKEYSTAT_INC(in_nomem); + ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); + return (ENOBUFS); + } + port = (struct sadb_x_nat_t_port *)mhp->ext[SADB_X_EXT_NAT_T_SPORT]; + if (port->sadb_x_nat_t_port_port == 0) { + ipseclog((LOG_DEBUG, "%s: invalid NAT-T sport specified.\n", + __func__)); + return (EINVAL); + } + sav->natt->sport = port->sadb_x_nat_t_port_port; + port = (struct sadb_x_nat_t_port *)mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + if (port->sadb_x_nat_t_port_port == 0) { + ipseclog((LOG_DEBUG, "%s: invalid NAT-T dport specified.\n", + __func__)); + return (EINVAL); + } + sav->natt->dport = port->sadb_x_nat_t_port_port; + + /* + * SADB_X_EXT_NAT_T_OAI and SADB_X_EXT_NAT_T_OAR are optional + * and needed only for transport mode IPsec. + * Usually NAT translates only one address, but it is possible, + * that both addresses could be translated. + * NOTE: Value of SADB_X_EXT_NAT_T_OAI is equal to SADB_X_EXT_NAT_T_OA. + */ + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_OAI)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_OAI)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + oai = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAI]; + } else + oai = NULL; + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_NAT_T_OAR)) { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_NAT_T_OAR)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", + __func__)); + return (EINVAL); + } + oar = (struct sadb_address *)mhp->ext[SADB_X_EXT_NAT_T_OAR]; + } else + oar = NULL; + + /* Initialize addresses only for transport mode */ + if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL) { + cksum = 0; + if (oai != NULL) { + /* Currently we support only AF_INET */ + sa = (struct sockaddr *)(oai + 1); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) { + ipseclog((LOG_DEBUG, + "%s: wrong NAT-OAi header.\n", + __func__)); + return (EINVAL); + } + /* Ignore address if it the same */ + if (((struct sockaddr_in *)sa)->sin_addr.s_addr != + sav->sah->saidx.src.sin.sin_addr.s_addr) { + bcopy(sa, &sav->natt->oai.sa, sa->sa_len); + sav->natt->flags |= IPSEC_NATT_F_OAI; + /* Calculate checksum delta */ + addr = sav->sah->saidx.src.sin.sin_addr.s_addr; + cksum = in_addword(cksum, ~addr >> 16); + cksum = in_addword(cksum, ~addr & 0xffff); + addr = sav->natt->oai.sin.sin_addr.s_addr; + cksum = in_addword(cksum, addr >> 16); + cksum = in_addword(cksum, addr & 0xffff); + } + } + if (oar != NULL) { + /* Currently we support only AF_INET */ + sa = (struct sockaddr *)(oar + 1); + if (sa->sa_family != AF_INET || + sa->sa_len != sizeof(struct sockaddr_in)) { + ipseclog((LOG_DEBUG, + "%s: wrong NAT-OAr header.\n", + __func__)); + return (EINVAL); + } + /* Ignore address if it the same */ + if (((struct sockaddr_in *)sa)->sin_addr.s_addr != + sav->sah->saidx.dst.sin.sin_addr.s_addr) { + bcopy(sa, &sav->natt->oar.sa, sa->sa_len); + sav->natt->flags |= IPSEC_NATT_F_OAR; + /* Calculate checksum delta */ + addr = sav->sah->saidx.dst.sin.sin_addr.s_addr; + cksum = in_addword(cksum, ~addr >> 16); + cksum = in_addword(cksum, ~addr & 0xffff); + addr = sav->natt->oar.sin.sin_addr.s_addr; + cksum = in_addword(cksum, addr >> 16); + cksum = in_addword(cksum, addr & 0xffff); + } + } + sav->natt->cksum = cksum; + } + return (0); +} + +static int +key_setident(struct secashead *sah, const struct sadb_msghdr *mhp) +{ const struct sadb_ident *idsrc, *iddst; int idsrclen, iddstlen; IPSEC_ASSERT(sah != NULL, ("null secashead")); - IPSEC_ASSERT(m != NULL, ("null mbuf")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* don't make buffer if not there */ - if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL && - mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { + if (SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_SRC) && + SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_DST)) { sah->idents = NULL; sah->identd = NULL; - return 0; + return (0); } - - if (mhp->ext[SADB_EXT_IDENTITY_SRC] == NULL || - mhp->ext[SADB_EXT_IDENTITY_DST] == NULL) { + + if (SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_IDENTITY_DST)) { ipseclog((LOG_DEBUG, "%s: invalid identity.\n", __func__)); - return EINVAL; + return (EINVAL); } idsrc = (const struct sadb_ident *)mhp->ext[SADB_EXT_IDENTITY_SRC]; @@ -5540,7 +5764,10 @@ /* * m will not be freed on return. - * it is caller's responsibility to free the result. + * it is caller's responsibility to free the result. + * + * Called from SADB_ADD and SADB_UPDATE. Reply will contain headers + * from the request in defined order. */ static struct mbuf * key_getmsgbuf_x1(struct mbuf *m, const struct sadb_msghdr *mhp) @@ -5552,11 +5779,15 @@ IPSEC_ASSERT(mhp->msg != NULL, ("null msg")); /* create new sadb_msg to reply. */ - n = key_gather_mbuf(m, mhp, 1, 9, SADB_EXT_RESERVED, + n = key_gather_mbuf(m, mhp, 1, 16, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_X_EXT_SA2, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST, SADB_EXT_LIFETIME_HARD, SADB_EXT_LIFETIME_SOFT, - SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST); + SADB_EXT_IDENTITY_SRC, SADB_EXT_IDENTITY_DST, + SADB_X_EXT_NAT_T_TYPE, SADB_X_EXT_NAT_T_SPORT, + SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, + SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NEW_ADDRESS_SRC, + SADB_X_EXT_NEW_ADDRESS_DST); if (!n) return NULL; @@ -5586,12 +5817,11 @@ static int key_delete(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *sah; - struct secasvar *sav = NULL; - u_int16_t proto; + struct sadb_address *src0, *dst0; + struct secasvar *sav; + struct sadb_sa *sa0; + uint8_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -5601,110 +5831,70 @@ /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } + src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); + dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - if (mhp->ext[SADB_EXT_SA] == NULL) { + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return (key_senderror(so, m, EINVAL)); + } + KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA)) { /* * Caller wants us to delete all non-LARVAL SAs * that match the src/dst. This is used during * IKE INITIAL-CONTACT. + * XXXAE: this looks like some extension to RFC2367. */ ipseclog((LOG_DEBUG, "%s: doing delete all.\n", __func__)); - return key_delete_all(so, m, mhp, proto); - } else if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); - return key_senderror(so, m, EINVAL); + return (key_delete_all(so, m, mhp, &saidx)); } - + if (SADB_CHECKLEN(mhp, SADB_EXT_SA)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); + return (key_senderror(so, m, EINVAL)); + } sa0 = (struct sadb_sa *)mhp->ext[SADB_EXT_SA]; - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + if (proto == IPPROTO_TCP) + sav = key_getsav_tcpmd5(&saidx, NULL); + else + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { + ipseclog((LOG_DEBUG, "%s: no SA found for SPI %u.\n", + __func__, ntohl(sa0->sadb_sa_spi))); + return (key_senderror(so, m, ESRCH)); } -#endif - - /* get a SA header */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; - - /* get a SA with SPI. */ - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - if (sav) - break; + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_HEAD) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u.\n", + __func__, ntohl(sav->spi))); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); } - if (sah == NULL) { - SAHTREE_UNLOCK(); - ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); - return key_senderror(so, m, ENOENT); - } + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + key_unlinksav(sav); + key_freesav(&sav); - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - SAHTREE_UNLOCK(); - { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ - /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 4, SADB_EXT_RESERVED, SADB_EXT_SA, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) @@ -5729,94 +5919,43 @@ */ static int key_delete_all(struct socket *so, struct mbuf *m, - const struct sadb_msghdr *mhp, u_int16_t proto) + const struct sadb_msghdr *mhp, struct secasindex *saidx) { - struct sadb_address *src0, *dst0; - struct secasindex saidx; + struct secasvar_queue drainq; struct secashead *sah; struct secasvar *sav, *nextsav; - u_int stateidx, state; - src0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_SRC]); - dst0 = (struct sadb_address *)(mhp->ext[SADB_EXT_ADDRESS_DST]); - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } -#endif - - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) + TAILQ_INIT(&drainq); + SAHTREE_WLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&sah->saidx, saidx, CMP_HEAD) == 0) continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; - - /* Delete all non-LARVAL SAs. */ - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - state = saorder_state_alive[stateidx]; - if (state == SADB_SASTATE_LARVAL) - continue; - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; sav = nextsav) { - nextsav = LIST_NEXT(sav, chain); - /* sanity check */ - if (sav->state != state) { - ipseclog((LOG_DEBUG, "%s: invalid " - "sav->state (queue %d SA %d)\n", - __func__, state, sav->state)); - continue; - } - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); - } - } + /* Move all ALIVE SAs into drainq */ + TAILQ_CONCAT(&drainq, &sah->savtree_alive, chain); } - SAHTREE_UNLOCK(); + /* Unlink all queued SAs from SPI hash */ + TAILQ_FOREACH(sav, &drainq, chain) { + sav->state = SADB_SASTATE_DEAD; + LIST_REMOVE(sav, spihash); + } + SAHTREE_WUNLOCK(); + /* Now we can release reference for all SAs in drainq */ + sav = TAILQ_FIRST(&drainq); + while (sav != NULL) { + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + nextsav = TAILQ_NEXT(sav, chain); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } + { struct mbuf *n; struct sadb_msg *newmsg; /* create new sadb_msg to reply. */ - /* XXX-BZ NAT-T extensions? */ n = key_gather_mbuf(m, mhp, 1, 3, SADB_EXT_RESERVED, SADB_EXT_ADDRESS_SRC, SADB_EXT_ADDRESS_DST); if (!n) @@ -5837,6 +5976,52 @@ } /* + * Delete all alive SAs for corresponding xform. + * Larval SAs have not initialized tdb_xform, so it is safe to leave them + * here when xform disappears. + */ +static void +key_delete_xform(const struct xformsw *xsp) +{ + struct secasvar_queue drainq; + struct secashead *sah; + struct secasvar *sav, *nextsav; + + TAILQ_INIT(&drainq); + SAHTREE_WLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + sav = TAILQ_FIRST(&sah->savtree_alive); + if (sav == NULL) + continue; + if (sav->tdb_xform != xsp) + continue; + /* + * It is supposed that all SAs in the chain are related to + * one xform. + */ + TAILQ_CONCAT(&drainq, &sah->savtree_alive, chain); + } + /* Unlink all queued SAs from SPI hash */ + TAILQ_FOREACH(sav, &drainq, chain) { + sav->state = SADB_SASTATE_DEAD; + LIST_REMOVE(sav, spihash); + } + SAHTREE_WUNLOCK(); + + /* Now we can release reference for all SAs in drainq */ + sav = TAILQ_FIRST(&drainq); + while (sav != NULL) { + KEYDBG(KEY_STAMP, + printf("%s: SA(%p)\n", __func__, sav)); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); + nextsav = TAILQ_NEXT(sav, chain); + key_freesah(&sav->sah); /* release reference from SAV */ + key_freesav(&sav); /* release last reference */ + sav = nextsav; + } +} + +/* * SADB_GET processing * receive * @@ -5851,12 +6036,11 @@ static int key_get(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - struct sadb_sa *sa0; - struct sadb_address *src0, *dst0; struct secasindex saidx; - struct secashead *sah; - struct secasvar *sav = NULL; - u_int16_t proto; + struct sadb_address *src0, *dst0; + struct sadb_sa *sa0; + struct secasvar *sav; + uint8_t proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -5870,18 +6054,19 @@ return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_SA] == NULL || - mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_SA) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_SA] < sizeof(struct sadb_sa) || - mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address)) { - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_SA) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } @@ -5889,79 +6074,45 @@ src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - /* XXX boundary check against sa_len */ + if (key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)) != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); + } KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifdef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", - __func__)); - return key_senderror(so, m, EINVAL); - } - - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; - - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); - } -#endif - - /* get a SA header */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; - if (key_cmpsaidx(&sah->saidx, &saidx, CMP_HEAD) == 0) - continue; - - /* get a SA with SPI. */ - sav = key_getsavbyspi(sah, sa0->sadb_sa_spi); - if (sav) - break; - } - SAHTREE_UNLOCK(); - if (sah == NULL) { + if (proto == IPPROTO_TCP) + sav = key_getsav_tcpmd5(&saidx, NULL); + else + sav = key_getsavbyspi(sa0->sadb_sa_spi); + if (sav == NULL) { ipseclog((LOG_DEBUG, "%s: no SA found.\n", __func__)); - return key_senderror(so, m, ENOENT); + return key_senderror(so, m, ESRCH); } + if (key_cmpsaidx(&sav->sah->saidx, &saidx, CMP_HEAD) == 0) { + ipseclog((LOG_DEBUG, "%s: saidx mismatched for SPI %u.\n", + __func__, ntohl(sa0->sadb_sa_spi))); + key_freesav(&sav); + return (key_senderror(so, m, ESRCH)); + } { struct mbuf *n; - u_int8_t satype; + uint8_t satype; /* map proto to satype */ - if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { + if ((satype = key_proto2satype(sav->sah->saidx.proto)) == 0) { ipseclog((LOG_DEBUG, "%s: there was invalid proto in SAD.\n", - __func__)); + __func__)); + key_freesav(&sav); return key_senderror(so, m, EINVAL); } /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, mhp->msg->sadb_msg_pid); + + key_freesav(&sav); if (!n) return key_senderror(so, m, ENOBUFS); @@ -5990,10 +6141,10 @@ * XXX no idea if the user wants ESP authentication or not */ static struct mbuf * -key_getcomb_esp() +key_getcomb_ealg(void) { struct sadb_comb *comb; - struct enc_xform *algo; + const struct enc_xform *algo; struct mbuf *result = NULL, *m, *n; int encmin; int i, off, o; @@ -6002,7 +6153,7 @@ m = NULL; for (i = 1; i <= SADB_EALG_MAX; i++) { - algo = esp_algorithm_lookup(i); + algo = enc_algorithm_lookup(i); if (algo == NULL) continue; @@ -6095,8 +6246,8 @@ static struct mbuf * key_getcomb_ah() { + const struct auth_hash *algo; struct sadb_comb *comb; - struct auth_hash *algo; struct mbuf *m; u_int16_t minkeysize, maxkeysize; int i; @@ -6113,7 +6264,7 @@ i != SADB_X_AALG_SHA2_512) continue; #endif - algo = ah_algorithm_lookup(i); + algo = auth_algorithm_lookup(i); if (!algo) continue; key_getsizes_ah(algo, i, &minkeysize, &maxkeysize); @@ -6153,15 +6304,15 @@ static struct mbuf * key_getcomb_ipcomp() { + const struct comp_algo *algo; struct sadb_comb *comb; - struct comp_algo *algo; struct mbuf *m; int i; const int l = PFKEY_ALIGN8(sizeof(struct sadb_comb)); m = NULL; for (i = 1; i <= SADB_X_CALG_MAX; i++) { - algo = ipcomp_algorithm_lookup(i); + algo = comp_algorithm_lookup(i); if (!algo) continue; @@ -6204,7 +6355,7 @@ switch (saidx->proto) { case IPPROTO_ESP: - m = key_getcomb_esp(); + m = key_getcomb_ealg(); break; case IPPROTO_AH: m = key_getcomb_ah(); @@ -6260,11 +6411,10 @@ { union sockaddr_union addr; struct mbuf *result, *m; - struct secacq *newacq; - u_int32_t seq; + uint32_t seq; int error; - u_int16_t ul_proto; - u_int8_t mask, satype; + uint16_t ul_proto; + uint8_t mask, satype; IPSEC_ASSERT(saidx != NULL, ("null saidx")); satype = key_proto2satype(saidx->proto); @@ -6273,30 +6423,12 @@ error = -1; result = NULL; ul_proto = IPSEC_ULPROTO_ANY; - /* - * We never do anything about acquirng SA. There is anather - * solution that kernel blocks to send SADB_ACQUIRE message until - * getting something message from IKEd. In later case, to be - * managed with ACQUIRING list. - */ - /* Get an entry to check whether sending message or not. */ - if ((newacq = key_getacq(saidx)) != NULL) { - if (V_key_blockacq_count < newacq->count) { - /* reset counter and do send message. */ - newacq->count = 0; - } else { - /* increment counter and do nothing. */ - newacq->count++; - return 0; - } - } else { - /* make new entry for blocking to send SADB_ACQUIRE. */ - if ((newacq = key_newacq(saidx)) == NULL) - return ENOBUFS; - } + /* Get seq number to check whether sending message or not. */ + seq = key_getacq(saidx, &error); + if (seq == 0) + return (error); - seq = newacq->seq; m = key_setsadbmsg(SADB_ACQUIRE, 0, satype, seq, 0, 0); if (!m) { error = ENOBUFS; @@ -6305,15 +6437,14 @@ result = m; /* - * No SADB_X_EXT_NAT_T_* here: we do not know - * anything related to NAT-T at this time. - */ - - /* * set sadb_address for saidx's. * * Note that if sp is supplied, then we're being called from - * key_checkrequest and should supply port and protocol information. + * key_allocsa_policy() and should supply port and protocol + * information. + * XXXAE: why only TCP and UDP? ICMP and SCTP looks applicable too. + * XXXAE: probably we can handle this in the ipsec[46]_allocsa(). + * XXXAE: it looks like we should save this info in the ACQ entry. */ if (sp != NULL && (sp->spidx.ul_proto == IPPROTO_TCP || sp->spidx.ul_proto == IPPROTO_UDP)) @@ -6331,7 +6462,8 @@ break; case AF_INET6: if (sp->spidx.src.sin6.sin6_port != IPSEC_PORT_ANY) { - addr.sin6.sin6_port = sp->spidx.src.sin6.sin6_port; + addr.sin6.sin6_port = + sp->spidx.src.sin6.sin6_port; mask = sp->spidx.prefs; } break; @@ -6358,7 +6490,8 @@ break; case AF_INET6: if (sp->spidx.dst.sin6.sin6_port != IPSEC_PORT_ANY) { - addr.sin6.sin6_port = sp->spidx.dst.sin6.sin6_port; + addr.sin6.sin6_port = + sp->spidx.dst.sin6.sin6_port; mask = sp->spidx.prefd; } break; @@ -6376,8 +6509,9 @@ /* XXX proxy address (optional) */ /* set sadb_x_policy */ - if (sp) { - m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, sp->priority); + if (sp != NULL) { + m = key_setsadbxpolicy(sp->policy, sp->spidx.dir, sp->id, + sp->priority); if (!m) { error = ENOBUFS; goto fail; @@ -6469,6 +6603,10 @@ mtod(result, struct sadb_msg *)->sadb_msg_len = PFKEY_UNIT64(result->m_pkthdr.len); + KEYDBG(KEY_STAMP, + printf("%s: SP(%p)\n", __func__, sp)); + KEYDBG(KEY_DATA, kdebug_secasindex(saidx, NULL)); + return key_sendup_mbuf(NULL, result, KEY_SENDUP_REGISTERED); fail: @@ -6477,60 +6615,122 @@ return error; } -static struct secacq * -key_newacq(const struct secasindex *saidx) +static uint32_t +key_newacq(const struct secasindex *saidx, int *perror) { - struct secacq *newacq; + struct secacq *acq; + uint32_t seq; - /* get new entry */ - newacq = malloc(sizeof(struct secacq), M_IPSEC_SAQ, M_NOWAIT|M_ZERO); - if (newacq == NULL) { + acq = malloc(sizeof(*acq), M_IPSEC_SAQ, M_NOWAIT | M_ZERO); + if (acq == NULL) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); - return NULL; + *perror = ENOBUFS; + return (0); } /* copy secindex */ - bcopy(saidx, &newacq->saidx, sizeof(newacq->saidx)); - newacq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); - newacq->created = time_second; - newacq->count = 0; + bcopy(saidx, &acq->saidx, sizeof(acq->saidx)); + acq->created = time_second; + acq->count = 0; /* add to acqtree */ ACQ_LOCK(); - LIST_INSERT_HEAD(&V_acqtree, newacq, chain); + seq = acq->seq = (V_acq_seq == ~0 ? 1 : ++V_acq_seq); + LIST_INSERT_HEAD(&V_acqtree, acq, chain); + LIST_INSERT_HEAD(ACQADDRHASH_HASH(saidx), acq, addrhash); + LIST_INSERT_HEAD(ACQSEQHASH_HASH(seq), acq, seqhash); ACQ_UNLOCK(); - - return newacq; + *perror = 0; + return (seq); } -static struct secacq * -key_getacq(const struct secasindex *saidx) +static uint32_t +key_getacq(const struct secasindex *saidx, int *perror) { struct secacq *acq; + uint32_t seq; ACQ_LOCK(); - LIST_FOREACH(acq, &V_acqtree, chain) { - if (key_cmpsaidx(saidx, &acq->saidx, CMP_EXACTLY)) + LIST_FOREACH(acq, ACQADDRHASH_HASH(saidx), addrhash) { + if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY)) { + if (acq->count > V_key_blockacq_count) { + /* + * Reset counter and send message. + * Also reset created time to keep ACQ for + * this saidx. + */ + acq->created = time_second; + acq->count = 0; + seq = acq->seq; + } else { + /* + * Increment counter and do nothing. + * We send SADB_ACQUIRE message only + * for each V_key_blockacq_count packet. + */ + acq->count++; + seq = 0; + } break; + } } ACQ_UNLOCK(); - - return acq; + if (acq != NULL) { + *perror = 0; + return (seq); + } + /* allocate new entry */ + return (key_newacq(saidx, perror)); } -static struct secacq * -key_getacqbyseq(u_int32_t seq) +static int +key_acqreset(uint32_t seq) { struct secacq *acq; ACQ_LOCK(); - LIST_FOREACH(acq, &V_acqtree, chain) { - if (acq->seq == seq) + LIST_FOREACH(acq, ACQSEQHASH_HASH(seq), seqhash) { + if (acq->seq == seq) { + acq->count = 0; + acq->created = time_second; break; + } } ACQ_UNLOCK(); + if (acq == NULL) + return (ESRCH); + return (0); +} +/* + * Mark ACQ entry as stale to remove it in key_flush_acq(). + * Called after successful SADB_GETSPI message. + */ +static int +key_acqdone(const struct secasindex *saidx, uint32_t seq) +{ + struct secacq *acq; - return acq; + ACQ_LOCK(); + LIST_FOREACH(acq, ACQSEQHASH_HASH(seq), seqhash) { + if (acq->seq == seq) + break; + } + if (acq != NULL) { + if (key_cmpsaidx(&acq->saidx, saidx, CMP_EXACTLY) == 0) { + ipseclog((LOG_DEBUG, + "%s: Mismatched saidx for ACQ %u", __func__, seq)); + acq = NULL; + } else { + acq->created = 0; + } + } else { + ipseclog((LOG_DEBUG, + "%s: ACQ %u is not found.", __func__, seq)); + } + ACQ_UNLOCK(); + if (acq == NULL) + return (ESRCH); + return (0); } static struct secspacq * @@ -6592,11 +6792,13 @@ static int key_acquire2(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { - const struct sadb_address *src0, *dst0; + SAHTREE_RLOCK_TRACKER; + struct sadb_address *src0, *dst0; struct secasindex saidx; struct secashead *sah; - u_int16_t proto; + uint32_t reqid; int error; + uint8_t mode, proto; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -6610,30 +6812,23 @@ * We do not raise error even if error occurred in this function. */ if (mhp->msg->sadb_msg_len == PFKEY_UNIT64(sizeof(struct sadb_msg))) { - struct secacq *acq; - /* check sequence number */ - if (mhp->msg->sadb_msg_seq == 0) { + if (mhp->msg->sadb_msg_seq == 0 || + mhp->msg->sadb_msg_errno == 0) { ipseclog((LOG_DEBUG, "%s: must specify sequence " - "number.\n", __func__)); - m_freem(m); - return 0; - } - - if ((acq = key_getacqbyseq(mhp->msg->sadb_msg_seq)) == NULL) { + "number and errno.\n", __func__)); + } else { /* - * the specified larval SA is already gone, or we got - * a bogus sequence number. we can silently ignore it. + * IKEd reported that error occurred. + * XXXAE: what it expects from the kernel? + * Probably we should send SADB_ACQUIRE again? + * If so, reset ACQ's state. + * XXXAE: it looks useless. */ - m_freem(m); - return 0; + key_acqreset(mhp->msg->sadb_msg_seq); } - - /* reset acq counter in order to deletion by timehander. */ - acq->created = time_second; - acq->count = 0; m_freem(m); - return 0; + return (0); } /* @@ -6643,79 +6838,60 @@ /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->ext[SADB_EXT_ADDRESS_SRC] == NULL || - mhp->ext[SADB_EXT_ADDRESS_DST] == NULL || - mhp->ext[SADB_EXT_PROPOSAL] == NULL) { - /* error */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKHDR(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKHDR(mhp, SADB_EXT_PROPOSAL)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: missing required header.\n", + __func__)); return key_senderror(so, m, EINVAL); } - if (mhp->extlen[SADB_EXT_ADDRESS_SRC] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_ADDRESS_DST] < sizeof(struct sadb_address) || - mhp->extlen[SADB_EXT_PROPOSAL] < sizeof(struct sadb_prop)) { - /* error */ - ipseclog((LOG_DEBUG, "%s: invalid message is passed.\n", - __func__)); + if (SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_SRC) || + SADB_CHECKLEN(mhp, SADB_EXT_ADDRESS_DST) || + SADB_CHECKLEN(mhp, SADB_EXT_PROPOSAL)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } - src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; - dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - - /* XXX boundary check against sa_len */ - KEY_SETSECASIDX(proto, IPSEC_MODE_ANY, 0, src0 + 1, dst0 + 1, &saidx); - - /* - * Make sure the port numbers are zero. - * In case of NAT-T we will update them later if needed. - */ - KEY_PORTTOSADDR(&saidx.src, 0); - KEY_PORTTOSADDR(&saidx.dst, 0); - -#ifndef IPSEC_NAT_T - /* - * Handle NAT-T info if present. - */ - - if (mhp->ext[SADB_X_EXT_NAT_T_SPORT] != NULL && - mhp->ext[SADB_X_EXT_NAT_T_DPORT] != NULL) { - struct sadb_x_nat_t_port *sport, *dport; - - if (mhp->extlen[SADB_X_EXT_NAT_T_SPORT] < sizeof(*sport) || - mhp->extlen[SADB_X_EXT_NAT_T_DPORT] < sizeof(*dport)) { - ipseclog((LOG_DEBUG, "%s: invalid message.\n", + if (SADB_CHECKHDR(mhp, SADB_X_EXT_SA2)) { + mode = IPSEC_MODE_ANY; + reqid = 0; + } else { + if (SADB_CHECKLEN(mhp, SADB_X_EXT_SA2)) { + ipseclog((LOG_DEBUG, + "%s: invalid message: wrong header size.\n", __func__)); return key_senderror(so, m, EINVAL); } + mode = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_mode; + reqid = ((struct sadb_x_sa2 *) + mhp->ext[SADB_X_EXT_SA2])->sadb_x_sa2_reqid; + } - sport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_SPORT]; - dport = (struct sadb_x_nat_t_port *) - mhp->ext[SADB_X_EXT_NAT_T_DPORT]; + src0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_SRC]; + dst0 = (struct sadb_address *)mhp->ext[SADB_EXT_ADDRESS_DST]; - if (sport) - KEY_PORTTOSADDR(&saidx.src, - sport->sadb_x_nat_t_port_port); - if (dport) - KEY_PORTTOSADDR(&saidx.dst, - dport->sadb_x_nat_t_port_port); + error = key_checksockaddrs((struct sockaddr *)(src0 + 1), + (struct sockaddr *)(dst0 + 1)); + if (error != 0) { + ipseclog((LOG_DEBUG, "%s: invalid sockaddr.\n", __func__)); + return key_senderror(so, m, EINVAL); } -#endif + KEY_SETSECASIDX(proto, mode, reqid, src0 + 1, dst0 + 1, &saidx); /* get a SA index */ - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (sah->state == SADB_SASTATE_DEAD) - continue; + SAHTREE_RLOCK(); + LIST_FOREACH(sah, SAHADDRHASH_HASH(&saidx), addrhash) { if (key_cmpsaidx(&sah->saidx, &saidx, CMP_MODE_REQID)) break; } - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); if (sah != NULL) { ipseclog((LOG_DEBUG, "%s: a SA exists already.\n", __func__)); return key_senderror(so, m, EEXIST); @@ -6723,12 +6899,13 @@ error = key_acquire(&saidx, NULL); if (error != 0) { - ipseclog((LOG_DEBUG, "%s: error %d returned from key_acquire\n", - __func__, mhp->msg->sadb_msg_errno)); + ipseclog((LOG_DEBUG, + "%s: error %d returned from key_acquire()\n", + __func__, error)); return key_senderror(so, m, error); } - - return key_sendup_mbuf(so, m, KEY_SENDUP_REGISTERED); + m_freem(m); + return (0); } /* @@ -6801,14 +6978,14 @@ /* create new sadb_msg to reply. */ alen = 0; for (i = 1; i <= SADB_AALG_MAX; i++) { - if (ah_algorithm_lookup(i)) + if (auth_algorithm_lookup(i)) alen += sizeof(struct sadb_alg); } if (alen) alen += sizeof(struct sadb_supported); elen = 0; for (i = 1; i <= SADB_EALG_MAX; i++) { - if (esp_algorithm_lookup(i)) + if (enc_algorithm_lookup(i)) elen += sizeof(struct sadb_alg); } if (elen) @@ -6847,10 +7024,10 @@ off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_AALG_MAX; i++) { - struct auth_hash *aalgo; + const struct auth_hash *aalgo; u_int16_t minkeysize, maxkeysize; - aalgo = ah_algorithm_lookup(i); + aalgo = auth_algorithm_lookup(i); if (!aalgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); @@ -6871,9 +7048,9 @@ off += PFKEY_ALIGN8(sizeof(*sup)); for (i = 1; i <= SADB_EALG_MAX; i++) { - struct enc_xform *ealgo; + const struct enc_xform *ealgo; - ealgo = esp_algorithm_lookup(i); + ealgo = enc_algorithm_lookup(i); if (!ealgo) continue; alg = (struct sadb_alg *)(mtod(n, caddr_t) + off); @@ -6936,16 +7113,19 @@ static int key_expire(struct secasvar *sav, int hard) { - int satype; struct mbuf *result = NULL, *m; - int len; - int error = -1; struct sadb_lifetime *lt; - u_int32_t replay_count; + uint32_t replay_count; + int error, len; + uint8_t satype; IPSEC_ASSERT (sav != NULL, ("null sav")); IPSEC_ASSERT (sav->sah != NULL, ("null sa header")); + KEYDBG(KEY_STAMP, + printf("%s: SA(%p) expired %s lifetime\n", __func__, + sav, hard ? "hard": "soft")); + KEYDBG(KEY_DATA, kdebug_secasv(sav)); /* set msg header */ satype = key_proto2satype(sav->sah->saidx.proto); IPSEC_ASSERT(satype != 0, ("invalid proto, satype %u", satype)); @@ -6999,10 +7179,12 @@ lt = mtod(m, struct sadb_lifetime *); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; - lt->sadb_lifetime_allocations = sav->lft_c->allocations; - lt->sadb_lifetime_bytes = sav->lft_c->bytes; - lt->sadb_lifetime_addtime = sav->lft_c->addtime; - lt->sadb_lifetime_usetime = sav->lft_c->usetime; + lt->sadb_lifetime_allocations = + (uint32_t)counter_u64_fetch(sav->lft_c_allocations); + lt->sadb_lifetime_bytes = + counter_u64_fetch(sav->lft_c_bytes); + lt->sadb_lifetime_addtime = sav->created; + lt->sadb_lifetime_usetime = sav->firstused; lt = (struct sadb_lifetime *)(mtod(m, caddr_t) + len / 2); lt->sadb_lifetime_len = PFKEY_UNIT64(sizeof(struct sadb_lifetime)); if (hard) { @@ -7042,6 +7224,8 @@ /* * XXX-BZ Handle NAT-T extensions here. + * XXXAE: it doesn't seem quite useful. IKEs should not depend on + * this information, we report only significant SA fields. */ if ((result->m_flags & M_PKTHDR) == 0) { @@ -7072,6 +7256,36 @@ return error; } +static void +key_freesah_flushed(struct secashead_queue *flushq) +{ + struct secashead *sah, *nextsah; + struct secasvar *sav, *nextsav; + + sah = TAILQ_FIRST(flushq); + while (sah != NULL) { + sav = TAILQ_FIRST(&sah->savtree_larval); + while (sav != NULL) { + nextsav = TAILQ_NEXT(sav, chain); + TAILQ_REMOVE(&sah->savtree_larval, sav, chain); + key_freesav(&sav); /* release last reference */ + key_freesah(&sah); /* release reference from SAV */ + sav = nextsav; + } + sav = TAILQ_FIRST(&sah->savtree_alive); + while (sav != NULL) { + nextsav = TAILQ_NEXT(sav, chain); + TAILQ_REMOVE(&sah->savtree_alive, sav, chain); + key_freesav(&sav); /* release last reference */ + key_freesah(&sah); /* release reference from SAV */ + sav = nextsav; + } + nextsah = TAILQ_NEXT(sah, chain); + key_freesah(&sah); /* release last reference */ + sah = nextsah; + } +} + /* * SADB_FLUSH processing * receive @@ -7087,12 +7301,12 @@ static int key_flush(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + struct secashead_queue flushq; struct sadb_msg *newmsg; struct secashead *sah, *nextsah; - struct secasvar *sav, *nextsav; - u_int16_t proto; - u_int8_t state; - u_int stateidx; + struct secasvar *sav; + uint8_t proto; + int i; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(mhp != NULL, ("null msghdr")); @@ -7104,37 +7318,71 @@ __func__)); return key_senderror(so, m, EINVAL); } + KEYDBG(KEY_STAMP, + printf("%s: proto %u\n", __func__, proto)); - /* no SATYPE specified, i.e. flushing all SA. */ - SAHTREE_LOCK(); - for (sah = LIST_FIRST(&V_sahtree); - sah != NULL; - sah = nextsah) { - nextsah = LIST_NEXT(sah, chain); - - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) - continue; - - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_alive); - stateidx++) { - state = saorder_state_any[stateidx]; - for (sav = LIST_FIRST(&sah->savtree[state]); - sav != NULL; - sav = nextsav) { - - nextsav = LIST_NEXT(sav, chain); - - key_sa_chgstate(sav, SADB_SASTATE_DEAD); - KEY_FREESAV(&sav); + TAILQ_INIT(&flushq); + if (proto == IPSEC_PROTO_ANY) { + /* no SATYPE specified, i.e. flushing all SA. */ + SAHTREE_WLOCK(); + /* Move all SAHs into flushq */ + TAILQ_CONCAT(&flushq, &V_sahtree, chain); + /* Flush all buckets in SPI hash */ + for (i = 0; i < V_savhash_mask + 1; i++) + LIST_INIT(&V_savhashtbl[i]); + /* Flush all buckets in SAHADDRHASH */ + for (i = 0; i < V_sahaddrhash_mask + 1; i++) + LIST_INIT(&V_sahaddrhashtbl[i]); + /* Mark all SAHs as unlinked */ + TAILQ_FOREACH(sah, &flushq, chain) { + sah->state = SADB_SASTATE_DEAD; + /* + * Callout handler makes its job using + * RLOCK and drain queues. In case, when this + * function will be called just before it + * acquires WLOCK, we need to mark SAs as + * unlinked to prevent second unlink. + */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + sav->state = SADB_SASTATE_DEAD; } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + sav->state = SADB_SASTATE_DEAD; + } } - - sah->state = SADB_SASTATE_DEAD; + SAHTREE_WUNLOCK(); + } else { + SAHTREE_WLOCK(); + sah = TAILQ_FIRST(&V_sahtree); + while (sah != NULL) { + IPSEC_ASSERT(sah->state != SADB_SASTATE_DEAD, + ("DEAD SAH %p in SADB_FLUSH", sah)); + nextsah = TAILQ_NEXT(sah, chain); + if (sah->saidx.proto != proto) { + sah = nextsah; + continue; + } + sah->state = SADB_SASTATE_DEAD; + TAILQ_REMOVE(&V_sahtree, sah, chain); + LIST_REMOVE(sah, addrhash); + /* Unlink all SAs from SPI hash */ + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + LIST_REMOVE(sav, spihash); + sav->state = SADB_SASTATE_DEAD; + } + /* Add SAH into flushq */ + TAILQ_INSERT_HEAD(&flushq, sah, chain); + sah = nextsah; + } + SAHTREE_WUNLOCK(); } - SAHTREE_UNLOCK(); + key_freesah_flushed(&flushq); + /* Free all queued SAs and SAHs */ if (m->m_len < sizeof(struct sadb_msg) || sizeof(struct sadb_msg) > m->m_len + M_TRAILINGSPACE(m)) { ipseclog((LOG_DEBUG, "%s: No more memory.\n", __func__)); @@ -7167,15 +7415,13 @@ static int key_dump(struct socket *so, struct mbuf *m, const struct sadb_msghdr *mhp) { + SAHTREE_RLOCK_TRACKER; struct secashead *sah; struct secasvar *sav; - u_int16_t proto; - u_int stateidx; - u_int8_t satype; - u_int8_t state; - int cnt; struct sadb_msg *newmsg; struct mbuf *n; + uint32_t cnt; + uint8_t proto, satype; IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); @@ -7185,69 +7431,66 @@ /* map satype to proto */ if ((proto = key_satype2proto(mhp->msg->sadb_msg_satype)) == 0) { ipseclog((LOG_DEBUG, "%s: invalid satype is passed.\n", - __func__)); + __func__)); return key_senderror(so, m, EINVAL); } /* count sav entries to be sent to the userland. */ cnt = 0; - SAHTREE_LOCK(); - LIST_FOREACH(sah, &V_sahtree, chain) { - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + SAHTREE_RLOCK(); + TAILQ_FOREACH(sah, &V_sahtree, chain) { + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && + proto != sah->saidx.proto) continue; - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - state = saorder_state_any[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - cnt++; - } - } + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) + cnt++; + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) + cnt++; } if (cnt == 0) { - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); return key_senderror(so, m, ENOENT); } /* send this to the userland, one at a time. */ newmsg = NULL; - LIST_FOREACH(sah, &V_sahtree, chain) { - if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC - && proto != sah->saidx.proto) + TAILQ_FOREACH(sah, &V_sahtree, chain) { + if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && + proto != sah->saidx.proto) continue; /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { - SAHTREE_UNLOCK(); + SAHTREE_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " - "SAD.\n", __func__)); + "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } - - for (stateidx = 0; - stateidx < _ARRAYLEN(saorder_state_any); - stateidx++) { - state = saorder_state_any[stateidx]; - LIST_FOREACH(sav, &sah->savtree[state], chain) { - n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); - if (!n) { - SAHTREE_UNLOCK(); - return key_senderror(so, m, ENOBUFS); - } - key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + n = key_setdumpsa(sav, SADB_DUMP, satype, + --cnt, mhp->msg->sadb_msg_pid); + if (n == NULL) { + SAHTREE_RUNLOCK(); + return key_senderror(so, m, ENOBUFS); } + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + n = key_setdumpsa(sav, SADB_DUMP, satype, + --cnt, mhp->msg->sadb_msg_pid); + if (n == NULL) { + SAHTREE_RUNLOCK(); + return key_senderror(so, m, ENOBUFS); + } + key_sendup_mbuf(so, n, KEY_SENDUP_ONE); + } } - SAHTREE_UNLOCK(); - + SAHTREE_RUNLOCK(); m_freem(m); - return 0; + return (0); } - /* * SADB_X_PROMISC processing * @@ -7351,12 +7594,6 @@ IPSEC_ASSERT(so != NULL, ("null socket")); IPSEC_ASSERT(m != NULL, ("null mbuf")); -#if 0 /*kdebug_sadb assumes msg in linear buffer*/ - KEYDEBUG(KEYDEBUG_KEY_DUMP, - ipseclog((LOG_DEBUG, "%s: passed sadb_msg\n", __func__)); - kdebug_sadb(msg)); -#endif - if (m->m_len < sizeof(struct sadb_msg)) { m = m_pullup(m, sizeof(struct sadb_msg)); if (!m) @@ -7653,15 +7890,15 @@ case SADB_EXT_SPIRANGE: case SADB_X_EXT_POLICY: case SADB_X_EXT_SA2: -#ifdef IPSEC_NAT_T case SADB_X_EXT_NAT_T_TYPE: case SADB_X_EXT_NAT_T_SPORT: case SADB_X_EXT_NAT_T_DPORT: case SADB_X_EXT_NAT_T_OAI: case SADB_X_EXT_NAT_T_OAR: case SADB_X_EXT_NAT_T_FRAG: -#endif case SADB_X_EXT_SA_REPLAY: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -7737,6 +7974,10 @@ case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: + case SADB_X_EXT_NAT_T_OAI: + case SADB_X_EXT_NAT_T_OAR: + case SADB_X_EXT_NEW_ADDRESS_SRC: + case SADB_X_EXT_NEW_ADDRESS_DST: baselen = PFKEY_ALIGN8(sizeof(struct sadb_address)); checktype = ADDR; break; @@ -7774,11 +8015,25 @@ { int i; - for (i = 0; i < IPSEC_DIR_MAX; i++) + for (i = 0; i < IPSEC_DIR_MAX; i++) { TAILQ_INIT(&V_sptree[i]); + TAILQ_INIT(&V_sptree_ifnet[i]); + } - LIST_INIT(&V_sahtree); + V_key_lft_zone = uma_zcreate("IPsec SA lft_c", + sizeof(uint64_t) * 2, NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, UMA_ZONE_PCPU); + TAILQ_INIT(&V_sahtree); + V_sphashtbl = hashinit(SPHASH_NHASH, M_IPSEC_SP, &V_sphash_mask); + V_savhashtbl = hashinit(SAVHASH_NHASH, M_IPSEC_SA, &V_savhash_mask); + V_sahaddrhashtbl = hashinit(SAHHASH_NHASH, M_IPSEC_SAH, + &V_sahaddrhash_mask); + V_acqaddrhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, + &V_acqaddrhash_mask); + V_acqseqhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, + &V_acqseqhash_mask); + for (i = 0; i <= SADB_SATYPE_MAX; i++) LIST_INIT(&V_regtree[i]); @@ -7788,6 +8043,7 @@ if (!IS_DEFAULT_VNET(curvnet)) return; + XFORMS_LOCK_INIT(); SPTREE_LOCK_INIT(); REGTREE_LOCK_INIT(); SAHTREE_LOCK_INIT(); @@ -7810,37 +8066,57 @@ void key_destroy(void) { - TAILQ_HEAD(, secpolicy) drainq; + struct secashead_queue sahdrainq; + struct secpolicy_queue drainq; struct secpolicy *sp, *nextsp; struct secacq *acq, *nextacq; struct secspacq *spacq, *nextspacq; - struct secashead *sah, *nextsah; + struct secashead *sah; + struct secasvar *sav; struct secreg *reg; int i; + /* + * XXX: can we just call free() for each object without + * walking through safe way with releasing references? + */ TAILQ_INIT(&drainq); SPTREE_WLOCK(); for (i = 0; i < IPSEC_DIR_MAX; i++) { TAILQ_CONCAT(&drainq, &V_sptree[i], chain); + TAILQ_CONCAT(&drainq, &V_sptree_ifnet[i], chain); } SPTREE_WUNLOCK(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); - KEY_FREESP(&sp); + key_freesp(&sp); sp = nextsp; } - SAHTREE_LOCK(); - for (sah = LIST_FIRST(&V_sahtree); sah != NULL; sah = nextsah) { - nextsah = LIST_NEXT(sah, chain); - if (__LIST_CHAINED(sah)) { - LIST_REMOVE(sah, chain); - free(sah, M_IPSEC_SAH); + TAILQ_INIT(&sahdrainq); + SAHTREE_WLOCK(); + TAILQ_CONCAT(&sahdrainq, &V_sahtree, chain); + for (i = 0; i < V_savhash_mask + 1; i++) + LIST_INIT(&V_savhashtbl[i]); + for (i = 0; i < V_sahaddrhash_mask + 1; i++) + LIST_INIT(&V_sahaddrhashtbl[i]); + TAILQ_FOREACH(sah, &sahdrainq, chain) { + sah->state = SADB_SASTATE_DEAD; + TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { + sav->state = SADB_SASTATE_DEAD; } + TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { + sav->state = SADB_SASTATE_DEAD; + } } - SAHTREE_UNLOCK(); + SAHTREE_WUNLOCK(); + key_freesah_flushed(&sahdrainq); + hashdestroy(V_sphashtbl, M_IPSEC_SP, V_sphash_mask); + hashdestroy(V_savhashtbl, M_IPSEC_SA, V_savhash_mask); + hashdestroy(V_sahaddrhashtbl, M_IPSEC_SAH, V_sahaddrhash_mask); + REGTREE_LOCK(); for (i = 0; i <= SADB_SATYPE_MAX; i++) { LIST_FOREACH(reg, &V_regtree[i], chain) { @@ -7854,12 +8130,12 @@ REGTREE_UNLOCK(); ACQ_LOCK(); - for (acq = LIST_FIRST(&V_acqtree); acq != NULL; acq = nextacq) { + acq = LIST_FIRST(&V_acqtree); + while (acq != NULL) { nextacq = LIST_NEXT(acq, chain); - if (__LIST_CHAINED(acq)) { - LIST_REMOVE(acq, chain); - free(acq, M_IPSEC_SAQ); - } + LIST_REMOVE(acq, chain); + free(acq, M_IPSEC_SAQ); + acq = nextacq; } ACQ_UNLOCK(); @@ -7873,51 +8149,31 @@ } } SPACQ_UNLOCK(); + hashdestroy(V_acqaddrhashtbl, M_IPSEC_SAQ, V_acqaddrhash_mask); + hashdestroy(V_acqseqhashtbl, M_IPSEC_SAQ, V_acqseqhash_mask); + uma_zdestroy(V_key_lft_zone); } #endif -/* - * XXX: maybe This function is called after INBOUND IPsec processing. - * - * Special check for tunnel-mode packets. - * We must make some checks for consistency between inner and outer IP header. - * - * xxx more checks to be provided - */ -int -key_checktunnelsanity(struct secasvar *sav, u_int family, caddr_t src, - caddr_t dst) -{ - IPSEC_ASSERT(sav->sah != NULL, ("null SA header")); - - /* XXX: check inner IP header */ - - return 1; -} - /* record data transfer on SA, and update timestamps */ void key_sa_recordxfer(struct secasvar *sav, struct mbuf *m) { IPSEC_ASSERT(sav != NULL, ("Null secasvar")); IPSEC_ASSERT(m != NULL, ("Null mbuf")); - if (!sav->lft_c) - return; /* * XXX Currently, there is a difference of bytes size * between inbound and outbound processing. */ - sav->lft_c->bytes += m->m_pkthdr.len; - /* to check bytes lifetime is done in key_timehandler(). */ + counter_u64_add(sav->lft_c_bytes, m->m_pkthdr.len); /* * We use the number of packets as the unit of * allocations. We increment the variable * whenever {esp,ah}_{in,out}put is called. */ - sav->lft_c->allocations++; - /* XXX check for expires? */ + counter_u64_add(sav->lft_c_allocations, 1); /* * NOTE: We record CURRENT usetime by using wall clock, @@ -7930,26 +8186,10 @@ * <--------------> HARD * <-----> SOFT */ - sav->lft_c->usetime = time_second; - /* XXX check for expires? */ - - return; + if (sav->firstused == 0) + sav->firstused = time_second; } -static void -key_sa_chgstate(struct secasvar *sav, u_int8_t state) -{ - IPSEC_ASSERT(sav != NULL, ("NULL sav")); - SAHTREE_LOCK_ASSERT(); - - if (sav->state != state) { - if (__LIST_CHAINED(sav)) - LIST_REMOVE(sav, chain); - sav->state = state; - LIST_INSERT_HEAD(&sav->sah->savtree[state], sav, chain); - } -} - /* * Take one of the kernel's security keys and convert it into a PF_KEY * structure within an mbuf, suitable for sending up to a waiting @@ -7964,7 +8204,7 @@ */ static struct mbuf * -key_setkey(struct seckey *src, u_int16_t exttype) +key_setkey(struct seckey *src, uint16_t exttype) { struct mbuf *m; struct sadb_key *p; @@ -8004,7 +8244,7 @@ */ static struct mbuf * -key_setlifetime(struct seclifetime *src, u_int16_t exttype) +key_setlifetime(struct seclifetime *src, uint16_t exttype) { struct mbuf *m = NULL; struct sadb_lifetime *p; @@ -8031,3 +8271,104 @@ return m; } + +const struct enc_xform * +enc_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_ealgs); i++) + if (alg == supported_ealgs[i].sadb_alg) + return (supported_ealgs[i].xform); + return (NULL); +} + +const struct auth_hash * +auth_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_aalgs); i++) + if (alg == supported_aalgs[i].sadb_alg) + return (supported_aalgs[i].xform); + return (NULL); +} + +const struct comp_algo * +comp_algorithm_lookup(int alg) +{ + int i; + + for (i = 0; i < nitems(supported_calgs); i++) + if (alg == supported_calgs[i].sadb_alg) + return (supported_calgs[i].xform); + return (NULL); +} + +/* + * Register a transform. + */ +static int +xform_register(struct xformsw* xsp) +{ + struct xformsw *entry; + + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xsp->xf_type) { + XFORMS_UNLOCK(); + return (EEXIST); + } + } + LIST_INSERT_HEAD(&xforms, xsp, chain); + XFORMS_UNLOCK(); + return (0); +} + +void +xform_attach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + if (xform_register(xsp) != 0) + printf("%s: failed to register %s xform\n", __func__, + xsp->xf_name); +} + +void +xform_detach(void *data) +{ + struct xformsw *xsp = (struct xformsw *)data; + + XFORMS_LOCK(); + LIST_REMOVE(xsp, chain); + XFORMS_UNLOCK(); + + /* Delete all SAs related to this xform. */ + key_delete_xform(xsp); +} + +/* + * Initialize transform support in an sav. + */ +static int +xform_init(struct secasvar *sav, u_short xftype) +{ + struct xformsw *entry; + int ret; + + IPSEC_ASSERT(sav->tdb_xform == NULL, + ("tdb_xform is already initialized")); + + ret = EINVAL; + XFORMS_LOCK(); + LIST_FOREACH(entry, &xforms, chain) { + if (entry->xf_type == xftype) { + ret = (*entry->xf_init)(sav, entry); + break; + } + } + XFORMS_UNLOCK(); + return (ret); +} + Index: head/sys/netipsec/key_debug.h =================================================================== --- head/sys/netipsec/key_debug.h +++ head/sys/netipsec/key_debug.h @@ -53,10 +53,12 @@ #define KEYDEBUG_IPSEC_DATA (KEYDEBUG_IPSEC | KEYDEBUG_DATA) #define KEYDEBUG_IPSEC_DUMP (KEYDEBUG_IPSEC | KEYDEBUG_DUMP) -#define KEYDEBUG(lev,arg) \ - do { if ((V_key_debug_level & (lev)) == (lev)) { arg; } } while (0) +#define KEYDBG(lev, arg) \ + if ((V_key_debug_level & (KEYDEBUG_ ## lev)) == (KEYDEBUG_ ## lev)) { \ + arg; \ + } -VNET_DECLARE(u_int32_t, key_debug_level); +VNET_DECLARE(uint32_t, key_debug_level); #define V_key_debug_level VNET(key_debug_level) #endif /*_KERNEL*/ @@ -69,15 +71,26 @@ struct secpolicy; struct secpolicyindex; struct secasindex; +struct secashead; struct secasvar; struct secreplay; struct mbuf; -extern void kdebug_secpolicy(struct secpolicy *); -extern void kdebug_secpolicyindex(struct secpolicyindex *); -extern void kdebug_secasindex(struct secasindex *); -extern void kdebug_secasv(struct secasvar *); -extern void kdebug_mbufhdr(const struct mbuf *); -extern void kdebug_mbuf(const struct mbuf *); +union sockaddr_union; +const char* kdebug_secpolicy_state(u_int); +const char* kdebug_secpolicy_policy(u_int); +const char* kdebug_secpolicyindex_dir(u_int); +const char* kdebug_ipsecrequest_level(u_int); +const char* kdebug_secasindex_mode(u_int); +const char* kdebug_secasv_state(u_int); +void kdebug_secpolicy(struct secpolicy *); +void kdebug_secpolicyindex(struct secpolicyindex *, const char *); +void kdebug_secasindex(const struct secasindex *, const char *); +void kdebug_secash(struct secashead *, const char *); +void kdebug_secasv(struct secasvar *); +void kdebug_mbufhdr(const struct mbuf *); +void kdebug_mbuf(const struct mbuf *); +char *ipsec_address(const union sockaddr_union *, char *, socklen_t); +char *ipsec_sa2str(struct secasvar *, char *, size_t); #endif /*_KERNEL*/ struct sockaddr; Index: head/sys/netipsec/key_debug.c =================================================================== --- head/sys/netipsec/key_debug.c +++ head/sys/netipsec/key_debug.c @@ -56,6 +56,7 @@ #include #ifdef _KERNEL #include +#include #endif #ifndef _KERNEL @@ -458,137 +459,219 @@ #ifdef _KERNEL /* %%%: about SPD and SAD */ -void -kdebug_secpolicy(struct secpolicy *sp) +const char* +kdebug_secpolicy_state(u_int state) { - /* sanity check */ - if (sp == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - printf("secpolicy{ refcnt=%u policy=%u\n", - sp->refcnt, sp->policy); + switch (state) { + case IPSEC_SPSTATE_DEAD: + return ("dead"); + case IPSEC_SPSTATE_LARVAL: + return ("larval"); + case IPSEC_SPSTATE_ALIVE: + return ("alive"); + case IPSEC_SPSTATE_PCB: + return ("pcb"); + case IPSEC_SPSTATE_IFNET: + return ("ifnet"); + } + return ("unknown"); +} - kdebug_secpolicyindex(&sp->spidx); +const char* +kdebug_secpolicy_policy(u_int policy) +{ - switch (sp->policy) { + switch (policy) { case IPSEC_POLICY_DISCARD: - printf(" type=discard }\n"); - break; + return ("discard"); case IPSEC_POLICY_NONE: - printf(" type=none }\n"); - break; + return ("none"); case IPSEC_POLICY_IPSEC: - { - struct ipsecrequest *isr; - for (isr = sp->req; isr != NULL; isr = isr->next) { - - printf(" level=%u\n", isr->level); - kdebug_secasindex(&isr->saidx); - - if (isr->sav != NULL) - kdebug_secasv(isr->sav); - } - printf(" }\n"); - } - break; - case IPSEC_POLICY_BYPASS: - printf(" type=bypass }\n"); - break; + return ("ipsec"); case IPSEC_POLICY_ENTRUST: - printf(" type=entrust }\n"); - break; - default: - printf("%s: Invalid policy found. %d\n", __func__, sp->policy); - break; + return ("entrust"); + case IPSEC_POLICY_BYPASS: + return ("bypass"); } - - return; + return ("unknown"); } -void -kdebug_secpolicyindex(struct secpolicyindex *spidx) +const char* +kdebug_secpolicyindex_dir(u_int dir) { - char buf[INET6_ADDRSTRLEN]; - /* sanity check */ - if (spidx == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + switch (dir) { + case IPSEC_DIR_ANY: + return ("any"); + case IPSEC_DIR_INBOUND: + return ("in"); + case IPSEC_DIR_OUTBOUND: + return ("out"); + } + return ("unknown"); +} - printf("secpolicyindex{ dir=%u prefs=%u prefd=%u ul_proto=%u\n", - spidx->dir, spidx->prefs, spidx->prefd, spidx->ul_proto); +const char* +kdebug_ipsecrequest_level(u_int level) +{ - printf("%s -> ", ipsec_address(&spidx->src, buf, sizeof(buf))); - printf("%s }\n", ipsec_address(&spidx->dst, buf, sizeof(buf))); + switch (level) { + case IPSEC_LEVEL_DEFAULT: + return ("default"); + case IPSEC_LEVEL_USE: + return ("use"); + case IPSEC_LEVEL_REQUIRE: + return ("require"); + case IPSEC_LEVEL_UNIQUE: + return ("unique"); + } + return ("unknown"); } -void -kdebug_secasindex(struct secasindex *saidx) +const char* +kdebug_secasindex_mode(u_int mode) { - char buf[INET6_ADDRSTRLEN]; - /* sanity check */ - if (saidx == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + switch (mode) { + case IPSEC_MODE_ANY: + return ("any"); + case IPSEC_MODE_TRANSPORT: + return ("transport"); + case IPSEC_MODE_TUNNEL: + return ("tunnel"); + case IPSEC_MODE_TCPMD5: + return ("tcp-md5"); + } + return ("unknown"); +} - printf("secasindex{ mode=%u proto=%u\n", - saidx->mode, saidx->proto); +const char* +kdebug_secasv_state(u_int state) +{ - printf("%s -> ", ipsec_address(&saidx->src, buf, sizeof(buf))); - printf("%s }\n", ipsec_address(&saidx->dst, buf, sizeof(buf))); + switch (state) { + case SADB_SASTATE_LARVAL: + return ("larval"); + case SADB_SASTATE_MATURE: + return ("mature"); + case SADB_SASTATE_DYING: + return ("dying"); + case SADB_SASTATE_DEAD: + return ("dead"); + } + return ("unknown"); } -static void -kdebug_sec_lifetime(struct seclifetime *lft) +static char* +kdebug_port2str(const struct sockaddr *sa, char *buf, size_t len) { - /* sanity check */ - if (lft == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + uint16_t port; - printf("sec_lifetime{ alloc=%u, bytes=%u\n", - lft->allocations, (u_int32_t)lft->bytes); - printf(" addtime=%u, usetime=%u }\n", - (u_int32_t)lft->addtime, (u_int32_t)lft->usetime); + IPSEC_ASSERT(sa != NULL, ("null sa")); + switch (sa->sa_family) { +#ifdef INET + case AF_INET: + port = ntohs(((const struct sockaddr_in *)sa)->sin_port); + break; +#endif +#ifdef INET6 + case AF_INET6: + port = ntohs(((const struct sockaddr_in6 *)sa)->sin6_port); + break; +#endif + default: + port = 0; + } + if (port == 0) + return ("*"); + snprintf(buf, len, "%u", port); + return (buf); +} - return; +void +kdebug_secpolicy(struct secpolicy *sp) +{ + u_int idx; + + IPSEC_ASSERT(sp != NULL, ("null sp")); + printf("SP { refcnt=%u id=%u priority=%u state=%s policy=%s\n", + sp->refcnt, sp->id, sp->priority, + kdebug_secpolicy_state(sp->state), + kdebug_secpolicy_policy(sp->policy)); + kdebug_secpolicyindex(&sp->spidx, " "); + for (idx = 0; idx < sp->tcount; idx++) { + printf(" req[%u]{ level=%s ", idx, + kdebug_ipsecrequest_level(sp->req[idx]->level)); + kdebug_secasindex(&sp->req[idx]->saidx, NULL); + printf(" }\n"); + } + printf("}\n"); } void -kdebug_secasv(struct secasvar *sav) +kdebug_secpolicyindex(struct secpolicyindex *spidx, const char *indent) { - /* sanity check */ - if (sav == NULL) - panic("%s: NULL pointer was passed.\n", __func__); + char buf[IPSEC_ADDRSTRLEN]; - printf("secas{"); - kdebug_secasindex(&sav->sah->saidx); + IPSEC_ASSERT(spidx != NULL, ("null spidx")); + if (indent != NULL) + printf("%s", indent); + printf("spidx { dir=%s ul_proto=", + kdebug_secpolicyindex_dir(spidx->dir)); + if (spidx->ul_proto == IPSEC_ULPROTO_ANY) + printf("* "); + else + printf("%u ", spidx->ul_proto); + printf("%s/%u -> ", ipsec_address(&spidx->src, buf, sizeof(buf)), + spidx->prefs); + printf("%s/%u }\n", ipsec_address(&spidx->dst, buf, sizeof(buf)), + spidx->prefd); +} - printf(" refcnt=%u state=%u auth=%u enc=%u\n", - sav->refcnt, sav->state, sav->alg_auth, sav->alg_enc); - printf(" spi=%u flags=%u\n", - (u_int32_t)ntohl(sav->spi), sav->flags); +void +kdebug_secasindex(const struct secasindex *saidx, const char *indent) +{ + char buf[IPSEC_ADDRSTRLEN], port[6]; - if (sav->key_auth != NULL) - kdebug_sadb_key((struct sadb_ext *)sav->key_auth); - if (sav->key_enc != NULL) - kdebug_sadb_key((struct sadb_ext *)sav->key_enc); + IPSEC_ASSERT(saidx != NULL, ("null saidx")); + if (indent != NULL) + printf("%s", indent); + printf("saidx { mode=%s proto=%u reqid=%u ", + kdebug_secasindex_mode(saidx->mode), saidx->proto, saidx->reqid); + printf("%s:%s -> ", ipsec_address(&saidx->src, buf, sizeof(buf)), + kdebug_port2str(&saidx->src.sa, port, sizeof(port))); + printf("%s:%s }\n", ipsec_address(&saidx->dst, buf, sizeof(buf)), + kdebug_port2str(&saidx->dst.sa, port, sizeof(port))); +} - if (sav->replay != NULL) { - SECASVAR_LOCK(sav); - kdebug_secreplay(sav->replay); - SECASVAR_UNLOCK(sav); - } - if (sav->lft_c != NULL) - kdebug_sec_lifetime(sav->lft_c); - if (sav->lft_h != NULL) - kdebug_sec_lifetime(sav->lft_h); - if (sav->lft_s != NULL) - kdebug_sec_lifetime(sav->lft_s); +static void +kdebug_sec_lifetime(struct seclifetime *lft, const char *indent) +{ -#ifdef notyet - /* XXX: misc[123] ? */ -#endif + IPSEC_ASSERT(lft != NULL, ("null lft")); + if (indent != NULL) + printf("%s", indent); + printf("lifetime { alloc=%u, bytes=%ju addtime=%ju usetime=%ju }\n", + lft->allocations, (uintmax_t)lft->bytes, (uintmax_t)lft->addtime, + (uintmax_t)lft->usetime); +} - return; +void +kdebug_secash(struct secashead *sah, const char *indent) +{ + + IPSEC_ASSERT(sah != NULL, ("null sah")); + if (indent != NULL) + printf("%s", indent); + printf("SAH { refcnt=%u state=%s\n", sah->refcnt, + kdebug_secasv_state(sah->state)); + if (indent != NULL) + printf("%s", indent); + kdebug_secasindex(&sah->saidx, indent); + if (indent != NULL) + printf("%s", indent); + printf("}\n"); } static void @@ -596,30 +679,83 @@ { int len, l; - /* sanity check */ - if (rpl == NULL) - panic("%s: NULL pointer was passed.\n", __func__); - + IPSEC_ASSERT(rpl != NULL, ("null rpl")); printf(" secreplay{ count=%u bitmap_size=%u wsize=%u seq=%u lastseq=%u", rpl->count, rpl->bitmap_size, rpl->wsize, rpl->seq, rpl->lastseq); if (rpl->bitmap == NULL) { - printf(" }\n"); + printf(" }\n"); return; } - printf("\n bitmap { "); - + printf("\n bitmap { "); for (len = 0; len < rpl->bitmap_size*4; len++) { for (l = 7; l >= 0; l--) printf("%u", (((rpl->bitmap)[len] >> l) & 1) ? 1 : 0); } - printf(" }\n"); + printf(" }\n"); +} - return; +static void +kdebug_secnatt(struct secnatt *natt) +{ + char buf[IPSEC_ADDRSTRLEN]; + + IPSEC_ASSERT(natt != NULL, ("null natt")); + printf(" natt{ sport=%u dport=%u ", ntohs(natt->sport), + ntohs(natt->dport)); + if (natt->flags & IPSEC_NATT_F_OAI) + printf("oai=%s ", ipsec_address(&natt->oai, buf, sizeof(buf))); + if (natt->flags & IPSEC_NATT_F_OAR) + printf("oar=%s ", ipsec_address(&natt->oar, buf, sizeof(buf))); + printf("}\n"); } void +kdebug_secasv(struct secasvar *sav) +{ + struct seclifetime lft_c; + + IPSEC_ASSERT(sav != NULL, ("null sav")); + + printf("SA { refcnt=%u spi=%u seq=%u pid=%u flags=0x%x state=%s\n", + sav->refcnt, ntohl(sav->spi), sav->seq, (uint32_t)sav->pid, + sav->flags, kdebug_secasv_state(sav->state)); + kdebug_secash(sav->sah, " "); + + lft_c.addtime = sav->created; + lft_c.allocations = (uint32_t)counter_u64_fetch( + sav->lft_c_allocations); + lft_c.bytes = counter_u64_fetch(sav->lft_c_bytes); + lft_c.usetime = sav->firstused; + kdebug_sec_lifetime(&lft_c, " c_"); + if (sav->lft_h != NULL) + kdebug_sec_lifetime(sav->lft_h, " h_"); + if (sav->lft_s != NULL) + kdebug_sec_lifetime(sav->lft_s, " s_"); + + if (sav->tdb_authalgxform != NULL) + printf(" alg_auth=%s\n", sav->tdb_authalgxform->name); + if (sav->key_auth != NULL) + KEYDBG(DUMP, + kdebug_sadb_key((struct sadb_ext *)sav->key_auth)); + if (sav->tdb_encalgxform != NULL) + printf(" alg_enc=%s\n", sav->tdb_encalgxform->name); + if (sav->key_enc != NULL) + KEYDBG(DUMP, + kdebug_sadb_key((struct sadb_ext *)sav->key_enc)); + if (sav->natt != NULL) + kdebug_secnatt(sav->natt); + if (sav->replay != NULL) { + KEYDBG(DUMP, + SECASVAR_LOCK(sav); + kdebug_secreplay(sav->replay); + SECASVAR_UNLOCK(sav)); + } + printf("}\n"); +} + +void kdebug_mbufhdr(const struct mbuf *m) { /* sanity check */ @@ -668,6 +804,47 @@ return; } + +/* Return a printable string for the address. */ +char * +ipsec_address(const union sockaddr_union* sa, char *buf, socklen_t size) +{ + + switch (sa->sa.sa_family) { +#ifdef INET + case AF_INET: + return (inet_ntop(AF_INET, &sa->sin.sin_addr, buf, size)); +#endif /* INET */ +#ifdef INET6 + case AF_INET6: + if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6.sin6_addr)) { + snprintf(buf, size, "%s%%%u", inet_ntop(AF_INET6, + &sa->sin6.sin6_addr, buf, size), + sa->sin6.sin6_scope_id); + return (buf); + } else + return (inet_ntop(AF_INET6, &sa->sin6.sin6_addr, + buf, size)); +#endif /* INET6 */ + case 0: + return ("*"); + default: + return ("(unknown address family)"); + } +} + +char * +ipsec_sa2str(struct secasvar *sav, char *buf, size_t size) +{ + char sbuf[IPSEC_ADDRSTRLEN], dbuf[IPSEC_ADDRSTRLEN]; + + snprintf(buf, size, "SA(SPI=%08lx src=%s dst=%s)", + (u_long)ntohl(sav->spi), + ipsec_address(&sav->sah->saidx.src, sbuf, sizeof(sbuf)), + ipsec_address(&sav->sah->saidx.dst, dbuf, sizeof(dbuf))); + return (buf); +} + #endif /* _KERNEL */ void Index: head/sys/netipsec/keydb.h =================================================================== --- head/sys/netipsec/keydb.h +++ head/sys/netipsec/keydb.h @@ -34,7 +34,7 @@ #define _NETIPSEC_KEYDB_H_ #ifdef _KERNEL - +#include #include #include @@ -57,9 +57,9 @@ struct secasindex { union sockaddr_union src; /* source address for SA */ union sockaddr_union dst; /* destination address for SA */ - u_int16_t proto; /* IPPROTO_ESP or IPPROTO_AH */ - u_int8_t mode; /* mode of protocol, see ipsec.h */ - u_int32_t reqid; /* reqid id who owned this SA */ + uint8_t proto; /* IPPROTO_ESP or IPPROTO_AH */ + uint8_t mode; /* mode of protocol, see ipsec.h */ + uint32_t reqid; /* reqid id who owned this SA */ /* see IPSEC_MANUAL_REQID_MAX. */ }; @@ -88,9 +88,23 @@ u_int64_t usetime; }; +struct secnatt { + union sockaddr_union oai; /* original addresses of initiator */ + union sockaddr_union oar; /* original address of responder */ + uint16_t sport; /* source port */ + uint16_t dport; /* destination port */ + uint16_t cksum; /* checksum delta */ + uint16_t flags; +#define IPSEC_NATT_F_OAI 0x0001 +#define IPSEC_NATT_F_OAR 0x0002 +}; + /* Security Association Data Base */ +TAILQ_HEAD(secasvar_queue, secasvar); struct secashead { - LIST_ENTRY(secashead) chain; + TAILQ_ENTRY(secashead) chain; + LIST_ENTRY(secashead) addrhash; /* hash by sproto+src+dst addresses */ + LIST_ENTRY(secashead) drainq; /* used ONLY by flush callout */ struct secasindex saidx; @@ -98,10 +112,10 @@ struct secident *identd; /* destination identity */ /* XXX I don't know how to use them. */ - u_int8_t state; /* MATURE or DEAD. */ - LIST_HEAD(_satree, secasvar) savtree[SADB_SASTATE_MAX+1]; - /* SA chain */ - /* The first of this list is newer SA */ + volatile u_int refcnt; /* reference count */ + uint8_t state; /* MATURE or DEAD. */ + struct secasvar_queue savtree_alive; /* MATURE and DYING SA */ + struct secasvar_queue savtree_larval; /* LARVAL SA */ }; struct xformsw; @@ -109,63 +123,70 @@ struct auth_hash; struct comp_algo; -/* Security Association */ +/* + * Security Association + * + * For INBOUND packets we do SA lookup using SPI, thus only SPIHASH is used. + * For OUTBOUND packets there may be several SA suitable for packet. + * We use key_preferred_oldsa variable to choose better SA. First of we do + * lookup for suitable SAH using packet's saidx. Then we use SAH's savtree + * to search better candidate. The newer SA (by created time) are placed + * in the beginning of the savtree list. There is no preference between + * DYING and MATURE. + * + * NB: Fields with a tdb_ prefix are part of the "glue" used + * to interface to the OpenBSD crypto support. This was done + * to distinguish this code from the mainline KAME code. + * NB: Fields are sorted on the basis of the frequency of changes, i.e. + * constants and unchangeable fields are going first. + * NB: if you want to change this structure, check that this will not break + * key_updateaddresses(). + */ struct secasvar { - LIST_ENTRY(secasvar) chain; - struct mtx lock; /* update/access lock */ + uint32_t spi; /* SPI Value, network byte order */ + uint32_t flags; /* holder for SADB_KEY_FLAGS */ + uint32_t seq; /* sequence number */ + pid_t pid; /* message's pid */ + u_int ivlen; /* length of IV */ - u_int refcnt; /* reference count */ - u_int8_t state; /* Status of this Association */ - - u_int8_t alg_auth; /* Authentication Algorithm Identifier*/ - u_int8_t alg_enc; /* Cipher Algorithm Identifier */ - u_int8_t alg_comp; /* Compression Algorithm Identifier */ - u_int32_t spi; /* SPI Value, network byte order */ - u_int32_t flags; /* holder for SADB_KEY_FLAGS */ - + struct secashead *sah; /* back pointer to the secashead */ struct seckey *key_auth; /* Key for Authentication */ struct seckey *key_enc; /* Key for Encryption */ - u_int ivlen; /* length of IV */ - void *sched; /* intermediate encryption key */ - size_t schedlen; - uint64_t cntr; /* counter for GCM and CTR */ - struct secreplay *replay; /* replay prevention */ - time_t created; /* for lifetime */ + struct secnatt *natt; /* NAT-T config */ + struct mtx *lock; /* update/access lock */ - struct seclifetime *lft_c; /* CURRENT lifetime, it's constant. */ + const struct xformsw *tdb_xform; /* transform */ + const struct enc_xform *tdb_encalgxform;/* encoding algorithm */ + const struct auth_hash *tdb_authalgxform;/* authentication algorithm */ + const struct comp_algo *tdb_compalgxform;/* compression algorithm */ + uint64_t tdb_cryptoid; /* crypto session id */ + + uint8_t alg_auth; /* Authentication Algorithm Identifier*/ + uint8_t alg_enc; /* Cipher Algorithm Identifier */ + uint8_t alg_comp; /* Compression Algorithm Identifier */ + uint8_t state; /* Status of this SA (pfkeyv2.h) */ + + counter_u64_t lft_c; /* CURRENT lifetime */ +#define lft_c_allocations lft_c +#define lft_c_bytes lft_c + 1 struct seclifetime *lft_h; /* HARD lifetime */ struct seclifetime *lft_s; /* SOFT lifetime */ - u_int32_t seq; /* sequence number */ - pid_t pid; /* message's pid */ + uint64_t created; /* time when SA was created */ + uint64_t firstused; /* time when SA was first used */ - struct secashead *sah; /* back pointer to the secashead */ + TAILQ_ENTRY(secasvar) chain; + LIST_ENTRY(secasvar) spihash; + LIST_ENTRY(secasvar) drainq; /* used ONLY by flush callout */ - /* - * NB: Fields with a tdb_ prefix are part of the "glue" used - * to interface to the OpenBSD crypto support. This was done - * to distinguish this code from the mainline KAME code. - */ - struct xformsw *tdb_xform; /* transform */ - struct enc_xform *tdb_encalgxform; /* encoding algorithm */ - struct auth_hash *tdb_authalgxform; /* authentication algorithm */ - struct comp_algo *tdb_compalgxform; /* compression algorithm */ - u_int64_t tdb_cryptoid; /* crypto session id */ - - /* - * NAT-Traversal. - */ - u_int16_t natt_type; /* IKE/ESP-marker in output. */ - u_int16_t natt_esp_frag_len; /* MTU for payload fragmentation. */ + uint64_t cntr; /* counter for GCM and CTR */ + volatile u_int refcnt; /* reference count */ }; -#define SECASVAR_LOCK_INIT(_sav) \ - mtx_init(&(_sav)->lock, "ipsec association", NULL, MTX_DEF) -#define SECASVAR_LOCK(_sav) mtx_lock(&(_sav)->lock) -#define SECASVAR_UNLOCK(_sav) mtx_unlock(&(_sav)->lock) -#define SECASVAR_LOCK_DESTROY(_sav) mtx_destroy(&(_sav)->lock) -#define SECASVAR_LOCK_ASSERT(_sav) mtx_assert(&(_sav)->lock, MA_OWNED) +#define SECASVAR_LOCK(_sav) mtx_lock((_sav)->lock) +#define SECASVAR_UNLOCK(_sav) mtx_unlock((_sav)->lock) +#define SECASVAR_LOCK_ASSERT(_sav) mtx_assert((_sav)->lock, MA_OWNED) #define SAV_ISGCM(_sav) \ ((_sav)->alg_enc == SADB_X_EALG_AESGCM8 || \ (_sav)->alg_enc == SADB_X_EALG_AESGCM12 || \ @@ -197,10 +218,11 @@ /* acquiring list table. */ struct secacq { LIST_ENTRY(secacq) chain; + LIST_ENTRY(secacq) addrhash; + LIST_ENTRY(secacq) seqhash; struct secasindex saidx; - - u_int32_t seq; /* sequence number */ + uint32_t seq; /* sequence number */ time_t created; /* for lifetime */ int count; /* for lifetime */ }; Index: head/sys/netipsec/keysock.c =================================================================== --- head/sys/netipsec/keysock.c +++ head/sys/netipsec/keysock.c @@ -115,7 +115,7 @@ M_ASSERTPKTHDR(m); - KEYDEBUG(KEYDEBUG_KEY_DUMP, kdebug_mbuf(m)); + KEYDBG(KEY_DUMP, kdebug_mbuf(m)); msg = mtod(m, struct sadb_msg *); PFKEYSTAT_INC(out_msgtype[msg->sadb_msg_type]); @@ -181,9 +181,9 @@ if (so == NULL || msg == NULL) panic("%s: NULL pointer was passed.\n", __func__); - KEYDEBUG(KEYDEBUG_KEY_DUMP, - printf("%s: \n", __func__); - kdebug_sadb(msg)); + KEYDBG(KEY_DUMP, + printf("%s: \n", __func__); + kdebug_sadb(msg)); /* * we increment statistics here, just in case we have ENOBUFS Index: head/sys/netipsec/subr_ipsec.c =================================================================== --- head/sys/netipsec/subr_ipsec.c +++ head/sys/netipsec/subr_ipsec.c @@ -0,0 +1,350 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +/* + * This file is build in the kernel only when 'options IPSEC' or + * 'options IPSEC_SUPPORT' is enabled. + */ + +#ifdef INET +void +ipsec4_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + static const struct sockaddr_in template = { + sizeof (struct sockaddr_in), + AF_INET, + 0, { 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } + }; + + src->sin = template; + dst->sin = template; + + if (m->m_len < sizeof (struct ip)) { + m_copydata(m, offsetof(struct ip, ip_src), + sizeof (struct in_addr), + (caddr_t) &src->sin.sin_addr); + m_copydata(m, offsetof(struct ip, ip_dst), + sizeof (struct in_addr), + (caddr_t) &dst->sin.sin_addr); + } else { + const struct ip *ip = mtod(m, const struct ip *); + src->sin.sin_addr = ip->ip_src; + dst->sin.sin_addr = ip->ip_dst; + } +} +#endif +#ifdef INET6 +void +ipsec6_setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + struct ip6_hdr ip6buf; + const struct ip6_hdr *ip6; + + if (m->m_len >= sizeof(*ip6)) + ip6 = mtod(m, const struct ip6_hdr *); + else { + m_copydata(m, 0, sizeof(ip6buf), (caddr_t)&ip6buf); + ip6 = &ip6buf; + } + + bzero(&src->sin6, sizeof(struct sockaddr_in6)); + src->sin6.sin6_family = AF_INET6; + src->sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&ip6->ip6_src, &src->sin6.sin6_addr, sizeof(ip6->ip6_src)); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + src->sin6.sin6_addr.s6_addr16[1] = 0; + src->sin6.sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); + } + + bzero(&dst->sin6, sizeof(struct sockaddr_in6)); + dst->sin6.sin6_family = AF_INET6; + dst->sin6.sin6_len = sizeof(struct sockaddr_in6); + bcopy(&ip6->ip6_dst, &dst->sin6.sin6_addr, sizeof(ip6->ip6_dst)); + if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { + dst->sin6.sin6_addr.s6_addr16[1] = 0; + dst->sin6.sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); + } +} +#endif + +#ifdef IPSEC_SUPPORT +/* + * Declare IPSEC_SUPPORT as module even if IPSEC is defined. + * tcpmd5.ko module depends from IPSEC_SUPPORT. + */ +#define IPSEC_MODULE_INCR 2 +static int +ipsec_kmod_enter(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + if ((old & IPSEC_MODULE_ENABLED) == 0) + return (ENXIO); + new = old + IPSEC_MODULE_INCR; + } while(atomic_cmpset_acq_int(cntr, old, new) == 0); + return (0); +} + +static void +ipsec_kmod_exit(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + new = old - IPSEC_MODULE_INCR; + } while (atomic_cmpset_rel_int(cntr, old, new) == 0); +} + +static void +ipsec_kmod_drain(volatile u_int *cntr) +{ + u_int old, new; + + do { + old = *cntr; + new = old & ~IPSEC_MODULE_ENABLED; + } while (atomic_cmpset_acq_int(cntr, old, new) == 0); + while (atomic_cmpset_int(cntr, 0, 0) == 0) + pause("ipsecd", hz/2); +} + +#define METHOD_DECL(...) __VA_ARGS__ +#define METHOD_ARGS(...) __VA_ARGS__ +#define IPSEC_KMOD_METHOD(type, name, sc, method, decl, args) \ +type name (decl) \ +{ \ + type ret = (type)ipsec_kmod_enter(&sc->enabled); \ + if (ret == 0) { \ + ret = (*sc->methods->method)(args); \ + ipsec_kmod_exit(&sc->enabled); \ + } \ + return (ret); \ +} + +#ifndef TCP_SIGNATURE +/* Declare TCP-MD5 support as kernel module. */ +static struct tcpmd5_support tcpmd5_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_input, sc, + input, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, + struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) +) + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_output, sc, + output, METHOD_DECL(struct tcpmd5_support * const sc, struct mbuf *m, + struct tcphdr *th, u_char *buf), METHOD_ARGS(m, th, buf) +) + +IPSEC_KMOD_METHOD(int, tcpmd5_kmod_pcbctl, sc, + pcbctl, METHOD_DECL(struct tcpmd5_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) + +void +tcpmd5_support_enable(const struct tcpmd5_methods * const methods) +{ + + KASSERT(tcp_ipsec_support->enabled == 0, ("TCP-MD5 already enabled")); + tcp_ipsec_support->methods = methods; + tcp_ipsec_support->enabled |= IPSEC_MODULE_ENABLED; +} + +void +tcpmd5_support_disable(void) +{ + + if (tcp_ipsec_support->enabled & IPSEC_MODULE_ENABLED) { + ipsec_kmod_drain(&tcp_ipsec_support->enabled); + tcp_ipsec_support->methods = NULL; + } +} +#endif + +static int +ipsec_support_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + return (0); + case MOD_UNLOAD: + return (EBUSY); + default: + return (EOPNOTSUPP); + } +} + +static moduledata_t ipsec_support_mod = { + "ipsec_support", + ipsec_support_modevent, + 0 +}; +DECLARE_MODULE(ipsec_support, ipsec_support_mod, SI_SUB_PROTO_DOMAIN, + SI_ORDER_ANY); +MODULE_VERSION(ipsec_support, 1); + +#ifndef IPSEC +/* + * IPsec support is build as kernel module. + */ +#ifdef INET +static struct ipsec_support ipv4_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct ipsec_support * const ipv4_ipsec_support = &ipv4_ipsec; + +IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_input, sc, + udp_input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + int off, int af), METHOD_ARGS(m, off, af) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_udp_pcbctl, sc, + udp_pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) +#endif + +#ifdef INET6 +static struct ipsec_support ipv6_ipsec = { + .enabled = 0, + .methods = NULL +}; +struct ipsec_support * const ipv6_ipsec_support = &ipv6_ipsec; +#endif + +IPSEC_KMOD_METHOD(int, ipsec_kmod_input, sc, + input, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + int offset, int proto), METHOD_ARGS(m, offset, proto) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_check_policy, sc, + check_policy, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + struct inpcb *inp), METHOD_ARGS(m, inp) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_forward, sc, + forward, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m), + (m) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, + output, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + struct inpcb *inp), METHOD_ARGS(m, inp) +) + +IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, + pcbctl, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp, + struct sockopt *sopt), METHOD_ARGS(inp, sopt) +) + +IPSEC_KMOD_METHOD(size_t, ipsec_kmod_hdrsize, sc, + hdrsize, METHOD_DECL(struct ipsec_support * const sc, struct inpcb *inp), + (inp) +) + +static IPSEC_KMOD_METHOD(int, ipsec_kmod_caps, sc, + capability, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, + u_int cap), METHOD_ARGS(m, cap) +) + +int +ipsec_kmod_capability(struct ipsec_support * const sc, struct mbuf *m, + u_int cap) +{ + + /* + * Since PF_KEY is build in the kernel, we can directly + * call key_havesp() without additional synchronizations. + */ + if (cap == IPSEC_CAP_OPERABLE) + return (key_havesp(IPSEC_DIR_INBOUND) != 0 || + key_havesp(IPSEC_DIR_OUTBOUND) != 0); + return (ipsec_kmod_caps(sc, m, cap)); +} + +void +ipsec_support_enable(struct ipsec_support * const sc, + const struct ipsec_methods * const methods) +{ + + KASSERT(sc->enabled == 0, ("IPsec already enabled")); + sc->methods = methods; + sc->enabled |= IPSEC_MODULE_ENABLED; +} + +void +ipsec_support_disable(struct ipsec_support * const sc) +{ + + if (sc->enabled & IPSEC_MODULE_ENABLED) { + ipsec_kmod_drain(&sc->enabled); + sc->methods = NULL; + } +} +#endif /* !IPSEC */ +#endif /* IPSEC_SUPPORT */ Index: head/sys/netipsec/udpencap.c =================================================================== --- head/sys/netipsec/udpencap.c +++ head/sys/netipsec/udpencap.c @@ -0,0 +1,294 @@ +/*- + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Handle UDP_ENCAP socket option. Always return with released INP_WLOCK. + */ +int +udp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + struct udpcb *up; + int error, optval; + + INP_WLOCK_ASSERT(inp); + if (sopt->sopt_name != UDP_ENCAP) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + + up = intoudpcb(inp); + if (sopt->sopt_dir == SOPT_GET) { + if (up->u_flags & UF_ESPINUDP) + optval = UDP_ENCAP_ESPINUDP; + else + optval = 0; + INP_WUNLOCK(inp); + return (sooptcopyout(sopt, &optval, sizeof(optval))); + } + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); + if (error != 0) + return (error); + + INP_WLOCK(inp); + switch (optval) { + case 0: + up->u_flags &= ~UF_ESPINUDP; + break; + case UDP_ENCAP_ESPINUDP: + up->u_flags |= UF_ESPINUDP; + break; + default: + error = EINVAL; + } + INP_WUNLOCK(inp); + return (error); +} + +/* + * Potentially decap ESP in UDP frame. Check for an ESP header. + * If present, strip the UDP header and push the result through IPSec. + * + * Returns error if mbuf consumed and/or processed, otherwise 0. + */ +int +udp_ipsec_input(struct mbuf *m, int off, int af) +{ + union sockaddr_union dst; + struct secasvar *sav; + struct udphdr *udp; + struct ip *ip; + uint32_t spi; + int error, hlen; + + /* + * Just return if packet doesn't have enough data. + * We need at least [IP header + UDP header + ESP header]. + * NAT-Keepalive packet has only one byte of payload, so it + * by default will not be processed. + */ + if (m->m_pkthdr.len < off + sizeof(struct esp)) + return (0); + + m_copydata(m, off, sizeof(uint32_t), (caddr_t)&spi); + if (spi == 0) /* Non-ESP marker. */ + return (0); + + /* + * Find SA and check that it is configured for UDP + * encapsulation. + */ + bzero(&dst, sizeof(dst)); + dst.sa.sa_family = af; + switch (af) { +#ifdef INET + case AF_INET: + dst.sin.sin_len = sizeof(struct sockaddr_in); + ip = mtod(m, struct ip *); + ip->ip_p = IPPROTO_ESP; + off = offsetof(struct ip, ip_p); + hlen = ip->ip_hl << 2; + dst.sin.sin_addr = ip->ip_dst; + break; +#endif +#ifdef INET6 + case AF_INET6: + /* Not yet */ + /* FALLTHROUGH */ +#endif + default: + ESPSTAT_INC(esps_nopf); + m_freem(m); + return (EPFNOSUPPORT); + } + + sav = key_allocsa(&dst, IPPROTO_ESP, spi); + if (sav == NULL) { + ESPSTAT_INC(esps_notdb); + m_freem(m); + return (ENOENT); + } + udp = mtodo(m, hlen); + if (sav->natt == NULL || + sav->natt->sport != udp->uh_sport || + sav->natt->dport != udp->uh_dport) { + /* XXXAE: should we check source address? */ + ESPSTAT_INC(esps_notdb); + key_freesav(&sav); + m_freem(m); + return (ENOENT); + } + /* + * Remove the UDP header + * Before: + * <--- off ---> + * +----+------+-----+ + * | IP | UDP | ESP | + * +----+------+-----+ + * <-skip-> + * After: + * +----+-----+ + * | IP | ESP | + * +----+-----+ + * <-skip-> + */ + m_striphdr(m, hlen, sizeof(*udp)); + /* + * We cannot yet update the cksums so clear any h/w cksum flags + * as they are no longer valid. + */ + if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) + m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + /* + * We can update ip_len and ip_sum here, but ipsec4_input_cb() + * will do this anyway, so don't touch them here. + */ + ESPSTAT_INC(esps_input); + error = (*sav->tdb_xform->xf_input)(m, sav, hlen, off); + if (error != 0) + key_freesav(&sav); + + return (EINPROGRESS); /* Consumed by IPsec. */ +} + +int +udp_ipsec_output(struct mbuf *m, struct secasvar *sav) +{ + struct udphdr *udp; + struct mbuf *n; + struct ip *ip; + int hlen, off; + + IPSEC_ASSERT(sav->natt != NULL, ("UDP encapsulation isn't required.")); + + if (sav->sah->saidx.dst.sa.sa_family == AF_INET6) + return (EAFNOSUPPORT); + + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + n = m_makespace(m, hlen, sizeof(*udp), &off); + if (n == NULL) { + DPRINTF(("%s: m_makespace for udphdr failed\n", __func__)); + return (ENOBUFS); + } + + udp = mtodo(n, off); + udp->uh_dport = sav->natt->dport; + udp->uh_sport = sav->natt->sport; + udp->uh_sum = 0; + udp->uh_ulen = htons(m->m_pkthdr.len - hlen); + + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_p = IPPROTO_UDP; + return (0); +} + +void +udp_ipsec_adjust_cksum(struct mbuf *m, struct secasvar *sav, int proto, + int skip) +{ + struct ip *ip; + uint16_t cksum, off; + + IPSEC_ASSERT(sav->natt != NULL, ("NAT-T isn't required")); + IPSEC_ASSERT(proto == IPPROTO_UDP || proto == IPPROTO_TCP, + ("unexpected protocol %u", proto)); + + if (proto == IPPROTO_UDP) + off = offsetof(struct udphdr, uh_sum); + else + off = offsetof(struct tcphdr, th_sum); + + if (V_natt_cksum_policy == 0) { /* auto */ + if (sav->natt->cksum != 0) { + /* Incrementally recompute. */ + m_copydata(m, skip + off, sizeof(cksum), + (caddr_t)&cksum); + cksum = in_addword(cksum, sav->natt->cksum); + } else { + /* No OA from IKEd. */ + if (proto == IPPROTO_TCP) { + /* Ignore for TCP. */ + m->m_pkthdr.csum_data = 0xffff; + m->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | + CSUM_PSEUDO_HDR); + return; + } + cksum = 0; /* Reset for UDP. */ + } + m_copyback(m, skip + off, sizeof(cksum), (caddr_t)&cksum); + } else { /* Fully recompute */ + ip = mtod(m, struct ip *); + cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(m->m_pkthdr.len - skip + proto)); + m_copyback(m, skip + off, sizeof(cksum), (caddr_t)&cksum); + m->m_pkthdr.csum_flags = + (proto == IPPROTO_UDP) ? CSUM_UDP: CSUM_TCP; + m->m_pkthdr.csum_data = off; + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +} + Index: head/sys/netipsec/xform.h =================================================================== --- head/sys/netipsec/xform.h +++ head/sys/netipsec/xform.h @@ -42,6 +42,7 @@ #define _NETIPSEC_XFORM_H_ #include +#include #include #include @@ -49,78 +50,68 @@ #define AH_HMAC_MAXHASHLEN (SHA2_512_HASH_LEN/2) /* Keep this updated */ #define AH_HMAC_INITIAL_RPL 1 /* replay counter initial value */ +#ifdef _KERNEL +struct secpolicy; +struct secasvar; + /* * Packet tag assigned on completion of IPsec processing; used - * to speedup processing when/if the packet comes back for more - * processing. + * to speedup security policy checking for INBOUND packets. */ -struct tdb_ident { - u_int32_t spi; - union sockaddr_union dst; - u_int8_t proto; - /* Cache those two for enc(4) in xform_ipip. */ - u_int8_t alg_auth; - u_int8_t alg_enc; +struct xform_history { + union sockaddr_union dst; /* destination address */ + uint32_t spi; /* Security Parameters Index */ + uint8_t proto; /* IPPROTO_ESP or IPPROTO_AH */ + uint8_t mode; /* transport or tunnel */ }; /* * Opaque data structure hung off a crypto operation descriptor. */ -struct tdb_crypto { - struct ipsecrequest *tc_isr; /* ipsec request state */ - u_int32_t tc_spi; /* associated SPI */ - union sockaddr_union tc_dst; /* dst addr of packet */ - u_int8_t tc_proto; /* current protocol, e.g. AH */ - u_int8_t tc_nxt; /* next protocol, e.g. IPV4 */ - int tc_protoff; /* current protocol offset */ - int tc_skip; /* data offset */ - caddr_t tc_ptr; /* associated crypto data */ - struct secasvar *tc_sav; /* related SA */ +struct xform_data { + struct secpolicy *sp; /* security policy */ + struct secasvar *sav; /* related SA */ + uint64_t cryptoid; /* used crypto session id */ + u_int idx; /* IPsec request index */ + int protoff; /* current protocol offset */ + int skip; /* data offset */ + uint8_t nxt; /* next protocol, e.g. IPV4 */ }; -struct secasvar; -struct ipescrequest; - -struct xformsw { - u_short xf_type; /* xform ID */ #define XF_IP4 1 /* unused */ #define XF_AH 2 /* AH */ #define XF_ESP 3 /* ESP */ #define XF_TCPSIGNATURE 5 /* TCP MD5 Signature option, RFC 2358 */ #define XF_IPCOMP 6 /* IPCOMP */ - u_short xf_flags; -#define XFT_AUTH 0x0001 -#define XFT_CONF 0x0100 -#define XFT_COMP 0x1000 - char *xf_name; /* human-readable name */ + +struct xformsw { + u_short xf_type; /* xform ID */ + char *xf_name; /* human-readable name */ int (*xf_init)(struct secasvar*, struct xformsw*); /* setup */ int (*xf_zeroize)(struct secasvar*); /* cleanup */ int (*xf_input)(struct mbuf*, struct secasvar*, /* input */ int, int); - int (*xf_output)(struct mbuf*, /* output */ - struct ipsecrequest *, struct mbuf **, int, int); - struct xformsw *xf_next; /* list of registered xforms */ + int (*xf_output)(struct mbuf*, /* output */ + struct secpolicy *, struct secasvar *, u_int, int, int); + LIST_ENTRY(xformsw) chain; }; -#ifdef _KERNEL -extern void xform_register(struct xformsw*); -extern int xform_init(struct secasvar *sav, int xftype); -extern int xform_ah_authsize(struct auth_hash *esph); +const struct enc_xform * enc_algorithm_lookup(int); +const struct auth_hash * auth_algorithm_lookup(int); +const struct comp_algo * comp_algorithm_lookup(int); -struct cryptoini; +void xform_attach(void *); +void xform_detach(void *); +struct cryptoini; /* XF_AH */ +int xform_ah_authsize(const struct auth_hash *); extern int ah_init0(struct secasvar *, struct xformsw *, struct cryptoini *); extern int ah_zeroize(struct secasvar *sav); -extern struct auth_hash *ah_algorithm_lookup(int alg); extern size_t ah_hdrsiz(struct secasvar *); /* XF_ESP */ -extern struct enc_xform *esp_algorithm_lookup(int alg); extern size_t esp_hdrsiz(struct secasvar *sav); - -/* XF_COMP */ -extern struct comp_algo *ipcomp_algorithm_lookup(int alg); #endif /* _KERNEL */ #endif /* _NETIPSEC_XFORM_H_ */ Index: head/sys/netipsec/xform_ah.c =================================================================== --- head/sys/netipsec/xform_ah.c +++ head/sys/netipsec/xform_ah.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include @@ -113,7 +113,7 @@ static int ah_output_cb(struct cryptop*); int -xform_ah_authsize(struct auth_hash *esph) +xform_ah_authsize(const struct auth_hash *esph) { int alen; @@ -141,43 +141,6 @@ return alen; } -/* - * NB: this is public for use by the PF_KEY support. - */ -struct auth_hash * -ah_algorithm_lookup(int alg) -{ - if (alg > SADB_AALG_MAX) - return NULL; - switch (alg) { - case SADB_X_AALG_NULL: - return &auth_hash_null; - case SADB_AALG_MD5HMAC: - return &auth_hash_hmac_md5; - case SADB_AALG_SHA1HMAC: - return &auth_hash_hmac_sha1; - case SADB_X_AALG_RIPEMD160HMAC: - return &auth_hash_hmac_ripemd_160; - case SADB_X_AALG_MD5: - return &auth_hash_key_md5; - case SADB_X_AALG_SHA: - return &auth_hash_key_sha1; - case SADB_X_AALG_SHA2_256: - return &auth_hash_hmac_sha2_256; - case SADB_X_AALG_SHA2_384: - return &auth_hash_hmac_sha2_384; - case SADB_X_AALG_SHA2_512: - return &auth_hash_hmac_sha2_512; - case SADB_X_AALG_AES128GMAC: - return &auth_hash_nist_gmac_aes_128; - case SADB_X_AALG_AES192GMAC: - return &auth_hash_nist_gmac_aes_192; - case SADB_X_AALG_AES256GMAC: - return &auth_hash_nist_gmac_aes_256; - } - return NULL; -} - size_t ah_hdrsiz(struct secasvar *sav) { @@ -202,10 +165,10 @@ int ah_init0(struct secasvar *sav, struct xformsw *xsp, struct cryptoini *cria) { - struct auth_hash *thash; + const struct auth_hash *thash; int keylen; - thash = ah_algorithm_lookup(sav->alg_auth); + thash = auth_algorithm_lookup(sav->alg_auth); if (thash == NULL) { DPRINTF(("%s: unsupported authentication algorithm %u\n", __func__, sav->alg_auth)); @@ -582,14 +545,14 @@ ah_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { char buf[128]; - struct auth_hash *ahx; - struct tdb_crypto *tc; + const struct auth_hash *ahx; + struct cryptodesc *crda; + struct cryptop *crp; + struct xform_data *xd; struct newah *ah; + uint64_t cryptoid; int hl, rplen, authsize, error; - struct cryptodesc *crda; - struct cryptop *crp; - IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->key_auth != NULL, ("null authentication key")); IPSEC_ASSERT(sav->tdb_authalgxform != NULL, @@ -608,13 +571,18 @@ } /* Check replay window, if applicable. */ - if (sav->replay && !ipsec_chkreplay(ntohl(ah->ah_seq), sav)) { + SECASVAR_LOCK(sav); + if (sav->replay != NULL && sav->replay->wsize != 0 && + ipsec_chkreplay(ntohl(ah->ah_seq), sav) == 0) { + SECASVAR_UNLOCK(sav); AHSTAT_INC(ahs_replay); DPRINTF(("%s: packet replay failure: %s\n", __func__, - ipsec_logsastr(sav, buf, sizeof(buf)))); + ipsec_sa2str(sav, buf, sizeof(buf)))); m_freem(m); - return ENOBUFS; + return (EACCES); } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Verify AH header length. */ hl = ah->ah_len * sizeof (u_int32_t); @@ -635,7 +603,8 @@ /* Get crypto descriptors. */ crp = crypto_getreq(1); if (crp == NULL) { - DPRINTF(("%s: failed to acquire crypto descriptor\n",__func__)); + DPRINTF(("%s: failed to acquire crypto descriptor\n", + __func__)); AHSTAT_INC(ahs_crypto); m_freem(m); return ENOBUFS; @@ -654,10 +623,10 @@ crda->crd_key = sav->key_auth->key_data; /* Allocate IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc(sizeof (struct tdb_crypto) + - skip + rplen + authsize, M_XDATA, M_NOWAIT | M_ZERO); - if (tc == NULL) { - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd) + skip + rplen + authsize, M_XDATA, + M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); AHSTAT_INC(ahs_crypto); crypto_freereq(crp); m_freem(m); @@ -668,7 +637,7 @@ * Save the authenticator, the skipped portion of the packet, * and the AH header. */ - m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(tc+1)); + m_copydata(m, 0, skip + rplen + authsize, (caddr_t)(xd + 1)); /* Zeroize the authenticator on the packet. */ m_copyback(m, skip + rplen, authsize, ipseczeroes); @@ -679,7 +648,7 @@ if (error != 0) { /* NB: mbuf is free'd by ah_massage_headers */ AHSTAT_INC(ahs_hdrops); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); return (error); } @@ -689,18 +658,15 @@ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback. */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_nxt = ah->ah_nxt; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->nxt = ah->ah_nxt; + xd->protoff = protoff; + xd->skip = skip; + xd->cryptoid = cryptoid; return (crypto_dispatch(crp)); } @@ -710,46 +676,43 @@ static int ah_input_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; - int rplen, error, skip, protoff; + char buf[IPSEC_ADDRSTRLEN]; unsigned char calc[AH_ALEN_MAX]; + const struct auth_hash *ahx; struct mbuf *m; struct cryptodesc *crd; - struct auth_hash *ahx; - struct tdb_crypto *tc; + struct xform_data *xd; struct secasvar *sav; struct secasindex *saidx; - u_int8_t nxt; caddr_t ptr; - int authsize; + uint64_t cryptoid; + int authsize, rplen, error, skip, protoff; + uint8_t nxt; crd = crp->crp_desc; - - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - nxt = tc->tc_nxt; - protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + nxt = xd->nxt; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, ("unexpected protocol family %u", saidx->dst.sa.sa_family)); - ahx = (struct auth_hash *) sav->tdb_authalgxform; + ahx = sav->tdb_authalgxform; /* Check for crypto errors. */ if (crp->crp_etype) { - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - - if (crp->crp_etype == EAGAIN) + if (crp->crp_etype == EAGAIN) { + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); - + } AHSTAT_INC(ahs_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; @@ -776,7 +739,7 @@ m_copydata(m, skip + rplen, authsize, calc); /* Verify authenticator. */ - ptr = (caddr_t) (tc + 1); + ptr = (caddr_t) (xd + 1); if (timingsafe_bcmp(ptr + skip + rplen, calc, authsize)) { DPRINTF(("%s: authentication hash mismatch for packet " "in SA %s/%08lx\n", __func__, @@ -787,11 +750,11 @@ goto bad; } /* Fix the Next Protocol field. */ - ((u_int8_t *) ptr)[protoff] = nxt; + ((uint8_t *) ptr)[protoff] = nxt; /* Copyback the saved (uncooked) network headers. */ m_copyback(m, 0, skip, ptr); - free(tc, M_XDATA), tc = NULL; /* No longer needed */ + free(xd, M_XDATA), xd = NULL; /* No longer needed */ /* * Header is now authenticated. @@ -806,11 +769,14 @@ m_copydata(m, skip + offsetof(struct newah, ah_seq), sizeof (seq), (caddr_t) &seq); + SECASVAR_LOCK(sav); if (ipsec_updatereplay(ntohl(seq), sav)) { + SECASVAR_UNLOCK(sav); AHSTAT_INC(ahs_replay); - error = ENOBUFS; /*XXX as above*/ + error = EACCES; goto bad; } + SECASVAR_UNLOCK(sav); } /* @@ -840,41 +806,38 @@ panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: if (sav) - KEY_FREESAV(&sav); + key_freesav(&sav); if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); + if (xd != NULL) + free(xd, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } /* - * AH output routine, called by ipsec[46]_process_packet(). + * AH output routine, called by ipsec[46]_perform_request(). */ static int -ah_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, - int skip, int protoff) +ah_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; - struct secasvar *sav; - struct auth_hash *ahx; + char buf[IPSEC_ADDRSTRLEN]; + const struct auth_hash *ahx; struct cryptodesc *crda; - struct tdb_crypto *tc; + struct xform_data *xd; struct mbuf *mi; struct cryptop *crp; - u_int16_t iplen; - int error, rplen, authsize, maxpacketsize, roff; - u_int8_t prot; struct newah *ah; + uint64_t cryptoid; + uint16_t iplen; + int error, rplen, authsize, maxpacketsize, roff; + uint8_t prot; - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); ahx = sav->tdb_authalgxform; IPSEC_ASSERT(ahx != NULL, ("null authentication xform")); @@ -960,9 +923,8 @@ m_copyback(m, skip + rplen, authsize, ipseczeroes); /* Insert packet replay counter, as requested. */ + SECASVAR_LOCK(sav); if (sav->replay) { - SECASVAR_LOCK(sav); - if (sav->replay->count == ~0 && (sav->flags & SADB_X_EXT_CYCSEQ) == 0) { SECASVAR_UNLOCK(sav); @@ -970,7 +932,7 @@ __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); AHSTAT_INC(ahs_wrap); - error = EINVAL; + error = EACCES; goto bad; } #ifdef REGRESSION @@ -979,9 +941,9 @@ #endif sav->replay->count++; ah->ah_seq = htonl(sav->replay->count); - - SECASVAR_UNLOCK(sav); } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Get crypto descriptors. */ crp = crypto_getreq(1); @@ -994,7 +956,6 @@ } crda = crp->crp_desc; - crda->crd_skip = 0; crda->crd_inject = skip + rplen; crda->crd_len = m->m_pkthdr.len; @@ -1005,18 +966,18 @@ crda->crd_klen = _KEYBITS(sav->key_auth); /* Allocate IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc( - sizeof(struct tdb_crypto) + skip, M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data) + skip, M_XDATA, + M_NOWAIT | M_ZERO); + if (xd == NULL) { crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); AHSTAT_INC(ahs_crypto); error = ENOBUFS; goto bad; } /* Save the skipped portion of the packet. */ - m_copydata(m, 0, skip, (caddr_t) (tc + 1)); + m_copydata(m, 0, skip, (caddr_t) (xd + 1)); /* * Fix IP header length on the header used for @@ -1026,7 +987,7 @@ switch (sav->sah->saidx.dst.sa.sa_family) { #ifdef INET case AF_INET: - bcopy(((caddr_t)(tc + 1)) + + bcopy(((caddr_t)(xd + 1)) + offsetof(struct ip, ip_len), (caddr_t) &iplen, sizeof(u_int16_t)); iplen = htons(ntohs(iplen) + rplen + authsize); @@ -1037,29 +998,29 @@ #ifdef INET6 case AF_INET6: - bcopy(((caddr_t)(tc + 1)) + + bcopy(((caddr_t)(xd + 1)) + offsetof(struct ip6_hdr, ip6_plen), - (caddr_t) &iplen, sizeof(u_int16_t)); + (caddr_t) &iplen, sizeof(uint16_t)); iplen = htons(ntohs(iplen) + rplen + authsize); m_copyback(m, offsetof(struct ip6_hdr, ip6_plen), - sizeof(u_int16_t), (caddr_t) &iplen); + sizeof(uint16_t), (caddr_t) &iplen); break; #endif /* INET6 */ } /* Fix the Next Header field in saved header. */ - ((u_int8_t *) (tc + 1))[protoff] = IPPROTO_AH; + ((uint8_t *) (xd + 1))[protoff] = IPPROTO_AH; /* Update the Next Protocol field in the IP header. */ prot = IPPROTO_AH; - m_copyback(m, protoff, sizeof(u_int8_t), (caddr_t) &prot); + m_copyback(m, protoff, sizeof(uint8_t), (caddr_t) &prot); /* "Massage" the packet headers for crypto processing. */ error = ah_massage_headers(&m, sav->sah->saidx.dst.sa.sa_family, skip, ahx->type, 1); if (error != 0) { m = NULL; /* mbuf was free'd by ah_massage_headers. */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); goto bad; } @@ -1069,19 +1030,15 @@ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ah_output_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback. */ - key_addref(isr->sp); - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_skip = skip; - tc->tc_protoff = protoff; + xd->sp = sp; + xd->sav = sav; + xd->skip = skip; + xd->idx = idx; + xd->cryptoid = cryptoid; return crypto_dispatch(crp); bad: @@ -1096,45 +1053,37 @@ static int ah_output_cb(struct cryptop *crp) { - int skip, protoff, error; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; + uint64_t cryptoid; caddr_t ptr; + u_int idx; + int skip, error; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; - ptr = (caddr_t) (tc + 1); m = (struct mbuf *) crp->crp_buf; + xd = (struct xform_data *) crp->crp_opaque; + sp = xd->sp; + sav = xd->sav; + skip = xd->skip; + idx = xd->idx; + cryptoid = xd->cryptoid; + ptr = (caddr_t) (xd + 1); - isr = tc->tc_isr; - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - AHSTAT_INC(ahs_notdb); - DPRINTF(("%s: SA expired while in crypto\n", __func__)); - error = ENOBUFS; /*XXX*/ - goto bad; - } - /* Check for crypto errors. */ if (crp->crp_etype) { - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); } - AHSTAT_INC(ahs_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; + m_freem(m); goto bad; } @@ -1145,18 +1094,15 @@ error = EINVAL; goto bad; } - AHSTAT_INC(ahs_hist[sav->alg_auth]); - /* * Copy original headers (with the new protocol number) back * in place. */ m_copyback(m, 0, skip, ptr); - /* No longer needed. */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); - + AHSTAT_INC(ahs_hist[sav->alg_auth]); #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { @@ -1172,33 +1118,26 @@ #endif /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); + error = ipsec_process_done(m, sp, sav, idx); return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); - if (m) - m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); + key_freesav(&sav); + key_freesp(&sp); return (error); } static struct xformsw ah_xformsw = { - XF_AH, XFT_AUTH, "IPsec AH", - ah_init, ah_zeroize, ah_input, ah_output, + .xf_type = XF_AH, + .xf_name = "IPsec AH", + .xf_init = ah_init, + .xf_zeroize = ah_zeroize, + .xf_input = ah_input, + .xf_output = ah_output, }; -static void -ah_attach(void) -{ - - xform_register(&ah_xformsw); -} - -SYSINIT(ah_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ah_attach, NULL); +SYSINIT(ah_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_attach, &ah_xformsw); +SYSUNINIT(ah_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_detach, &ah_xformsw); Index: head/sys/netipsec/xform_esp.c =================================================================== --- head/sys/netipsec/xform_esp.c +++ head/sys/netipsec/xform_esp.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include #include @@ -97,40 +97,6 @@ static int esp_input_cb(struct cryptop *op); static int esp_output_cb(struct cryptop *crp); -/* - * NB: this is public for use by the PF_KEY support. - * NB: if you add support here; be sure to add code to esp_attach below! - */ -struct enc_xform * -esp_algorithm_lookup(int alg) -{ - if (alg >= ESP_ALG_MAX) - return NULL; - switch (alg) { - case SADB_EALG_DESCBC: - return &enc_xform_des; - case SADB_EALG_3DESCBC: - return &enc_xform_3des; - case SADB_X_EALG_AES: - return &enc_xform_rijndael128; - case SADB_X_EALG_BLOWFISHCBC: - return &enc_xform_blf; - case SADB_X_EALG_CAST128CBC: - return &enc_xform_cast5; - case SADB_EALG_NULL: - return &enc_xform_null; - case SADB_X_EALG_CAMELLIACBC: - return &enc_xform_camellia; - case SADB_X_EALG_AESCTR: - return &enc_xform_aes_icm; - case SADB_X_EALG_AESGCM16: - return &enc_xform_aes_nist_gcm; - case SADB_X_EALG_AESGMAC: - return &enc_xform_aes_nist_gmac; - } - return NULL; -} - size_t esp_hdrsiz(struct secasvar *sav) { @@ -168,12 +134,12 @@ static int esp_init(struct secasvar *sav, struct xformsw *xsp) { - struct enc_xform *txform; + const struct enc_xform *txform; struct cryptoini cria, crie; int keylen; int error; - txform = esp_algorithm_lookup(sav->alg_enc); + txform = enc_algorithm_lookup(sav->alg_enc); if (txform == NULL) { DPRINTF(("%s: unsupported encryption algorithm %d\n", __func__, sav->alg_enc)); @@ -298,14 +264,15 @@ esp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { char buf[128]; - struct auth_hash *esph; - struct enc_xform *espx; - struct tdb_crypto *tc; - uint8_t *ivp; - int plen, alen, hlen; - struct newesp *esp; + const struct auth_hash *esph; + const struct enc_xform *espx; + struct xform_data *xd; struct cryptodesc *crde; struct cryptop *crp; + struct newesp *esp; + uint8_t *ivp; + uint64_t cryptoid; + int plen, alen, hlen; IPSEC_ASSERT(sav != NULL, ("null SA")); IPSEC_ASSERT(sav->tdb_encalgxform != NULL, ("null encoding xform")); @@ -354,14 +321,19 @@ /* * Check sequence number. */ - if (esph != NULL && sav->replay != NULL && - !ipsec_chkreplay(ntohl(esp->esp_seq), sav)) { - DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav, buf, sizeof(buf)))); /*XXX*/ - ESPSTAT_INC(esps_replay); - m_freem(m); - return ENOBUFS; /*XXX*/ + SECASVAR_LOCK(sav); + if (esph != NULL && sav->replay != NULL && sav->replay->wsize != 0) { + if (ipsec_chkreplay(ntohl(esp->esp_seq), sav) == 0) { + SECASVAR_UNLOCK(sav); + DPRINTF(("%s: packet replay check for %s\n", __func__, + ipsec_sa2str(sav, buf, sizeof(buf)))); + ESPSTAT_INC(esps_replay); + m_freem(m); + return (EACCES); + } } + cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); /* Update the counters */ ESPSTAT_ADD(esps_ibytes, m->m_pkthdr.len - (skip + hlen + alen)); @@ -377,12 +349,11 @@ } /* Get IPsec-specific opaque pointer */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto) + alen, - M_XDATA, M_NOWAIT | M_ZERO); - if (tc == NULL) { - crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd) + alen, M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); ESPSTAT_INC(esps_crypto); + crypto_freereq(crp); m_freem(m); return ENOBUFS; } @@ -404,7 +375,7 @@ /* Copy the authenticator */ m_copydata(m, m->m_pkthdr.len - alen, alen, - (caddr_t) (tc + 1)); + (caddr_t) (xd + 1)); /* Chain authentication request */ crde = crda->crd_next; @@ -417,17 +388,14 @@ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_sid = cryptoid; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->protoff = protoff; + xd->skip = skip; + xd->cryptoid = cryptoid; /* Decryption descriptor */ IPSEC_ASSERT(crde != NULL, ("null esp crypto descriptor")); @@ -467,45 +435,39 @@ { char buf[128]; u_int8_t lastthree[3], aalg[AH_HMAC_MAXHASHLEN]; - int hlen, skip, protoff, error, alen; + const struct auth_hash *esph; + const struct enc_xform *espx; struct mbuf *m; struct cryptodesc *crd; - struct auth_hash *esph; - struct enc_xform *espx; - struct tdb_crypto *tc; + struct xform_data *xd; struct secasvar *sav; struct secasindex *saidx; caddr_t ptr; + uint64_t cryptoid; + int hlen, skip, protoff, error, alen; crd = crp->crp_desc; IPSEC_ASSERT(crd != NULL, ("null crypto descriptor!")); - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; - IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || - saidx->dst.sa.sa_family == AF_INET6, - ("unexpected protocol family %u", saidx->dst.sa.sa_family)); - esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - - if (crp->crp_etype == EAGAIN) + if (crp->crp_etype == EAGAIN) { + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); - + } ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; @@ -527,7 +489,7 @@ AHSTAT_INC(ahs_hist[sav->alg_auth]); /* Copy the authenticator from the packet */ m_copydata(m, m->m_pkthdr.len - alen, alen, aalg); - ptr = (caddr_t) (tc + 1); + ptr = (caddr_t) (xd + 1); /* Verify authenticator */ if (timingsafe_bcmp(ptr, aalg, alen) != 0) { @@ -539,13 +501,13 @@ error = EACCES; goto bad; } - + m->m_flags |= M_AUTHIPDGM; /* Remove trailing authenticator */ m_adj(m, -alen); } /* Release the crypto descriptors */ - free(tc, M_XDATA), tc = NULL; + free(xd, M_XDATA), xd = NULL; crypto_freereq(crp), crp = NULL; /* @@ -561,13 +523,16 @@ m_copydata(m, skip + offsetof(struct newesp, esp_seq), sizeof (seq), (caddr_t) &seq); + SECASVAR_LOCK(sav); if (ipsec_updatereplay(ntohl(seq), sav)) { + SECASVAR_UNLOCK(sav); DPRINTF(("%s: packet replay check for %s\n", __func__, - ipsec_logsastr(sav, buf, sizeof(buf)))); + ipsec_sa2str(sav, buf, sizeof(buf)))); ESPSTAT_INC(esps_replay); - error = ENOBUFS; + error = EACCES; goto bad; } + SECASVAR_UNLOCK(sav); } /* Determine the ESP header length */ @@ -635,46 +600,40 @@ panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: - if (sav) - KEY_FREESAV(&sav); + if (sav != NULL) + key_freesav(&sav); if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); + if (xd != NULL) + free(xd, M_XDATA); if (crp != NULL) crypto_freereq(crp); return error; } - /* - * ESP output routine, called by ipsec[46]_process_packet(). + * ESP output routine, called by ipsec[46]_perform_request(). */ static int -esp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, - int skip, int protoff) +esp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; - struct enc_xform *espx; - struct auth_hash *esph; - uint8_t *ivp; - uint64_t cntr; - int hlen, rlen, padding, blks, alen, i, roff; - struct mbuf *mo = (struct mbuf *) NULL; - struct tdb_crypto *tc; - struct secasvar *sav; + char buf[IPSEC_ADDRSTRLEN]; + struct cryptodesc *crde = NULL, *crda = NULL; + struct cryptop *crp; + const struct auth_hash *esph; + const struct enc_xform *espx; + struct mbuf *mo = NULL; + struct xform_data *xd; struct secasindex *saidx; unsigned char *pad; - u_int8_t prot; + uint8_t *ivp; + uint64_t cntr, cryptoid; + int hlen, rlen, padding, blks, alen, i, roff; int error, maxpacketsize; + uint8_t prot; - struct cryptodesc *crde = NULL, *crda = NULL; - struct cryptop *crp; - - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); esph = sav->tdb_authalgxform; espx = sav->tdb_encalgxform; @@ -720,8 +679,9 @@ error = EPFNOSUPPORT; goto bad; } + /* DPRINTF(("%s: skip %d hlen %d rlen %d padding %d alen %d blksd %d\n", - __func__, skip, hlen, rlen, padding, alen, blks)); + __func__, skip, hlen, rlen, padding, alen, blks)); */ if (skip + hlen + rlen + padding + alen > maxpacketsize) { DPRINTF(("%s: packet in SA %s/%08lx got too big " "(len %u, max len %u)\n", __func__, @@ -752,28 +712,32 @@ DPRINTF(("%s: %u byte ESP hdr inject failed for SA %s/%08lx\n", __func__, hlen, ipsec_address(&saidx->dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); - ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ + ESPSTAT_INC(esps_hdrops); /* XXX diffs from openbsd */ error = ENOBUFS; goto bad; } /* Initialize ESP header. */ - bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, sizeof(u_int32_t)); + bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, + sizeof(uint32_t)); + SECASVAR_LOCK(sav); if (sav->replay) { - u_int32_t replay; + uint32_t replay; - SECASVAR_LOCK(sav); #ifdef REGRESSION /* Emulate replay attack when ipsec_replay is TRUE. */ if (!V_ipsec_replay) #endif sav->replay->count++; replay = htonl(sav->replay->count); - SECASVAR_UNLOCK(sav); - bcopy((caddr_t) &replay, - mtod(mo, caddr_t) + roff + sizeof(u_int32_t), - sizeof(u_int32_t)); + + bcopy((caddr_t) &replay, mtod(mo, caddr_t) + roff + + sizeof(uint32_t), sizeof(uint32_t)); } + cryptoid = sav->tdb_cryptoid; + if (SAV_ISCTRORGCM(sav)) + cntr = sav->cntr++; + SECASVAR_UNLOCK(sav); /* * Add padding -- better to do it ourselves than use the crypto engine, @@ -825,11 +789,10 @@ } /* IPsec-specific opaque crypto info. */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { crypto_freereq(crp); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); ESPSTAT_INC(esps_crypto); error = ENOBUFS; goto bad; @@ -855,13 +818,10 @@ /* Nonce is last four bytes of key, RFC3686 5.1 */ memcpy(ivp, sav->key_enc->key_data + _KEYLEN(sav->key_enc) - 4, 4); - SECASVAR_LOCK(sav); - cntr = sav->cntr++; - SECASVAR_UNLOCK(sav); be64enc(&ivp[4], cntr); - if (SAV_ISCTR(sav)) { /* Initial block counter is 1, RFC3686 4 */ + /* XXXAE: should we use this only for first packet? */ be32enc(&ivp[sav->ivlen + 4], 1); } @@ -870,21 +830,18 @@ } /* Callback parameters */ - key_addref(isr->sp); - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = saidx->dst; - tc->tc_proto = saidx->proto; + xd->sp = sp; + xd->sav = sav; + xd->idx = idx; + xd->cryptoid = cryptoid; /* Crypto operation descriptor. */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = esp_output_cb; - crp->crp_opaque = (caddr_t) tc; - crp->crp_sid = sav->tdb_cryptoid; + crp->crp_opaque = (caddr_t) xd; + crp->crp_sid = cryptoid; if (esph) { /* Authentication descriptor. */ @@ -903,53 +860,40 @@ m_freem(m); return (error); } - /* * ESP output callback from the crypto driver. */ static int esp_output_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; + uint64_t cryptoid; + u_int idx; int error; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); + xd = (struct xform_data *) crp->crp_opaque; m = (struct mbuf *) crp->crp_buf; + sp = xd->sp; + sav = xd->sav; + idx = xd->idx; + cryptoid = xd->cryptoid; - isr = tc->tc_isr; - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - - /* With the isr lock released, SA pointer may have changed. */ - if (sav != isr->sav) { - ESPSTAT_INC(esps_notdb); - DPRINTF(("%s: SA gone during crypto (SA %s/%08lx proto %u)\n", - __func__, ipsec_address(&tc->tc_dst, buf, sizeof(buf)), - (u_long) ntohl(tc->tc_spi), tc->tc_proto)); - error = ENOBUFS; /*XXX*/ - goto bad; - } - /* Check for crypto errors. */ if (crp->crp_etype) { - /* Reset session ID. */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; return (crypto_dispatch(crp)); } - ESPSTAT_INC(esps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); error = crp->crp_etype; + m_freem(m); goto bad; } @@ -960,19 +904,17 @@ error = EINVAL; goto bad; } + free(xd, M_XDATA); + crypto_freereq(crp); ESPSTAT_INC(esps_hist[sav->alg_enc]); if (sav->tdb_authalgxform != NULL) AHSTAT_INC(ahs_hist[sav->alg_auth]); - /* Release crypto descriptors. */ - free(tc, M_XDATA); - crypto_freereq(crp); - #ifdef REGRESSION /* Emulate man-in-the-middle attack when ipsec_integrity is TRUE. */ if (V_ipsec_integrity) { static unsigned char ipseczeroes[AH_HMAC_MAXHASHLEN]; - struct auth_hash *esph; + const struct auth_hash *esph; /* * Corrupt HMAC if we want to test integrity verification of @@ -990,33 +932,26 @@ #endif /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); + error = ipsec_process_done(m, sp, sav, idx); return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); - if (m) - m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); + key_freesav(&sav); + key_freesp(&sp); return (error); } static struct xformsw esp_xformsw = { - XF_ESP, XFT_CONF|XFT_AUTH, "IPsec ESP", - esp_init, esp_zeroize, esp_input, - esp_output + .xf_type = XF_ESP, + .xf_name = "IPsec ESP", + .xf_init = esp_init, + .xf_zeroize = esp_zeroize, + .xf_input = esp_input, + .xf_output = esp_output, }; -static void -esp_attach(void) -{ - - xform_register(&esp_xformsw); -} -SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, esp_attach, NULL); +SYSINIT(esp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_attach, &esp_xformsw); +SYSUNINIT(esp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + xform_detach, &esp_xformsw); Index: head/sys/netipsec/xform_ipcomp.c =================================================================== --- head/sys/netipsec/xform_ipcomp.c +++ head/sys/netipsec/xform_ipcomp.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -65,6 +64,7 @@ #include #include +#include #include #include @@ -88,18 +88,6 @@ static int ipcomp_input_cb(struct cryptop *crp); static int ipcomp_output_cb(struct cryptop *crp); -struct comp_algo * -ipcomp_algorithm_lookup(int alg) -{ - if (alg >= IPCOMP_ALG_MAX) - return NULL; - switch (alg) { - case SADB_X_CALG_DEFLATE: - return &comp_algo_deflate; - } - return NULL; -} - /* * RFC 3173 p 2.2. Non-Expansion Policy: * If the total size of a compressed payload and the IPComp header, as @@ -116,10 +104,10 @@ { struct secasvar *sav; - sav = KEY_ALLOCSA_TUNNEL(src, dst, IPPROTO_IPCOMP); + sav = key_allocsa_tunnel(src, dst, IPPROTO_IPCOMP); if (sav == NULL) return (0); - KEY_FREESAV(&sav); + key_freesav(&sav); if (src->sa.sa_family == AF_INET) return (sizeof(struct in_addr) << 4); @@ -161,11 +149,11 @@ static int ipcomp_init(struct secasvar *sav, struct xformsw *xsp) { - struct comp_algo *tcomp; + const struct comp_algo *tcomp; struct cryptoini cric; /* NB: algorithm really comes in alg_enc and not alg_comp! */ - tcomp = ipcomp_algorithm_lookup(sav->alg_enc); + tcomp = comp_algorithm_lookup(sav->alg_enc); if (tcomp == NULL) { DPRINTF(("%s: unsupported compression algorithm %d\n", __func__, sav->alg_comp)); @@ -201,7 +189,7 @@ static int ipcomp_input(struct mbuf *m, struct secasvar *sav, int skip, int protoff) { - struct tdb_crypto *tc; + struct xform_data *xd; struct cryptodesc *crdc; struct cryptop *crp; struct ipcomp *ipcomp; @@ -236,12 +224,12 @@ return ENOBUFS; } /* Get IPsec-specific opaque pointer */ - tc = (struct tdb_crypto *) malloc(sizeof (*tc), M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { - m_freem(m); - crypto_freereq(crp); - DPRINTF(("%s: cannot allocate tdb_crypto\n", __func__)); + xd = malloc(sizeof(*xd), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { + DPRINTF(("%s: cannot allocate xform_data\n", __func__)); IPCOMPSTAT_INC(ipcomps_crypto); + crypto_freereq(crp); + m_freem(m); return ENOBUFS; } crdc = crp->crp_desc; @@ -250,28 +238,26 @@ crdc->crd_len = m->m_pkthdr.len - (skip + hlen); crdc->crd_inject = skip; - tc->tc_ptr = 0; - /* Decompression operation */ crdc->crd_alg = sav->tdb_compalgxform->type; + /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len - (skip + hlen); crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ipcomp_input_cb; - crp->crp_sid = sav->tdb_cryptoid; - crp->crp_opaque = (caddr_t) tc; + crp->crp_opaque = (caddr_t) xd; /* These are passed as-is to the callback */ - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; + xd->sav = sav; + xd->protoff = protoff; + xd->skip = skip; + SECASVAR_LOCK(sav); + crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); + return crypto_dispatch(crp); } @@ -281,28 +267,26 @@ static int ipcomp_input_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; + char buf[IPSEC_ADDRSTRLEN]; struct cryptodesc *crd; - struct tdb_crypto *tc; - int skip, protoff; + struct xform_data *xd; struct mbuf *m; struct secasvar *sav; struct secasindex *saidx; - int hlen = IPCOMP_HLENGTH, error, clen; - u_int8_t nproto; caddr_t addr; + uint64_t cryptoid; + int hlen = IPCOMP_HLENGTH, error, clen; + int skip, protoff; + uint8_t nproto; crd = crp->crp_desc; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque crypto data area!")); - skip = tc->tc_skip; - protoff = tc->tc_protoff; m = (struct mbuf *) crp->crp_buf; - - sav = tc->tc_sav; - IPSEC_ASSERT(sav != NULL, ("null SA!")); - + xd = (struct xform_data *) crp->crp_opaque; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; saidx = &sav->sah->saidx; IPSEC_ASSERT(saidx->dst.sa.sa_family == AF_INET || saidx->dst.sa.sa_family == AF_INET6, @@ -310,12 +294,12 @@ /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - return crypto_dispatch(crp); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } IPCOMPSTAT_INC(ipcomps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); @@ -334,7 +318,7 @@ clen = crp->crp_olen; /* Length of data after processing */ /* Release the crypto descriptors */ - free(tc, M_XDATA), tc = NULL; + free(xd, M_XDATA), xd = NULL; crypto_freereq(crp), crp = NULL; /* In case it's not done already, adjust the size of the mbuf chain */ @@ -379,37 +363,33 @@ panic("%s: Unexpected address family: %d saidx=%p", __func__, saidx->dst.sa.sa_family, saidx); } - - KEY_FREESAV(&sav); return error; bad: - if (sav) - KEY_FREESAV(&sav); - if (m) + if (sav != NULL) + key_freesav(&sav); + if (m != NULL) m_freem(m); - if (tc != NULL) - free(tc, M_XDATA); - if (crp) + if (xd != NULL) + free(xd, M_XDATA); + if (crp != NULL) crypto_freereq(crp); return error; } /* - * IPComp output routine, called by ipsec[46]_process_packet() + * IPComp output routine, called by ipsec[46]_perform_request() */ static int -ipcomp_output(struct mbuf *m, struct ipsecrequest *isr, struct mbuf **mp, - int skip, int protoff) +ipcomp_output(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, + u_int idx, int skip, int protoff) { - char buf[INET6_ADDRSTRLEN]; - struct secasvar *sav; - struct comp_algo *ipcompx; - int error, ralen, maxpacketsize; + char buf[IPSEC_ADDRSTRLEN]; + const struct comp_algo *ipcompx; struct cryptodesc *crdc; struct cryptop *crp; - struct tdb_crypto *tc; + struct xform_data *xd; + int error, ralen, maxpacketsize; - sav = isr->sav; IPSEC_ASSERT(sav != NULL, ("null SA")); ipcompx = sav->tdb_compalgxform; IPSEC_ASSERT(ipcompx != NULL, ("null compression xform")); @@ -422,7 +402,7 @@ */ if (m->m_pkthdr.len <= ipcompx->minlen) { IPCOMPSTAT_INC(ipcomps_threshold); - return ipsec_process_done(m, isr); + return ipsec_process_done(m, sp, sav, idx); } ralen = m->m_pkthdr.len - skip; /* Raw payload length before comp. */ @@ -496,34 +476,32 @@ crdc->crd_alg = ipcompx->type; /* IPsec-specific opaque crypto info */ - tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto), - M_XDATA, M_NOWAIT|M_ZERO); - if (tc == NULL) { + xd = malloc(sizeof(struct xform_data), M_XDATA, M_NOWAIT | M_ZERO); + if (xd == NULL) { IPCOMPSTAT_INC(ipcomps_crypto); - DPRINTF(("%s: failed to allocate tdb_crypto\n", __func__)); + DPRINTF(("%s: failed to allocate xform_data\n", __func__)); crypto_freereq(crp); error = ENOBUFS; goto bad; } - key_addref(isr->sp); - tc->tc_isr = isr; - KEY_ADDREFSA(sav); - tc->tc_sav = sav; - tc->tc_spi = sav->spi; - tc->tc_dst = sav->sah->saidx.dst; - tc->tc_proto = sav->sah->saidx.proto; - tc->tc_protoff = protoff; - tc->tc_skip = skip; + xd->sp = sp; + xd->sav = sav; + xd->idx = idx; + xd->skip = skip; + xd->protoff = protoff; /* Crypto operation descriptor */ crp->crp_ilen = m->m_pkthdr.len; /* Total input length */ crp->crp_flags = CRYPTO_F_IMBUF | CRYPTO_F_CBIFSYNC; crp->crp_buf = (caddr_t) m; crp->crp_callback = ipcomp_output_cb; - crp->crp_opaque = (caddr_t) tc; - crp->crp_sid = sav->tdb_cryptoid; + crp->crp_opaque = (caddr_t) xd; + SECASVAR_LOCK(sav); + crp->crp_sid = xd->cryptoid = sav->tdb_cryptoid; + SECASVAR_UNLOCK(sav); + return crypto_dispatch(crp); bad: if (m) @@ -537,39 +515,32 @@ static int ipcomp_output_cb(struct cryptop *crp) { - char buf[INET6_ADDRSTRLEN]; - struct tdb_crypto *tc; - struct ipsecrequest *isr; + char buf[IPSEC_ADDRSTRLEN]; + struct xform_data *xd; + struct secpolicy *sp; struct secasvar *sav; struct mbuf *m; - int error, skip; + uint64_t cryptoid; + u_int idx; + int error, skip, protoff; - tc = (struct tdb_crypto *) crp->crp_opaque; - IPSEC_ASSERT(tc != NULL, ("null opaque data area!")); m = (struct mbuf *) crp->crp_buf; - skip = tc->tc_skip; + xd = (struct xform_data *) crp->crp_opaque; + idx = xd->idx; + sp = xd->sp; + sav = xd->sav; + skip = xd->skip; + protoff = xd->protoff; + cryptoid = xd->cryptoid; - isr = tc->tc_isr; - IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp")); - IPSECREQUEST_LOCK(isr); - sav = tc->tc_sav; - /* With the isr lock released SA pointer can be updated. */ - if (sav != isr->sav) { - IPCOMPSTAT_INC(ipcomps_notdb); - DPRINTF(("%s: SA expired while in crypto\n", __func__)); - error = ENOBUFS; /*XXX*/ - goto bad; - } - /* Check for crypto errors */ if (crp->crp_etype) { - /* Reset the session ID */ - if (sav->tdb_cryptoid != 0) - sav->tdb_cryptoid = crp->crp_sid; - if (crp->crp_etype == EAGAIN) { - IPSECREQUEST_UNLOCK(isr); - return crypto_dispatch(crp); + /* Reset the session ID */ + if (ipsec_updateid(sav, &crp->crp_sid, &cryptoid) != 0) + crypto_freesession(cryptoid); + xd->cryptoid = crp->crp_sid; + return (crypto_dispatch(crp)); } IPCOMPSTAT_INC(ipcomps_noxform); DPRINTF(("%s: crypto error %d\n", __func__, crp->crp_etype)); @@ -595,7 +566,8 @@ mo = m_makespace(m, skip, IPCOMP_HLENGTH, &roff); if (mo == NULL) { IPCOMPSTAT_INC(ipcomps_wrap); - DPRINTF(("%s: IPCOMP header inject failed for IPCA %s/%08lx\n", + DPRINTF(("%s: IPCOMP header inject failed " + "for IPCA %s/%08lx\n", __func__, ipsec_address(&sav->sah->saidx.dst, buf, sizeof(buf)), (u_long) ntohl(sav->spi))); error = ENOBUFS; @@ -622,7 +594,7 @@ /* Fix Next Protocol in IPv4/IPv6 header */ prot = IPPROTO_IPCOMP; - m_copyback(m, tc->tc_protoff, sizeof(u_int8_t), + m_copyback(m, protoff, sizeof(u_int8_t), (u_char *)&prot); /* Adjust the length in the IP header */ @@ -658,33 +630,22 @@ } /* Release the crypto descriptor */ - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); /* NB: m is reclaimed by ipsec_process_done. */ - error = ipsec_process_done(m, isr); - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); + error = ipsec_process_done(m, sp, sav, idx); return (error); bad: - if (sav) - KEY_FREESAV(&sav); - IPSECREQUEST_UNLOCK(isr); - KEY_FREESP(&isr->sp); if (m) m_freem(m); - free(tc, M_XDATA); + free(xd, M_XDATA); crypto_freereq(crp); + key_freesav(&sav); + key_freesp(&sp); return (error); } -static struct xformsw ipcomp_xformsw = { - XF_IPCOMP, XFT_COMP, "IPcomp", - ipcomp_init, ipcomp_zeroize, ipcomp_input, - ipcomp_output -}; - #ifdef INET static const struct encaptab *ipe4_cookie = NULL; extern struct domain inetdomain; @@ -768,6 +729,15 @@ } #endif +static struct xformsw ipcomp_xformsw = { + .xf_type = XF_IPCOMP, + .xf_name = "IPcomp", + .xf_init = ipcomp_init, + .xf_zeroize = ipcomp_zeroize, + .xf_input = ipcomp_input, + .xf_output = ipcomp_output, +}; + static void ipcomp_attach(void) { @@ -780,8 +750,23 @@ ipe6_cookie = encap_attach_func(AF_INET6, -1, ipcomp6_nonexp_encapcheck, &ipcomp6_protosw, NULL); #endif - xform_register(&ipcomp_xformsw); + xform_attach(&ipcomp_xformsw); } +static void +ipcomp_detach(void) +{ + +#ifdef INET + encap_detach(ipe4_cookie); +#endif +#ifdef INET6 + encap_detach(ipe6_cookie); +#endif + xform_detach(&ipcomp_xformsw); +} + SYSINIT(ipcomp_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ipcomp_attach, NULL); +SYSUNINIT(ipcomp_xform_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, + ipcomp_detach, NULL); Index: head/sys/netipsec/xform_tcp.c =================================================================== --- head/sys/netipsec/xform_tcp.c +++ head/sys/netipsec/xform_tcp.c @@ -1,7 +1,6 @@ -/* $FreeBSD$ */ - /*- * Copyright (c) 2003 Bruce M. Simpson + * Copyright (c) 2016 Andrey V. Elsukov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -28,19 +27,27 @@ */ /* TCP MD5 Signature Option (RFC2385) */ +#include +__FBSDID("$FreeBSD$"); + #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_ipsec.h" #include #include #include #include +#include +#include #include +#include #include +#include #include -#include #include +#include #include #include #include @@ -50,6 +57,7 @@ #include #include +#include #include #ifdef INET6 @@ -60,13 +68,266 @@ #include #include +#define TCP_SIGLEN 16 /* length of computed digest in bytes */ +#define TCP_KEYLEN_MIN 1 /* minimum length of TCP-MD5 key */ +#define TCP_KEYLEN_MAX 80 /* maximum length of TCP-MD5 key */ + +static inline void +tcp_fields_to_net(struct tcphdr *th) +{ + + th->th_seq = htonl(th->th_seq); + th->th_ack = htonl(th->th_ack); + th->th_win = htons(th->th_win); + th->th_urp = htons(th->th_urp); +} + +static int +tcp_ipsec_pcbctl(struct inpcb *inp, struct sockopt *sopt) +{ + struct tcpcb *tp; + int error, optval; + + INP_WLOCK_ASSERT(inp); + if (sopt->sopt_name != TCP_MD5SIG) { + INP_WUNLOCK(inp); + return (ENOPROTOOPT); + } + + tp = intotcpcb(inp); + if (sopt->sopt_dir == SOPT_GET) { + optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0; + INP_WUNLOCK(inp); + + /* On success return with released INP_WLOCK */ + return (sooptcopyout(sopt, &optval, sizeof(optval))); + } + + INP_WUNLOCK(inp); + + error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); + if (error != 0) + return (error); + + /* INP_WLOCK_RECHECK */ + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_WUNLOCK(inp); + return (ECONNRESET); + } + if (optval > 0) + tp->t_flags |= TF_SIGNATURE; + else + tp->t_flags &= ~TF_SIGNATURE; + + /* On success return with acquired INP_WLOCK */ + return (error); +} + /* + * Callback function invoked by m_apply() to digest TCP segment data + * contained within an mbuf chain. + */ +static int +tcp_signature_apply(void *fstate, void *data, u_int len) +{ + + MD5Update(fstate, (u_char *)data, len); + return (0); +} + +#ifdef INET +static int +ip_pseudo_compute(struct mbuf *m, MD5_CTX *ctx) +{ + struct ippseudo ipp; + struct ip *ip; + + ip = mtod(m, struct ip *); + ipp.ippseudo_src.s_addr = ip->ip_src.s_addr; + ipp.ippseudo_dst.s_addr = ip->ip_dst.s_addr; + ipp.ippseudo_p = IPPROTO_TCP; + ipp.ippseudo_pad = 0; + ipp.ippseudo_len = htons(m->m_pkthdr.len - (ip->ip_hl << 2)); + MD5Update(ctx, (char *)&ipp, sizeof(ipp)); + return (ip->ip_hl << 2); +} +#endif + +#ifdef INET6 +static int +ip6_pseudo_compute(struct mbuf *m, MD5_CTX *ctx) +{ + struct ip6_pseudo { + struct in6_addr src, dst; + uint32_t len; + uint32_t nxt; + } ip6p __aligned(4); + struct ip6_hdr *ip6; + + ip6 = mtod(m, struct ip6_hdr *); + ip6p.src = ip6->ip6_src; + ip6p.dst = ip6->ip6_dst; + ip6p.len = htonl(m->m_pkthdr.len - sizeof(*ip6)); /* XXX: ext headers */ + ip6p.nxt = htonl(IPPROTO_TCP); + MD5Update(ctx, (char *)&ip6p, sizeof(ip6p)); + return (sizeof(*ip6)); +} +#endif + +static int +tcp_signature_compute(struct mbuf *m, struct tcphdr *th, + struct secasvar *sav, u_char *buf) +{ + MD5_CTX ctx; + int len; + u_short csum; + + MD5Init(&ctx); + /* Step 1: Update MD5 hash with IP(v6) pseudo-header. */ + switch (sav->sah->saidx.dst.sa.sa_family) { +#ifdef INET + case AF_INET: + len = ip_pseudo_compute(m, &ctx); + break; +#endif +#ifdef INET6 + case AF_INET6: + len = ip6_pseudo_compute(m, &ctx); + break; +#endif + default: + return (EAFNOSUPPORT); + } + /* + * Step 2: Update MD5 hash with TCP header, excluding options. + * The TCP checksum must be set to zero. + */ + csum = th->th_sum; + th->th_sum = 0; + MD5Update(&ctx, (char *)th, sizeof(struct tcphdr)); + th->th_sum = csum; + /* + * Step 3: Update MD5 hash with TCP segment data. + * Use m_apply() to avoid an early m_pullup(). + */ + len += (th->th_off << 2); + if (m->m_pkthdr.len - len > 0) + m_apply(m, len, m->m_pkthdr.len - len, + tcp_signature_apply, &ctx); + /* + * Step 4: Update MD5 hash with shared secret. + */ + MD5Update(&ctx, sav->key_auth->key_data, _KEYLEN(sav->key_auth)); + MD5Final(buf, &ctx); + key_sa_recordxfer(sav, m); + return (0); +} + +static void +setsockaddrs(const struct mbuf *m, union sockaddr_union *src, + union sockaddr_union *dst) +{ + struct ip *ip; + + IPSEC_ASSERT(m->m_len >= sizeof(*ip), ("unexpected mbuf len")); + + ip = mtod(m, struct ip *); + switch (ip->ip_v) { +#ifdef INET + case IPVERSION: + ipsec4_setsockaddrs(m, src, dst); + break; +#endif +#ifdef INET6 + case (IPV6_VERSION >> 4): + ipsec6_setsockaddrs(m, src, dst); + break; +#endif + default: + bzero(src, sizeof(*src)); + bzero(dst, sizeof(*dst)); + } +} + +/* + * Compute TCP-MD5 hash of an *INBOUND* TCP segment. + * Parameters: + * m pointer to head of mbuf chain + * th pointer to TCP header + * buf pointer to storage for computed MD5 digest + * + * Return 0 if successful, otherwise return -1. + */ +static int +tcp_ipsec_input(struct mbuf *m, struct tcphdr *th, u_char *buf) +{ + char tmpdigest[TCP_SIGLEN]; + struct secasindex saidx; + struct secasvar *sav; + + setsockaddrs(m, &saidx.src, &saidx.dst); + saidx.proto = IPPROTO_TCP; + saidx.mode = IPSEC_MODE_TCPMD5; + saidx.reqid = 0; + sav = key_allocsa_tcpmd5(&saidx); + if (sav == NULL) { + KMOD_TCPSTAT_INC(tcps_sig_err_buildsig); + return (EACCES); + } + /* + * tcp_input() operates with TCP header fields in host + * byte order. We expect them in network byte order. + */ + tcp_fields_to_net(th); + tcp_signature_compute(m, th, sav, tmpdigest); + tcp_fields_to_host(th); + key_freesav(&sav); + if (bcmp(buf, tmpdigest, TCP_SIGLEN) != 0) { + KMOD_TCPSTAT_INC(tcps_sig_rcvbadsig); + return (EACCES); + } + KMOD_TCPSTAT_INC(tcps_sig_rcvgoodsig); + return (0); +} + +/* + * Compute TCP-MD5 hash of an *OUTBOUND* TCP segment. + * Parameters: + * m pointer to head of mbuf chain + * th pointer to TCP header + * buf pointer to storage for computed MD5 digest + * + * Return 0 if successful, otherwise return error code. + */ +static int +tcp_ipsec_output(struct mbuf *m, struct tcphdr *th, u_char *buf) +{ + struct secasindex saidx; + struct secasvar *sav; + + setsockaddrs(m, &saidx.src, &saidx.dst); + saidx.proto = IPPROTO_TCP; + saidx.mode = IPSEC_MODE_TCPMD5; + saidx.reqid = 0; + sav = key_allocsa_tcpmd5(&saidx); + if (sav == NULL) { + KMOD_TCPSTAT_INC(tcps_sig_err_buildsig); + return (EACCES); + } + tcp_signature_compute(m, th, sav, buf); + key_freesav(&sav); + return (0); +} + +/* * Initialize a TCP-MD5 SA. Called when the SA is being set up. * * We don't need to set up the tdb prefixed fields, as we don't use the * opencrypto code; we just perform a key length check. * - * XXX: Currently we only allow a single 'magic' SPI to be used. + * XXX: Currently we have used single 'magic' SPI and need to still + * support this. * * This allows per-host granularity without affecting the userland * interface, which is a simple socket option toggle switch, @@ -85,11 +346,6 @@ { int keylen; - if (sav->spi != htonl(TCP_SIG_SPI)) { - DPRINTF(("%s: SPI must be TCP_SIG_SPI (0x1000)\n", - __func__)); - return (EINVAL); - } if (sav->alg_auth != SADB_X_AALG_TCP_MD5) { DPRINTF(("%s: unsupported authentication algorithm %u\n", __func__, sav->alg_auth)); @@ -104,67 +360,76 @@ DPRINTF(("%s: invalid key length %u\n", __func__, keylen)); return (EINVAL); } - + sav->tdb_xform = xsp; return (0); } /* - * Paranoia. - * * Called when the SA is deleted. */ static int tcpsignature_zeroize(struct secasvar *sav) { - if (sav->key_auth) + if (sav->key_auth != NULL) bzero(sav->key_auth->key_data, _KEYLEN(sav->key_auth)); - - sav->tdb_cryptoid = 0; - sav->tdb_authalgxform = NULL; sav->tdb_xform = NULL; - return (0); } -/* - * Verify that an input packet passes authentication. - * Called from the ipsec layer. - * We do this from within tcp itself, so this routine is just a stub. - */ -static int -tcpsignature_input(struct mbuf *m, struct secasvar *sav, int skip, - int protoff) -{ +static struct xformsw tcpsignature_xformsw = { + .xf_type = XF_TCPSIGNATURE, + .xf_name = "TCP-MD5", + .xf_init = tcpsignature_init, + .xf_zeroize = tcpsignature_zeroize, +}; - return (0); -} +static const struct tcpmd5_methods tcpmd5_methods = { + .input = tcp_ipsec_input, + .output = tcp_ipsec_output, + .pcbctl = tcp_ipsec_pcbctl, +}; -/* - * Prepend the authentication header. - * Called from the ipsec layer. - * We do this from within tcp itself, so this routine is just a stub. - */ +#ifndef KLD_MODULE +/* TCP-MD5 support is build in the kernel */ +static const struct tcpmd5_support tcpmd5_ipsec = { + .enabled = IPSEC_MODULE_ENABLED, + .methods = &tcpmd5_methods +}; +const struct tcpmd5_support * const tcp_ipsec_support = &tcpmd5_ipsec; +#endif /* !KLD_MODULE */ + static int -tcpsignature_output(struct mbuf *m, struct ipsecrequest *isr, - struct mbuf **mp, int skip, int protoff) +tcpmd5_modevent(module_t mod, int type, void *data) { - return (EINVAL); + switch (type) { + case MOD_LOAD: + xform_attach(&tcpsignature_xformsw); +#ifdef KLD_MODULE + tcpmd5_support_enable(&tcpmd5_methods); +#endif + break; + case MOD_UNLOAD: +#ifdef KLD_MODULE + tcpmd5_support_disable(); +#endif + xform_detach(&tcpsignature_xformsw); + break; + default: + return (EOPNOTSUPP); + } + return (0); } -static struct xformsw tcpsignature_xformsw = { - XF_TCPSIGNATURE, XFT_AUTH, "TCPMD5", - tcpsignature_init, tcpsignature_zeroize, - tcpsignature_input, tcpsignature_output +static moduledata_t tcpmd5_mod = { + "tcpmd5", + tcpmd5_modevent, + 0 }; -static void -tcpsignature_attach(void) -{ - - xform_register(&tcpsignature_xformsw); -} - -SYSINIT(tcpsignature_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, - tcpsignature_attach, NULL); +DECLARE_MODULE(tcpmd5, tcpmd5_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_VERSION(tcpmd5, 1); +#ifdef KLD_MODULE +MODULE_DEPEND(tcpmd5, ipsec_support, 1, 1, 1); +#endif Index: head/usr.bin/netstat/inet.c =================================================================== --- head/usr.bin/netstat/inet.c +++ head/usr.bin/netstat/inet.c @@ -858,12 +858,25 @@ "{N:/successful ECN handshake%s}\n"); p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} " "{N:/time%s ECN reduced the congestion window}\n"); + + xo_close_container("ecn"); + xo_open_container("tcp-signature"); + p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} " + "{N:/packet%s with matching signature received}\n"); + p(tcps_sig_rcvbadsig, "\t{:received-bad-signature/%ju} " + "{N:/packet%s with bad signature received}\n"); + p(tcps_sig_err_buildsig, "\t{:failed-make-signature/%ju} " + "{N:/time%s failed to make signature due to no SA}\n"); + p(tcps_sig_err_sigopt, "\t{:no-signature-expected/%ju} " + "{N:/time%s unexpected signature received}\n"); + p(tcps_sig_err_nosigopt, "\t{:no-signature-provided/%ju} " + "{N:/time%s no signature provided by segment}\n"); #undef p #undef p1a #undef p2 #undef p2a #undef p3 - xo_close_container("ecn"); + xo_close_container("tcp-signature"); xo_open_container("TCP connection count by state"); xo_emit("{T:/TCP connection count by state}:\n");