diff --git a/lib/libipsec/pfkey.c b/lib/libipsec/pfkey.c --- a/lib/libipsec/pfkey.c +++ b/lib/libipsec/pfkey.c @@ -1677,7 +1677,8 @@ /* duplicate check */ /* XXX Are there duplication either KEY_AUTH or KEY_ENCRYPT ?*/ - if (mhp[ext->sadb_ext_type] != NULL) { + if (mhp[ext->sadb_ext_type] != NULL && + ext->sadb_ext_type != SADB_X_EXT_IF_HW_OFFL /* XXXKIB */) { __ipsec_errcode = EIPSEC_INVAL_EXTTYPE; return -1; } @@ -1713,6 +1714,9 @@ case SADB_X_EXT_SA_REPLAY: case SADB_X_EXT_NEW_ADDRESS_SRC: case SADB_X_EXT_NEW_ADDRESS_DST: + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + case SADB_X_EXT_IF_HW_OFFL: mhp[ext->sadb_ext_type] = (caddr_t)ext; break; default: diff --git a/lib/libipsec/pfkey_dump.c b/lib/libipsec/pfkey_dump.c --- a/lib/libipsec/pfkey_dump.c +++ b/lib/libipsec/pfkey_dump.c @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -201,7 +202,7 @@ caddr_t mhp[SADB_EXT_MAX + 1]; struct sadb_sa *m_sa; struct sadb_x_sa2 *m_sa2; - struct sadb_lifetime *m_lftc, *m_lfth, *m_lfts; + struct sadb_lifetime *m_lftc, *m_lfth, *m_lfts, *m_lft_sw, *m_lft_hw; struct sadb_address *m_saddr, *m_daddr, *m_paddr; struct sadb_key *m_auth, *m_enc; struct sadb_ident *m_sid, *m_did; @@ -210,6 +211,10 @@ struct sadb_x_nat_t_type *natt_type; struct sadb_x_nat_t_port *natt_sport, *natt_dport; struct sadb_address *natt_oai, *natt_oar; + struct sadb_x_if_hw_offl *if_hw_offl; + caddr_t p, ep; + struct sadb_ext *ext; + bool first; /* check pfkey message. */ if (pfkey_align(m, mhp)) { @@ -240,7 +245,9 @@ natt_dport = (struct sadb_x_nat_t_port *)mhp[SADB_X_EXT_NAT_T_DPORT]; natt_oai = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAI]; natt_oar = (struct sadb_address *)mhp[SADB_X_EXT_NAT_T_OAR]; - + m_lft_sw = (struct sadb_lifetime *)mhp[SADB_X_EXT_LFT_CUR_SW_OFFL]; + m_lft_hw = (struct sadb_lifetime *)mhp[SADB_X_EXT_LFT_CUR_HW_OFFL]; + if_hw_offl = (struct sadb_x_if_hw_offl *)mhp[SADB_X_EXT_IF_HW_OFFL]; /* source address */ if (m_saddr == NULL) { @@ -332,6 +339,27 @@ GETMSGSTR(str_state, m_sa->sadb_sa_state); printf("\n"); + /* hw offload interface */ + if (if_hw_offl != NULL) { + p = (caddr_t)m; + ep = p + PFKEY_UNUNIT64(m->sadb_msg_len); + p += sizeof(struct sadb_msg); + printf("\thw offl if: "); + + for (first = true; p < ep; p += PFKEY_EXTLEN(ext)) { + ext = (struct sadb_ext *)p; + if (ext->sadb_ext_type != SADB_X_EXT_IF_HW_OFFL) + continue; + if_hw_offl = (struct sadb_x_if_hw_offl *)ext; + if (first) + first = false; + else + printf(","); + printf("%s", if_hw_offl->sadb_x_if_hw_offl_if); + } + printf("\n"); + } + /* lifetime */ if (m_lftc != NULL) { time_t tmp_time = time(0); @@ -381,7 +409,23 @@ /* XXX DEBUG */ printf("refcnt=%u\n", m->sadb_msg_reserved); - return; + if (m_lft_sw != NULL) { + printf("\tsw offl use: %s", + str_time(m_lft_sw->sadb_lifetime_usetime)); + printf("\tsw offl allocated: %lu", + (unsigned long)m_lft_sw->sadb_lifetime_allocations); + str_lifetime_byte(m_lft_sw, "sw offl"); + printf("\n"); + } + + if (m_lft_hw != NULL) { + printf("\thw offl use: %s", + str_time(m_lft_hw->sadb_lifetime_usetime)); + printf("\thw offl allocated: %lu", + (unsigned long)m_lft_hw->sadb_lifetime_allocations); + str_lifetime_byte(m_lft_hw, "hw offl"); + printf("\n"); + } } void diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -2068,6 +2068,8 @@ setifcapnv), DEF_CMD_SARG("-rxtls", "-"IFCAP2_RXTLS4_NAME ",-" IFCAP2_RXTLS6_NAME, setifcapnv), + DEF_CMD_SARG("ipsec_accel", IFCAP2_IPSEC_ACCEL_NAME, setifcapnv), + DEF_CMD_SARG("-ipsec_accel", "-"IFCAP2_IPSEC_ACCEL_NAME, setifcapnv), DEF_CMD("wol", IFCAP_WOL, setifcap), DEF_CMD("-wol", IFCAP_WOL, clearifcap), DEF_CMD("wol_ucast", IFCAP_WOL_UCAST, setifcap), diff --git a/sbin/setkey/parse.y b/sbin/setkey/parse.y --- a/sbin/setkey/parse.y +++ b/sbin/setkey/parse.y @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -68,6 +69,8 @@ struct addrinfo *p_natt_oai, *p_natt_oar; int p_natt_sport, p_natt_dport; int p_natt_fraglen; +bool esn; +vchar_t p_hwif; static int p_aiflags = 0, p_aifamily = PF_UNSPEC; @@ -115,6 +118,7 @@ %token SPDADD SPDDELETE SPDDUMP SPDFLUSH %token F_POLICY PL_REQUESTS %token F_AIFLAGS F_NATT F_NATT_MTU +%token F_ESN F_HWIF %token TAGGED %type prefix protocol_spec upper_spec @@ -539,12 +543,21 @@ { p_natt_fraglen = $2; } + | F_ESN + { + esn = true; + p_ext |= SADB_X_SAFLAGS_ESN; + } + | F_HWIF STRING + { + p_hwif = $2; + } ; /* definition about command for SPD management */ /* spdadd */ spdadd_command - : SPDADD ipaddropts STRING prefix portstr STRING prefix portstr upper_spec upper_misc_spec policy_spec EOT + : SPDADD ipaddropts STRING prefix portstr STRING prefix portstr upper_spec upper_misc_spec policy_spec spd_hwif EOT { int status; struct addrinfo *src, *dst; @@ -648,6 +661,14 @@ | ipaddropts ipaddropt ; +spd_hwif + : + | F_HWIF STRING + { + p_hwif = $2; + } + ; + ipaddropt : F_AIFLAGS { @@ -831,6 +852,7 @@ char buf[BUFSIZ]; int l, l0; struct sadb_address m_addr; + struct sadb_x_if_hw_offl m_if_hw; struct addrinfo *s, *d; int n; int plen; @@ -849,6 +871,20 @@ memcpy(buf + l, policy->buf, policy->len); l += policy->len; + if (p_hwif.len != 0) { + l0 = sizeof(struct sadb_x_if_hw_offl); + m_if_hw.sadb_x_if_hw_offl_len = PFKEY_UNIT64(l0); + m_if_hw.sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + m_if_hw.sadb_x_if_hw_offl_flags = 0; + memset(&m_if_hw.sadb_x_if_hw_offl_if[0], 0, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + strlcpy(&m_if_hw.sadb_x_if_hw_offl_if[0], p_hwif.buf, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + + memcpy(buf + l, &m_if_hw, l0); + l += l0; + } + l0 = l; n = 0; @@ -1040,6 +1076,7 @@ struct sadb_x_nat_t_type m_natt_type; struct sadb_x_nat_t_port m_natt_port; struct sadb_x_nat_t_frag m_natt_frag; + struct sadb_x_if_hw_offl m_if_hw; int n; int plen; struct sockaddr *sa; @@ -1256,6 +1293,20 @@ } } + if (p_hwif.len != 0) { + len = sizeof(struct sadb_x_if_hw_offl); + m_if_hw.sadb_x_if_hw_offl_len = PFKEY_UNIT64(len); + m_if_hw.sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + m_if_hw.sadb_x_if_hw_offl_flags = 0; + memset(&m_if_hw.sadb_x_if_hw_offl_if[0], 0, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + strlcpy(&m_if_hw.sadb_x_if_hw_offl_if[0], p_hwif.buf, + sizeof(m_if_hw.sadb_x_if_hw_offl_if)); + + memcpy(buf + l, &m_if_hw, len); + l += len; + } + if (n == 0) return -1; else @@ -1355,6 +1406,10 @@ p_natt_oai = p_natt_oar = NULL; p_natt_sport = p_natt_dport = 0; p_natt_fraglen = -1; + + esn = false; + p_hwif.len = 0; + p_hwif.buf = NULL; } void diff --git a/sbin/setkey/setkey.8 b/sbin/setkey/setkey.8 --- a/sbin/setkey/setkey.8 +++ b/sbin/setkey/setkey.8 @@ -341,6 +341,8 @@ not indication of the optional components. .It Fl natt_mtu Ar fragsize Configure NAT-T fragment size. +.It Fl esn +Enable Extended Sequence Number extension for this SA. .El .\" .Pp diff --git a/sbin/setkey/token.l b/sbin/setkey/token.l --- a/sbin/setkey/token.l +++ b/sbin/setkey/token.l @@ -187,6 +187,8 @@ {hyphen}ls { return(F_LIFETIME_SOFT); } {hyphen}natt { return(F_NATT); } {hyphen}natt_mtu { return(F_NATT_MTU); } +{hyphen}esn { return(F_ESN); } +{hyphen}hwif { return(F_HWIF); } /* ... */ any { return(ANY); } diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -465,6 +465,7 @@ IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h +IPSEC_ACCEL opt_ipsec.h IPSTEALTH KERN_TLS KRPC diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c --- a/sys/dev/mlx5/mlx5_en/mlx5_en_main.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_main.c @@ -4455,6 +4455,32 @@ IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); } +static int +mlx5e_sa_in_install(struct ifnet *ifp, void *sav, void *dst_addr, + int sproto, uint32_t spi, uint32_t drv_spi, void **privp) +{ + return (0); +} + +static int +mlx5e_sa_out_install(struct ifnet *ifp, void *sp, + void *sav, uint32_t drv_spi, void **privp) +{ + return (0); +} + +static int +mlx5e_sa_deinstall(struct ifnet *ifp, uint32_t drv_spi, void *priv) +{ + return (0); +} + +static const struct if_ipsec_accel_methods mlx5e_ipsec_funcs = { + .if_sa_in_install = mlx5e_sa_in_install, + .if_sa_out_install = mlx5e_sa_out_install, + .if_sa_deinstall = mlx5e_sa_deinstall, +}; + static void * mlx5e_create_ifp(struct mlx5_core_dev *mdev) { @@ -4523,7 +4549,7 @@ #endif if_setcapabilitiesbit(ifp, IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO, 0); if_setcapabilities2bit(ifp, IFCAP2_BIT(IFCAP2_RXTLS4) | - IFCAP2_BIT(IFCAP2_RXTLS6), 0); + IFCAP2_BIT(IFCAP2_RXTLS6) | IFCAP2_BIT(IFCAP2_IPSEC_ACCEL), 0); if_setsndtagallocfn(ifp, mlx5e_snd_tag_alloc); #ifdef RATELIMIT if_setratelimitqueryfn(ifp, mlx5e_ratelimit_query); @@ -4549,6 +4575,7 @@ if (if_getcapabilities(ifp) & IFCAP_VXLAN_HWTSO) if_sethwassistbits(ifp, CSUM_INNER_IP6_TSO | CSUM_INNER_IP_TSO, 0); + if_setipsec_accel_methods(ifp, &mlx5e_ipsec_funcs); /* ifnet sysctl tree */ sysctl_ctx_init(&priv->sysctl_ctx); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), diff --git a/sys/modules/ipsec/Makefile b/sys/modules/ipsec/Makefile --- a/sys/modules/ipsec/Makefile +++ b/sys/modules/ipsec/Makefile @@ -2,8 +2,8 @@ .PATH: ${SRCTOP}/sys/net ${SRCTOP}/sys/netipsec KMOD= ipsec -SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ - ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ +SRCS= if_ipsec.c ipsec.c ipsec_accel.c ipsec_input.c ipsec_mbuf.c \ + ipsec_mod.c ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_sctp.h .if "${MK_INET}" != "no" || "${MK_INET6}" != "no" SRCS+= udpencap.c diff --git a/sys/net/if.h b/sys/net/if.h --- a/sys/net/if.h +++ b/sys/net/if.h @@ -255,7 +255,8 @@ #define IFCAP_B_TXTLS_RTLMT 31 /* can do TLS with rate limiting */ #define IFCAP_B_RXTLS4 32 /* can to TLS receive for TCP */ #define IFCAP_B_RXTLS6 33 /* can to TLS receive for TCP6 */ -#define __IFCAP_B_SIZE 34 +#define IFCAP_B_IPSEC_ACCEL 34 /* inline IPSEC offload */ +#define __IFCAP_B_SIZE 35 #define IFCAP_B_MAX (__IFCAP_B_MAX - 1) #define IFCAP_B_SIZE (__IFCAP_B_SIZE) @@ -298,6 +299,7 @@ /* IFCAP2_* are integers, not bits. */ #define IFCAP2_RXTLS4 (IFCAP_B_RXTLS4 - 32) #define IFCAP2_RXTLS6 (IFCAP_B_RXTLS6 - 32) +#define IFCAP2_IPSEC_ACCEL (IFCAP_B_IPSEC_ACCEL - 32) #define IFCAP2_BIT(x) (1UL << (x)) diff --git a/sys/net/if.c b/sys/net/if.c --- a/sys/net/if.c +++ b/sys/net/if.c @@ -2413,6 +2413,7 @@ const struct ifcap_nv_bit_name ifcap2_nv_bit_names[] = { CAP2NV(RXTLS4), CAP2NV(RXTLS6), + CAP2NV(IPSEC_ACCEL), {0, NULL} }; #undef CAPNV @@ -5167,6 +5168,12 @@ return (ifp->if_l2com); } +void +if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *m) +{ + ifp->if_ipsec_accel_m = m; +} + #ifdef DDB static void if_show_ifnet(struct ifnet *ifp) diff --git a/sys/net/if_ipsec.c b/sys/net/if_ipsec.c --- a/sys/net/if_ipsec.c +++ b/sys/net/if_ipsec.c @@ -415,12 +415,12 @@ switch (af) { #ifdef INET case AF_INET: - error = ipsec4_process_packet(m, sp, NULL); + error = ipsec4_process_packet(ifp, m, sp, NULL, ifp->if_mtu); break; #endif #ifdef INET6 case AF_INET6: - error = ipsec6_process_packet(m, sp, NULL); + error = ipsec6_process_packet(ifp, m, sp, NULL, ifp->if_mtu); break; #endif default: @@ -901,8 +901,10 @@ } return (0); fail: - for (i = 0; i < IPSEC_SPCOUNT; i++) - key_freesp(&sp[i]); + for (i = 0; i < IPSEC_SPCOUNT; i++) { + if (sp[i] != NULL) + key_freesp(&sp[i]); + } return (ENOMEM); } diff --git a/sys/net/if_private.h b/sys/net/if_private.h --- a/sys/net/if_private.h +++ b/sys/net/if_private.h @@ -138,6 +138,8 @@ int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); + const struct if_ipsec_accel_methods *if_ipsec_accel_m; + /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; diff --git a/sys/net/if_strings.h b/sys/net/if_strings.h --- a/sys/net/if_strings.h +++ b/sys/net/if_strings.h @@ -60,9 +60,11 @@ #define IFCAP_TXTLS_RTLMT_NAME "TXTLS_RTLMT" #define IFCAP_RXTLS4_NAME "RXTLS4" #define IFCAP_RXTLS6_NAME "RXTLS6" +#define IFCAP_IPSEC_ACCEL_NAME "IPSEC" #define IFCAP2_RXTLS4_NAME IFCAP_RXTLS4_NAME #define IFCAP2_RXTLS6_NAME IFCAP_RXTLS6_NAME +#define IFCAP2_IPSEC_ACCEL_NAME IFCAP_IPSEC_ACCEL_NAME static const char *ifcap_bit_names[] = { IFCAP_RXCSUM_NAME, @@ -99,6 +101,7 @@ IFCAP_TXTLS_RTLMT_NAME, IFCAP_RXTLS4_NAME, IFCAP_RXTLS6_NAME, + IFCAP_IPSEC_ACCEL_NAME, }; #ifdef IFCAP_B_SIZE diff --git a/sys/net/if_var.h b/sys/net/if_var.h --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -131,6 +131,27 @@ typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); typedef void (*if_reassign_fn_t)(if_t, struct vnet *, char *); +typedef int (*if_spdadd_fn_t)(if_t, void *sp, void *inp, void **priv); +typedef int (*if_spddel_fn_t)(if_t, void *sp, void *priv); +typedef int (*if_sa_in_install_fn_t)(if_t ifp, void *sav, + void *dst_addr, int sproto, uint32_t spi, uint32_t drv_spi,void **privp); +typedef int (*if_sa_deinstall_fn_t)(if_t ifp, uint32_t drv_spi, void *priv); +typedef int (*if_sa_out_install_fn_t)(if_t ifp, void *sp, + void *sav, uint32_t drv_spi, void **privp); +struct seclifetime; +#define IF_SA_CNT_UPD 0x80000000 +enum IF_SA_CNT_WHICH { + IF_SA_CNT_IFP_HW_VAL = 1, + IF_SA_CNT_TOTAL_SW_VAL, + IF_SA_CNT_TOTAL_HW_VAL, + IF_SA_CNT_IFP_HW_UPD = IF_SA_CNT_IFP_HW_VAL | IF_SA_CNT_UPD, + IF_SA_CNT_TOTAL_SW_UPD = IF_SA_CNT_TOTAL_SW_VAL | IF_SA_CNT_UPD, + IF_SA_CNT_TOTAL_HW_UPD = IF_SA_CNT_TOTAL_HW_VAL | IF_SA_CNT_UPD, +}; +typedef int (*if_sa_cnt_fn_t)(if_t ifp, void *sa, + uint32_t drv_spi, void *priv, struct seclifetime *lt); +typedef int (*if_ipsec_hwassist_fn_t)(if_t ifp, void *sav, + uint32_t drv_spi,void *priv); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ @@ -700,6 +721,17 @@ void if_setreassignfn(if_t ifp, if_reassign_fn_t); void if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t); +struct if_ipsec_accel_methods { + if_spdadd_fn_t if_spdadd; + if_spddel_fn_t if_spddel; + if_sa_in_install_fn_t if_sa_in_install; + if_sa_deinstall_fn_t if_sa_deinstall; + if_sa_out_install_fn_t if_sa_out_install; + if_sa_cnt_fn_t if_sa_cnt; + if_ipsec_hwassist_fn_t if_hwassist; +}; +void if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *); + /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); diff --git a/sys/net/pfkeyv2.h b/sys/net/pfkeyv2.h --- a/sys/net/pfkeyv2.h +++ b/sys/net/pfkeyv2.h @@ -296,6 +296,13 @@ }; _Static_assert(sizeof(struct sadb_x_sa_replay) == 8, "struct size mismatch"); +struct sadb_x_if_hw_offl { + u_int16_t sadb_x_if_hw_offl_len; + u_int16_t sadb_x_if_hw_offl_exttype; + u_int32_t sadb_x_if_hw_offl_flags; + u_int8_t sadb_x_if_hw_offl_if[32]; /* IF_NAMESIZE is 16, keep room */ +}; + #define SADB_EXT_RESERVED 0 #define SADB_EXT_SA 1 #define SADB_EXT_LIFETIME_CURRENT 2 @@ -326,7 +333,10 @@ #define SADB_X_EXT_SA_REPLAY 26 /* Replay window override. */ #define SADB_X_EXT_NEW_ADDRESS_SRC 27 #define SADB_X_EXT_NEW_ADDRESS_DST 28 -#define SADB_EXT_MAX 28 +#define SADB_X_EXT_LFT_CUR_SW_OFFL 29 +#define SADB_X_EXT_LFT_CUR_HW_OFFL 30 +#define SADB_X_EXT_IF_HW_OFFL 31 +#define SADB_EXT_MAX 31 #define SADB_SATYPE_UNSPEC 0 #define SADB_SATYPE_AH 2 diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -673,7 +673,7 @@ error = ENOBUFS; goto bad; } - if ((error = IPSEC_OUTPUT(ipv4, m, inp)) != 0) { + if ((error = IPSEC_OUTPUT(ipv4, ifp, m, inp, mtu)) != 0) { if (error == EINPROGRESS) error = 0; goto done; diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -3924,6 +3924,8 @@ tp->t_tsomax = cap.tsomax; tp->t_tsomaxsegcount = cap.tsomaxsegcount; tp->t_tsomaxsegsize = cap.tsomaxsegsize; + if (cap.ipsec_tso) + tp->t_flags2 |= TF2_IPSEC_TSO; } } diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -201,9 +201,7 @@ struct tcphdr *th; u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen, ulen; -#if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; -#endif int idle, sendalot, curticks; int sack_rxmit, sack_bytes_rxmt; struct sackhole *p; @@ -547,15 +545,15 @@ offsetof(struct ipoption, ipopt_list); else ipoptlen = 0; -#if defined(IPSEC) || defined(IPSEC_SUPPORT) ipoptlen += ipsec_optlen; -#endif if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && (tp->t_port == 0) && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && sack_rxmit == 0 && - ipoptlen == 0 && !(flags & TH_SYN)) + (ipoptlen == 0 || (ipoptlen == ipsec_optlen && + (tp->t_flags2 & TF2_IPSEC_TSO) != 0)) && + !(flags & TH_SYN)) tso = 1; if (SEQ_LT((sack_rxmit ? p->rxmit : tp->snd_nxt) + len, @@ -911,7 +909,7 @@ * overflowing or exceeding the maximum length * allowed by the network interface: */ - KASSERT(ipoptlen == 0, + KASSERT(ipoptlen == ipsec_optlen, ("%s: TSO can't do IP options", __func__)); /* @@ -920,8 +918,8 @@ */ if (if_hw_tsomax != 0) { /* compute maximum TSO length */ - max_len = (if_hw_tsomax - hdrlen - - max_linkhdr); + max_len = if_hw_tsomax - hdrlen - + ipsec_optlen - max_linkhdr; if (max_len <= 0) { len = 0; } else if (len > max_len) { @@ -935,7 +933,7 @@ * fractional unless the send sockbuf can be * emptied: */ - max_len = (tp->t_maxseg - optlen); + max_len = tp->t_maxseg - optlen - ipsec_optlen; if (((uint32_t)off + (uint32_t)len) < sbavail(&so->so_snd)) { moff = len % max_len; @@ -1384,10 +1382,10 @@ * The TCP pseudo header checksum is always provided. */ if (tso) { - KASSERT(len > tp->t_maxseg - optlen, + KASSERT(len > tp->t_maxseg - optlen - ipsec_optlen, ("%s: len <= tso_segsz", __func__)); m->m_pkthdr.csum_flags |= CSUM_TSO; - m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen; + m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen - ipsec_optlen; } KASSERT(len + hdrlen == m_length(m, NULL), diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -3379,6 +3379,9 @@ cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; + /* XXXKIB IFCAP2_IPSEC_ACCEL_TSO */ + cap->ipsec_tso = (ifp->if_capenable2 & + IFCAP2_BIT(IFCAP2_IPSEC_ACCEL)) != 0; } } } @@ -3418,6 +3421,7 @@ cap->tsomax = ifp->if_hw_tsomax; cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; + cap->ipsec_tso = false; /* XXXKIB */ } } } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -39,6 +39,8 @@ #include #include #include +#else +#include #endif #define TCP_END_BYTE_INFO 8 /* Bytes that makeup the "end information array" */ @@ -849,6 +851,7 @@ #define TF2_MBUF_QUEUE_READY 0x00020000 /* Inputs can be queued */ #define TF2_DONT_SACK_QUEUE 0x00040000 /* Don't wake on sack */ #define TF2_CANNOT_DO_ECN 0x00080000 /* The stack does not do ECN */ +#define TF2_IPSEC_TSO 0x00100000 /* IPSEC + TSO supported */ /* * Structure to hold TCP options that are only used during segment @@ -904,6 +907,7 @@ u_int tsomax; u_int tsomaxsegcount; u_int tsomaxsegsize; + bool ipsec_tso; }; #ifndef _NETINET_IN_PCB_H_ diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -450,27 +450,6 @@ #endif } -#if defined(IPSEC) || defined(IPSEC_SUPPORT) - /* - * IPSec checking which handles several cases. - * FAST IPSEC: We re-injected the packet. - * XXX: need scope argument. - */ - if (IPSEC_ENABLED(ipv6)) { - m = mb_unmapped_to_ext(m); - if (m == NULL) { - IP6STAT_INC(ip6s_odropped); - error = ENOBUFS; - goto bad; - } - if ((error = IPSEC_OUTPUT(ipv6, m, inp)) != 0) { - if (error == EINPROGRESS) - error = 0; - goto done; - } - } -#endif /* IPSEC */ - /* Source address validation. */ ip6 = mtod(m, struct ip6_hdr *); if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && @@ -801,6 +780,27 @@ KASSERT((ifp != NULL), ("output interface must not be NULL")); KASSERT((origifp != NULL), ("output address interface must not be NULL")); +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * IPSec checking which handles several cases. + * FAST IPSEC: We re-injected the packet. + * XXX: need scope argument. + */ + if (IPSEC_ENABLED(ipv6)) { + m = mb_unmapped_to_ext(m); + if (m == NULL) { + IP6STAT_INC(ip6s_odropped); + error = ENOBUFS; + goto bad; + } + if ((error = IPSEC_OUTPUT(ipv6, ifp, m, inp, mtu)) != 0) { + if (error == EINPROGRESS) + error = 0; + goto done; + } + } +#endif /* IPSEC */ + if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); diff --git a/sys/netipsec/esp_var.h b/sys/netipsec/esp_var.h --- a/sys/netipsec/esp_var.h +++ b/sys/netipsec/esp_var.h @@ -74,6 +74,8 @@ #include VNET_DECLARE(int, esp_enable); +VNET_DECLARE(int, esp_ctr_compatibility); +#define V_esp_ctr_compatibility VNET(esp_ctr_compatibility) VNET_PCPUSTAT_DECLARE(struct espstat, espstat); #define ESPSTAT_ADD(name, val) \ diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -71,6 +71,12 @@ u_int level; /* IPsec level defined below. */ }; +struct ipsec_accel_adddel_sp_tq { + struct vnet *adddel_vnet; + struct task adddel_task; + int adddel_scheduled; +}; + /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; @@ -102,6 +108,13 @@ time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ +#ifdef IPSEC_ACCEL + CK_LIST_HEAD(, ifp_handle_sp) accel_ifps; + struct ipsec_accel_adddel_sp_tq accel_add_tq; + struct ipsec_accel_adddel_sp_tq accel_del_tq; + struct inpcb *ipsec_accel_add_sp_inp; + const char *accel_ifname; +#endif }; /* @@ -336,8 +349,9 @@ void ipsec4_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); int ipsec4_common_input_cb(struct mbuf *, struct secasvar *, int, int); -int ipsec4_check_pmtu(struct mbuf *, struct secpolicy *, int); -int ipsec4_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec4_check_pmtu(struct ifnet *, struct mbuf *, struct secpolicy *, int); +int ipsec4_process_packet(struct ifnet *, struct mbuf *, struct secpolicy *, + struct inpcb *, u_long); int ipsec_process_done(struct mbuf *, struct secpolicy *, struct secasvar *, u_int); diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -85,6 +85,7 @@ #ifdef INET6 #include #endif +#include #include #include #include /*XXX*/ @@ -636,8 +637,16 @@ ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_ACCEL + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_ACCEL + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); @@ -802,8 +811,16 @@ ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_ACCEL + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_ACCEL + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); diff --git a/sys/netipsec/ipsec6.h b/sys/netipsec/ipsec6.h --- a/sys/netipsec/ipsec6.h +++ b/sys/netipsec/ipsec6.h @@ -66,8 +66,9 @@ void ipsec6_setsockaddrs(const struct mbuf *, union sockaddr_union *, union sockaddr_union *); int ipsec6_common_input_cb(struct mbuf *, struct secasvar *, int, int); -int ipsec6_check_pmtu(struct mbuf *, struct secpolicy *, int); -int ipsec6_process_packet(struct mbuf *, struct secpolicy *, struct inpcb *); +int ipsec6_check_pmtu(struct ifnet *, struct mbuf *, struct secpolicy *, int); +int ipsec6_process_packet(struct ifnet *, struct mbuf *, struct secpolicy *, + struct inpcb *, u_long); int ip6_ipsec_filtertunnel(struct mbuf *); int ip6_ipsec_pcbctl(struct inpcb *, struct sockopt *); diff --git a/sys/netipsec/ipsec_accel.h b/sys/netipsec/ipsec_accel.h new file mode 100644 --- /dev/null +++ b/sys/netipsec/ipsec_accel.h @@ -0,0 +1,200 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETIPSEC_IPSEC_ACCEL_H_ +#define _NETIPSEC_IPSEC_ACCEL_H_ + +#ifdef _KERNEL +#include +#include +#include + +struct secpolicy; +struct secasvar; +struct inpcb; + +struct ipsec_accel_out_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +struct ipsec_accel_in_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +#define IPSEC_ACCEL_DRV_SPI_BYPASS 2 +#define IPSEC_ACCEL_DRV_SPI_MIN 3 +#define IPSEC_ACCEL_DRV_SPI_MAX 0xffff + +extern void (*ipsec_accel_sa_install_input_p)(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi); +extern void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +extern void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +extern void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +extern int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +extern void (*ipsec_accel_sync_p)(void); +extern bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +extern struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); + +#ifdef IPSEC_ACCEL +/* + * Have to use ipsec_accel_sa_install_input_p indirection because + * key.c is unconditionally included into the static kernel. + */ +static inline void +ipsec_accel_sa_install_input(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi) +{ + void (*p)(struct secasvar *sav, const union sockaddr_union *, + int, uint32_t); + + p = atomic_load_ptr(&ipsec_accel_sa_install_input_p); + if (p != NULL) + p(sav, dst_address, sproto, spi); +} + +static inline void +ipsec_accel_forget_sav(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_forget_sav_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_spdadd(struct secpolicy *sp, struct inpcb *inp) +{ + void (*p)(struct secpolicy *sp, struct inpcb *inp); + + p = atomic_load_ptr(&ipsec_accel_spdadd_p); + if (p != NULL) + p(sp, inp); +} + +static inline void +ipsec_accel_spddel(struct secpolicy *sp) +{ + void (*p)(struct secpolicy *sp); + + p = atomic_load_ptr(&ipsec_accel_spddel_p); + if (p != NULL) + p(sp); +} + +static inline int +ipsec_accel_sa_lifetime_op(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + int (*p)(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, + enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); + + p = atomic_load_ptr(&ipsec_accel_sa_lifetime_op_p); + if (p != NULL) + return (p(sav, lft_c, ifp, op, sahtree_trackerp)); + return (ENOTSUP); +} + +static inline void +ipsec_accel_sync(void) +{ + void (*p)(void); + + p = atomic_load_ptr(&ipsec_accel_sync_p); + if (p != NULL) + p(); +} + +static inline bool +ipsec_accel_is_accel_sav(struct secasvar *sav) +{ + bool (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_is_accel_sav_p); + if (p != NULL) + return (p(sav)); + return (false); +} + +static inline struct mbuf * +ipsec_accel_key_setaccelif(struct secasvar *sav) +{ + struct mbuf *(*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_key_setaccelif_p); + if (p != NULL) + return (p(sav)); + return (NULL); +} + + +#else +#define ipsec_accel_sa_install_input(a, b, c, d) +#define ipsec_accel_forget_sav(a) +#define ipsec_accel_spdadd(a, b) +#define ipsec_accel_spddel(a) +#define ipsec_accel_sa_lifetime_op(a, b, c, d, e) +#define ipsec_accel_sync() +#define ipsec_accel_is_accel_sav(a) +#define ipsec_accel_key_setaccelif(a) +#endif + +void ipsec_accel_sa_install_input_impl(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi); +void ipsec_accel_forget_sav_impl(struct secasvar *sav); +void ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp); +void ipsec_accel_spddel_impl(struct secpolicy *sp); + +#ifdef IPSEC_ACCEL +void ipsec_accel_sa_install_output(struct ifnet *ifp, struct secpolicy *sp, + struct secasvar *sav); +int ipsec_accel_input(struct mbuf *m, int offset, int proto); +bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, + struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af, + int mtu, int *hwassist); +void ipsec_accel_forget_sav(struct secasvar *sav); +#else +#define ipsec_accel_sa_install_output(a, b, c) +#define ipsec_accel_input(a, b, c) (ENXIO) +#define ipsec_accel_output(a, b, c, d, e, f, g, h) ({ \ + *h = 0; \ + false; \ +}) +#define ipsec_accel_forget_sav(a) +#endif + +struct ipsec_accel_in_tag *ipsec_accel_input_tag_lookup(const struct mbuf *); +void ipsec_accel_on_ifdown(struct ifnet *ifp); +void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + uint64_t octets, uint64_t allocs); + +#endif /* _KERNEL */ + +#endif /* _NETIPSEC_IPSEC_ACCEL_H_ */ diff --git a/sys/netipsec/ipsec_accel.c b/sys/netipsec/ipsec_accel.c new file mode 100644 --- /dev/null +++ b/sys/netipsec/ipsec_accel.c @@ -0,0 +1,1266 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef IPSEC_ACCEL + +static struct mtx ipsec_accel_sav_tmp; +static struct unrhdr *drv_spi_unr; +static struct mtx ipsec_accel_cnt_lock; + +struct ipsec_accel_install_input_tq { + union sockaddr_union dst_address; + int sproto; + uint32_t spi; + struct secasvar *sav; + struct vnet *install_vnet; + struct task install_task; +}; + +struct ipsec_accel_install_output_tq { + struct ifnet *ifp; + struct secpolicy *sp; + struct secasvar *sav; + struct vnet *install_vnet; + struct task install_task; +}; + +struct ipsec_accel_forget_tq { + struct vnet *forget_vnet; + struct task forget_task; + struct secasvar *sav; +}; + +struct ifp_handle_sav { + CK_LIST_ENTRY(ifp_handle_sav) sav_link; + CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link; + struct secasvar *sav; + struct ifnet *ifp; + void *ifdata; + uint64_t drv_spi; + uint32_t flags; + size_t hdr_ext_size; + uint64_t cnt_octets; + uint64_t cnt_allocs; +}; + +#define IFP_HS_INFLUX 0x00000001 +#define IFP_HS_HANDLED 0x00000002 +#define IFP_HS_REJECTED 0x00000004 +#define IFP_HS_INPUT 0x00000008 +#define IFP_HS_OUTPUT 0x00000010 +#define IFP_HS_MARKER 0x00000020 + +static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles; + +struct ifp_handle_sp { + CK_LIST_ENTRY(ifp_handle_sp) sp_link; + CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link; + struct secpolicy *sp; + struct ifnet *ifp; + void *ifdata; + uint32_t flags; +}; + +#define IFP_HP_HANDLED 0x00000001 +#define IFP_HP_REJECTED 0x00000002 +#define IFP_HP_MARKER 0x00000004 + +static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles; + +static void * +drvspi_sa_trie_alloc(struct pctrie *ptree) +{ + void *res; + + res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT); + if (res != NULL) + pctrie_zone_init(res, 0, 0); + return (res); +} + +static void +drvspi_sa_trie_free(struct pctrie *ptree, void *node) +{ + free(node, M_IPSEC_MISC); +} + +PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, + drvspi_sa_trie_alloc, drvspi_sa_trie_free); +static struct pctrie drv_spi_pctrie; + +static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + uint32_t drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); +static void ipsec_accel_forget_sav_clear(struct secasvar *sav); +static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, + struct ifnet *ifp); +static bool ipsec_accel_do_not_install_sav_out(struct secasvar *sav, + struct ifnet *ifp); +static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m); +static void ipsec_accel_sync_imp(void); +static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav); +static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); + +static void +ipsec_accel_init(void *arg) +{ + mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0); + mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0); + drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN, + IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp); + ipsec_accel_sa_install_input_p = ipsec_accel_sa_install_input_impl; + ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl; + ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl; + ipsec_accel_spddel_p = ipsec_accel_spddel_impl; + ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl; + ipsec_accel_sync_p = ipsec_accel_sync_imp; + ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl; + ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl; + pctrie_init(&drv_spi_pctrie); +} +SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_init, NULL); + +static void +ipsec_accel_fini(void *arg) +{ + ipsec_accel_sa_install_input_p = NULL; + ipsec_accel_forget_sav_p = NULL; + ipsec_accel_spdadd_p = NULL; + ipsec_accel_spddel_p = NULL; + ipsec_accel_sa_lifetime_op_p = NULL; + ipsec_accel_sync_p = NULL; + ipsec_accel_is_accel_sav_p = NULL; + ipsec_accel_key_setaccelif_p = NULL; + ipsec_accel_sync_imp(); + clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ + clear_unrhdr(drv_spi_unr); + delete_unrhdr(drv_spi_unr); + mtx_destroy(&ipsec_accel_sav_tmp); + mtx_destroy(&ipsec_accel_cnt_lock); +} +SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_fini, NULL); + +static void +ipsec_accel_alloc_forget_tq(struct secasvar *sav) +{ + void *ftq; + + if (sav->accel_forget_tq != 0) + return; + + ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK); + if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq)) + free(ftq, M_TEMP); +} + +static bool +ipsec_accel_sa_install_match(if_t ifp, void *arg) +{ + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_ACCEL)) == 0) + return (false); + if (ifp->if_ipsec_accel_m->if_sa_in_install == NULL) { + printf("driver bug ifp %s if_sa_in_install NULL\n", + if_name(ifp)); + return (false); + } + return (true); +} + +static int +ipsec_accel_sa_install_cb(if_t ifp, void *arg) +{ + struct ipsec_accel_install_input_tq *tq; + void *priv; + uint32_t drv_spi; + int error; + + tq = arg; + + printf("ipsec_accel_sa_install_act: ifp %s h %p spi %#x " + "flags %#x seq %d\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_sa_in_install, + be32toh(tq->spi), tq->sav->flags, tq->sav->seq); + priv = NULL; + drv_spi = alloc_unr(drv_spi_unr); + if (tq->sav->accel_ifname != NULL && + strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED | IFP_HS_INPUT, NULL); + goto out; + } + if (drv_spi == -1) { + /* XXXKIB */ + printf("ipsec_accel_sa_install_input: cannot alloc " + "drv_spi if %s spi %#x\n", if_name(ifp), be32toh(tq->spi)); + return (ENOMEM); + } + error = ifp->if_ipsec_accel_m->if_sa_in_install(ifp, tq->sav, + &tq->dst_address, tq->sproto, tq->spi, drv_spi, &priv); + if (error != 0) { + if (error == EOPNOTSUPP) { + printf("ipsec_accel_sa_install_input: driver " + "refused sa if %s spi %#x\n", + if_name(ifp), be32toh(tq->spi)); + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED | IFP_HS_INPUT, + NULL); + /* XXXKIB */ + } else { + printf("ipsec_accel_sa_install_input: driver " + "error %d if %s spi %#x\n", + error, if_name(ifp), be32toh(tq->spi)); + /* XXXKIB */ + } + } else { + error = ipsec_accel_handle_sav(tq->sav, ifp, + drv_spi, priv, IFP_HS_HANDLED | IFP_HS_INPUT, NULL); + if (error != 0) { + /* XXXKIB */ + printf("ipsec_accel_sa_install_input: handle_sav " + "err %d if %s spi %#x\n", error, if_name(ifp), + be32toh(tq->spi)); + } + } +out: + return (error); +} + +static void +ipsec_accel_sa_install_act(void *context, int pending) +{ + struct ipsec_accel_install_input_tq *tq; + void *tqf; + struct secasvar *sav; + + tq = context; + tqf = NULL; + sav = tq->sav; + CURVNET_SET(tq->install_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) == 0 && + sav->state < SADB_SASTATE_DYING) { + sav->accel_flags |= SADB_KEY_ACCEL_INST; + mtx_unlock(&ipsec_accel_sav_tmp); + if_foreach_sleep(ipsec_accel_sa_install_match, context, + ipsec_accel_sa_install_cb, context); + ipsec_accel_alloc_forget_tq(sav); + mtx_lock(&ipsec_accel_sav_tmp); + + /* + * If ipsec_accel_forget_sav() raced with us and set + * the flag, do its work. Its task cannot execute in + * parallel since taskqueue_thread is single-threaded. + */ + if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { + tqf = (void *)sav->accel_forget_tq; + sav->accel_forget_tq = 0; + ipsec_accel_forget_sav_clear(sav); + } + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&tq->sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); + free(tqf, M_TEMP); +} + +/* + * Typically, exclusive rm sahtree and exclusive sx so_snd_sx are + * owned by the caller. +*/ +void +ipsec_accel_sa_install_input_impl(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi) +{ + struct ipsec_accel_install_input_tq *tq; + + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) + return; + + printf( + "ipsec_accel_sa_install_input: spi %#x flags %#x seq %d " + "sproto %d\n", + be32toh(spi), sav->flags, sav->seq, sproto); + + tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); + if (tq == NULL) { + printf("ipsec_accel_sa_install_input: no memory for tq, " + "spi %#x\n", be32toh(tq->spi)); + /* XXXKIB */ + return; + } + + refcount_acquire(&sav->refcnt); + + TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_install_act, tq); + tq->sav = sav; + tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ + memcpy(&tq->dst_address, dst_address, sizeof(*dst_address)); + tq->sproto = sproto; + tq->spi = spi; + taskqueue_enqueue(taskqueue_thread, &tq->install_task); +} + +static void +ipsec_accel_sa_install_output_act(void *context, int pending) +{ + struct ifp_handle_sav *i; + struct ipsec_accel_install_output_tq *tq; + struct secasvar *sav; + if_t ifp; + void *priv, *tqf; + uint32_t drv_spi; + int error; + + tqf = NULL; + tq = context; + ifp = tq->ifp; + CURVNET_SET(tq->install_vnet); + sav = tq->sav; + + mtx_lock(&ipsec_accel_sav_tmp); + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_ACCEL)) == 0 || + sav->state >= SADB_SASTATE_DYING || + (sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) { + mtx_unlock(&ipsec_accel_sav_tmp); + goto out; + } + sav->accel_flags |= SADB_KEY_ACCEL_INST; + mtx_unlock(&ipsec_accel_sav_tmp); + + priv = NULL; + drv_spi = alloc_unr(drv_spi_unr); + if (drv_spi == -1) { + printf("ipsec_accel_sa_install_output: cannot alloc " + "drv_spi if %s spi %#x\n", + if_name(ifp), be32toh(sav->spi)); + /* XXXKIB */ + goto out; + } + + if (sav->accel_ifname != NULL && + strcmp(sav->accel_ifname, if_name(ifp)) != 0) { + (void)ipsec_accel_handle_sav(sav, ifp, + drv_spi, priv, IFP_HS_REJECTED | IFP_HS_OUTPUT, NULL); + goto out; + } + + ipsec_accel_alloc_forget_tq(sav); + + if (ifp->if_ipsec_accel_m->if_sa_out_install == NULL) { + printf("driver bug ifp %s if_sa_out_install NULL\n", + if_name(ifp)); + free_unr(drv_spi_unr, drv_spi); + goto out; + } + + error = ipsec_accel_handle_sav(sav, ifp, drv_spi, NULL, + IFP_HS_OUTPUT | IFP_HS_INFLUX, &i); + if (error != 0) { + free_unr(drv_spi_unr, drv_spi); + goto out; + } + + printf("ipsec_accel_sa_install_out_act: ifp %s h %p spi %#x " + "flags %#x seq %d drv_spi %#x\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_sa_out_install, + be32toh(sav->spi), sav->flags, sav->seq, drv_spi); + error = ifp->if_ipsec_accel_m->if_sa_out_install(ifp, + tq->sp, sav, drv_spi, &priv); + if (error != 0) { + if (error == EOPNOTSUPP) { + printf("ipsec_accel_sa_install_output: " + "driver refused sa if %s spi %#x\n", + if_name(ifp), be32toh(sav->spi)); + } else { + printf("ipsec_accel_sa_install_output: driver " + "error %d if %s spi %#x\n", + error, if_name(ifp), be32toh(sav->spi)); + } + } + mtx_lock(&ipsec_accel_sav_tmp); + i->flags &= ~IFP_HS_INFLUX; + i->flags |= (error == 0 ? IFP_HS_HANDLED : IFP_HS_REJECTED); + i->ifdata = priv; + wakeup(i); + mtx_unlock(&ipsec_accel_sav_tmp); + +out: + mtx_lock(&ipsec_accel_sav_tmp); + if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { + tqf = (void *)sav->accel_forget_tq; + sav->accel_forget_tq = 0; + ipsec_accel_forget_sav_clear(sav); + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesp(&tq->sp); + key_freesav(&sav); + if_rele(ifp); + CURVNET_RESTORE(); + free(tq, M_TEMP); + free(tqf, M_TEMP); +} + +void +ipsec_accel_sa_install_output(struct ifnet *ifp, struct secpolicy *sp, + struct secasvar *sav) +{ + struct ipsec_accel_install_output_tq *tq; + + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) + return; + + if (ipsec_accel_do_not_install_sav_out(sav, ifp)) + return; + + printf("ipsec_accel_sa_install_output: spi %#x\n", + be32toh(sav->spi)); + tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); + if (tq == NULL) { + printf("ipsec_accel_sa_install_output %s sp %p sav %p spi %d:" + " cannot alloc tq\n", + if_name(ifp), sp, sav, be32toh(sav->spi)); + return; + } + if (!if_try_ref(ifp)) { + free(tq, M_TEMP); + printf("ipsec_accel_sa_install_output %s sp %p sav %p spi %d:" + " cannot ref ifp\n", + if_name(ifp), sp, sav, be32toh(sav->spi)); + return; + } + refcount_acquire(&sav->refcnt); + key_addref(sp); + + TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_install_output_act, tq); + tq->ifp = ifp; + tq->sav = sav; + tq->sp = sp; + tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ + taskqueue_enqueue(taskqueue_thread, &tq->install_task); +} + +static int +ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + uint32_t drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires) +{ + struct ifp_handle_sav *ihs, *i; + int error; + + MPASS(__bitcount(flags & (IFP_HS_INPUT | IFP_HS_OUTPUT)) == 1); + MPASS(__bitcount(flags & (IFP_HS_INFLUX | IFP_HS_HANDLED | + IFP_HS_REJECTED)) == 1); + + ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO); + ihs->ifp = ifp; + ihs->sav = sav; + ihs->drv_spi = drv_spi; + ihs->ifdata = priv; + ihs->flags = flags; + if ((flags & IFP_HS_OUTPUT) != 0) + ihs->hdr_ext_size = esp_hdrsiz(sav); + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp && + (i->flags & (IFP_HS_INPUT | IFP_HS_OUTPUT)) == + (flags & (IFP_HS_INPUT | IFP_HS_OUTPUT))) { + error = EALREADY; + goto errout; + } + } + error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs); + if (error != 0) + goto errout; + if_ref(ihs->ifp); + CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + if (ires != NULL) + *ires = ihs; + return (0); +errout: + mtx_unlock(&ipsec_accel_sav_tmp); + free(ihs, M_IPSEC_MISC); + if (ires != NULL) + *ires = NULL; + return (error); +} + +static void +ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav) +{ + struct ifnet *ifp; + struct secasvar *sav; + + mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED); + + while ((i->flags & IFP_HS_INFLUX) != 0) + msleep(i, &ipsec_accel_sav_tmp, PSOCK, "ipsacf", 0); + + CK_LIST_REMOVE(i, sav_link); + CK_LIST_REMOVE(i, sav_allh_link); + DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + ifp = i->ifp; + sav = i->sav; + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + printf("sa deinstall %s %p spi %#x ifl %#x\n", + if_name(ifp), sav, be32toh(sav->spi), i->flags); + ifp->if_ipsec_accel_m->if_sa_deinstall(ifp, + i->drv_spi, i->ifdata); + } + if_rele(ifp); + free_unr(drv_spi_unr, i->drv_spi); + free(i, M_IPSEC_MISC); + if (freesav) + key_freesav(&sav); + mtx_lock(&ipsec_accel_sav_tmp); +} + +static void +ipsec_accel_forget_sav_clear(struct secasvar *sav) +{ + struct ifp_handle_sav *i; + + for (;;) { + i = CK_LIST_FIRST(&sav->accel_ifps); + if (i == NULL) + break; + ipsec_accel_forget_handle_sav(i, false); + } +} + +static void +ipsec_accel_forget_sav_act(void *arg, int pending) +{ + struct ipsec_accel_forget_tq *tq; + struct secasvar *sav; + + tq = arg; + sav = tq->sav; + CURVNET_SET(tq->forget_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + ipsec_accel_forget_sav_clear(sav); + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); +} + +void +ipsec_accel_forget_sav_impl(struct secasvar *sav) +{ + struct ipsec_accel_forget_tq *tq; + + mtx_lock(&ipsec_accel_sav_tmp); + sav->accel_flags |= SADB_KEY_ACCEL_DEINST; + tq = (void *)atomic_load_ptr(&sav->accel_forget_tq); + if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq, + (uintptr_t)tq, 0)) { + mtx_unlock(&ipsec_accel_sav_tmp); + return; + } + mtx_unlock(&ipsec_accel_sav_tmp); + + refcount_acquire(&sav->refcnt); + TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq); + tq->forget_vnet = curthread->td_vnet; + tq->sav = sav; + taskqueue_enqueue(taskqueue_thread, &tq->forget_task); +} + +static void +ipsec_accel_on_ifdown_sav(struct ifnet *ifp) +{ + struct ifp_handle_sav *i, *marker; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker, + sav_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sav_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sav_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sav_allh_link); + if (i->ifp == ifp) { + refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */ + ipsec_accel_forget_handle_sav(i, true); + } + } + CK_LIST_REMOVE(marker, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp) +{ + struct ifp_handle_sav *i; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_ACCEL)) == 0) + return (NULL); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) { + if ((i->flags & IFP_HS_HANDLED) != 0) + return (i); + return (NULL); + } + } + return (NULL); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp) +{ + NET_EPOCH_ASSERT(); + return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp)); +} + +static bool +ipsec_accel_is_accel_sav_impl(struct secasvar *sav) +{ + return (!CK_LIST_EMPTY(&sav->accel_ifps)); +} + +static bool +ipsec_accel_do_not_install_sav_out(struct secasvar *sav, struct ifnet *ifp) +{ + struct ifp_handle_sav *i; + + NET_EPOCH_ASSERT(); + + if (ifp == NULL || (ifp->if_capenable2 & + IFCAP2_BIT(IFCAP2_IPSEC_ACCEL)) == 0) + return (true); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp && (i->flags & IFP_HS_OUTPUT) != 0) + return (true); + } + return (false); +} + +static struct secasvar * +ipsec_accel_drvspi_to_sa(u_int drv_spi) +{ + struct ifp_handle_sav *i; + + i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi); + if (i == NULL) + return (NULL); + return (i->sav); +} + +static struct ifp_handle_sp * +ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp) +{ + struct ifp_handle_sp *i; + + CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static bool +ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp) +{ + return (ipsec_accel_find_accel_sp(sp, ifp) != NULL); +} + +static int +ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp, + struct ifp_handle_sp **ip) +{ + struct ifp_handle_sp *i; + + i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO); + i->sp = sp; + i->ifp = ifp; + if_ref(ifp); + i->flags = IFP_HP_HANDLED; + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + *ip = i; + return (0); +} + +static bool +ipsec_accel_spdadd_match(if_t ifp, void *arg) +{ + struct secpolicy *sp; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_ACCEL)) == 0 || + ifp->if_ipsec_accel_m->if_spdadd == NULL) + return (false); + sp = arg; + if (sp->accel_ifname != NULL && + strcmp(sp->accel_ifname, if_name(ifp)) != 0) + return (false); + if (ipsec_accel_is_accel_sp(sp, ifp)) + return (false); + return (true); +} + +static int +ipsec_accel_spdadd_cb(if_t ifp, void *arg) +{ + struct secpolicy *sp; + struct inpcb *inp; + struct ifp_handle_sp *i; + int error; + + sp = arg; + inp = sp->ipsec_accel_add_sp_inp; + printf("ipsec_accel_spdadd_act: ifp %s m %p sp %p inp %p\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp); + error = ipsec_accel_remember_sp(sp, ifp, &i); + if (error != 0) { + printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n", + if_name(ifp), sp, error); + return (error); + } + error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata); + if (error != 0) { + i->flags |= IFP_HP_REJECTED; + printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n", + if_name(ifp), sp, error); + } + return (error); +} + +static void +ipsec_accel_spdadd_act(void *arg, int pending) +{ + struct secpolicy *sp; + struct inpcb *inp; + + sp = arg; + CURVNET_SET(sp->accel_add_tq.adddel_vnet); + if_foreach_sleep(ipsec_accel_spdadd_match, arg, + ipsec_accel_spdadd_cb, arg); + inp = sp->ipsec_accel_add_sp_inp; + if (inp != NULL) { + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); + sp->ipsec_accel_add_sp_inp = NULL; + } + CURVNET_RESTORE(); + key_freesp(&sp); +} + +void +ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + if (sp->tcount == 0 && inp == NULL) + return; + tq = &sp->accel_add_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + sp->ipsec_accel_add_sp_inp = inp; + if (inp != NULL) + in_pcbref(inp); + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_spddel_act(void *arg, int pending) +{ + struct ifp_handle_sp *i; + struct secpolicy *sp; + int error; + + sp = arg; + CURVNET_SET(sp->accel_del_tq.adddel_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + for (;;) { + i = CK_LIST_FIRST(&sp->accel_ifps); + if (i == NULL) + break; + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(i->ifp), sp); + error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp, + sp, i->ifdata); + if (error != 0) { + printf( + "ipsec_accel_spddel: %s if_spddel %p res %d\n", + if_name(i->ifp), sp, error); + } + } + if_rele(i->ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesp(&sp); + CURVNET_RESTORE(); +} + +void +ipsec_accel_spddel_impl(struct secpolicy *sp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + + tq = &sp->accel_del_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_on_ifdown_sp(struct ifnet *ifp) +{ + struct ifp_handle_sp *i, *marker; + struct secpolicy *sp; + int error; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker, + sp_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sp_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sp_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sp_allh_link); + if (i->ifp != ifp) + continue; + + sp = i->sp; + key_addref(sp); + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(ifp), sp); + error = ifp->if_ipsec_accel_m->if_spddel(ifp, + sp, i->ifdata); + } + if (error != 0) { + printf( + "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n", + if_name(ifp), sp, error); + } + key_freesp(&sp); + if_rele(ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + CK_LIST_REMOVE(marker, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +void +ipsec_accel_on_ifdown(struct ifnet *ifp) +{ + ipsec_accel_on_ifdown_sp(ifp); + ipsec_accel_on_ifdown_sav(ifp); +} + +static bool +ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu) +{ + int alen, blks, hlen, padding, rlen; + + rlen = m->m_pkthdr.len - skip; + hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) : + sizeof(struct newesp)) + sav->ivlen; + blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ? + sav->tdb_encalgxform->native_blocksize : + sav->tdb_encalgxform->blocksize); + padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; + alen = xform_ah_authsize(sav->tdb_authalgxform); + + return (skip + hlen + rlen + padding + alen <= mtu); +} + +static bool +ipsec_accel_output_tag(struct mbuf *m, uint16_t drv_spi) +{ + struct ipsec_accel_out_tag *tag; + + tag = (struct ipsec_accel_out_tag *)m_tag_get( + PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT); + if (tag == NULL) + return (false); + tag->drv_spi = drv_spi; + m_tag_prepend(m, &tag->tag); + return (true); +} + +bool +ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + struct secpolicy *sp, struct secasvar *sav, int af, int mtu, int *hwassist) +{ + struct ifp_handle_sav *i; + struct ip *ip; + struct tcpcb *tp; + u_long ip_len, skip; + bool res; + + *hwassist = 0; + res = false; + if (ifp == NULL) + return (res); + + M_ASSERTPKTHDR(m); + NET_EPOCH_ASSERT(); + + if (sav == NULL) { + res = ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS); + goto out; + } + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i == NULL) + goto out; + + if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { + ip_len = m->m_pkthdr.len; + if (ip_len + i->hdr_ext_size > mtu) + goto out; + switch (af) { + case AF_INET: + ip = mtod(m, struct ip *); + skip = ip->ip_hl << 2; + break; + case AF_INET6: + skip = sizeof(struct ip6_hdr); + break; + default: + __unreachable(); + } + if (!ipsec_accel_output_pad(m, sav, skip, mtu)) + goto out; + } + + if (!ipsec_accel_output_tag(m, i->drv_spi)) + goto out; + + ipsec_accel_sa_recordxfer(sav, m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); + + *hwassist = ifp->if_ipsec_accel_m->if_hwassist(ifp, sav, + i->drv_spi, i->ifdata); + res = true; +out: + if (inp != NULL && inp->inp_pcbinfo == &V_tcbinfo) { + INP_WLOCK_ASSERT(inp); + tp = (struct tcpcb *)inp; + if (res && (*hwassist & (CSUM_TSO | CSUM_IP6_TSO)) != 0) { + tp->t_flags2 |= TF2_IPSEC_TSO; + } else { + tp->t_flags2 &= ~TF2_IPSEC_TSO; + } + } + return (res); +} + +struct ipsec_accel_in_tag * +ipsec_accel_input_tag_lookup(const struct mbuf *m) +{ + struct ipsec_accel_in_tag *tag; + struct m_tag *xtag; + + xtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_ACCEL_IN, NULL); + if (xtag == NULL) + return (NULL); + tag = __containerof(xtag, struct ipsec_accel_in_tag, tag); + return (tag); +} + +int +ipsec_accel_input(struct mbuf *m, int offset, int proto) +{ + struct secasvar *sav; + struct ipsec_accel_in_tag *tag; + + tag = ipsec_accel_input_tag_lookup(m); + if (tag == NULL) + return (ENXIO); + + if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN || + tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) { + printf("if %s mbuf %p drv_spi %d invalid, packet dropped\n", + (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) : + "", m, tag->drv_spi); + m_freem(m); + return (EINPROGRESS); + } + + sav = ipsec_accel_drvspi_to_sa(tag->drv_spi); + if (sav != NULL) + ipsec_accel_sa_recordxfer(sav, m); + return (0); +} + +static void +ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m) +{ + counter_u64_add(sav->accel_lft_sw, 1); + counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len); + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; +} + +static void +ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c, + const struct seclifetime *lft_l) +{ + lft_c->allocations += lft_l->allocations; + lft_c->bytes += lft_l->bytes; + lft_c->usetime = min(lft_c->usetime, lft_l->usetime); +} + +void +ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + uint64_t octets, uint64_t allocs) +{ + struct epoch_tracker et; + struct ifp_handle_sav *i; + uint64_t odiff, adiff; + + NET_EPOCH_ENTER(et); + mtx_lock(&ipsec_accel_cnt_lock); + + if (allocs != 0) { + if (sav->firstused == 0) + sav->firstused = time_second; + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; + } + + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) + break; + } + if (i == NULL) + goto out; + + odiff = octets - i->cnt_octets; + adiff = allocs - i->cnt_allocs; + + if (sav->lft_c != NULL) { + counter_u64_add(sav->lft_c_bytes, odiff); + counter_u64_add(sav->lft_c_allocations, adiff); + } + + i->cnt_octets = octets; + i->cnt_allocs = allocs; + sav->accel_hw_octets += odiff; + sav->accel_hw_allocs += adiff; + +out: + mtx_unlock(&ipsec_accel_cnt_lock); + NET_EPOCH_EXIT(et); +} + +static int +ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + struct seclifetime lft_l, lft_s; + struct ifp_handle_sav *i; + if_t ifp1; + if_sa_cnt_fn_t p; + int error; + + error = 0; + memset(&lft_l, 0, sizeof(lft_l)); + + switch (op & ~IF_SA_CNT_UPD) { + case IF_SA_CNT_IFP_HW_VAL: + IFNET_RLOCK_ASSERT(); + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i == NULL) { + error = ENOENT; + break; + } + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) != + IFP_HS_HANDLED) { + error = ENOENT; + break; + } + p = ifp->if_ipsec_accel_m->if_sa_cnt; + if (p == NULL) + error = ENOTSUP; + else + error = p(ifp, sav, i->drv_spi, i->ifdata, &lft_l); + break; + + case IF_SA_CNT_TOTAL_SW_VAL: + lft_l.allocations = (uint32_t)counter_u64_fetch( + sav->accel_lft_sw); + lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1); + lft_l.usetime = sav->accel_firstused; + break; + + case IF_SA_CNT_TOTAL_HW_VAL: + IFNET_RLOCK_ASSERT(); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) != + IFP_HS_HANDLED) + continue; + ifp1 = i->ifp; + p = ifp1->if_ipsec_accel_m->if_sa_cnt; + if (p == NULL) + continue; + memset(&lft_s, 0, sizeof(lft_s)); + if (sahtree_trackerp != NULL) + ipsec_sahtree_runlock(sahtree_trackerp); + error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s); + if (sahtree_trackerp != NULL) + ipsec_sahtree_rlock(sahtree_trackerp); + if (error == 0) + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + } + break; + } + + if (error == 0) { + if ((op & IF_SA_CNT_UPD) == 0) + memset(lft_c, 0, sizeof(*lft_c)); + ipsec_accel_sa_lifetime_update(lft_c, &lft_l); + } + + return (error); +} + +static void +ipsec_accel_sync_imp(void) +{ + taskqueue_drain_all(taskqueue_thread); +} + +static struct mbuf * +ipsec_accel_key_setaccelif_impl(struct secasvar *sav) +{ + struct mbuf *m, *m1; + struct ifp_handle_sav *i; + struct epoch_tracker et; + + if (sav->accel_ifname != NULL) + return (key_setaccelif(sav->accel_ifname)); + + m = m1 = NULL; + + NET_EPOCH_ENTER(et); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + m1 = key_setaccelif(if_name(i->ifp)); + if (m == NULL) + m = m1; + else if (m1 != NULL) + m_cat(m, m1); + } + } + NET_EPOCH_EXIT(et); + return (m); +} + +#endif /* IPSEC_ACCEL */ diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -219,6 +220,12 @@ return ENXIO; } + /* + * We still own a ref on sav, which allows the input hook to + * get one more reference reliably. + */ + ipsec_accel_sa_install_input(sav, &dst_address, sproto, spi); + /* * Call appropriate transform and return -- callback takes care of * everything else. @@ -237,6 +244,11 @@ int ipsec4_input(struct mbuf *m, int offset, int proto) { + int error; + + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); switch (proto) { case IPPROTO_AH: @@ -536,7 +548,12 @@ int ipsec6_input(struct mbuf *m, int offset, int proto) { + int error; + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); + switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -84,6 +84,7 @@ #include #endif #include +#include #include #include #include @@ -110,7 +111,8 @@ #ifdef INET static struct secasvar * -ipsec4_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +ipsec4_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + u_int *pidx, int *error) { struct secasindex *saidx, tmpsaidx; struct ipsecrequest *isr; @@ -179,6 +181,7 @@ return (NULL); } IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + ipsec_accel_sa_install_output(ifp, sp, sav); return (sav); } @@ -186,14 +189,15 @@ * IPsec output logic for IPv4. */ static int -ipsec4_perform_request(struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp, u_int idx) +ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp, u_int idx, u_long mtu) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; struct secasvar *sav; struct ip *ip; - int error, i, off; + int error, hwassist, i, off; + bool accel; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); @@ -206,9 +210,11 @@ * determine next transform. At the end of transform we can * release reference to SP. */ - sav = ipsec4_allocsa(m, sp, &idx, &error); + sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET, mtu, &hwassist); key_freesp(&sp); return (error); } @@ -221,6 +227,30 @@ if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + hwassist = 0; + accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu, + &hwassist); + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~hwassist) != 0) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if ((m->m_pkthdr.csum_flags & CSUM_SCTP & ~hwassist) != 0) { + struct ip *ip; + + ip = mtod(m, struct ip *); + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } +#endif + if (accel) + return (EJUSTRETURN); + ip = mtod(m, struct ip *); dst = &sav->sah->saidx.dst; /* Do the appropriate encapsulation, if necessary */ @@ -288,15 +318,16 @@ } int -ipsec4_process_packet(struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp) +ipsec4_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp, u_long mtu) { - return (ipsec4_perform_request(m, sp, inp, 0)); + return (ipsec4_perform_request(ifp, m, sp, inp, 0, mtu)); } int -ipsec4_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) +ipsec4_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + int forwarding) { struct secasvar *sav; struct ip *ip; @@ -317,7 +348,7 @@ setdf: idx = sp->tcount - 1; - sav = ipsec4_allocsa(m, sp, &idx, &error); + sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { key_freesp(&sp); /* @@ -368,7 +399,8 @@ } static int -ipsec4_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + int forwarding, u_long mtu) { struct secpolicy *sp; int error; @@ -392,27 +424,9 @@ * packets, and thus, even if they are forwarded, the replies will * return back to us. */ - if (!forwarding) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#if defined(SCTP) || defined(SCTP_SUPPORT) - if (m->m_pkthdr.csum_flags & CSUM_SCTP) { - struct ip *ip; - ip = mtod(m, struct ip *); - sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } -#endif - } /* NB: callee frees mbuf and releases reference to SP */ - error = ipsec4_check_pmtu(m, sp, forwarding); + error = ipsec4_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) return (0); @@ -420,7 +434,7 @@ return (error); } - error = ipsec4_process_packet(m, sp, inp); + error = ipsec4_process_packet(ifp, m, sp, inp, mtu); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -440,7 +454,7 @@ * other values - mbuf consumed by IPsec. */ int -ipsec4_output(struct mbuf *m, struct inpcb *inp) +ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu) { /* @@ -451,7 +465,7 @@ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); - return (ipsec4_common_output(m, inp, 0)); + return (ipsec4_common_output(ifp, m, inp, 0, mtu)); } /* @@ -471,7 +485,7 @@ m_freem(m); return (EACCES); } - return (ipsec4_common_output(m, NULL, 1)); + return (ipsec4_common_output(NULL /* XXXKIB */, m, NULL, 1, 0)); } #endif @@ -491,7 +505,8 @@ } static struct secasvar * -ipsec6_allocsa(struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) +ipsec6_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + u_int *pidx, int *error) { struct secasindex *saidx, tmpsaidx; struct ipsecrequest *isr; @@ -572,6 +587,7 @@ return (NULL); } IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); + ipsec_accel_sa_install_output(ifp, sp, sav); return (sav); } @@ -579,20 +595,23 @@ * IPsec output logic for IPv6. */ static int -ipsec6_perform_request(struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp, u_int idx) +ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp, u_int idx, u_long mtu) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; struct secasvar *sav; struct ip6_hdr *ip6; - int error, i, off; + int error, hwassist, i, off; + bool accel; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); - sav = ipsec6_allocsa(m, sp, &idx, &error); + sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET6, mtu, &hwassist); key_freesp(&sp); return (error); } @@ -607,6 +626,28 @@ if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + hwassist = 0; + accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu, + &hwassist); + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~hwassist) != 0) { + in6_delayed_cksum(m, m->m_pkthdr.len - + sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if ((m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6 & ~hwassist) != 0) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + } +#endif + if (accel) + return (EJUSTRETURN); + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ dst = &sav->sah->saidx.dst; @@ -671,18 +712,19 @@ } int -ipsec6_process_packet(struct mbuf *m, struct secpolicy *sp, - struct inpcb *inp) +ipsec6_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + struct inpcb *inp, u_long mtu) { - return (ipsec6_perform_request(m, sp, inp, 0)); + return (ipsec6_perform_request(ifp, m, sp, inp, 0, mtu)); } /* * IPv6 implementation is based on IPv4 implementation. */ int -ipsec6_check_pmtu(struct mbuf *m, struct secpolicy *sp, int forwarding) +ipsec6_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, + int forwarding) { struct secasvar *sav; size_t hlen, pmtu; @@ -699,7 +741,7 @@ return (0); idx = sp->tcount - 1; - sav = ipsec6_allocsa(m, sp, &idx, &error); + sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { key_freesp(&sp); /* @@ -745,7 +787,8 @@ } static int -ipsec6_common_output(struct mbuf *m, struct inpcb *inp, int forwarding) +ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + int forwarding, u_long mtu) { struct secpolicy *sp; int error; @@ -761,25 +804,7 @@ return (0); /* No IPsec required. */ } - if (!forwarding) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - in6_delayed_cksum(m, m->m_pkthdr.len - - sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } -#if defined(SCTP) || defined(SCTP_SUPPORT) - if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { - sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; - } -#endif - } - - error = ipsec6_check_pmtu(m, sp, forwarding); + error = ipsec6_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) return (0); @@ -788,7 +813,7 @@ } /* NB: callee frees mbuf and releases reference to SP */ - error = ipsec6_process_packet(m, sp, inp); + error = ipsec6_process_packet(ifp, m, sp, inp, mtu); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We @@ -808,7 +833,7 @@ * other values - mbuf consumed by IPsec. */ int -ipsec6_output(struct mbuf *m, struct inpcb *inp) +ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu) { /* @@ -819,7 +844,7 @@ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); - return (ipsec6_common_output(m, inp, 0)); + return (ipsec6_common_output(ifp, m, inp, 0, mtu)); } /* @@ -839,7 +864,7 @@ m_freem(m); return (EACCES); } - return (ipsec6_common_output(m, NULL, 1)); + return (ipsec6_common_output(NULL /* XXXKIB */, m, NULL, 1, 0)); } #endif /* INET6 */ @@ -853,6 +878,10 @@ struct m_tag *mtag; int error; + if (sav->state >= SADB_SASTATE_DEAD) { + error = ESRCH; + goto bad; + } saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET @@ -916,14 +945,16 @@ case AF_INET: key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); - return (ipsec4_perform_request(m, sp, NULL, idx)); + return (ipsec4_perform_request(NULL, m, sp, NULL, + idx, 0)); /* NOTREACHED */ #endif #ifdef INET6 case AF_INET6: key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); - return (ipsec6_perform_request(m, sp, NULL, idx)); + return (ipsec6_perform_request(NULL, m, sp, NULL, + idx, 0)); /* NOTREACHED */ #endif /* INET6 */ default: diff --git a/sys/netipsec/ipsec_pcb.c b/sys/netipsec/ipsec_pcb.c --- a/sys/netipsec/ipsec_pcb.c +++ b/sys/netipsec/ipsec_pcb.c @@ -49,6 +49,7 @@ #include #include #include +#include MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); @@ -166,18 +167,26 @@ int ipsec_delete_pcbpolicy(struct inpcb *inp) { + struct inpcbpolicy *inp_sp; - if (inp->inp_sp == NULL) + inp_sp = inp->inp_sp; + if (inp_sp == NULL) return (0); + inp->inp_sp = NULL; - if (inp->inp_sp->sp_in != NULL) - key_freesp(&inp->inp_sp->sp_in); + if (inp_sp->sp_in != NULL) { + if ((inp_sp->flags & INP_INBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_in); + key_freesp(&inp_sp->sp_in); + } - if (inp->inp_sp->sp_out != NULL) - key_freesp(&inp->inp_sp->sp_out); + if (inp_sp->sp_out != NULL) { + if ((inp_sp->flags & INP_OUTBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_out); + key_freesp(&inp_sp->sp_out); + } - free(inp->inp_sp, M_IPSEC_INPCB); - inp->inp_sp = NULL; + free(inp_sp, M_IPSEC_INPCB); return (0); } @@ -248,20 +257,26 @@ if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); - if (new->inp_sp->sp_in != NULL) + if (new->inp_sp->sp_in != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_in); key_freesp(&new->inp_sp->sp_in); + } new->inp_sp->sp_in = sp; new->inp_sp->flags |= INP_INBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); - if (new->inp_sp->sp_out != NULL) + if (new->inp_sp->sp_out != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_out); key_freesp(&new->inp_sp->sp_out); + } new->inp_sp->sp_out = sp; new->inp_sp->flags |= INP_OUTBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } return (0); } @@ -339,8 +354,10 @@ flags = INP_OUTBOUND_POLICY; } /* Clear old SP and set new SP. */ - if (*spp != NULL) + if (*spp != NULL) { + ipsec_accel_spddel(*spp); key_freesp(spp); + } *spp = newsp; KEYDBG(IPSEC_DUMP, printf("%s: new SP(%p)\n", __func__, newsp)); @@ -348,6 +365,7 @@ inp->inp_sp->flags &= ~flags; else { inp->inp_sp->flags |= flags; + ipsec_accel_spdadd(newsp, inp); KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); } INP_WUNLOCK(inp); diff --git a/sys/netipsec/ipsec_support.h b/sys/netipsec/ipsec_support.h --- a/sys/netipsec/ipsec_support.h +++ b/sys/netipsec/ipsec_support.h @@ -29,6 +29,7 @@ #ifdef _KERNEL #if defined(IPSEC) || defined(IPSEC_SUPPORT) +struct ifnet; struct mbuf; struct inpcb; struct tcphdr; @@ -58,7 +59,7 @@ int ipsec4_input(struct mbuf *, int, int); int ipsec4_forward(struct mbuf *); int ipsec4_pcbctl(struct inpcb *, struct sockopt *); -int ipsec4_output(struct mbuf *, struct inpcb *); +int ipsec4_output(struct ifnet *, struct mbuf *, struct inpcb *, u_long); int ipsec4_capability(struct mbuf *, u_int); int ipsec4_ctlinput(ipsec_ctlinput_param_t); #endif /* INET */ @@ -68,7 +69,7 @@ int ipsec6_in_reject(const struct mbuf *, struct inpcb *); int ipsec6_forward(struct mbuf *); int ipsec6_pcbctl(struct inpcb *, struct sockopt *); -int ipsec6_output(struct mbuf *, struct inpcb *); +int ipsec6_output(struct ifnet *, struct mbuf *, struct inpcb *, u_long); int ipsec6_capability(struct mbuf *, u_int); int ipsec6_ctlinput(ipsec_ctlinput_param_t); #endif /* INET6 */ @@ -77,7 +78,8 @@ int (*input)(struct mbuf *, int, int); int (*check_policy)(const struct mbuf *, struct inpcb *); int (*forward)(struct mbuf *); - int (*output)(struct mbuf *, struct inpcb *); + int (*output)(struct ifnet *, struct mbuf *, struct inpcb *, + u_long); int (*pcbctl)(struct inpcb *, struct sockopt *); size_t (*hdrsize)(struct inpcb *); int (*capability)(struct mbuf *, u_int); @@ -187,8 +189,8 @@ int ipsec_kmod_check_policy(struct ipsec_support * const, struct mbuf *, struct inpcb *); int ipsec_kmod_forward(struct ipsec_support * const, struct mbuf *); -int ipsec_kmod_output(struct ipsec_support * const, struct mbuf *, - struct inpcb *); +int ipsec_kmod_output(struct ipsec_support * const, struct ifnet *, + struct mbuf *, struct inpcb *, u_long); int ipsec_kmod_pcbctl(struct ipsec_support * const, struct inpcb *, struct sockopt *); int ipsec_kmod_capability(struct ipsec_support * const, struct mbuf *, u_int); diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -36,6 +36,7 @@ #ifdef _KERNEL +struct mbuf; struct secpolicy; struct secpolicyindex; struct secasvar; @@ -49,6 +50,7 @@ struct secpolicy *key_newsp(void); struct secpolicy *key_allocsp(struct secpolicyindex *, u_int); +struct secpolicy *key_do_allocsp(struct secpolicyindex *spidx, u_int dir); struct secpolicy *key_msg2sp(struct sadb_x_policy *, size_t, int *); int key_sp2msg(struct secpolicy *, void *, size_t *); void key_addref(struct secpolicy *); @@ -59,6 +61,7 @@ void key_bumpspgen(void); uint32_t key_getspgen(void); uint32_t key_newreqid(void); +struct mbuf *key_setaccelif(const char *ifname); struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); struct secasvar *key_allocsa_tunnel(union sockaddr_union *, @@ -84,6 +87,10 @@ uint16_t key_portfromsaddr(struct sockaddr *); void key_porttosaddr(struct sockaddr *, uint16_t port); +struct rm_priotracker; +void ipsec_sahtree_runlock(struct rm_priotracker *); +void ipsec_sahtree_rlock(struct rm_priotracker *); + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); MALLOC_DECLARE(M_IPSEC_SAH); diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #ifdef INET6 @@ -90,12 +91,27 @@ #endif #include +#include #include #include /* randomness */ #include +#ifdef IPSEC_ACCEL +void (*ipsec_accel_sa_install_input_p)(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi); +void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +void (*ipsec_accel_sync_p)(void); +bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); +#endif + #define FULLMASK 0xff #define _BITS(bytes) ((bytes) << 3) @@ -391,6 +407,9 @@ [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = sizeof(struct sadb_address), [SADB_X_EXT_NEW_ADDRESS_DST] = sizeof(struct sadb_address), + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(minsize) == SADB_EXT_MAX + 1, "minsize size mismatch"); @@ -424,6 +443,9 @@ [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = 0, [SADB_X_EXT_NEW_ADDRESS_DST] = 0, + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(maxsize) == SADB_EXT_MAX + 1, "maxsize size mismatch"); @@ -622,7 +644,6 @@ static void key_unlink(struct secpolicy *); static void key_detach(struct secpolicy *); -static struct secpolicy *key_do_allocsp(struct secpolicyindex *spidx, u_int dir); static struct secpolicy *key_getsp(struct secpolicyindex *); static struct secpolicy *key_getspbyid(u_int32_t); static struct mbuf *key_gather_mbuf(struct mbuf *, @@ -662,7 +683,7 @@ const struct sadb_msghdr *, struct secasvar *, struct secasindex *); static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t); + u_int8_t, u_int32_t, u_int32_t, struct rm_priotracker *); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); @@ -1228,6 +1249,11 @@ KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); *spp = NULL; +#ifdef IPSEC_ACCEL + KASSERT(CK_LIST_EMPTY(&sp->accel_ifps), + ("key_freesp: sp %p still offloaded", sp)); + free(__DECONST(char *, sp->accel_ifname), M_IPSEC_MISC); +#endif while (sp->tcount > 0) ipsec_delisr(sp->req[--sp->tcount]); free(sp, M_IPSEC_SP); @@ -1241,6 +1267,7 @@ SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); key_freesp(&sp); } @@ -1259,6 +1286,7 @@ return; } sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); V_spd_size--; LIST_REMOVE(sp, idhash); @@ -1286,6 +1314,7 @@ newsp->state = IPSEC_SPSTATE_ALIVE; V_spd_size++; V_sp_genid++; + ipsec_accel_spdadd(newsp, NULL); } /* @@ -1330,6 +1359,7 @@ */ LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); spp[i]->state = IPSEC_SPSTATE_IFNET; + ipsec_accel_spdadd(spp[i], NULL); } SPTREE_WUNLOCK(); /* @@ -1358,6 +1388,7 @@ if (spp[i]->state != IPSEC_SPSTATE_IFNET) continue; spp[i]->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(spp[i]); TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], spp[i], chain); V_spd_size--; @@ -1366,6 +1397,7 @@ SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); for (i = 0; i < count; i++) { m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); @@ -1425,6 +1457,7 @@ /* Unlink from SPI hash */ LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sah = sav->sah; SAHTREE_WUNLOCK(); key_freesav(&sav); @@ -1822,6 +1855,9 @@ size_t xlen, ilen; caddr_t p; int error, i; +#ifdef IPSEC_ACCEL + struct sadb_x_if_hw_offl *xif; +#endif IPSEC_ASSERT(sp != NULL, ("null policy")); @@ -1877,6 +1913,18 @@ } } xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); +#ifdef IPSEC_ACCEL + if (error == 0 && sp->accel_ifname != NULL) { + xif = (struct sadb_x_if_hw_offl *)(xpl + 1); + bzero(xif, sizeof(*xif)); + xif->sadb_x_if_hw_offl_len = PFKEY_UNIT64(sizeof(*xif)); + xif->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + xif->sadb_x_if_hw_offl_flags = 0; + strncpy(xif->sadb_x_if_hw_offl_if, sp->accel_ifname, + sizeof(xif->sadb_x_if_hw_offl_if)); + xlen += sizeof(*xif); + } +#endif if (error == 0) *len = xlen; else @@ -2089,6 +2137,27 @@ newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; bcopy(&spidx, &newsp->spidx, sizeof(spidx)); +#ifdef IPSEC_ACCEL + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + newsp->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsp->accel_ifname == NULL) { + ipseclog((LOG_DEBUG, "%s: cannot alloc accel_ifname.\n", + __func__)); + key_freesp(&newsp); + return (key_senderror(so, m, error)); + } + strncpy(__DECONST(char *, newsp->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } + +#endif SPTREE_WLOCK(); if ((newsp->id = key_getnewspid()) == 0) { @@ -2096,6 +2165,7 @@ key_detach(oldsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2110,6 +2180,7 @@ key_insertsp(newsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2291,6 +2362,7 @@ KEYDBG(KEY_STAMP, printf("%s: SP(%p)\n", __func__, sp)); KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + ipsec_accel_spddel(sp); key_unlink(sp); key_freesp(&sp); @@ -2562,6 +2634,7 @@ */ TAILQ_FOREACH(sp, &drainq, chain) { sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); LIST_REMOVE(sp, idhash); } V_sp_genid++; @@ -2765,6 +2838,10 @@ tlen += PFKEY_ALIGN8(len); } +#ifdef IPSEC_ACCEL + if (sp->accel_ifname != NULL) + tlen += sizeof(struct sadb_x_if_hw_offl); +#endif return (tlen); } @@ -3006,6 +3083,32 @@ sav->state = SADB_SASTATE_LARVAL; sav->pid = (pid_t)mhp->msg->sadb_msg_pid; SAV_INITREF(sav); +#ifdef IPSEC_ACCEL + CK_LIST_INIT(&sav->accel_ifps); + sav->accel_forget_tq = 0; + sav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (sav->accel_lft_sw == NULL) { + *errp = ENOBUFS; + goto done; + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + sav->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (sav->accel_ifname == NULL) { + *errp = ENOBUFS; + goto done; + } + strncpy(__DECONST(char *, sav->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } +#endif again: sah = key_getsah(saidx); if (sah == NULL) { @@ -3086,6 +3189,13 @@ } if (sav->lft_c != NULL) uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); +#ifdef IPSEC_ACCEL + if (sav->accel_lft_sw != NULL) + uma_zfree_pcpu(ipsec_key_lft_zone, + sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), + M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA), sav = NULL; } if (sah != NULL) @@ -3154,6 +3264,10 @@ ("attempt to free non DEAD SA %p", sav)); IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); +#ifdef IPSEC_ACCEL + KASSERT(CK_LIST_EMPTY(&sav->accel_ifps), + ("key_unlinksav: sav %p still offloaded", sav)); +#endif /* * SA must be unlinked from the chain and hashtbl. @@ -3166,6 +3280,12 @@ free(sav->lock, M_IPSEC_MISC); uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); } +#ifdef IPSEC_ACCEL + /* XXXKIB should this be moved to key_cleansav()? */ + if (sav->accel_lft_sw != NULL) + uma_zfree_pcpu(ipsec_key_lft_zone, sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA); } @@ -3589,7 +3709,7 @@ */ static struct mbuf * key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, - uint32_t seq, uint32_t pid) + uint32_t seq, uint32_t pid, struct rm_priotracker *sahtree_trackerp) { struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; @@ -3605,8 +3725,15 @@ SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, +#ifdef IPSEC_ACCEL + SADB_X_EXT_LFT_CUR_SW_OFFL, SADB_X_EXT_LFT_CUR_HW_OFFL, + SADB_X_EXT_IF_HW_OFFL, +#endif }; uint32_t replay_count; +#ifdef IPSEC_ACCEL + int error; +#endif SECASVAR_RLOCK_TRACKER; @@ -3753,6 +3880,44 @@ case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; +#ifdef IPSEC_ACCEL + case SADB_X_EXT_LFT_CUR_SW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + SAV_ADDREF(sav); + error = ipsec_accel_sa_lifetime_op(sav, &lft_c, + NULL, IF_SA_CNT_TOTAL_SW_VAL, sahtree_trackerp); + if (error != 0) { + m = NULL; + goto fail; + } + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + key_freesav(&sav); + if (sav == NULL) { + m_freem(m); + goto fail; + } + break; + case SADB_X_EXT_LFT_CUR_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + memset(&lft_c, 0, sizeof(lft_c)); + lft_c.bytes = sav->accel_hw_octets; + lft_c.allocations = sav->accel_hw_allocs; + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + break; + case SADB_X_EXT_IF_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + m = ipsec_accel_key_setaccelif(sav); + if (m == NULL) + continue; /* benigh */ + break; +#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: @@ -4503,6 +4668,7 @@ V_spd_size--; LIST_REMOVE(sp, idhash); sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); sp = nextsp; } V_sp_genid++; @@ -4626,6 +4792,7 @@ TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Unlink all SAs with expired HARD lifetime */ @@ -4642,6 +4809,7 @@ TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Mark all SAs with expired SOFT lifetime as DYING */ @@ -5240,6 +5408,30 @@ /* Clone SA's content into newsav */ SAV_INITREF(newsav); bcopy(sav, newsav, offsetof(struct secasvar, chain)); +#ifdef IPSEC_ACCEL + CK_LIST_INIT(&newsav->accel_ifps); + newsav->accel_forget_tq = 0; + newsav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (newsav->accel_lft_sw == NULL) { + error = ENOBUFS; + goto fail; + } + if (sav->accel_ifname != NULL) { + struct sadb_x_if_hw_offl xof; + + newsav->accel_ifname = malloc(sizeof(xof.sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsav->accel_ifname == NULL) { + error = ENOBUFS; + goto fail; + } + strncpy(__DECONST(char *, sav->accel_ifname), + newsav->accel_ifname, + sizeof(xof.sadb_x_if_hw_offl_if)); + } +#endif + /* * We create new NAT-T config if it is needed. * Old NAT-T config will be freed by key_cleansav() when @@ -5270,6 +5462,7 @@ TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); /* * Link new SA with SAH. Keep SAs ordered by @@ -5327,6 +5520,12 @@ if (isnew != 0) key_freesah(&sah); if (newsav != NULL) { +#ifdef IPSEC_ACCEL + if (newsav->accel_lft_sw != NULL) + uma_zfree_pcpu(ipsec_key_lft_zone, + newsav->accel_lft_sw); + free(__DECONST(char *, newsav->accel_ifname), M_IPSEC_MISC); +#endif if (newsav->natt != NULL) free(newsav->natt, M_IPSEC_MISC); free(newsav, M_IPSEC_SA); @@ -6197,6 +6396,7 @@ /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6265,6 +6465,7 @@ /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6373,7 +6574,7 @@ /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, - mhp->msg->sadb_msg_pid); + mhp->msg->sadb_msg_pid, NULL); key_freesav(&sav); if (!n) @@ -7615,9 +7816,11 @@ */ TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -7639,10 +7842,12 @@ TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } /* Add SAH into flushq */ TAILQ_INSERT_HEAD(&flushq, sah, chain); @@ -7706,6 +7911,7 @@ /* count sav entries to be sent to the userland. */ cnt = 0; + IFNET_RLOCK(); SAHTREE_RLOCK(); TAILQ_FOREACH(sah, &V_sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && @@ -7720,6 +7926,7 @@ if (cnt == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOENT); } @@ -7732,30 +7939,34 @@ /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); m_freem(m); return (0); } @@ -8176,6 +8387,11 @@ case SADB_X_EXT_SA_REPLAY: case SADB_X_EXT_NEW_ADDRESS_SRC: case SADB_X_EXT_NEW_ADDRESS_DST: +#ifdef IPSEC_ACCEL + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + case SADB_X_EXT_IF_HW_OFFL: +#endif /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -8484,9 +8700,11 @@ sah->state = SADB_SASTATE_DEAD; TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -8634,6 +8852,32 @@ return m; } +#ifdef IPSEC_ACCEL +struct mbuf * +key_setaccelif(const char *ifname) +{ + struct mbuf *m = NULL; + struct sadb_x_if_hw_offl *p; + int len = PFKEY_ALIGN8(sizeof(*p)); + + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (m); + m_align(m, len); + m->m_len = len; + p = mtod(m, struct sadb_x_if_hw_offl *); + + bzero(p, len); + p->sadb_x_if_hw_offl_len = PFKEY_UNIT64(len); + p->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + p->sadb_x_if_hw_offl_flags = 0; + strncpy(p->sadb_x_if_hw_offl_if, ifname, + sizeof(p->sadb_x_if_hw_offl_if)); + + return (m); +} +#endif + /* * Take one of the kernel's lifetime data structures and convert it * into a PF_KEY structure within an mbuf, suitable for sending up to @@ -8709,3 +8953,15 @@ return (supported_calgs[i].xform); return (NULL); } + +void +ipsec_sahtree_runlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_runlock(&sahtree_lock, sahtree_trackerp); +} + +void +ipsec_sahtree_rlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_rlock(&sahtree_lock, sahtree_trackerp); +} diff --git a/sys/netipsec/key_debug.c b/sys/netipsec/key_debug.c --- a/sys/netipsec/key_debug.c +++ b/sys/netipsec/key_debug.c @@ -155,6 +155,8 @@ X_NAME(SA_REPLAY); X_NAME(NEW_ADDRESS_SRC); X_NAME(NEW_ADDRESS_DST); + X_NAME(LFT_CUR_SW_OFFL); + X_NAME(LFT_CUR_HW_OFFL); default: return ("UNKNOWN"); }; @@ -251,6 +253,9 @@ case SADB_X_EXT_NAT_T_DPORT: kdebug_sadb_x_natt(ext); break; + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + kdebug_sadb_lifetime(ext); default: printf("%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type); diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -36,9 +36,11 @@ #ifdef _KERNEL #include +#include #include #include #include +#include #include #include @@ -125,6 +127,7 @@ struct enc_xform; struct auth_hash; struct comp_algo; +struct ifp_handle_sav; /* * Security Association @@ -185,8 +188,21 @@ uint64_t cntr; /* counter for GCM and CTR */ volatile u_int refcnt; /* reference count */ +#ifdef IPSEC_ACCEL + CK_LIST_HEAD(, ifp_handle_sav) accel_ifps; + uintptr_t accel_forget_tq; + const char *accel_ifname; + uint32_t accel_flags; + counter_u64_t accel_lft_sw; + uint64_t accel_hw_allocs; + uint64_t accel_hw_octets; + uint64_t accel_firstused; +#endif }; +#define SADB_KEY_ACCEL_INST 0x00000001 +#define SADB_KEY_ACCEL_DEINST 0x00000002 + #define SECASVAR_RLOCK_TRACKER struct rm_priotracker _secas_tracker #define SECASVAR_RLOCK(_sav) rm_rlock((_sav)->lock, &_secas_tracker) #define SECASVAR_RUNLOCK(_sav) rm_runlock((_sav)->lock, &_secas_tracker) diff --git a/sys/netipsec/subr_ipsec.c b/sys/netipsec/subr_ipsec.c --- a/sys/netipsec/subr_ipsec.c +++ b/sys/netipsec/subr_ipsec.c @@ -368,9 +368,10 @@ ipsec_ctlinput_param_t param), METHOD_ARGS(param) ) -IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, - output, METHOD_DECL(struct ipsec_support * const sc, struct mbuf *m, - struct inpcb *inp), METHOD_ARGS(m, inp) +IPSEC_KMOD_METHOD(int, ipsec_kmod_output, sc, output, + METHOD_DECL(struct ipsec_support * const sc, struct ifnet *ifp, + struct mbuf *m, struct inpcb *inp, u_long mtu), + METHOD_ARGS(ifp, m, inp, mtu) ) IPSEC_KMOD_METHOD(int, ipsec_kmod_pcbctl, sc, diff --git a/sys/netipsec/udpencap.c b/sys/netipsec/udpencap.c --- a/sys/netipsec/udpencap.c +++ b/sys/netipsec/udpencap.c @@ -61,6 +61,7 @@ #include #include #include +#include #include /* @@ -191,6 +192,8 @@ m_freem(m); return (ENOENT); } + ipsec_accel_sa_install_input(sav, &dst, IPPROTO_ESP, spi); + /* * Remove the UDP header * Before: diff --git a/sys/netipsec/xform_esp.c b/sys/netipsec/xform_esp.c --- a/sys/netipsec/xform_esp.c +++ b/sys/netipsec/xform_esp.c @@ -83,8 +83,7 @@ #define SPI_SIZE 4 VNET_DEFINE(int, esp_enable) = 1; -VNET_DEFINE_STATIC(int, esp_ctr_compatibility) = 1; -#define V_esp_ctr_compatibility VNET(esp_ctr_compatibility) +VNET_DEFINE(int, esp_ctr_compatibility) = 1; VNET_PCPUSTAT_DEFINE(struct espstat, espstat); VNET_PCPUSTAT_SYSINIT(espstat); diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -1383,6 +1383,8 @@ #define PACKET_TAG_IPSEC_NAT_T_PORTS 29 /* two uint16_t */ #define PACKET_TAG_ND_OUTGOING 30 /* ND outgoing */ #define PACKET_TAG_PF_REASSEMBLED 31 +#define PACKET_TAG_IPSEC_ACCEL_OUT 32 /* IPSEC accel out */ +#define PACKET_TAG_IPSEC_ACCEL_IN 33 /* IPSEC accel in */ /* Specific cookies and tags. */