diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -4464,6 +4464,8 @@ netipsec/ipsec_input.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mbuf.c optional ipsec inet | ipsec inet6 netipsec/ipsec_mod.c optional ipsec inet | ipsec inet6 +netipsec/ipsec_offload.c optional ipsec ipsec_offload inet | \ + ipsec ipsec_offload inet6 netipsec/ipsec_output.c optional ipsec inet | ipsec inet6 netipsec/ipsec_pcb.c optional ipsec inet | ipsec inet6 | \ ipsec_support inet | ipsec_support inet6 diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -466,6 +466,7 @@ IPSEC opt_ipsec.h IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h +IPSEC_OFFLOAD opt_ipsec.h IPSTEALTH KERN_TLS KRPC diff --git a/sys/modules/ipsec/Makefile b/sys/modules/ipsec/Makefile --- a/sys/modules/ipsec/Makefile +++ b/sys/modules/ipsec/Makefile @@ -2,8 +2,9 @@ .PATH: ${SRCTOP}/sys/net ${SRCTOP}/sys/netipsec KMOD= ipsec -SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c ipsec_mod.c \ - ipsec_output.c xform_ah.c xform_esp.c xform_ipcomp.c \ +SRCS= if_ipsec.c ipsec.c ipsec_input.c ipsec_mbuf.c \ + ipsec_mod.c ipsec_offload.c ipsec_output.c \ + xform_ah.c xform_esp.c xform_ipcomp.c \ opt_inet.h opt_inet6.h opt_ipsec.h opt_kern_tls.h opt_sctp.h .if "${MK_INET}" != "no" || "${MK_INET6}" != "no" SRCS+= udpencap.c diff --git a/sys/netipsec/ipsec.h b/sys/netipsec/ipsec.h --- a/sys/netipsec/ipsec.h +++ b/sys/netipsec/ipsec.h @@ -71,6 +71,12 @@ u_int level; /* IPsec level defined below. */ }; +struct ipsec_accel_adddel_sp_tq { + struct vnet *adddel_vnet; + struct task adddel_task; + int adddel_scheduled; +}; + /* Security Policy Data Base */ struct secpolicy { TAILQ_ENTRY(secpolicy) chain; @@ -102,6 +108,11 @@ time_t lastused; /* updated every when kernel sends a packet */ long lifetime; /* duration of the lifetime of this policy */ long validtime; /* duration this policy is valid without use */ + CK_LIST_HEAD(, ifp_handle_sp) accel_ifps; + struct ipsec_accel_adddel_sp_tq accel_add_tq; + struct ipsec_accel_adddel_sp_tq accel_del_tq; + struct inpcb *ipsec_accel_add_sp_inp; + const char *accel_ifname; }; /* diff --git a/sys/netipsec/ipsec.c b/sys/netipsec/ipsec.c --- a/sys/netipsec/ipsec.c +++ b/sys/netipsec/ipsec.c @@ -85,6 +85,7 @@ #ifdef INET6 #include #endif +#include #include #include #include /*XXX*/ @@ -636,8 +637,16 @@ ipsec4_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec4_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); @@ -802,8 +811,16 @@ ipsec6_in_reject(const struct mbuf *m, struct inpcb *inp) { struct secpolicy *sp; +#ifdef IPSEC_OFFLOAD + struct ipsec_accel_in_tag *tag; +#endif int result; +#ifdef IPSEC_OFFLOAD + tag = ipsec_accel_input_tag_lookup(m); + if (tag != NULL) + return (0); +#endif sp = ipsec6_getpolicy(m, inp, IPSEC_DIR_INBOUND, 0); result = ipsec_in_reject(sp, inp, m); key_freesp(&sp); diff --git a/sys/netipsec/ipsec_input.c b/sys/netipsec/ipsec_input.c --- a/sys/netipsec/ipsec_input.c +++ b/sys/netipsec/ipsec_input.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -237,6 +238,11 @@ int ipsec4_input(struct mbuf *m, int offset, int proto) { + int error; + + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); switch (proto) { case IPPROTO_AH: @@ -536,7 +542,12 @@ int ipsec6_input(struct mbuf *m, int offset, int proto) { + int error; + error = ipsec_accel_input(m, offset, proto); + if (error != ENXIO) + return (error); + switch (proto) { case IPPROTO_AH: case IPPROTO_ESP: diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h new file mode 100644 --- /dev/null +++ b/sys/netipsec/ipsec_offload.h @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETIPSEC_IPSEC_OFFLOAD_H_ +#define _NETIPSEC_IPSEC_OFFLOAD_H_ + +#ifdef _KERNEL +#include +#include +#include + +struct secpolicy; +struct secasvar; +struct inpcb; + +struct ipsec_accel_out_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +struct ipsec_accel_in_tag { + struct m_tag tag; + uint16_t drv_spi; +}; + +#define IPSEC_ACCEL_DRV_SPI_BYPASS 2 +#define IPSEC_ACCEL_DRV_SPI_MIN 3 +#define IPSEC_ACCEL_DRV_SPI_MAX 0xffff + +extern void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); +extern void (*ipsec_accel_sa_install_input_p)(struct secasvar *sav, + const union sockaddr_union *dst_address, int sproto, uint32_t spi); +extern void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +extern void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +extern void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +extern int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +extern void (*ipsec_accel_sync_p)(void); +extern bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +extern struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); + +#ifdef IPSEC_OFFLOAD +/* + * Have to use ipsec_accel_sa_install_input_p indirection because + * key.c is unconditionally included into the static kernel. + */ +static inline void +ipsec_accel_sa_newkey(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_sa_newkey_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_forget_sav(struct secasvar *sav) +{ + void (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_forget_sav_p); + if (p != NULL) + p(sav); +} + +static inline void +ipsec_accel_spdadd(struct secpolicy *sp, struct inpcb *inp) +{ + void (*p)(struct secpolicy *sp, struct inpcb *inp); + + p = atomic_load_ptr(&ipsec_accel_spdadd_p); + if (p != NULL) + p(sp, inp); +} + +static inline void +ipsec_accel_spddel(struct secpolicy *sp) +{ + void (*p)(struct secpolicy *sp); + + p = atomic_load_ptr(&ipsec_accel_spddel_p); + if (p != NULL) + p(sp); +} + +static inline int +ipsec_accel_sa_lifetime_op(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + int (*p)(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, + enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); + + p = atomic_load_ptr(&ipsec_accel_sa_lifetime_op_p); + if (p != NULL) + return (p(sav, lft_c, ifp, op, sahtree_trackerp)); + return (ENOTSUP); +} + +static inline void +ipsec_accel_sync(void) +{ + void (*p)(void); + + p = atomic_load_ptr(&ipsec_accel_sync_p); + if (p != NULL) + p(); +} + +static inline bool +ipsec_accel_is_accel_sav(struct secasvar *sav) +{ + bool (*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_is_accel_sav_p); + if (p != NULL) + return (p(sav)); + return (false); +} + +static inline struct mbuf * +ipsec_accel_key_setaccelif(struct secasvar *sav) +{ + struct mbuf *(*p)(struct secasvar *sav); + + p = atomic_load_ptr(&ipsec_accel_key_setaccelif_p); + if (p != NULL) + return (p(sav)); + return (NULL); +} + + +#else +#define ipsec_accel_sa_newkey(a) +#define ipsec_accel_forget_sav(a) +#define ipsec_accel_spdadd(a, b) +#define ipsec_accel_spddel(a) +#define ipsec_accel_sa_lifetime_op(a, b, c, d, e) +#define ipsec_accel_sync() +#define ipsec_accel_is_accel_sav(a) +#define ipsec_accel_key_setaccelif(a) +#endif + +void ipsec_accel_forget_sav_impl(struct secasvar *sav); +void ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp); +void ipsec_accel_spddel_impl(struct secpolicy *sp); + +#ifdef IPSEC_OFFLOAD +int ipsec_accel_input(struct mbuf *m, int offset, int proto); +bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, + struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af, + int mtu); +void ipsec_accel_forget_sav(struct secasvar *sav); +#else +#define ipsec_accel_input(a, b, c) (ENXIO) +#define ipsec_accel_output(a, b, c, d, e, f, g) (false) +#define ipsec_accel_forget_sav(a) +#endif + +struct ipsec_accel_in_tag *ipsec_accel_input_tag_lookup(const struct mbuf *); +void ipsec_accel_on_ifdown(struct ifnet *ifp); +void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t octets, uint64_t allocs); + +#endif /* _KERNEL */ + +#endif /* _NETIPSEC_IPSEC_OFFLOAD_H_ */ diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c new file mode 100644 --- /dev/null +++ b/sys/netipsec/ipsec_offload.c @@ -0,0 +1,1061 @@ +/*- + * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef IPSEC_OFFLOAD + +static struct mtx ipsec_accel_sav_tmp; +static struct unrhdr *drv_spi_unr; +static struct mtx ipsec_accel_cnt_lock; + +struct ipsec_accel_install_newkey_tq { + struct secasvar *sav; + struct vnet *install_vnet; + struct task install_task; +}; + +struct ipsec_accel_forget_tq { + struct vnet *forget_vnet; + struct task forget_task; + struct secasvar *sav; +}; + +struct ifp_handle_sav { + CK_LIST_ENTRY(ifp_handle_sav) sav_link; + CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link; + struct secasvar *sav; + struct ifnet *ifp; + void *ifdata; + uint64_t drv_spi; + uint32_t flags; + size_t hdr_ext_size; + uint64_t cnt_octets; + uint64_t cnt_allocs; +}; + +#define IFP_HS_HANDLED 0x00000001 +#define IFP_HS_REJECTED 0x00000002 +#define IFP_HS_INPUT 0x00000004 +#define IFP_HS_OUTPUT 0x00000008 +#define IFP_HS_MARKER 0x00000010 + +static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles; + +struct ifp_handle_sp { + CK_LIST_ENTRY(ifp_handle_sp) sp_link; + CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link; + struct secpolicy *sp; + struct ifnet *ifp; + void *ifdata; + uint32_t flags; +}; + +#define IFP_HP_HANDLED 0x00000001 +#define IFP_HP_REJECTED 0x00000002 +#define IFP_HP_MARKER 0x00000004 + +static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles; + +static void * +drvspi_sa_trie_alloc(struct pctrie *ptree) +{ + void *res; + + res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT); + if (res != NULL) + pctrie_zone_init(res, 0, 0); + return (res); +} + +static void +drvspi_sa_trie_free(struct pctrie *ptree, void *node) +{ + free(node, M_IPSEC_MISC); +} + +PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, + drvspi_sa_trie_alloc, drvspi_sa_trie_free); +static struct pctrie drv_spi_pctrie; + +static void ipsec_accel_sa_newkey_impl(struct secasvar *sav); +static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); +static void ipsec_accel_forget_sav_clear(struct secasvar *sav); +static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, + struct ifnet *ifp); +static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m); +static void ipsec_accel_sync_imp(void); +static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav); +static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); + +static void +ipsec_accel_init(void *arg) +{ + mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0); + mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0); + drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN, + IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp); + ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl; + ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl; + ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl; + ipsec_accel_spddel_p = ipsec_accel_spddel_impl; + ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl; + ipsec_accel_sync_p = ipsec_accel_sync_imp; + ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl; + ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl; + pctrie_init(&drv_spi_pctrie); +} +SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_init, NULL); + +static void +ipsec_accel_fini(void *arg) +{ + ipsec_accel_sa_newkey_p = NULL; + ipsec_accel_forget_sav_p = NULL; + ipsec_accel_spdadd_p = NULL; + ipsec_accel_spddel_p = NULL; + ipsec_accel_sa_lifetime_op_p = NULL; + ipsec_accel_sync_p = NULL; + ipsec_accel_is_accel_sav_p = NULL; + ipsec_accel_key_setaccelif_p = NULL; + ipsec_accel_sync_imp(); + clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ + clear_unrhdr(drv_spi_unr); + delete_unrhdr(drv_spi_unr); + mtx_destroy(&ipsec_accel_sav_tmp); + mtx_destroy(&ipsec_accel_cnt_lock); +} +SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY, + ipsec_accel_fini, NULL); + +static void +ipsec_accel_alloc_forget_tq(struct secasvar *sav) +{ + void *ftq; + + if (sav->accel_forget_tq != 0) + return; + + ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK); + if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq)) + free(ftq, M_TEMP); +} + +static bool +ipsec_accel_sa_install_match(if_t ifp, void *arg) +{ + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (false); + if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) { + printf("driver bug ifp %s if_sa_newkey NULL\n", + if_name(ifp)); + return (false); + } + return (true); +} + +static int +ipsec_accel_sa_newkey_cb(if_t ifp, void *arg) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *priv; + u_int drv_spi; + int error; + + tq = arg; + + printf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x " + "flags %#x seq %d\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey, + be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq); + priv = NULL; + drv_spi = alloc_unr(drv_spi_unr); + if (tq->sav->accel_ifname != NULL && + strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + goto out; + } + if (drv_spi == -1) { + /* XXXKIB */ + printf("ipsec_accel_sa_install_newkey: cannot alloc " + "drv_spi if %s spi %#x\n", if_name(ifp), + be32toh(tq->sav->spi)); + return (ENOMEM); + } + error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav, + drv_spi, &priv); + if (error != 0) { + if (error == EOPNOTSUPP) { + printf("ipsec_accel_sa_newkey: driver " + "refused sa if %s spi %#x\n", + if_name(ifp), be32toh(tq->sav->spi)); + error = ipsec_accel_handle_sav(tq->sav, + ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); + /* XXXKIB */ + } else { + printf("ipsec_accel_sa_newkey: driver " + "error %d if %s spi %#x\n", + error, if_name(ifp), be32toh(tq->sav->spi)); + /* XXXKIB */ + } + } else { + error = ipsec_accel_handle_sav(tq->sav, ifp, + drv_spi, priv, IFP_HS_HANDLED, NULL); + if (error != 0) { + /* XXXKIB */ + printf("ipsec_accel_sa_newkey: handle_sav " + "err %d if %s spi %#x\n", error, + if_name(ifp), be32toh(tq->sav->spi)); + } + } +out: + return (error); +} + +static void +ipsec_accel_sa_newkey_act(void *context, int pending) +{ + struct ipsec_accel_install_newkey_tq *tq; + void *tqf; + struct secasvar *sav; + + tq = context; + tqf = NULL; + sav = tq->sav; + CURVNET_SET(tq->install_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) == 0 && + sav->state == SADB_SASTATE_MATURE) { + sav->accel_flags |= SADB_KEY_ACCEL_INST; + mtx_unlock(&ipsec_accel_sav_tmp); + if_foreach_sleep(ipsec_accel_sa_install_match, context, + ipsec_accel_sa_newkey_cb, context); + ipsec_accel_alloc_forget_tq(sav); + mtx_lock(&ipsec_accel_sav_tmp); + + /* + * If ipsec_accel_forget_sav() raced with us and set + * the flag, do its work. Its task cannot execute in + * parallel since taskqueue_thread is single-threaded. + */ + if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { + tqf = (void *)sav->accel_forget_tq; + sav->accel_forget_tq = 0; + ipsec_accel_forget_sav_clear(sav); + } + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&tq->sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); + free(tqf, M_TEMP); +} + +static void +ipsec_accel_sa_newkey_impl(struct secasvar *sav) +{ + struct ipsec_accel_install_newkey_tq *tq; + + if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | + SADB_KEY_ACCEL_DEINST)) != 0) + return; + + printf( + "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n", + be32toh(sav->spi), sav->flags, sav->seq); + + tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); + if (tq == NULL) { + printf("ipsec_accel_sa_install_newkey: no memory for tq, " + "spi %#x\n", be32toh(sav->spi)); + /* XXXKIB */ + return; + } + + refcount_acquire(&sav->refcnt); + + TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq); + tq->sav = sav; + tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ + taskqueue_enqueue(taskqueue_thread, &tq->install_task); +} + +static int +ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, + u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires) +{ + struct ifp_handle_sav *ihs, *i; + int error; + + MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1); + + ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO); + ihs->ifp = ifp; + ihs->sav = sav; + ihs->drv_spi = drv_spi; + ihs->ifdata = priv; + ihs->flags = flags; + if ((flags & IFP_HS_OUTPUT) != 0) + ihs->hdr_ext_size = esp_hdrsiz(sav); + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) { + error = EALREADY; + goto errout; + } + } + error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs); + if (error != 0) + goto errout; + if_ref(ihs->ifp); + CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + if (ires != NULL) + *ires = ihs; + return (0); +errout: + mtx_unlock(&ipsec_accel_sav_tmp); + free(ihs, M_IPSEC_MISC); + if (ires != NULL) + *ires = NULL; + return (error); +} + +static void +ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav) +{ + struct ifnet *ifp; + struct secasvar *sav; + + mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED); + + CK_LIST_REMOVE(i, sav_link); + CK_LIST_REMOVE(i, sav_allh_link); + DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + ifp = i->ifp; + sav = i->sav; + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + printf("sa deinstall %s %p spi %#x ifl %#x\n", + if_name(ifp), sav, be32toh(sav->spi), i->flags); + ifp->if_ipsec_accel_m->if_sa_deinstall(ifp, + i->drv_spi, i->ifdata); + } + if_rele(ifp); + free_unr(drv_spi_unr, i->drv_spi); + free(i, M_IPSEC_MISC); + if (freesav) + key_freesav(&sav); + mtx_lock(&ipsec_accel_sav_tmp); +} + +static void +ipsec_accel_forget_sav_clear(struct secasvar *sav) +{ + struct ifp_handle_sav *i; + + for (;;) { + i = CK_LIST_FIRST(&sav->accel_ifps); + if (i == NULL) + break; + ipsec_accel_forget_handle_sav(i, false); + } +} + +static void +ipsec_accel_forget_sav_act(void *arg, int pending) +{ + struct ipsec_accel_forget_tq *tq; + struct secasvar *sav; + + tq = arg; + sav = tq->sav; + CURVNET_SET(tq->forget_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + ipsec_accel_forget_sav_clear(sav); + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesav(&sav); + CURVNET_RESTORE(); + free(tq, M_TEMP); +} + +void +ipsec_accel_forget_sav_impl(struct secasvar *sav) +{ + struct ipsec_accel_forget_tq *tq; + + mtx_lock(&ipsec_accel_sav_tmp); + sav->accel_flags |= SADB_KEY_ACCEL_DEINST; + tq = (void *)atomic_load_ptr(&sav->accel_forget_tq); + if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq, + (uintptr_t)tq, 0)) { + mtx_unlock(&ipsec_accel_sav_tmp); + return; + } + mtx_unlock(&ipsec_accel_sav_tmp); + + refcount_acquire(&sav->refcnt); + TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq); + tq->forget_vnet = curthread->td_vnet; + tq->sav = sav; + taskqueue_enqueue(taskqueue_thread, &tq->forget_task); +} + +static void +ipsec_accel_on_ifdown_sav(struct ifnet *ifp) +{ + struct ifp_handle_sav *i, *marker; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker, + sav_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sav_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sav_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sav_allh_link); + if (i->ifp == ifp) { + refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */ + ipsec_accel_forget_handle_sav(i, true); + } + } + CK_LIST_REMOVE(marker, sav_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp) +{ + struct ifp_handle_sav *i; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) + return (NULL); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static struct ifp_handle_sav * +ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp) +{ + NET_EPOCH_ASSERT(); + return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp)); +} + +static bool +ipsec_accel_is_accel_sav_impl(struct secasvar *sav) +{ + return (!CK_LIST_EMPTY(&sav->accel_ifps)); +} + +static struct secasvar * +ipsec_accel_drvspi_to_sa(u_int drv_spi) +{ + struct ifp_handle_sav *i; + + i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi); + if (i == NULL) + return (NULL); + return (i->sav); +} + +static struct ifp_handle_sp * +ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp) +{ + struct ifp_handle_sp *i; + + CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) { + if (i->ifp == ifp) + return (i); + } + return (NULL); +} + +static bool +ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp) +{ + return (ipsec_accel_find_accel_sp(sp, ifp) != NULL); +} + +static int +ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp, + struct ifp_handle_sp **ip) +{ + struct ifp_handle_sp *i; + + i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO); + i->sp = sp; + i->ifp = ifp; + if_ref(ifp); + i->flags = IFP_HP_HANDLED; + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + *ip = i; + return (0); +} + +static bool +ipsec_accel_spdadd_match(if_t ifp, void *arg) +{ + struct secpolicy *sp; + + if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 || + ifp->if_ipsec_accel_m->if_spdadd == NULL) + return (false); + sp = arg; + if (sp->accel_ifname != NULL && + strcmp(sp->accel_ifname, if_name(ifp)) != 0) + return (false); + if (ipsec_accel_is_accel_sp(sp, ifp)) + return (false); + return (true); +} + +static int +ipsec_accel_spdadd_cb(if_t ifp, void *arg) +{ + struct secpolicy *sp; + struct inpcb *inp; + struct ifp_handle_sp *i; + int error; + + sp = arg; + inp = sp->ipsec_accel_add_sp_inp; + printf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n", + if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp); + error = ipsec_accel_remember_sp(sp, ifp, &i); + if (error != 0) { + printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n", + if_name(ifp), sp, error); + return (error); + } + error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata); + if (error != 0) { + i->flags |= IFP_HP_REJECTED; + printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n", + if_name(ifp), sp, error); + } + return (error); +} + +static void +ipsec_accel_spdadd_act(void *arg, int pending) +{ + struct secpolicy *sp; + struct inpcb *inp; + + sp = arg; + CURVNET_SET(sp->accel_add_tq.adddel_vnet); + if_foreach_sleep(ipsec_accel_spdadd_match, arg, + ipsec_accel_spdadd_cb, arg); + inp = sp->ipsec_accel_add_sp_inp; + if (inp != NULL) { + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); + sp->ipsec_accel_add_sp_inp = NULL; + } + CURVNET_RESTORE(); + key_freesp(&sp); +} + +void +ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + if (sp->tcount == 0 && inp == NULL) + return; + tq = &sp->accel_add_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + sp->ipsec_accel_add_sp_inp = inp; + if (inp != NULL) + in_pcbref(inp); + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_spddel_act(void *arg, int pending) +{ + struct ifp_handle_sp *i; + struct secpolicy *sp; + int error; + + sp = arg; + CURVNET_SET(sp->accel_del_tq.adddel_vnet); + mtx_lock(&ipsec_accel_sav_tmp); + for (;;) { + i = CK_LIST_FIRST(&sp->accel_ifps); + if (i == NULL) + break; + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(i->ifp), sp); + error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp, + sp, i->ifdata); + if (error != 0) { + printf( + "ipsec_accel_spddel: %s if_spddel %p res %d\n", + if_name(i->ifp), sp, error); + } + } + if_rele(i->ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + mtx_unlock(&ipsec_accel_sav_tmp); + key_freesp(&sp); + CURVNET_RESTORE(); +} + +void +ipsec_accel_spddel_impl(struct secpolicy *sp) +{ + struct ipsec_accel_adddel_sp_tq *tq; + + if (sp == NULL) + return; + + tq = &sp->accel_del_tq; + if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) + return; + tq->adddel_vnet = curthread->td_vnet; + TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp); + key_addref(sp); + taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); +} + +static void +ipsec_accel_on_ifdown_sp(struct ifnet *ifp) +{ + struct ifp_handle_sp *i, *marker; + struct secpolicy *sp; + int error; + + marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); + marker->flags = IFP_HS_MARKER; + + mtx_lock(&ipsec_accel_sav_tmp); + CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker, + sp_allh_link); + for (;;) { + i = CK_LIST_NEXT(marker, sp_allh_link); + if (i == NULL) + break; + CK_LIST_REMOVE(marker, sp_allh_link); + CK_LIST_INSERT_AFTER(i, marker, sp_allh_link); + if (i->ifp != ifp) + continue; + + sp = i->sp; + key_addref(sp); + CK_LIST_REMOVE(i, sp_link); + CK_LIST_REMOVE(i, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + NET_EPOCH_WAIT(); + if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == + IFP_HP_HANDLED) { + printf("spd deinstall %s %p\n", if_name(ifp), sp); + error = ifp->if_ipsec_accel_m->if_spddel(ifp, + sp, i->ifdata); + } + if (error != 0) { + printf( + "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n", + if_name(ifp), sp, error); + } + key_freesp(&sp); + if_rele(ifp); + free(i, M_IPSEC_MISC); + mtx_lock(&ipsec_accel_sav_tmp); + } + CK_LIST_REMOVE(marker, sp_allh_link); + mtx_unlock(&ipsec_accel_sav_tmp); + free(marker, M_IPSEC_MISC); +} + +void +ipsec_accel_on_ifdown(struct ifnet *ifp) +{ + ipsec_accel_on_ifdown_sp(ifp); + ipsec_accel_on_ifdown_sav(ifp); +} + +static bool +ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu) +{ + int alen, blks, hlen, padding, rlen; + + rlen = m->m_pkthdr.len - skip; + hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) : + sizeof(struct newesp)) + sav->ivlen; + blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ? + sav->tdb_encalgxform->native_blocksize : + sav->tdb_encalgxform->blocksize); + padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; + alen = xform_ah_authsize(sav->tdb_authalgxform); + + return (skip + hlen + rlen + padding + alen <= mtu); +} + +static bool +ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi) +{ + struct ipsec_accel_out_tag *tag; + + tag = (struct ipsec_accel_out_tag *)m_tag_get( + PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT); + if (tag == NULL) + return (false); + tag->drv_spi = drv_spi; + m_tag_prepend(m, &tag->tag); + return (true); +} + +bool +ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, + struct secpolicy *sp, struct secasvar *sav, int af, int mtu) +{ + struct ifp_handle_sav *i; + struct ip *ip; + u_long ip_len, skip; + + if (ifp == NULL) + return (false); + + M_ASSERTPKTHDR(m); + NET_EPOCH_ASSERT(); + + if (sav == NULL) + return (ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS)); + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i == NULL) + return (false); + + if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { + ip_len = m->m_pkthdr.len; + if (ip_len + i->hdr_ext_size > mtu) + return (false); + switch (af) { + case AF_INET: + ip = mtod(m, struct ip *); + skip = ip->ip_hl << 2; + break; + case AF_INET6: + skip = sizeof(struct ip6_hdr); + break; + default: + __unreachable(); + } + if (!ipsec_accel_output_pad(m, sav, skip, mtu)) + return (false); + } + + if (!ipsec_accel_output_tag(m, i->drv_spi)) + return (false); + + ipsec_accel_sa_recordxfer(sav, m); + key_freesav(&sav); + if (sp != NULL) + key_freesp(&sp); + + return (true); +} + +struct ipsec_accel_in_tag * +ipsec_accel_input_tag_lookup(const struct mbuf *m) +{ + struct ipsec_accel_in_tag *tag; + struct m_tag *xtag; + + xtag = m_tag_find(__DECONST(struct mbuf *, m), + PACKET_TAG_IPSEC_ACCEL_IN, NULL); + if (xtag == NULL) + return (NULL); + tag = __containerof(xtag, struct ipsec_accel_in_tag, tag); + return (tag); +} + +int +ipsec_accel_input(struct mbuf *m, int offset, int proto) +{ + struct secasvar *sav; + struct ipsec_accel_in_tag *tag; + + tag = ipsec_accel_input_tag_lookup(m); + if (tag == NULL) + return (ENXIO); + + if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN || + tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) { + printf("if %s mbuf %p drv_spi %d invalid, packet dropped\n", + (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) : + "", m, tag->drv_spi); + m_freem(m); + return (EINPROGRESS); + } + + sav = ipsec_accel_drvspi_to_sa(tag->drv_spi); + if (sav != NULL) + ipsec_accel_sa_recordxfer(sav, m); + return (0); +} + +static void +ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m) +{ + counter_u64_add(sav->accel_lft_sw, 1); + counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len); + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; +} + +static void +ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c, + const struct seclifetime *lft_l) +{ + lft_c->allocations += lft_l->allocations; + lft_c->bytes += lft_l->bytes; + lft_c->usetime = min(lft_c->usetime, lft_l->usetime); +} + +void +ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, + u_int drv_spi, uint64_t octets, uint64_t allocs) +{ + struct epoch_tracker et; + struct ifp_handle_sav *i; + uint64_t odiff, adiff; + + NET_EPOCH_ENTER(et); + mtx_lock(&ipsec_accel_cnt_lock); + + if (allocs != 0) { + if (sav->firstused == 0) + sav->firstused = time_second; + if (sav->accel_firstused == 0) + sav->accel_firstused = time_second; + } + + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if (i->ifp == ifp && i->drv_spi == drv_spi) + break; + } + if (i == NULL) + goto out; + + odiff = octets - i->cnt_octets; + adiff = allocs - i->cnt_allocs; + + if (sav->lft_c != NULL) { + counter_u64_add(sav->lft_c_bytes, odiff); + counter_u64_add(sav->lft_c_allocations, adiff); + } + + i->cnt_octets = octets; + i->cnt_allocs = allocs; + sav->accel_hw_octets += odiff; + sav->accel_hw_allocs += adiff; + +out: + mtx_unlock(&ipsec_accel_cnt_lock); + NET_EPOCH_EXIT(et); +} + +static void +ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp, + struct seclifetime *lft) +{ + struct ifp_handle_sav *i; + if_sa_cnt_fn_t p; + + IFNET_RLOCK_ASSERT(); + + i = ipsec_accel_is_accel_sav_ptr(sav, ifp); + if (i != NULL && (i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + p = ifp->if_ipsec_accel_m->if_sa_cnt; + if (p != NULL) + p(ifp, sav, i->drv_spi, i->ifdata, lft); + } +} + +static int +ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp) +{ + struct seclifetime lft_l, lft_s; + struct ifp_handle_sav *i; + if_t ifp1; + if_sa_cnt_fn_t p; + int error; + + error = 0; + memset(&lft_l, 0, sizeof(lft_l)); + memset(&lft_s, 0, sizeof(lft_s)); + + switch (op & ~IF_SA_CNT_UPD) { + case IF_SA_CNT_IFP_HW_VAL: + ipsec_accel_sa_lifetime_hw(sav, ifp, &lft_l); + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + break; + + case IF_SA_CNT_TOTAL_SW_VAL: + lft_l.allocations = (uint32_t)counter_u64_fetch( + sav->accel_lft_sw); + lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1); + lft_l.usetime = sav->accel_firstused; + break; + + case IF_SA_CNT_TOTAL_HW_VAL: + IFNET_RLOCK_ASSERT(); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) != + IFP_HS_HANDLED) + continue; + ifp1 = i->ifp; + p = ifp1->if_ipsec_accel_m->if_sa_cnt; + if (p == NULL) + continue; + memset(&lft_s, 0, sizeof(lft_s)); + if (sahtree_trackerp != NULL) + ipsec_sahtree_runlock(sahtree_trackerp); + error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s); + if (sahtree_trackerp != NULL) + ipsec_sahtree_rlock(sahtree_trackerp); + if (error == 0) + ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); + } + break; + } + + if (error == 0) { + if ((op & IF_SA_CNT_UPD) == 0) + memset(lft_c, 0, sizeof(*lft_c)); + ipsec_accel_sa_lifetime_update(lft_c, &lft_l); + } + + return (error); +} + +static void +ipsec_accel_sync_imp(void) +{ + taskqueue_drain_all(taskqueue_thread); +} + +static struct mbuf * +ipsec_accel_key_setaccelif_impl(struct secasvar *sav) +{ + struct mbuf *m, *m1; + struct ifp_handle_sav *i; + struct epoch_tracker et; + + if (sav->accel_ifname != NULL) + return (key_setaccelif(sav->accel_ifname)); + + m = m1 = NULL; + + NET_EPOCH_ENTER(et); + CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { + if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == + IFP_HS_HANDLED) { + m1 = key_setaccelif(if_name(i->ifp)); + if (m == NULL) + m = m1; + else if (m1 != NULL) + m_cat(m, m1); + } + } + NET_EPOCH_EXIT(et); + return (m); +} + +#endif /* IPSEC_OFFLOAD */ diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -84,6 +84,7 @@ #include #endif #include +#include #include #include #include @@ -210,6 +211,8 @@ sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET, mtu); key_freesp(&sp); return (error); } @@ -222,6 +225,9 @@ if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu)) + return (EJUSTRETURN); + ip = mtod(m, struct ip *); dst = &sav->sah->saidx.dst; /* Do the appropriate encapsulation, if necessary */ @@ -597,6 +603,8 @@ sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ + (void)ipsec_accel_output(ifp, m, inp, sp, NULL, + AF_INET6, mtu); key_freesp(&sp); return (error); } @@ -611,6 +619,9 @@ if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; + if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu)) + return (EJUSTRETURN); + ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ dst = &sav->sah->saidx.dst; @@ -859,6 +870,10 @@ struct m_tag *mtag; int error; + if (sav->state >= SADB_SASTATE_DEAD) { + error = ESRCH; + goto bad; + } saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET diff --git a/sys/netipsec/ipsec_pcb.c b/sys/netipsec/ipsec_pcb.c --- a/sys/netipsec/ipsec_pcb.c +++ b/sys/netipsec/ipsec_pcb.c @@ -49,6 +49,7 @@ #include #include #include +#include MALLOC_DEFINE(M_IPSEC_INPCB, "inpcbpolicy", "inpcb-resident ipsec policy"); @@ -166,18 +167,26 @@ int ipsec_delete_pcbpolicy(struct inpcb *inp) { + struct inpcbpolicy *inp_sp; - if (inp->inp_sp == NULL) + inp_sp = inp->inp_sp; + if (inp_sp == NULL) return (0); + inp->inp_sp = NULL; - if (inp->inp_sp->sp_in != NULL) - key_freesp(&inp->inp_sp->sp_in); + if (inp_sp->sp_in != NULL) { + if ((inp_sp->flags & INP_INBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_in); + key_freesp(&inp_sp->sp_in); + } - if (inp->inp_sp->sp_out != NULL) - key_freesp(&inp->inp_sp->sp_out); + if (inp_sp->sp_out != NULL) { + if ((inp_sp->flags & INP_OUTBOUND_POLICY) != 0) + ipsec_accel_spddel(inp_sp->sp_out); + key_freesp(&inp_sp->sp_out); + } - free(inp->inp_sp, M_IPSEC_INPCB); - inp->inp_sp = NULL; + free(inp_sp, M_IPSEC_INPCB); return (0); } @@ -248,20 +257,26 @@ if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_INBOUND); - if (new->inp_sp->sp_in != NULL) + if (new->inp_sp->sp_in != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_in); key_freesp(&new->inp_sp->sp_in); + } new->inp_sp->sp_in = sp; new->inp_sp->flags |= INP_INBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } if (old->inp_sp->flags & INP_OUTBOUND_POLICY) { sp = ipsec_deepcopy_pcbpolicy(old->inp_sp->sp_out); if (sp == NULL) return (ENOBUFS); ipsec_setspidx_inpcb(new, &sp->spidx, IPSEC_DIR_OUTBOUND); - if (new->inp_sp->sp_out != NULL) + if (new->inp_sp->sp_out != NULL) { + ipsec_accel_spddel(new->inp_sp->sp_out); key_freesp(&new->inp_sp->sp_out); + } new->inp_sp->sp_out = sp; new->inp_sp->flags |= INP_OUTBOUND_POLICY; + ipsec_accel_spdadd(sp, new); } return (0); } @@ -339,8 +354,10 @@ flags = INP_OUTBOUND_POLICY; } /* Clear old SP and set new SP. */ - if (*spp != NULL) + if (*spp != NULL) { + ipsec_accel_spddel(*spp); key_freesp(spp); + } *spp = newsp; KEYDBG(IPSEC_DUMP, printf("%s: new SP(%p)\n", __func__, newsp)); @@ -348,6 +365,7 @@ inp->inp_sp->flags &= ~flags; else { inp->inp_sp->flags |= flags; + ipsec_accel_spdadd(newsp, inp); KEYDBG(IPSEC_DUMP, kdebug_secpolicy(newsp)); } INP_WUNLOCK(inp); diff --git a/sys/netipsec/key.h b/sys/netipsec/key.h --- a/sys/netipsec/key.h +++ b/sys/netipsec/key.h @@ -36,6 +36,7 @@ #ifdef _KERNEL +struct mbuf; struct secpolicy; struct secpolicyindex; struct secasvar; @@ -60,6 +61,7 @@ void key_bumpspgen(void); uint32_t key_getspgen(void); uint32_t key_newreqid(void); +struct mbuf *key_setaccelif(const char *ifname); struct secasvar *key_allocsa(union sockaddr_union *, uint8_t, uint32_t); struct secasvar *key_allocsa_tunnel(union sockaddr_union *, @@ -85,6 +87,10 @@ uint16_t key_portfromsaddr(struct sockaddr *); void key_porttosaddr(struct sockaddr *, uint16_t port); +struct rm_priotracker; +void ipsec_sahtree_runlock(struct rm_priotracker *); +void ipsec_sahtree_rlock(struct rm_priotracker *); + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IPSEC_SA); MALLOC_DECLARE(M_IPSEC_SAH); diff --git a/sys/netipsec/key.c b/sys/netipsec/key.c --- a/sys/netipsec/key.c +++ b/sys/netipsec/key.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #ifdef INET6 @@ -90,12 +91,26 @@ #endif #include +#include #include #include /* randomness */ #include +#ifdef IPSEC_OFFLOAD +void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); +void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); +void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); +void (*ipsec_accel_spddel_p)(struct secpolicy *sp); +int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, + struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, + struct rm_priotracker *sahtree_trackerp); +void (*ipsec_accel_sync_p)(void); +bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); +struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); +#endif + #define FULLMASK 0xff #define _BITS(bytes) ((bytes) << 3) @@ -391,6 +406,9 @@ [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = sizeof(struct sadb_address), [SADB_X_EXT_NEW_ADDRESS_DST] = sizeof(struct sadb_address), + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(minsize) == SADB_EXT_MAX + 1, "minsize size mismatch"); @@ -424,6 +442,9 @@ [SADB_X_EXT_SA_REPLAY] = sizeof(struct sadb_x_sa_replay), [SADB_X_EXT_NEW_ADDRESS_SRC] = 0, [SADB_X_EXT_NEW_ADDRESS_DST] = 0, + [SADB_X_EXT_LFT_CUR_SW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_LFT_CUR_HW_OFFL] = sizeof(struct sadb_lifetime), + [SADB_X_EXT_IF_HW_OFFL] = sizeof(struct sadb_x_if_hw_offl), }; _Static_assert(nitems(maxsize) == SADB_EXT_MAX + 1, "maxsize size mismatch"); @@ -661,7 +682,7 @@ const struct sadb_msghdr *, struct secasvar *, struct secasindex *); static struct mbuf *key_setdumpsa(struct secasvar *, u_int8_t, - u_int8_t, u_int32_t, u_int32_t); + u_int8_t, u_int32_t, u_int32_t, struct rm_priotracker *); static struct mbuf *key_setsadbmsg(u_int8_t, u_int16_t, u_int8_t, u_int32_t, pid_t, u_int16_t); static struct mbuf *key_setsadbsa(struct secasvar *); @@ -1227,6 +1248,11 @@ KEYDBG(IPSEC_DATA, kdebug_secpolicy(sp)); *spp = NULL; +#ifdef IPSEC_OFFLOAD + KASSERT(CK_LIST_EMPTY(&sp->accel_ifps), + ("key_freesp: sp %p still offloaded", sp)); + free(__DECONST(char *, sp->accel_ifname), M_IPSEC_MISC); +#endif while (sp->tcount > 0) ipsec_delisr(sp->req[--sp->tcount]); free(sp, M_IPSEC_SP); @@ -1240,6 +1266,7 @@ SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); key_freesp(&sp); } @@ -1258,6 +1285,7 @@ return; } sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); V_spd_size--; LIST_REMOVE(sp, idhash); @@ -1285,6 +1313,7 @@ newsp->state = IPSEC_SPSTATE_ALIVE; V_spd_size++; V_sp_genid++; + ipsec_accel_spdadd(newsp, NULL); } /* @@ -1329,6 +1358,7 @@ */ LIST_INSERT_HEAD(SPHASH_HASH(spp[i]->id), spp[i], idhash); spp[i]->state = IPSEC_SPSTATE_IFNET; + ipsec_accel_spdadd(spp[i], NULL); } SPTREE_WUNLOCK(); /* @@ -1357,6 +1387,7 @@ if (spp[i]->state != IPSEC_SPSTATE_IFNET) continue; spp[i]->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(spp[i]); TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], spp[i], chain); V_spd_size--; @@ -1365,6 +1396,7 @@ SPTREE_WUNLOCK(); if (SPDCACHE_ENABLED()) spdcache_clear(); + ipsec_accel_sync(); for (i = 0; i < count; i++) { m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); @@ -1424,6 +1456,7 @@ /* Unlink from SPI hash */ LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sah = sav->sah; SAHTREE_WUNLOCK(); key_freesav(&sav); @@ -1821,6 +1854,9 @@ size_t xlen, ilen; caddr_t p; int error, i; +#ifdef IPSEC_OFFLOAD + struct sadb_x_if_hw_offl *xif; +#endif IPSEC_ASSERT(sp != NULL, ("null policy")); @@ -1876,6 +1912,18 @@ } } xpl->sadb_x_policy_len = PFKEY_UNIT64(xlen); +#ifdef IPSEC_OFFLOAD + if (error == 0 && sp->accel_ifname != NULL) { + xif = (struct sadb_x_if_hw_offl *)(xpl + 1); + bzero(xif, sizeof(*xif)); + xif->sadb_x_if_hw_offl_len = PFKEY_UNIT64(sizeof(*xif)); + xif->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + xif->sadb_x_if_hw_offl_flags = 0; + strncpy(xif->sadb_x_if_hw_offl_if, sp->accel_ifname, + sizeof(xif->sadb_x_if_hw_offl_if)); + xlen += sizeof(*xif); + } +#endif if (error == 0) *len = xlen; else @@ -2088,6 +2136,27 @@ newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0; newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0; bcopy(&spidx, &newsp->spidx, sizeof(spidx)); +#ifdef IPSEC_OFFLOAD + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + newsp->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsp->accel_ifname == NULL) { + ipseclog((LOG_DEBUG, "%s: cannot alloc accel_ifname.\n", + __func__)); + key_freesp(&newsp); + return (key_senderror(so, m, error)); + } + strncpy(__DECONST(char *, newsp->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } + +#endif SPTREE_WLOCK(); if ((newsp->id = key_getnewspid()) == 0) { @@ -2095,6 +2164,7 @@ key_detach(oldsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2109,6 +2179,7 @@ key_insertsp(newsp); SPTREE_WUNLOCK(); if (oldsp != NULL) { + ipsec_accel_sync(); key_freesp(&oldsp); /* first for key_detach */ IPSEC_ASSERT(oldsp != NULL, ("null oldsp: refcount bug")); key_freesp(&oldsp); /* second for our reference */ @@ -2290,6 +2361,7 @@ KEYDBG(KEY_STAMP, printf("%s: SP(%p)\n", __func__, sp)); KEYDBG(KEY_DATA, kdebug_secpolicy(sp)); + ipsec_accel_spddel(sp); key_unlink(sp); key_freesp(&sp); @@ -2561,6 +2633,7 @@ */ TAILQ_FOREACH(sp, &drainq, chain) { sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); LIST_REMOVE(sp, idhash); } V_sp_genid++; @@ -2764,6 +2837,10 @@ tlen += PFKEY_ALIGN8(len); } +#ifdef IPSEC_OFFLOAD + if (sp->accel_ifname != NULL) + tlen += sizeof(struct sadb_x_if_hw_offl); +#endif return (tlen); } @@ -3005,6 +3082,32 @@ sav->state = SADB_SASTATE_LARVAL; sav->pid = (pid_t)mhp->msg->sadb_msg_pid; SAV_INITREF(sav); +#ifdef IPSEC_OFFLOAD + CK_LIST_INIT(&sav->accel_ifps); + sav->accel_forget_tq = 0; + sav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (sav->accel_lft_sw == NULL) { + *errp = ENOBUFS; + goto done; + } + if (!SADB_CHECKHDR(mhp, SADB_X_EXT_IF_HW_OFFL) && + !SADB_CHECKLEN(mhp, SADB_X_EXT_IF_HW_OFFL)) { + struct sadb_x_if_hw_offl *xof; + + xof = (struct sadb_x_if_hw_offl *)mhp->ext[ + SADB_X_EXT_IF_HW_OFFL]; + sav->accel_ifname = malloc(sizeof(xof->sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (sav->accel_ifname == NULL) { + *errp = ENOBUFS; + goto done; + } + strncpy(__DECONST(char *, sav->accel_ifname), + xof->sadb_x_if_hw_offl_if, + sizeof(xof->sadb_x_if_hw_offl_if)); + } +#endif again: sah = key_getsah(saidx); if (sah == NULL) { @@ -3068,9 +3171,10 @@ SAH_ADDREF(sah); } /* Link SAV with SAH */ - if (sav->state == SADB_SASTATE_MATURE) + if (sav->state == SADB_SASTATE_MATURE) { TAILQ_INSERT_HEAD(&sah->savtree_alive, sav, chain); - else + ipsec_accel_sa_newkey(sav); + } else TAILQ_INSERT_HEAD(&sah->savtree_larval, sav, chain); /* Add SAV into SPI hash */ LIST_INSERT_HEAD(SAVHASH_HASH(sav->spi), sav, spihash); @@ -3085,6 +3189,13 @@ } if (sav->lft_c != NULL) uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); +#ifdef IPSEC_OFFLOAD + if (sav->accel_lft_sw != NULL) + uma_zfree_pcpu(ipsec_key_lft_zone, + sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), + M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA), sav = NULL; } if (sah != NULL) @@ -3153,6 +3264,10 @@ ("attempt to free non DEAD SA %p", sav)); IPSEC_ASSERT(sav->refcnt == 0, ("reference count %u > 0", sav->refcnt)); +#ifdef IPSEC_OFFLOAD + KASSERT(CK_LIST_EMPTY(&sav->accel_ifps), + ("key_unlinksav: sav %p still offloaded", sav)); +#endif /* * SA must be unlinked from the chain and hashtbl. @@ -3165,6 +3280,11 @@ free(sav->lock, M_IPSEC_MISC); uma_zfree_pcpu(ipsec_key_lft_zone, sav->lft_c); } +#ifdef IPSEC_OFFLOAD + /* XXXKIB should this be moved to key_cleansav()? */ + uma_zfree_pcpu(ipsec_key_lft_zone, sav->accel_lft_sw); + free(__DECONST(char *, sav->accel_ifname), M_IPSEC_MISC); +#endif free(sav, M_IPSEC_SA); } @@ -3588,7 +3708,7 @@ */ static struct mbuf * key_setdumpsa(struct secasvar *sav, uint8_t type, uint8_t satype, - uint32_t seq, uint32_t pid) + uint32_t seq, uint32_t pid, struct rm_priotracker *sahtree_trackerp) { struct seclifetime lft_c; struct mbuf *result = NULL, *tres = NULL, *m; @@ -3604,8 +3724,15 @@ SADB_X_EXT_NAT_T_SPORT, SADB_X_EXT_NAT_T_DPORT, SADB_X_EXT_NAT_T_OAI, SADB_X_EXT_NAT_T_OAR, SADB_X_EXT_NAT_T_FRAG, +#ifdef IPSEC_OFFLOAD + SADB_X_EXT_LFT_CUR_SW_OFFL, SADB_X_EXT_LFT_CUR_HW_OFFL, + SADB_X_EXT_IF_HW_OFFL, +#endif }; uint32_t replay_count; +#ifdef IPSEC_OFFLOAD + int error; +#endif SECASVAR_RLOCK_TRACKER; @@ -3752,6 +3879,44 @@ case SADB_X_EXT_NAT_T_FRAG: /* We do not (yet) support those. */ continue; +#ifdef IPSEC_OFFLOAD + case SADB_X_EXT_LFT_CUR_SW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + SAV_ADDREF(sav); + error = ipsec_accel_sa_lifetime_op(sav, &lft_c, + NULL, IF_SA_CNT_TOTAL_SW_VAL, sahtree_trackerp); + if (error != 0) { + m = NULL; + goto fail; + } + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + key_freesav(&sav); + if (sav == NULL) { + m_freem(m); + goto fail; + } + break; + case SADB_X_EXT_LFT_CUR_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + memset(&lft_c, 0, sizeof(lft_c)); + lft_c.bytes = sav->accel_hw_octets; + lft_c.allocations = sav->accel_hw_allocs; + m = key_setlifetime(&lft_c, dumporder[i]); + if (m == NULL) + goto fail; + break; + case SADB_X_EXT_IF_HW_OFFL: + if (!ipsec_accel_is_accel_sav(sav)) + continue; + m = ipsec_accel_key_setaccelif(sav); + if (m == NULL) + continue; /* benigh */ + break; +#endif case SADB_EXT_ADDRESS_PROXY: case SADB_EXT_IDENTITY_SRC: @@ -4502,6 +4667,7 @@ V_spd_size--; LIST_REMOVE(sp, idhash); sp->state = IPSEC_SPSTATE_DEAD; + ipsec_accel_spddel(sp); sp = nextsp; } V_sp_genid++; @@ -4625,6 +4791,7 @@ TAILQ_REMOVE(&sav->sah->savtree_larval, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Unlink all SAs with expired HARD lifetime */ @@ -4641,6 +4808,7 @@ TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); sav = nextsav; } /* Mark all SAs with expired SOFT lifetime as DYING */ @@ -5239,6 +5407,30 @@ /* Clone SA's content into newsav */ SAV_INITREF(newsav); bcopy(sav, newsav, offsetof(struct secasvar, chain)); +#ifdef IPSEC_OFFLOAD + CK_LIST_INIT(&newsav->accel_ifps); + newsav->accel_forget_tq = 0; + newsav->accel_lft_sw = uma_zalloc_pcpu(ipsec_key_lft_zone, + M_NOWAIT | M_ZERO); + if (newsav->accel_lft_sw == NULL) { + error = ENOBUFS; + goto fail; + } + if (sav->accel_ifname != NULL) { + struct sadb_x_if_hw_offl xof; + + newsav->accel_ifname = malloc(sizeof(xof.sadb_x_if_hw_offl_if), + M_IPSEC_MISC, M_NOWAIT); + if (newsav->accel_ifname == NULL) { + error = ENOBUFS; + goto fail; + } + strncpy(__DECONST(char *, sav->accel_ifname), + newsav->accel_ifname, + sizeof(xof.sadb_x_if_hw_offl_if)); + } +#endif + /* * We create new NAT-T config if it is needed. * Old NAT-T config will be freed by key_cleansav() when @@ -5269,6 +5461,7 @@ TAILQ_REMOVE(&sav->sah->savtree_alive, sav, chain); LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); /* * Link new SA with SAH. Keep SAs ordered by @@ -5326,6 +5519,10 @@ if (isnew != 0) key_freesah(&sah); if (newsav != NULL) { +#ifdef IPSEC_OFFLOAD + uma_zfree_pcpu(ipsec_key_lft_zone, newsav->accel_lft_sw); + free(__DECONST(char *, newsav->accel_ifname), M_IPSEC_MISC); +#endif if (newsav->natt != NULL) free(newsav->natt, M_IPSEC_MISC); free(newsav, M_IPSEC_SA); @@ -5540,6 +5737,7 @@ KEYDBG(KEY_STAMP, printf("%s: SA(%p)\n", __func__, sav)); KEYDBG(KEY_DATA, kdebug_secasv(sav)); + ipsec_accel_sa_newkey(sav); key_freesav(&sav); { @@ -5692,6 +5890,7 @@ KEYDBG(KEY_STAMP, printf("%s: return SA(%p)\n", __func__, sav)); KEYDBG(KEY_DATA, kdebug_secasv(sav)); + ipsec_accel_sa_newkey(sav); /* * If SADB_ADD was in response to SADB_ACQUIRE, we need to schedule * ACQ for deletion. @@ -6196,6 +6395,7 @@ /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6264,6 +6464,7 @@ /* Unlink all queued SAs from SPI hash */ TAILQ_FOREACH(sav, &drainq, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); LIST_REMOVE(sav, spihash); } SAHTREE_WUNLOCK(); @@ -6372,7 +6573,7 @@ /* create new sadb_msg to reply. */ n = key_setdumpsa(sav, SADB_GET, satype, mhp->msg->sadb_msg_seq, - mhp->msg->sadb_msg_pid); + mhp->msg->sadb_msg_pid, NULL); key_freesav(&sav); if (!n) @@ -7614,9 +7815,11 @@ */ TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -7638,10 +7841,12 @@ TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { LIST_REMOVE(sav, spihash); sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } /* Add SAH into flushq */ TAILQ_INSERT_HEAD(&flushq, sah, chain); @@ -7705,6 +7910,7 @@ /* count sav entries to be sent to the userland. */ cnt = 0; + IFNET_RLOCK(); SAHTREE_RLOCK(); TAILQ_FOREACH(sah, &V_sahtree, chain) { if (mhp->msg->sadb_msg_satype != SADB_SATYPE_UNSPEC && @@ -7719,6 +7925,7 @@ if (cnt == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOENT); } @@ -7731,30 +7938,34 @@ /* map proto to satype */ if ((satype = key_proto2satype(sah->saidx.proto)) == 0) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); ipseclog((LOG_DEBUG, "%s: there was invalid proto in " "SAD.\n", __func__)); return key_senderror(so, m, EINVAL); } TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { n = key_setdumpsa(sav, SADB_DUMP, satype, - --cnt, mhp->msg->sadb_msg_pid); + --cnt, mhp->msg->sadb_msg_pid, &sahtree_tracker); if (n == NULL) { SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); return key_senderror(so, m, ENOBUFS); } key_sendup_mbuf(so, n, KEY_SENDUP_ONE); } } SAHTREE_RUNLOCK(); + IFNET_RUNLOCK(); m_freem(m); return (0); } @@ -8175,6 +8386,11 @@ case SADB_X_EXT_SA_REPLAY: case SADB_X_EXT_NEW_ADDRESS_SRC: case SADB_X_EXT_NEW_ADDRESS_DST: +#ifdef IPSEC_OFFLOAD + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + case SADB_X_EXT_IF_HW_OFFL: +#endif /* duplicate check */ /* * XXX Are there duplication payloads of either @@ -8483,9 +8699,11 @@ sah->state = SADB_SASTATE_DEAD; TAILQ_FOREACH(sav, &sah->savtree_larval, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } TAILQ_FOREACH(sav, &sah->savtree_alive, chain) { sav->state = SADB_SASTATE_DEAD; + ipsec_accel_forget_sav(sav); } } SAHTREE_WUNLOCK(); @@ -8633,6 +8851,32 @@ return m; } +#ifdef IPSEC_OFFLOAD +struct mbuf * +key_setaccelif(const char *ifname) +{ + struct mbuf *m = NULL; + struct sadb_x_if_hw_offl *p; + int len = PFKEY_ALIGN8(sizeof(*p)); + + m = m_get2(len, M_NOWAIT, MT_DATA, 0); + if (m == NULL) + return (m); + m_align(m, len); + m->m_len = len; + p = mtod(m, struct sadb_x_if_hw_offl *); + + bzero(p, len); + p->sadb_x_if_hw_offl_len = PFKEY_UNIT64(len); + p->sadb_x_if_hw_offl_exttype = SADB_X_EXT_IF_HW_OFFL; + p->sadb_x_if_hw_offl_flags = 0; + strncpy(p->sadb_x_if_hw_offl_if, ifname, + sizeof(p->sadb_x_if_hw_offl_if)); + + return (m); +} +#endif + /* * Take one of the kernel's lifetime data structures and convert it * into a PF_KEY structure within an mbuf, suitable for sending up to @@ -8708,3 +8952,15 @@ return (supported_calgs[i].xform); return (NULL); } + +void +ipsec_sahtree_runlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_runlock(&sahtree_lock, sahtree_trackerp); +} + +void +ipsec_sahtree_rlock(struct rm_priotracker *sahtree_trackerp) +{ + rm_rlock(&sahtree_lock, sahtree_trackerp); +} diff --git a/sys/netipsec/key_debug.c b/sys/netipsec/key_debug.c --- a/sys/netipsec/key_debug.c +++ b/sys/netipsec/key_debug.c @@ -155,6 +155,8 @@ X_NAME(SA_REPLAY); X_NAME(NEW_ADDRESS_SRC); X_NAME(NEW_ADDRESS_DST); + X_NAME(LFT_CUR_SW_OFFL); + X_NAME(LFT_CUR_HW_OFFL); default: return ("UNKNOWN"); }; @@ -251,6 +253,9 @@ case SADB_X_EXT_NAT_T_DPORT: kdebug_sadb_x_natt(ext); break; + case SADB_X_EXT_LFT_CUR_SW_OFFL: + case SADB_X_EXT_LFT_CUR_HW_OFFL: + kdebug_sadb_lifetime(ext); default: printf("%s: invalid ext_type %u\n", __func__, ext->sadb_ext_type); diff --git a/sys/netipsec/keydb.h b/sys/netipsec/keydb.h --- a/sys/netipsec/keydb.h +++ b/sys/netipsec/keydb.h @@ -36,9 +36,11 @@ #ifdef _KERNEL #include +#include #include #include #include +#include #include #include @@ -125,6 +127,7 @@ struct enc_xform; struct auth_hash; struct comp_algo; +struct ifp_handle_sav; /* * Security Association @@ -185,8 +188,19 @@ uint64_t cntr; /* counter for GCM and CTR */ volatile u_int refcnt; /* reference count */ + CK_LIST_HEAD(, ifp_handle_sav) accel_ifps; + uintptr_t accel_forget_tq; + const char *accel_ifname; + uint32_t accel_flags; + counter_u64_t accel_lft_sw; + uint64_t accel_hw_allocs; + uint64_t accel_hw_octets; + uint64_t accel_firstused; }; +#define SADB_KEY_ACCEL_INST 0x00000001 +#define SADB_KEY_ACCEL_DEINST 0x00000002 + #define SECASVAR_RLOCK_TRACKER struct rm_priotracker _secas_tracker #define SECASVAR_RLOCK(_sav) rm_rlock((_sav)->lock, &_secas_tracker) #define SECASVAR_RUNLOCK(_sav) rm_runlock((_sav)->lock, &_secas_tracker)