diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 3e094dcb3cd5..579585b25dd2 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -1,757 +1,760 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, ro) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; struct debugnet_methods; #ifdef _KERNEL #include #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include CK_STAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ CK_STAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head *, link_pfil_head); #define V_link_pfil_head VNET(link_pfil_head) #define PFIL_ETHER_NAME "ethernet" #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_input_fn_t)(if_t, struct mbuf *); typedef int (*if_output_fn_t)(if_t, struct mbuf *, const struct sockaddr *, struct route *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); typedef void (*if_reassign_fn_t)(if_t, struct vnet *, char *); typedef int (*if_spdadd_fn_t)(if_t, void *sp, void *inp, void **priv); typedef int (*if_spddel_fn_t)(if_t, void *sp, void *priv); typedef int (*if_sa_newkey_fn_t)(if_t ifp, void *sav, u_int drv_spi, void **privp); typedef int (*if_sa_deinstall_fn_t)(if_t ifp, u_int drv_spi, void *priv); struct seclifetime; #define IF_SA_CNT_UPD 0x80000000 enum IF_SA_CNT_WHICH { IF_SA_CNT_IFP_HW_VAL = 1, IF_SA_CNT_TOTAL_SW_VAL, IF_SA_CNT_TOTAL_HW_VAL, IF_SA_CNT_IFP_HW_UPD = IF_SA_CNT_IFP_HW_VAL | IF_SA_CNT_UPD, IF_SA_CNT_TOTAL_SW_UPD = IF_SA_CNT_TOTAL_SW_VAL | IF_SA_CNT_UPD, IF_SA_CNT_TOTAL_HW_UPD = IF_SA_CNT_TOTAL_HW_VAL | IF_SA_CNT_UPD, }; typedef int (*if_sa_cnt_fn_t)(if_t ifp, void *sa, uint32_t drv_spi, void *priv, struct seclifetime *lt); +typedef int (*if_ipsec_hwassist_fn_t)(if_t ifp, void *sav, + u_int drv_spi,void *priv); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Network interface send tag support. The storage of "struct * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ struct ktls_session; struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 #define IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3 #define IF_SND_TAG_TYPE_TLS_RX 4 #define IF_SND_TAG_TYPE_MAX 5 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ uint32_t flowid; /* mbuf hash value */ uint32_t flowtype; /* mbuf hash type */ uint8_t numa_domain; /* numa domain of associated inp */ }; struct if_snd_tag_alloc_rate_limit { struct if_snd_tag_alloc_header hdr; uint64_t max_rate; /* in bytes/s */ uint32_t flags; /* M_NOWAIT or M_WAITOK */ uint32_t reserved; /* alignment */ }; struct if_snd_tag_alloc_tls { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; }; struct if_snd_tag_alloc_tls_rx { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; uint16_t vlan_id; /* valid if non-zero */ }; struct if_snd_tag_alloc_tls_rate_limit { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; const struct ktls_session *tls; uint64_t max_rate; /* in bytes/s */ }; struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ #define IF_SND_QUEUE_LEVEL_MIN 0 #define IF_SND_QUEUE_LEVEL_MAX 65535 uint32_t flags; /* M_NOWAIT or M_WAITOK */ }; struct if_snd_tag_modify_tls_rx { /* TCP sequence number of TLS header in host endian format */ uint32_t tls_hdr_tcp_sn; /* * TLS record length, including all headers, data and trailers. * If the tls_rec_length is zero, it means HW encryption resumed. */ uint32_t tls_rec_length; /* TLS sequence number in host endian format */ uint64_t tls_seq_number; }; union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; struct if_snd_tag_alloc_tls_rx tls_rx; struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; struct if_snd_tag_modify_tls_rx tls_rx; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; }; typedef int (if_snd_tag_alloc_t)(if_t, union if_snd_tag_alloc_params *, struct m_snd_tag **); typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); typedef void (if_snd_tag_free_t)(struct m_snd_tag *); typedef struct m_snd_tag *(if_next_send_tag_t)(struct m_snd_tag *); struct if_snd_tag_sw { if_snd_tag_modify_t *snd_tag_modify; if_snd_tag_query_t *snd_tag_query; if_snd_tag_free_t *snd_tag_free; if_next_send_tag_t *next_snd_tag; u_int type; /* One of IF_SND_TAG_TYPE_*. */ }; /* Query return flags */ #define RT_NOSUPPORT 0x00000000 /* Not supported */ #define RT_IS_INDIRECT 0x00000001 /* * Interface like a lagg, select * the actual interface for * capabilities. */ #define RT_IS_SELECTABLE 0x00000002 /* * No rate table, you select * rates and the first * number_of_rates are created. */ #define RT_IS_FIXED_TABLE 0x00000004 /* A fixed table is attached */ #define RT_IS_UNUSABLE 0x00000008 /* It is not usable for this */ #define RT_IS_SETUP_REQ 0x00000010 /* The interface setup must be called before use */ struct if_ratelimit_query_results { const uint64_t *rate_table; /* Pointer to table if present */ uint32_t flags; /* Flags indicating results */ uint32_t max_flows; /* Max flows using, 0=unlimited */ uint32_t number_of_rates; /* How many unique rates can be created */ uint32_t min_segment_burst; /* The amount the adapter bursts at each send */ }; typedef void (if_ratelimit_query_t)(if_t, struct if_ratelimit_query_results *); typedef int (if_ratelimit_setup_t)(if_t, uint64_t, uint32_t); #define IF_NODOM 255 /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) #ifdef _KERNEL /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); typedef void (*ifaddr_event_ext_handler_t)(void *, if_t, struct ifaddr *, int); EVENTHANDLER_DECLARE(ifaddr_event_ext, ifaddr_event_ext_handler_t); #define IFADDR_EVENT_ADD 0 #define IFADDR_EVENT_DEL 1 /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, if_t); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, if_t, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 #define IFNET_EVENT_PCP 2 /* priority code point, PCP */ #define IFNET_EVENT_UPDATE_BAUDRATE 3 typedef void (*ifnet_event_fn)(void *, if_t ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; CK_STAILQ_HEAD(, ifg_member) ifg_members; /* (CK_) */ CK_STAILQ_ENTRY(ifg_group) ifg_next; /* (CK_) */ }; struct ifg_member { CK_STAILQ_ENTRY(ifg_member) ifgm_next; /* (CK_) */ if_t ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; CK_STAILQ_ENTRY(ifg_list) ifgl_next; /* (CK_) */ }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) if_getllsoftc(ifp) #define SETTOEDEV(ifp, sc) if_setllsoftc((ifp), (sc)) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ if_t ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ CK_STAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; struct epoch_context ifa_epoch_ctx; }; struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); int __result_use_check ifa_try_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ #define IFMA_F_ENQUEUED 0x1 struct ifmultiaddr { CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ if_t ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ int ifma_flags; void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ struct epoch_context ifma_epoch_ctx; }; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() sx_xlock(&ifnet_sxlock) #define IFNET_WUNLOCK() sx_xunlock(&ifnet_sxlock) #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_WLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_XLOCKED) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) /* * Look up an ifnet given its index. The returned value protected from * being freed by the network epoch. The _ref variant also acquires a * reference that must be freed using if_rele(). */ if_t ifnet_byindex(u_int); if_t ifnet_byindex_ref(u_int); /* * ifnet_byindexgen() looks up ifnet by index and generation count, * attempting to restore a weak pointer that had been stored across * the epoch. */ if_t ifnet_byindexgen(uint16_t idx, uint16_t gen); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(if_t, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_loif VNET(loif) #ifdef MCAST_VERBOSE #define MCDPRINTF printf #else #define MCDPRINTF(...) #endif int if_addgroup(if_t, const char *); int if_delgroup(if_t, const char *); int if_addmulti(if_t, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(if_t, int); if_t if_alloc(u_char); if_t if_alloc_dev(u_char, device_t dev); void if_attach(if_t); void if_dead(if_t); int if_delmulti(if_t, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_delmulti_ifma_flags(struct ifmultiaddr *, int flags); void if_detach(if_t); void if_purgeaddrs(if_t); void if_delallmulti(if_t); void if_down(if_t); struct ifmultiaddr * if_findmulti(if_t, const struct sockaddr *); void if_freemulti(struct ifmultiaddr *ifma); void if_free(if_t); void if_initname(if_t, const char *, int); void if_link_state_change(if_t, int); int if_printf(if_t, const char *, ...) __printflike(2, 3); int if_log(if_t, int, const char *, ...) __printflike(3, 4); void if_ref(if_t); void if_rele(if_t); bool __result_use_check if_try_ref(if_t); int if_setlladdr(if_t, const u_char *, int); int if_tunnel_check_nesting(if_t, struct mbuf *, uint32_t, int); void if_up(if_t); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(if_t, int); if_t ifunit(const char *); if_t ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, const struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, if_t); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(if_t ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, if_t ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(if_t, struct if_data *); uint64_t if_get_counter_default(if_t, ift_counter); void if_inc_counter(if_t, ift_counter, int64_t); uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(const if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(const if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(const if_t ifp); int if_setcapabilities2(if_t ifp, int capabilities); int if_setcapabilities2bit(if_t ifp, int setbit, int clearbit); int if_getcapabilities2(const if_t ifp); int if_togglecapenable2(if_t ifp, int togglecap); int if_setcapenable2(if_t ifp, int capenable); int if_setcapenable2bit(if_t ifp, int setcap, int clearcap); int if_getcapenable2(const if_t ifp); int if_getdunit(const if_t ifp); int if_getindex(const if_t ifp); int if_getidxgen(const if_t ifp); const char *if_getdname(const if_t ifp); void if_setdname(if_t ifp, const char *name); const char *if_name(if_t ifp); int if_setname(if_t ifp, const char *name); int if_rename(if_t ifp, char *new_name); const char *if_getdescr(if_t ifp); void if_setdescr(if_t ifp, char *descrbuf); char *if_allocdescr(size_t sz, int malloc_flag); void if_freedescr(char *descrbuf); void if_setlastchange(if_t ifp); int if_getalloctype(const if_t ifp); int if_gettype(const if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(const if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_getlinkstate(if_t ifp); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(const if_t ifp); int if_togglehwassist(if_t ifp, int toggle_bits); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); void if_setllsoftc(if_t ifp, void *softc); void *if_getllsoftc(if_t ifp); u_int if_getfib(if_t ifp); uint8_t if_getaddrlen(if_t ifp); int if_gethwaddr(const if_t ifp, struct ifreq *); const uint8_t *if_getbroadcastaddr(const if_t ifp); void if_setbroadcastaddr(if_t ifp, const uint8_t *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(const if_t ifp); int if_getmtu_family(const if_t ifp, int family); void if_notifymtu(if_t ifp); int if_setflagbits(if_t ifp, int set, int clear); int if_setflags(if_t ifp, int flags); int if_getflags(const if_t ifp); int if_getnumadomain(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); u_int if_gethwtsomax(const if_t ifp); u_int if_gethwtsomaxsegcount(const if_t ifp); u_int if_gethwtsomaxsegsize(const if_t ifp); void if_setnetmapadapter(if_t ifp, struct netmap_adapter *na); struct netmap_adapter *if_getnetmapadapter(if_t ifp); void if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); caddr_t if_getlladdr(const if_t ifp); struct vnet *if_getvnet(const if_t ifp); void *if_gethandle(u_char); void if_vlancap(if_t ifp); int if_transmit(if_t ifp, struct mbuf *m); void if_init(if_t ifp, void *ctx); int if_ioctl(if_t ifp, u_long cmd, void *data); int if_resolvemulti(if_t ifp, struct sockaddr **, struct sockaddr *); uint64_t if_getcounter(if_t ifp, ift_counter counter); struct label *if_getmaclabel(if_t ifp); void if_setmaclabel(if_t ifp, struct label *label); struct bpf_if *if_getbpf(if_t ifp); uint8_t if_getpcp(if_t ifp); void *if_getl2com(if_t ifp); struct ifvlantrunk *if_getvlantrunk(if_t ifp); bool if_altq_is_enabled(if_t ifp); void *if_getafdata(if_t ifp, int); int if_snd_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **mstp); /* * Traversing through interface address lists. */ struct sockaddr_dl; typedef u_int iflladdr_cb_t(void *, struct sockaddr_dl *, u_int); u_int if_foreach_lladdr(if_t, iflladdr_cb_t, void *); u_int if_foreach_llmaddr(if_t, iflladdr_cb_t, void *); u_int if_lladdr_count(if_t); u_int if_llmaddr_count(if_t); bool if_maddr_empty(if_t); int if_getamcount(const if_t ifp); struct ifaddr * if_getifaddr(const if_t ifp); typedef u_int if_addr_cb_t(void *, struct ifaddr *, u_int); u_int if_foreach_addr_type(if_t ifp, int type, if_addr_cb_t cb, void *cb_arg); typedef int (*if_foreach_cb_t)(if_t, void *); typedef bool (*if_foreach_match_t)(if_t, void *); int if_foreach(if_foreach_cb_t, void *); int if_foreach_sleep(if_foreach_match_t, void *, if_foreach_cb_t, void *); /* Opaque iterator structure for iterating over interfaces. */ struct if_iter { void *context[4]; }; if_t if_iter_start(struct if_iter *); if_t if_iter_next(struct if_iter *); void if_iter_finish(struct if_iter *); struct ifa_iter { void *context[4]; }; struct ifaddr *ifa_iter_start(if_t ifp, struct ifa_iter *iter); struct ifaddr *ifa_iter_next(struct ifa_iter *iter); void ifa_iter_finish(struct ifa_iter *iter); /* Functions */ void if_setinitfn(if_t ifp, if_init_fn_t); void if_setinputfn(if_t ifp, if_input_fn_t); if_input_fn_t if_getinputfn(if_t ifp); void if_setioctlfn(if_t ifp, if_ioctl_fn_t); void if_setoutputfn(if_t ifp, if_output_fn_t); void if_setstartfn(if_t ifp, if_start_fn_t); if_start_fn_t if_getstartfn(if_t ifp); void if_settransmitfn(if_t ifp, if_transmit_fn_t); if_transmit_fn_t if_gettransmitfn(if_t ifp); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); void if_setsndtagallocfn(if_t ifp, if_snd_tag_alloc_t); void if_setdebugnet_methods(if_t, struct debugnet_methods *); void if_setreassignfn(if_t ifp, if_reassign_fn_t); void if_setratelimitqueryfn(if_t ifp, if_ratelimit_query_t); /* * NB: The interface is not yet stable, drivers implementing IPSEC * offload need to be prepared to adapt to changes. */ struct if_ipsec_accel_methods { if_spdadd_fn_t if_spdadd; if_spddel_fn_t if_spddel; if_sa_newkey_fn_t if_sa_newkey; if_sa_deinstall_fn_t if_sa_deinstall; if_sa_cnt_fn_t if_sa_cnt; + if_ipsec_hwassist_fn_t if_hwassist; }; void if_setipsec_accel_methods(if_t ifp, const struct if_ipsec_accel_methods *); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); /* accessors for struct ifreq */ void *ifr_data_get_ptr(void *ifrp); void *ifr_buffer_get_buffer(void *data); size_t ifr_buffer_get_length(void *data); int ifhwioctl(u_long, if_t, caddr_t, struct thread *); #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXX: temporary until drivers converted. */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */ diff --git a/sys/netipsec/ipsec_offload.c b/sys/netipsec/ipsec_offload.c index 851bacaf4ea1..7f63a5e0ccb6 100644 --- a/sys/netipsec/ipsec_offload.c +++ b/sys/netipsec/ipsec_offload.c @@ -1,1061 +1,1064 @@ /*- * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC_OFFLOAD static struct mtx ipsec_accel_sav_tmp; static struct unrhdr *drv_spi_unr; static struct mtx ipsec_accel_cnt_lock; struct ipsec_accel_install_newkey_tq { struct secasvar *sav; struct vnet *install_vnet; struct task install_task; }; struct ipsec_accel_forget_tq { struct vnet *forget_vnet; struct task forget_task; struct secasvar *sav; }; struct ifp_handle_sav { CK_LIST_ENTRY(ifp_handle_sav) sav_link; CK_LIST_ENTRY(ifp_handle_sav) sav_allh_link; struct secasvar *sav; struct ifnet *ifp; void *ifdata; uint64_t drv_spi; uint32_t flags; size_t hdr_ext_size; uint64_t cnt_octets; uint64_t cnt_allocs; }; #define IFP_HS_HANDLED 0x00000001 #define IFP_HS_REJECTED 0x00000002 #define IFP_HS_INPUT 0x00000004 #define IFP_HS_OUTPUT 0x00000008 #define IFP_HS_MARKER 0x00000010 static CK_LIST_HEAD(, ifp_handle_sav) ipsec_accel_all_sav_handles; struct ifp_handle_sp { CK_LIST_ENTRY(ifp_handle_sp) sp_link; CK_LIST_ENTRY(ifp_handle_sp) sp_allh_link; struct secpolicy *sp; struct ifnet *ifp; void *ifdata; uint32_t flags; }; #define IFP_HP_HANDLED 0x00000001 #define IFP_HP_REJECTED 0x00000002 #define IFP_HP_MARKER 0x00000004 static CK_LIST_HEAD(, ifp_handle_sp) ipsec_accel_all_sp_handles; static void * drvspi_sa_trie_alloc(struct pctrie *ptree) { void *res; res = malloc(pctrie_node_size(), M_IPSEC_MISC, M_ZERO | M_NOWAIT); if (res != NULL) pctrie_zone_init(res, 0, 0); return (res); } static void drvspi_sa_trie_free(struct pctrie *ptree, void *node) { free(node, M_IPSEC_MISC); } PCTRIE_DEFINE(DRVSPI_SA, ifp_handle_sav, drv_spi, drvspi_sa_trie_alloc, drvspi_sa_trie_free); static struct pctrie drv_spi_pctrie; static void ipsec_accel_sa_newkey_impl(struct secasvar *sav); static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires); static void ipsec_accel_forget_sav_clear(struct secasvar *sav); static struct ifp_handle_sav *ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp); static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m); static void ipsec_accel_sync_imp(void); static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav); static struct mbuf *ipsec_accel_key_setaccelif_impl(struct secasvar *sav); static void ipsec_accel_init(void *arg) { mtx_init(&ipsec_accel_sav_tmp, "ipasat", MTX_DEF, 0); mtx_init(&ipsec_accel_cnt_lock, "ipascn", MTX_DEF, 0); drv_spi_unr = new_unrhdr(IPSEC_ACCEL_DRV_SPI_MIN, IPSEC_ACCEL_DRV_SPI_MAX, &ipsec_accel_sav_tmp); ipsec_accel_sa_newkey_p = ipsec_accel_sa_newkey_impl; ipsec_accel_forget_sav_p = ipsec_accel_forget_sav_impl; ipsec_accel_spdadd_p = ipsec_accel_spdadd_impl; ipsec_accel_spddel_p = ipsec_accel_spddel_impl; ipsec_accel_sa_lifetime_op_p = ipsec_accel_sa_lifetime_op_impl; ipsec_accel_sync_p = ipsec_accel_sync_imp; ipsec_accel_is_accel_sav_p = ipsec_accel_is_accel_sav_impl; ipsec_accel_key_setaccelif_p = ipsec_accel_key_setaccelif_impl; pctrie_init(&drv_spi_pctrie); } SYSINIT(ipsec_accel_init, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipsec_accel_init, NULL); static void ipsec_accel_fini(void *arg) { ipsec_accel_sa_newkey_p = NULL; ipsec_accel_forget_sav_p = NULL; ipsec_accel_spdadd_p = NULL; ipsec_accel_spddel_p = NULL; ipsec_accel_sa_lifetime_op_p = NULL; ipsec_accel_sync_p = NULL; ipsec_accel_is_accel_sav_p = NULL; ipsec_accel_key_setaccelif_p = NULL; ipsec_accel_sync_imp(); clean_unrhdr(drv_spi_unr); /* avoid panic, should go later */ clear_unrhdr(drv_spi_unr); delete_unrhdr(drv_spi_unr); mtx_destroy(&ipsec_accel_sav_tmp); mtx_destroy(&ipsec_accel_cnt_lock); } SYSUNINIT(ipsec_accel_fini, SI_SUB_VNET_DONE, SI_ORDER_ANY, ipsec_accel_fini, NULL); static void ipsec_accel_alloc_forget_tq(struct secasvar *sav) { void *ftq; if (sav->accel_forget_tq != 0) return; ftq = malloc(sizeof(struct ipsec_accel_forget_tq), M_TEMP, M_WAITOK); if (!atomic_cmpset_ptr(&sav->accel_forget_tq, 0, (uintptr_t)ftq)) free(ftq, M_TEMP); } static bool ipsec_accel_sa_install_match(if_t ifp, void *arg) { if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) return (false); if (ifp->if_ipsec_accel_m->if_sa_newkey == NULL) { printf("driver bug ifp %s if_sa_newkey NULL\n", if_name(ifp)); return (false); } return (true); } static int ipsec_accel_sa_newkey_cb(if_t ifp, void *arg) { struct ipsec_accel_install_newkey_tq *tq; void *priv; u_int drv_spi; int error; tq = arg; printf("ipsec_accel_sa_newkey_act: ifp %s h %p spi %#x " "flags %#x seq %d\n", if_name(ifp), ifp->if_ipsec_accel_m->if_sa_newkey, be32toh(tq->sav->spi), tq->sav->flags, tq->sav->seq); priv = NULL; drv_spi = alloc_unr(drv_spi_unr); if (tq->sav->accel_ifname != NULL && strcmp(tq->sav->accel_ifname, if_name(ifp)) != 0) { error = ipsec_accel_handle_sav(tq->sav, ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); goto out; } if (drv_spi == -1) { /* XXXKIB */ printf("ipsec_accel_sa_install_newkey: cannot alloc " "drv_spi if %s spi %#x\n", if_name(ifp), be32toh(tq->sav->spi)); return (ENOMEM); } error = ifp->if_ipsec_accel_m->if_sa_newkey(ifp, tq->sav, drv_spi, &priv); if (error != 0) { if (error == EOPNOTSUPP) { printf("ipsec_accel_sa_newkey: driver " "refused sa if %s spi %#x\n", if_name(ifp), be32toh(tq->sav->spi)); error = ipsec_accel_handle_sav(tq->sav, ifp, drv_spi, priv, IFP_HS_REJECTED, NULL); /* XXXKIB */ } else { printf("ipsec_accel_sa_newkey: driver " "error %d if %s spi %#x\n", error, if_name(ifp), be32toh(tq->sav->spi)); /* XXXKIB */ } } else { error = ipsec_accel_handle_sav(tq->sav, ifp, drv_spi, priv, IFP_HS_HANDLED, NULL); if (error != 0) { /* XXXKIB */ printf("ipsec_accel_sa_newkey: handle_sav " "err %d if %s spi %#x\n", error, if_name(ifp), be32toh(tq->sav->spi)); } } out: return (error); } static void ipsec_accel_sa_newkey_act(void *context, int pending) { struct ipsec_accel_install_newkey_tq *tq; void *tqf; struct secasvar *sav; tq = context; tqf = NULL; sav = tq->sav; CURVNET_SET(tq->install_vnet); mtx_lock(&ipsec_accel_sav_tmp); if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | SADB_KEY_ACCEL_DEINST)) == 0 && sav->state == SADB_SASTATE_MATURE) { sav->accel_flags |= SADB_KEY_ACCEL_INST; mtx_unlock(&ipsec_accel_sav_tmp); if_foreach_sleep(ipsec_accel_sa_install_match, context, ipsec_accel_sa_newkey_cb, context); ipsec_accel_alloc_forget_tq(sav); mtx_lock(&ipsec_accel_sav_tmp); /* * If ipsec_accel_forget_sav() raced with us and set * the flag, do its work. Its task cannot execute in * parallel since taskqueue_thread is single-threaded. */ if ((sav->accel_flags & SADB_KEY_ACCEL_DEINST) != 0) { tqf = (void *)sav->accel_forget_tq; sav->accel_forget_tq = 0; ipsec_accel_forget_sav_clear(sav); } } mtx_unlock(&ipsec_accel_sav_tmp); key_freesav(&tq->sav); CURVNET_RESTORE(); free(tq, M_TEMP); free(tqf, M_TEMP); } static void ipsec_accel_sa_newkey_impl(struct secasvar *sav) { struct ipsec_accel_install_newkey_tq *tq; if ((sav->accel_flags & (SADB_KEY_ACCEL_INST | SADB_KEY_ACCEL_DEINST)) != 0) return; printf( "ipsec_accel_sa_install_newkey: spi %#x flags %#x seq %d\n", be32toh(sav->spi), sav->flags, sav->seq); tq = malloc(sizeof(*tq), M_TEMP, M_NOWAIT); if (tq == NULL) { printf("ipsec_accel_sa_install_newkey: no memory for tq, " "spi %#x\n", be32toh(sav->spi)); /* XXXKIB */ return; } refcount_acquire(&sav->refcnt); TASK_INIT(&tq->install_task, 0, ipsec_accel_sa_newkey_act, tq); tq->sav = sav; tq->install_vnet = curthread->td_vnet; /* XXXKIB liveness */ taskqueue_enqueue(taskqueue_thread, &tq->install_task); } static int ipsec_accel_handle_sav(struct secasvar *sav, struct ifnet *ifp, u_int drv_spi, void *priv, uint32_t flags, struct ifp_handle_sav **ires) { struct ifp_handle_sav *ihs, *i; int error; MPASS(__bitcount(flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == 1); ihs = malloc(sizeof(*ihs), M_IPSEC_MISC, M_WAITOK | M_ZERO); ihs->ifp = ifp; ihs->sav = sav; ihs->drv_spi = drv_spi; ihs->ifdata = priv; ihs->flags = flags; if ((flags & IFP_HS_OUTPUT) != 0) ihs->hdr_ext_size = esp_hdrsiz(sav); mtx_lock(&ipsec_accel_sav_tmp); CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { if (i->ifp == ifp) { error = EALREADY; goto errout; } } error = DRVSPI_SA_PCTRIE_INSERT(&drv_spi_pctrie, ihs); if (error != 0) goto errout; if_ref(ihs->ifp); CK_LIST_INSERT_HEAD(&sav->accel_ifps, ihs, sav_link); CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, ihs, sav_allh_link); mtx_unlock(&ipsec_accel_sav_tmp); if (ires != NULL) *ires = ihs; return (0); errout: mtx_unlock(&ipsec_accel_sav_tmp); free(ihs, M_IPSEC_MISC); if (ires != NULL) *ires = NULL; return (error); } static void ipsec_accel_forget_handle_sav(struct ifp_handle_sav *i, bool freesav) { struct ifnet *ifp; struct secasvar *sav; mtx_assert(&ipsec_accel_sav_tmp, MA_OWNED); CK_LIST_REMOVE(i, sav_link); CK_LIST_REMOVE(i, sav_allh_link); DRVSPI_SA_PCTRIE_REMOVE(&drv_spi_pctrie, i->drv_spi); mtx_unlock(&ipsec_accel_sav_tmp); NET_EPOCH_WAIT(); ifp = i->ifp; sav = i->sav; if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == IFP_HS_HANDLED) { printf("sa deinstall %s %p spi %#x ifl %#x\n", if_name(ifp), sav, be32toh(sav->spi), i->flags); ifp->if_ipsec_accel_m->if_sa_deinstall(ifp, i->drv_spi, i->ifdata); } if_rele(ifp); free_unr(drv_spi_unr, i->drv_spi); free(i, M_IPSEC_MISC); if (freesav) key_freesav(&sav); mtx_lock(&ipsec_accel_sav_tmp); } static void ipsec_accel_forget_sav_clear(struct secasvar *sav) { struct ifp_handle_sav *i; for (;;) { i = CK_LIST_FIRST(&sav->accel_ifps); if (i == NULL) break; ipsec_accel_forget_handle_sav(i, false); } } static void ipsec_accel_forget_sav_act(void *arg, int pending) { struct ipsec_accel_forget_tq *tq; struct secasvar *sav; tq = arg; sav = tq->sav; CURVNET_SET(tq->forget_vnet); mtx_lock(&ipsec_accel_sav_tmp); ipsec_accel_forget_sav_clear(sav); mtx_unlock(&ipsec_accel_sav_tmp); key_freesav(&sav); CURVNET_RESTORE(); free(tq, M_TEMP); } void ipsec_accel_forget_sav_impl(struct secasvar *sav) { struct ipsec_accel_forget_tq *tq; mtx_lock(&ipsec_accel_sav_tmp); sav->accel_flags |= SADB_KEY_ACCEL_DEINST; tq = (void *)atomic_load_ptr(&sav->accel_forget_tq); if (tq == NULL || !atomic_cmpset_ptr(&sav->accel_forget_tq, (uintptr_t)tq, 0)) { mtx_unlock(&ipsec_accel_sav_tmp); return; } mtx_unlock(&ipsec_accel_sav_tmp); refcount_acquire(&sav->refcnt); TASK_INIT(&tq->forget_task, 0, ipsec_accel_forget_sav_act, tq); tq->forget_vnet = curthread->td_vnet; tq->sav = sav; taskqueue_enqueue(taskqueue_thread, &tq->forget_task); } static void ipsec_accel_on_ifdown_sav(struct ifnet *ifp) { struct ifp_handle_sav *i, *marker; marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); marker->flags = IFP_HS_MARKER; mtx_lock(&ipsec_accel_sav_tmp); CK_LIST_INSERT_HEAD(&ipsec_accel_all_sav_handles, marker, sav_allh_link); for (;;) { i = CK_LIST_NEXT(marker, sav_allh_link); if (i == NULL) break; CK_LIST_REMOVE(marker, sav_allh_link); CK_LIST_INSERT_AFTER(i, marker, sav_allh_link); if (i->ifp == ifp) { refcount_acquire(&i->sav->refcnt); /* XXXKIB wrap ? */ ipsec_accel_forget_handle_sav(i, true); } } CK_LIST_REMOVE(marker, sav_allh_link); mtx_unlock(&ipsec_accel_sav_tmp); free(marker, M_IPSEC_MISC); } static struct ifp_handle_sav * ipsec_accel_is_accel_sav_ptr_raw(struct secasvar *sav, struct ifnet *ifp) { struct ifp_handle_sav *i; if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0) return (NULL); CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { if (i->ifp == ifp) return (i); } return (NULL); } static struct ifp_handle_sav * ipsec_accel_is_accel_sav_ptr(struct secasvar *sav, struct ifnet *ifp) { NET_EPOCH_ASSERT(); return (ipsec_accel_is_accel_sav_ptr_raw(sav, ifp)); } static bool ipsec_accel_is_accel_sav_impl(struct secasvar *sav) { return (!CK_LIST_EMPTY(&sav->accel_ifps)); } static struct secasvar * ipsec_accel_drvspi_to_sa(u_int drv_spi) { struct ifp_handle_sav *i; i = DRVSPI_SA_PCTRIE_LOOKUP(&drv_spi_pctrie, drv_spi); if (i == NULL) return (NULL); return (i->sav); } static struct ifp_handle_sp * ipsec_accel_find_accel_sp(struct secpolicy *sp, if_t ifp) { struct ifp_handle_sp *i; CK_LIST_FOREACH(i, &sp->accel_ifps, sp_link) { if (i->ifp == ifp) return (i); } return (NULL); } static bool ipsec_accel_is_accel_sp(struct secpolicy *sp, if_t ifp) { return (ipsec_accel_find_accel_sp(sp, ifp) != NULL); } static int ipsec_accel_remember_sp(struct secpolicy *sp, if_t ifp, struct ifp_handle_sp **ip) { struct ifp_handle_sp *i; i = malloc(sizeof(*i), M_IPSEC_MISC, M_WAITOK | M_ZERO); i->sp = sp; i->ifp = ifp; if_ref(ifp); i->flags = IFP_HP_HANDLED; mtx_lock(&ipsec_accel_sav_tmp); CK_LIST_INSERT_HEAD(&sp->accel_ifps, i, sp_link); CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, i, sp_allh_link); mtx_unlock(&ipsec_accel_sav_tmp); *ip = i; return (0); } static bool ipsec_accel_spdadd_match(if_t ifp, void *arg) { struct secpolicy *sp; if ((ifp->if_capenable2 & IFCAP2_BIT(IFCAP2_IPSEC_OFFLOAD)) == 0 || ifp->if_ipsec_accel_m->if_spdadd == NULL) return (false); sp = arg; if (sp->accel_ifname != NULL && strcmp(sp->accel_ifname, if_name(ifp)) != 0) return (false); if (ipsec_accel_is_accel_sp(sp, ifp)) return (false); return (true); } static int ipsec_accel_spdadd_cb(if_t ifp, void *arg) { struct secpolicy *sp; struct inpcb *inp; struct ifp_handle_sp *i; int error; sp = arg; inp = sp->ipsec_accel_add_sp_inp; printf("ipsec_accel_spdadd_cb: ifp %s m %p sp %p inp %p\n", if_name(ifp), ifp->if_ipsec_accel_m->if_spdadd, sp, inp); error = ipsec_accel_remember_sp(sp, ifp, &i); if (error != 0) { printf("ipsec_accel_spdadd: %s if_spdadd %p remember res %d\n", if_name(ifp), sp, error); return (error); } error = ifp->if_ipsec_accel_m->if_spdadd(ifp, sp, inp, &i->ifdata); if (error != 0) { i->flags |= IFP_HP_REJECTED; printf("ipsec_accel_spdadd: %s if_spdadd %p res %d\n", if_name(ifp), sp, error); } return (error); } static void ipsec_accel_spdadd_act(void *arg, int pending) { struct secpolicy *sp; struct inpcb *inp; sp = arg; CURVNET_SET(sp->accel_add_tq.adddel_vnet); if_foreach_sleep(ipsec_accel_spdadd_match, arg, ipsec_accel_spdadd_cb, arg); inp = sp->ipsec_accel_add_sp_inp; if (inp != NULL) { INP_WLOCK(inp); if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); sp->ipsec_accel_add_sp_inp = NULL; } CURVNET_RESTORE(); key_freesp(&sp); } void ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp) { struct ipsec_accel_adddel_sp_tq *tq; if (sp == NULL) return; if (sp->tcount == 0 && inp == NULL) return; tq = &sp->accel_add_tq; if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) return; tq->adddel_vnet = curthread->td_vnet; sp->ipsec_accel_add_sp_inp = inp; if (inp != NULL) in_pcbref(inp); TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spdadd_act, sp); key_addref(sp); taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); } static void ipsec_accel_spddel_act(void *arg, int pending) { struct ifp_handle_sp *i; struct secpolicy *sp; int error; sp = arg; CURVNET_SET(sp->accel_del_tq.adddel_vnet); mtx_lock(&ipsec_accel_sav_tmp); for (;;) { i = CK_LIST_FIRST(&sp->accel_ifps); if (i == NULL) break; CK_LIST_REMOVE(i, sp_link); CK_LIST_REMOVE(i, sp_allh_link); mtx_unlock(&ipsec_accel_sav_tmp); NET_EPOCH_WAIT(); if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == IFP_HP_HANDLED) { printf("spd deinstall %s %p\n", if_name(i->ifp), sp); error = i->ifp->if_ipsec_accel_m->if_spddel(i->ifp, sp, i->ifdata); if (error != 0) { printf( "ipsec_accel_spddel: %s if_spddel %p res %d\n", if_name(i->ifp), sp, error); } } if_rele(i->ifp); free(i, M_IPSEC_MISC); mtx_lock(&ipsec_accel_sav_tmp); } mtx_unlock(&ipsec_accel_sav_tmp); key_freesp(&sp); CURVNET_RESTORE(); } void ipsec_accel_spddel_impl(struct secpolicy *sp) { struct ipsec_accel_adddel_sp_tq *tq; if (sp == NULL) return; tq = &sp->accel_del_tq; if (atomic_cmpset_int(&tq->adddel_scheduled, 0, 1) == 0) return; tq->adddel_vnet = curthread->td_vnet; TASK_INIT(&tq->adddel_task, 0, ipsec_accel_spddel_act, sp); key_addref(sp); taskqueue_enqueue(taskqueue_thread, &tq->adddel_task); } static void ipsec_accel_on_ifdown_sp(struct ifnet *ifp) { struct ifp_handle_sp *i, *marker; struct secpolicy *sp; int error; marker = malloc(sizeof(*marker), M_IPSEC_MISC, M_WAITOK | M_ZERO); marker->flags = IFP_HS_MARKER; mtx_lock(&ipsec_accel_sav_tmp); CK_LIST_INSERT_HEAD(&ipsec_accel_all_sp_handles, marker, sp_allh_link); for (;;) { i = CK_LIST_NEXT(marker, sp_allh_link); if (i == NULL) break; CK_LIST_REMOVE(marker, sp_allh_link); CK_LIST_INSERT_AFTER(i, marker, sp_allh_link); if (i->ifp != ifp) continue; sp = i->sp; key_addref(sp); CK_LIST_REMOVE(i, sp_link); CK_LIST_REMOVE(i, sp_allh_link); mtx_unlock(&ipsec_accel_sav_tmp); NET_EPOCH_WAIT(); if ((i->flags & (IFP_HP_HANDLED | IFP_HP_REJECTED)) == IFP_HP_HANDLED) { printf("spd deinstall %s %p\n", if_name(ifp), sp); error = ifp->if_ipsec_accel_m->if_spddel(ifp, sp, i->ifdata); } if (error != 0) { printf( "ipsec_accel_on_ifdown_sp: %s if_spddel %p res %d\n", if_name(ifp), sp, error); } key_freesp(&sp); if_rele(ifp); free(i, M_IPSEC_MISC); mtx_lock(&ipsec_accel_sav_tmp); } CK_LIST_REMOVE(marker, sp_allh_link); mtx_unlock(&ipsec_accel_sav_tmp); free(marker, M_IPSEC_MISC); } void ipsec_accel_on_ifdown(struct ifnet *ifp) { ipsec_accel_on_ifdown_sp(ifp); ipsec_accel_on_ifdown_sav(ifp); } static bool ipsec_accel_output_pad(struct mbuf *m, struct secasvar *sav, int skip, int mtu) { int alen, blks, hlen, padding, rlen; rlen = m->m_pkthdr.len - skip; hlen = ((sav->flags & SADB_X_EXT_OLD) != 0 ? sizeof(struct esp) : sizeof(struct newesp)) + sav->ivlen; blks = MAX(4, SAV_ISCTR(sav) && VNET(esp_ctr_compatibility) ? sav->tdb_encalgxform->native_blocksize : sav->tdb_encalgxform->blocksize); padding = ((blks - ((rlen + 2) % blks)) % blks) + 2; alen = xform_ah_authsize(sav->tdb_authalgxform); return (skip + hlen + rlen + padding + alen <= mtu); } static bool ipsec_accel_output_tag(struct mbuf *m, u_int drv_spi) { struct ipsec_accel_out_tag *tag; tag = (struct ipsec_accel_out_tag *)m_tag_get( PACKET_TAG_IPSEC_ACCEL_OUT, sizeof(*tag), M_NOWAIT); if (tag == NULL) return (false); tag->drv_spi = drv_spi; m_tag_prepend(m, &tag->tag); return (true); } bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, - struct secpolicy *sp, struct secasvar *sav, int af, int mtu) + struct secpolicy *sp, struct secasvar *sav, int af, int mtu, int *hwassist) { struct ifp_handle_sav *i; struct ip *ip; u_long ip_len, skip; + *hwassist = 0; if (ifp == NULL) return (false); M_ASSERTPKTHDR(m); NET_EPOCH_ASSERT(); if (sav == NULL) return (ipsec_accel_output_tag(m, IPSEC_ACCEL_DRV_SPI_BYPASS)); i = ipsec_accel_is_accel_sav_ptr(sav, ifp); if (i == NULL) return (false); if ((m->m_pkthdr.csum_flags & CSUM_TSO) == 0) { ip_len = m->m_pkthdr.len; if (ip_len + i->hdr_ext_size > mtu) return (false); switch (af) { case AF_INET: ip = mtod(m, struct ip *); skip = ip->ip_hl << 2; break; case AF_INET6: skip = sizeof(struct ip6_hdr); break; default: __unreachable(); } if (!ipsec_accel_output_pad(m, sav, skip, mtu)) return (false); } if (!ipsec_accel_output_tag(m, i->drv_spi)) return (false); ipsec_accel_sa_recordxfer(sav, m); key_freesav(&sav); if (sp != NULL) key_freesp(&sp); + *hwassist = ifp->if_ipsec_accel_m->if_hwassist(ifp, sav, + i->drv_spi, i->ifdata); return (true); } struct ipsec_accel_in_tag * ipsec_accel_input_tag_lookup(const struct mbuf *m) { struct ipsec_accel_in_tag *tag; struct m_tag *xtag; xtag = m_tag_find(__DECONST(struct mbuf *, m), PACKET_TAG_IPSEC_ACCEL_IN, NULL); if (xtag == NULL) return (NULL); tag = __containerof(xtag, struct ipsec_accel_in_tag, tag); return (tag); } int ipsec_accel_input(struct mbuf *m, int offset, int proto) { struct secasvar *sav; struct ipsec_accel_in_tag *tag; tag = ipsec_accel_input_tag_lookup(m); if (tag == NULL) return (ENXIO); if (tag->drv_spi < IPSEC_ACCEL_DRV_SPI_MIN || tag->drv_spi > IPSEC_ACCEL_DRV_SPI_MAX) { printf("if %s mbuf %p drv_spi %d invalid, packet dropped\n", (m->m_flags & M_PKTHDR) != 0 ? if_name(m->m_pkthdr.rcvif) : "", m, tag->drv_spi); m_freem(m); return (EINPROGRESS); } sav = ipsec_accel_drvspi_to_sa(tag->drv_spi); if (sav != NULL) ipsec_accel_sa_recordxfer(sav, m); return (0); } static void ipsec_accel_sa_recordxfer(struct secasvar *sav, struct mbuf *m) { counter_u64_add(sav->accel_lft_sw, 1); counter_u64_add(sav->accel_lft_sw + 1, m->m_pkthdr.len); if (sav->accel_firstused == 0) sav->accel_firstused = time_second; } static void ipsec_accel_sa_lifetime_update(struct seclifetime *lft_c, const struct seclifetime *lft_l) { lft_c->allocations += lft_l->allocations; lft_c->bytes += lft_l->bytes; lft_c->usetime = min(lft_c->usetime, lft_l->usetime); } void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs) { struct epoch_tracker et; struct ifp_handle_sav *i; uint64_t odiff, adiff; NET_EPOCH_ENTER(et); mtx_lock(&ipsec_accel_cnt_lock); if (allocs != 0) { if (sav->firstused == 0) sav->firstused = time_second; if (sav->accel_firstused == 0) sav->accel_firstused = time_second; } CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { if (i->ifp == ifp && i->drv_spi == drv_spi) break; } if (i == NULL) goto out; odiff = octets - i->cnt_octets; adiff = allocs - i->cnt_allocs; if (sav->lft_c != NULL) { counter_u64_add(sav->lft_c_bytes, odiff); counter_u64_add(sav->lft_c_allocations, adiff); } i->cnt_octets = octets; i->cnt_allocs = allocs; sav->accel_hw_octets += odiff; sav->accel_hw_allocs += adiff; out: mtx_unlock(&ipsec_accel_cnt_lock); NET_EPOCH_EXIT(et); } static void ipsec_accel_sa_lifetime_hw(struct secasvar *sav, if_t ifp, struct seclifetime *lft) { struct ifp_handle_sav *i; if_sa_cnt_fn_t p; IFNET_RLOCK_ASSERT(); i = ipsec_accel_is_accel_sav_ptr(sav, ifp); if (i != NULL && (i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == IFP_HS_HANDLED) { p = ifp->if_ipsec_accel_m->if_sa_cnt; if (p != NULL) p(ifp, sav, i->drv_spi, i->ifdata, lft); } } static int ipsec_accel_sa_lifetime_op_impl(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp) { struct seclifetime lft_l, lft_s; struct ifp_handle_sav *i; if_t ifp1; if_sa_cnt_fn_t p; int error; error = 0; memset(&lft_l, 0, sizeof(lft_l)); memset(&lft_s, 0, sizeof(lft_s)); switch (op & ~IF_SA_CNT_UPD) { case IF_SA_CNT_IFP_HW_VAL: ipsec_accel_sa_lifetime_hw(sav, ifp, &lft_l); ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); break; case IF_SA_CNT_TOTAL_SW_VAL: lft_l.allocations = (uint32_t)counter_u64_fetch( sav->accel_lft_sw); lft_l.bytes = counter_u64_fetch(sav->accel_lft_sw + 1); lft_l.usetime = sav->accel_firstused; break; case IF_SA_CNT_TOTAL_HW_VAL: IFNET_RLOCK_ASSERT(); CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) != IFP_HS_HANDLED) continue; ifp1 = i->ifp; p = ifp1->if_ipsec_accel_m->if_sa_cnt; if (p == NULL) continue; memset(&lft_s, 0, sizeof(lft_s)); if (sahtree_trackerp != NULL) ipsec_sahtree_runlock(sahtree_trackerp); error = p(ifp1, sav, i->drv_spi, i->ifdata, &lft_s); if (sahtree_trackerp != NULL) ipsec_sahtree_rlock(sahtree_trackerp); if (error == 0) ipsec_accel_sa_lifetime_update(&lft_l, &lft_s); } break; } if (error == 0) { if ((op & IF_SA_CNT_UPD) == 0) memset(lft_c, 0, sizeof(*lft_c)); ipsec_accel_sa_lifetime_update(lft_c, &lft_l); } return (error); } static void ipsec_accel_sync_imp(void) { taskqueue_drain_all(taskqueue_thread); } static struct mbuf * ipsec_accel_key_setaccelif_impl(struct secasvar *sav) { struct mbuf *m, *m1; struct ifp_handle_sav *i; struct epoch_tracker et; if (sav->accel_ifname != NULL) return (key_setaccelif(sav->accel_ifname)); m = m1 = NULL; NET_EPOCH_ENTER(et); CK_LIST_FOREACH(i, &sav->accel_ifps, sav_link) { if ((i->flags & (IFP_HS_HANDLED | IFP_HS_REJECTED)) == IFP_HS_HANDLED) { m1 = key_setaccelif(if_name(i->ifp)); if (m == NULL) m = m1; else if (m1 != NULL) m_cat(m, m1); } } NET_EPOCH_EXIT(et); return (m); } #endif /* IPSEC_OFFLOAD */ diff --git a/sys/netipsec/ipsec_offload.h b/sys/netipsec/ipsec_offload.h index 87e2a33288be..27b9c938832e 100644 --- a/sys/netipsec/ipsec_offload.h +++ b/sys/netipsec/ipsec_offload.h @@ -1,191 +1,194 @@ /*- * Copyright (c) 2021,2022 NVIDIA CORPORATION & AFFILIATES. ALL RIGHTS RESERVED. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETIPSEC_IPSEC_OFFLOAD_H_ #define _NETIPSEC_IPSEC_OFFLOAD_H_ #ifdef _KERNEL #include #include #include struct secpolicy; struct secasvar; struct inpcb; struct ipsec_accel_out_tag { struct m_tag tag; uint16_t drv_spi; }; struct ipsec_accel_in_tag { struct m_tag tag; uint16_t drv_spi; }; #define IPSEC_ACCEL_DRV_SPI_BYPASS 2 #define IPSEC_ACCEL_DRV_SPI_MIN 3 #define IPSEC_ACCEL_DRV_SPI_MAX 0xffff extern void (*ipsec_accel_sa_newkey_p)(struct secasvar *sav); extern void (*ipsec_accel_sa_install_input_p)(struct secasvar *sav, const union sockaddr_union *dst_address, int sproto, uint32_t spi); extern void (*ipsec_accel_forget_sav_p)(struct secasvar *sav); extern void (*ipsec_accel_spdadd_p)(struct secpolicy *sp, struct inpcb *inp); extern void (*ipsec_accel_spddel_p)(struct secpolicy *sp); extern int (*ipsec_accel_sa_lifetime_op_p)(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); extern void (*ipsec_accel_sync_p)(void); extern bool (*ipsec_accel_is_accel_sav_p)(struct secasvar *sav); extern struct mbuf *(*ipsec_accel_key_setaccelif_p)(struct secasvar *sav); #ifdef IPSEC_OFFLOAD /* * Have to use ipsec_accel_sa_install_input_p indirection because * key.c is unconditionally included into the static kernel. */ static inline void ipsec_accel_sa_newkey(struct secasvar *sav) { void (*p)(struct secasvar *sav); p = atomic_load_ptr(&ipsec_accel_sa_newkey_p); if (p != NULL) p(sav); } static inline void ipsec_accel_forget_sav(struct secasvar *sav) { void (*p)(struct secasvar *sav); p = atomic_load_ptr(&ipsec_accel_forget_sav_p); if (p != NULL) p(sav); } static inline void ipsec_accel_spdadd(struct secpolicy *sp, struct inpcb *inp) { void (*p)(struct secpolicy *sp, struct inpcb *inp); p = atomic_load_ptr(&ipsec_accel_spdadd_p); if (p != NULL) p(sp, inp); } static inline void ipsec_accel_spddel(struct secpolicy *sp) { void (*p)(struct secpolicy *sp); p = atomic_load_ptr(&ipsec_accel_spddel_p); if (p != NULL) p(sp); } static inline int ipsec_accel_sa_lifetime_op(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp) { int (*p)(struct secasvar *sav, struct seclifetime *lft_c, if_t ifp, enum IF_SA_CNT_WHICH op, struct rm_priotracker *sahtree_trackerp); p = atomic_load_ptr(&ipsec_accel_sa_lifetime_op_p); if (p != NULL) return (p(sav, lft_c, ifp, op, sahtree_trackerp)); return (ENOTSUP); } static inline void ipsec_accel_sync(void) { void (*p)(void); p = atomic_load_ptr(&ipsec_accel_sync_p); if (p != NULL) p(); } static inline bool ipsec_accel_is_accel_sav(struct secasvar *sav) { bool (*p)(struct secasvar *sav); p = atomic_load_ptr(&ipsec_accel_is_accel_sav_p); if (p != NULL) return (p(sav)); return (false); } static inline struct mbuf * ipsec_accel_key_setaccelif(struct secasvar *sav) { struct mbuf *(*p)(struct secasvar *sav); p = atomic_load_ptr(&ipsec_accel_key_setaccelif_p); if (p != NULL) return (p(sav)); return (NULL); } #else #define ipsec_accel_sa_newkey(a) #define ipsec_accel_forget_sav(a) #define ipsec_accel_spdadd(a, b) #define ipsec_accel_spddel(a) #define ipsec_accel_sa_lifetime_op(a, b, c, d, e) #define ipsec_accel_sync() #define ipsec_accel_is_accel_sav(a) #define ipsec_accel_key_setaccelif(a) #endif void ipsec_accel_forget_sav_impl(struct secasvar *sav); void ipsec_accel_spdadd_impl(struct secpolicy *sp, struct inpcb *inp); void ipsec_accel_spddel_impl(struct secpolicy *sp); #ifdef IPSEC_OFFLOAD int ipsec_accel_input(struct mbuf *m, int offset, int proto); bool ipsec_accel_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, struct secpolicy *sp, struct secasvar *sav, int af, - int mtu); + int mtu, int *hwassist); void ipsec_accel_forget_sav(struct secasvar *sav); #else #define ipsec_accel_input(a, b, c) (ENXIO) -#define ipsec_accel_output(a, b, c, d, e, f, g) (false) +#define ipsec_accel_output(a, b, c, d, e, f, g, h) ({ \ + *h = 0; \ + false; \ +}) #define ipsec_accel_forget_sav(a) #endif struct ipsec_accel_in_tag *ipsec_accel_input_tag_lookup(const struct mbuf *); void ipsec_accel_on_ifdown(struct ifnet *ifp); void ipsec_accel_drv_sa_lifetime_update(struct secasvar *sav, if_t ifp, u_int drv_spi, uint64_t octets, uint64_t allocs); #endif /* _KERNEL */ #endif /* _NETIPSEC_IPSEC_OFFLOAD_H_ */ diff --git a/sys/netipsec/ipsec_output.c b/sys/netipsec/ipsec_output.c index 8f49bc8fce24..10f1728f72ac 100644 --- a/sys/netipsec/ipsec_output.c +++ b/sys/netipsec/ipsec_output.c @@ -1,1203 +1,1209 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * IPsec output processing. */ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include "opt_sctp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include #ifdef INET6 #include #include #endif #include #ifdef INET6 #include #endif #if defined(SCTP) || defined(SCTP_SUPPORT) #include #endif #include #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include #define IPSEC_OSTAT_INC(proto, name) do { \ if ((proto) == IPPROTO_ESP) \ ESPSTAT_INC(esps_##name); \ else if ((proto) == IPPROTO_AH)\ AHSTAT_INC(ahs_##name); \ else \ IPCOMPSTAT_INC(ipcomps_##name); \ } while (0) static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx); static size_t ipsec_get_pmtu(struct secasvar *sav); #ifdef INET static struct secasvar * ipsec4_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) { struct secasindex *saidx, tmpsaidx; struct ipsecrequest *isr; struct sockaddr_in *sin; struct secasvar *sav; struct ip *ip; /* * Check system global policy controls. */ next: isr = sp->req[*pidx]; if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { DPRINTF(("%s: IPsec outbound packet dropped due" " to policy (check your sysctls)\n", __func__)); IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); *error = EHOSTUNREACH; return (NULL); } /* * Craft SA index to search for proper SA. Note that * we only initialize unspecified SA peers for transport * mode; for tunnel mode they must already be filled in. */ if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { saidx = &tmpsaidx; *saidx = isr->saidx; ip = mtod(m, struct ip *); if (saidx->src.sa.sa_len == 0) { sin = &saidx->src.sin; sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; sin->sin_addr = ip->ip_src; } if (saidx->dst.sa.sa_len == 0) { sin = &saidx->dst.sin; sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_port = IPSEC_PORT_ANY; sin->sin_addr = ip->ip_dst; } } else saidx = &sp->req[*pidx]->saidx; /* * Lookup SA and validate it. */ sav = key_allocsa_policy(sp, saidx, error); if (sav == NULL) { IPSECSTAT_INC(ips_out_nosa); if (*error != 0) return (NULL); if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { /* * We have no SA and policy that doesn't require * this IPsec transform, thus we can continue w/o * IPsec processing, i.e. return EJUSTRETURN. * But first check if there is some bundled transform. */ if (sp->tcount > ++(*pidx)) goto next; *error = EJUSTRETURN; } return (NULL); } IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); return (sav); } /* * IPsec output logic for IPv4. */ static int ipsec4_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp, u_int idx, u_long mtu) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; struct secasvar *sav; struct ip *ip; - int error, i, off; + int error, hwassist, i, off; + bool accel; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); /* * We hold the reference to SP. Content of SP couldn't be changed. * Craft secasindex and do lookup for suitable SA. * Then do encapsulation if needed and call xform's output. * We need to store SP in the xform callback parameters. * In xform callback we will extract SP and it can be used to * determine next transform. At the end of transform we can * release reference to SP. */ sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ (void)ipsec_accel_output(ifp, m, inp, sp, NULL, - AF_INET, mtu); + AF_INET, mtu, &hwassist); key_freesp(&sp); return (error); } goto bad; } /* * XXXAE: most likely ip_sum at this point is wrong. */ IPSEC_INIT_CTX(&ctx, &m, inp, sav, AF_INET, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; - if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu)) + hwassist = 0; + accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET, mtu, + &hwassist); + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA & ~hwassist) != 0) { + in_delayed_cksum(m); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if ((m->m_pkthdr.csum_flags & CSUM_SCTP & ~hwassist) != 0) { + struct ip *ip; + + ip = mtod(m, struct ip *); + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP; + } +#endif + if (accel) return (EJUSTRETURN); ip = mtod(m, struct ip *); dst = &sav->sah->saidx.dst; /* Do the appropriate encapsulation, if necessary */ if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ dst->sa.sa_family != AF_INET || /* PF mismatch */ (dst->sa.sa_family == AF_INET && /* Proxy */ dst->sin.sin_addr.s_addr != INADDR_ANY && dst->sin.sin_addr.s_addr != ip->ip_dst.s_addr)) { /* Fix IPv4 header checksum and length */ ip->ip_len = htons(m->m_pkthdr.len); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, ip->ip_hl << 2); error = ipsec_encap(&m, &sav->sah->saidx); if (error != 0) { DPRINTF(("%s: encapsulation for SPI 0x%08x failed " "with error %d\n", __func__, ntohl(sav->spi), error)); /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } inp = NULL; } IPSEC_INIT_CTX(&ctx, &m, inp, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; /* * Dispatch to the appropriate IPsec transform logic. The * packet will be returned for transmission after crypto * processing, etc. are completed. * * NB: m & sav are ``passed to caller'' who's responsible for * reclaiming their resources. */ switch(dst->sa.sa_family) { case AF_INET: ip = mtod(m, struct ip *); i = ip->ip_hl << 2; off = offsetof(struct ip, ip_p); break; #ifdef INET6 case AF_INET6: i = sizeof(struct ip6_hdr); off = offsetof(struct ip6_hdr, ip6_nxt); break; #endif /* INET6 */ default: DPRINTF(("%s: unsupported protocol family %u\n", __func__, dst->sa.sa_family)); error = EPFNOSUPPORT; IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); goto bad; } error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); return (error); bad: IPSECSTAT_INC(ips_out_inval); if (m != NULL) m_freem(m); if (sav != NULL) key_freesav(&sav); key_freesp(&sp); return (error); } int ipsec4_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp, u_long mtu) { return (ipsec4_perform_request(ifp, m, sp, inp, 0, mtu)); } int ipsec4_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, int forwarding) { struct secasvar *sav; struct ip *ip; size_t hlen, pmtu; uint32_t idx; int error; /* Don't check PMTU if the frame won't have DF bit set. */ if (!V_ip4_ipsec_dfbit) return (0); if (V_ip4_ipsec_dfbit == 1) goto setdf; /* V_ip4_ipsec_dfbit > 1 - we will copy it from inner header. */ ip = mtod(m, struct ip *); if (!(ip->ip_off & htons(IP_DF))) return (0); setdf: idx = sp->tcount - 1; sav = ipsec4_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { key_freesp(&sp); /* * No matching SA was found and SADB_ACQUIRE message was generated. * Since we have matched a SP to this packet drop it silently. */ if (error == 0) error = EINPROGRESS; if (error != EJUSTRETURN) m_freem(m); return (error); } pmtu = ipsec_get_pmtu(sav); if (pmtu == 0) { key_freesav(&sav); return (0); } hlen = ipsec_hdrsiz_internal(sp); key_freesav(&sav); if (m_length(m, NULL) + hlen > pmtu) { /* * If we're forwarding generate ICMP message here, * so that it contains pmtu subtracted by header size. * Set error to EINPROGRESS, in order for the frame * to be dropped silently. */ if (forwarding) { if (pmtu > hlen) icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, pmtu - hlen); else m_freem(m); key_freesp(&sp); return (EINPROGRESS); /* Pretend that we consumed it. */ } else { m_freem(m); key_freesp(&sp); return (EMSGSIZE); } } return (0); } static int ipsec4_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, int forwarding, u_long mtu) { struct secpolicy *sp; int error; /* Lookup for the corresponding outbound security policy */ sp = ipsec4_checkpolicy(m, inp, &error, !forwarding); if (sp == NULL) { if (error == -EINVAL) { /* Discarded by policy. */ m_freem(m); return (EACCES); } return (0); /* No IPsec required. */ } /* * Usually we have to have tunnel mode IPsec security policy * when we are forwarding a packet. Otherwise we could not handle * encrypted replies, because they are not destined for us. But * some users are doing source address translation for forwarded * packets, and thus, even if they are forwarded, the replies will * return back to us. */ - if (!forwarding) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - in_delayed_cksum(m); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; - } -#if defined(SCTP) || defined(SCTP_SUPPORT) - if (m->m_pkthdr.csum_flags & CSUM_SCTP) { - struct ip *ip; - ip = mtod(m, struct ip *); - sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP; - } -#endif - } /* NB: callee frees mbuf and releases reference to SP */ error = ipsec4_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) return (0); return (error); } error = ipsec4_process_packet(ifp, m, sp, inp, mtu); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We * will just continue to process the packet without * IPsec processing and return without error. */ return (0); } if (error == 0) return (EINPROGRESS); /* consumed by IPsec */ return (error); } /* * IPSEC_OUTPUT() method implementation for IPv4. * 0 - no IPsec handling needed * other values - mbuf consumed by IPsec. */ int ipsec4_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu) { /* * If the packet is resubmitted to ip_output (e.g. after * AH, ESP, etc. processing), there will be a tag to bypass * the lookup and related policy checking. */ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); return (ipsec4_common_output(ifp, m, inp, 0, mtu)); } /* * IPSEC_FORWARD() method implementation for IPv4. * 0 - no IPsec handling needed * other values - mbuf consumed by IPsec. */ int ipsec4_forward(struct mbuf *m) { /* * Check if this packet has an active inbound SP and needs to be * dropped instead of forwarded. */ if (ipsec4_in_reject(m, NULL) != 0) { m_freem(m); return (EACCES); } return (ipsec4_common_output(NULL /* XXXKIB */, m, NULL, 1, 0)); } #endif #ifdef INET6 static int in6_sa_equal_addrwithscope(const struct sockaddr_in6 *sa, const struct in6_addr *ia) { struct in6_addr ia2; if (IN6_IS_SCOPE_LINKLOCAL(&sa->sin6_addr)) { memcpy(&ia2, &sa->sin6_addr, sizeof(ia2)); ia2.s6_addr16[1] = htons(sa->sin6_scope_id); return (IN6_ARE_ADDR_EQUAL(ia, &ia2)); } return (IN6_ARE_ADDR_EQUAL(&sa->sin6_addr, ia)); } static struct secasvar * ipsec6_allocsa(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, u_int *pidx, int *error) { struct secasindex *saidx, tmpsaidx; struct ipsecrequest *isr; struct sockaddr_in6 *sin6; struct secasvar *sav; struct ip6_hdr *ip6; /* * Check system global policy controls. */ next: isr = sp->req[*pidx]; if ((isr->saidx.proto == IPPROTO_ESP && !V_esp_enable) || (isr->saidx.proto == IPPROTO_AH && !V_ah_enable) || (isr->saidx.proto == IPPROTO_IPCOMP && !V_ipcomp_enable)) { DPRINTF(("%s: IPsec outbound packet dropped due" " to policy (check your sysctls)\n", __func__)); IPSEC_OSTAT_INC(isr->saidx.proto, pdrops); *error = EHOSTUNREACH; return (NULL); } /* * Craft SA index to search for proper SA. Note that * we only fillin unspecified SA peers for transport * mode; for tunnel mode they must already be filled in. */ if (isr->saidx.mode == IPSEC_MODE_TRANSPORT) { saidx = &tmpsaidx; *saidx = isr->saidx; ip6 = mtod(m, struct ip6_hdr *); if (saidx->src.sin6.sin6_len == 0) { sin6 = (struct sockaddr_in6 *)&saidx->src; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; sin6->sin6_addr = ip6->ip6_src; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_src.s6_addr16[1]); } } if (saidx->dst.sin6.sin6_len == 0) { sin6 = (struct sockaddr_in6 *)&saidx->dst; sin6->sin6_len = sizeof(*sin6); sin6->sin6_family = AF_INET6; sin6->sin6_port = IPSEC_PORT_ANY; sin6->sin6_addr = ip6->ip6_dst; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) { /* fix scope id for comparing SPD */ sin6->sin6_addr.s6_addr16[1] = 0; sin6->sin6_scope_id = ntohs(ip6->ip6_dst.s6_addr16[1]); } } } else saidx = &sp->req[*pidx]->saidx; /* * Lookup SA and validate it. */ sav = key_allocsa_policy(sp, saidx, error); if (sav == NULL) { IPSEC6STAT_INC(ips_out_nosa); if (*error != 0) return (NULL); if (ipsec_get_reqlevel(sp, *pidx) != IPSEC_LEVEL_REQUIRE) { /* * We have no SA and policy that doesn't require * this IPsec transform, thus we can continue w/o * IPsec processing, i.e. return EJUSTRETURN. * But first check if there is some bundled transform. */ if (sp->tcount > ++(*pidx)) goto next; *error = EJUSTRETURN; } return (NULL); } IPSEC_ASSERT(sav->tdb_xform != NULL, ("SA with NULL tdb_xform")); return (sav); } /* * IPsec output logic for IPv6. */ static int ipsec6_perform_request(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp, u_int idx, u_long mtu) { struct ipsec_ctx_data ctx; union sockaddr_union *dst; struct secasvar *sav; struct ip6_hdr *ip6; - int error, i, off; + int error, hwassist, i, off; + bool accel; IPSEC_ASSERT(idx < sp->tcount, ("Wrong IPsec request index %d", idx)); sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { if (error == EJUSTRETURN) { /* No IPsec required */ (void)ipsec_accel_output(ifp, m, inp, sp, NULL, - AF_INET6, mtu); + AF_INET6, mtu, &hwassist); key_freesp(&sp); return (error); } goto bad; } /* Fix IP length in case if it is not set yet. */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6)); IPSEC_INIT_CTX(&ctx, &m, inp, sav, AF_INET6, IPSEC_ENC_BEFORE); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; - if (ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu)) + hwassist = 0; + accel = ipsec_accel_output(ifp, m, inp, sp, sav, AF_INET6, mtu, + &hwassist); + + /* + * Do delayed checksums now because we send before + * this is done in the normal processing path. + */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6 & ~hwassist) != 0) { + in6_delayed_cksum(m, m->m_pkthdr.len - + sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; + } +#if defined(SCTP) || defined(SCTP_SUPPORT) + if ((m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6 & ~hwassist) != 0) { + sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); + m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; + } +#endif + if (accel) return (EJUSTRETURN); ip6 = mtod(m, struct ip6_hdr *); /* pfil can change mbuf */ dst = &sav->sah->saidx.dst; /* Do the appropriate encapsulation, if necessary */ if (sp->req[idx]->saidx.mode == IPSEC_MODE_TUNNEL || /* Tunnel requ'd */ dst->sa.sa_family != AF_INET6 || /* PF mismatch */ ((dst->sa.sa_family == AF_INET6) && (!IN6_IS_ADDR_UNSPECIFIED(&dst->sin6.sin6_addr)) && (!in6_sa_equal_addrwithscope(&dst->sin6, &ip6->ip6_dst)))) { if (m->m_pkthdr.len - sizeof(*ip6) > IPV6_MAXPACKET) { /* No jumbogram support. */ error = ENXIO; /*XXX*/ goto bad; } error = ipsec_encap(&m, &sav->sah->saidx); if (error != 0) { DPRINTF(("%s: encapsulation for SPI 0x%08x failed " "with error %d\n", __func__, ntohl(sav->spi), error)); /* XXXAE: IPSEC_OSTAT_INC(tunnel); */ goto bad; } inp = NULL; } IPSEC_INIT_CTX(&ctx, &m, inp, sav, dst->sa.sa_family, IPSEC_ENC_AFTER); if ((error = ipsec_run_hhooks(&ctx, HHOOK_TYPE_IPSEC_OUT)) != 0) goto bad; switch(dst->sa.sa_family) { #ifdef INET case AF_INET: { struct ip *ip; ip = mtod(m, struct ip *); i = ip->ip_hl << 2; off = offsetof(struct ip, ip_p); } break; #endif /* AF_INET */ case AF_INET6: i = sizeof(struct ip6_hdr); off = offsetof(struct ip6_hdr, ip6_nxt); break; default: DPRINTF(("%s: unsupported protocol family %u\n", __func__, dst->sa.sa_family)); error = EPFNOSUPPORT; IPSEC_OSTAT_INC(sav->sah->saidx.proto, nopf); goto bad; } error = (*sav->tdb_xform->xf_output)(m, sp, sav, idx, i, off); return (error); bad: IPSEC6STAT_INC(ips_out_inval); if (m != NULL) m_freem(m); if (sav != NULL) key_freesav(&sav); key_freesp(&sp); return (error); } int ipsec6_process_packet(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, struct inpcb *inp, u_long mtu) { return (ipsec6_perform_request(ifp, m, sp, inp, 0, mtu)); } /* * IPv6 implementation is based on IPv4 implementation. */ int ipsec6_check_pmtu(struct ifnet *ifp, struct mbuf *m, struct secpolicy *sp, int forwarding) { struct secasvar *sav; size_t hlen, pmtu; uint32_t idx; int error; /* * According to RFC8200 L3 fragmentation is supposed to be done only on * locally generated packets. During L3 forwarding packets that are too * big are always supposed to be dropped, with an ICMPv6 packet being * sent back. */ if (!forwarding) return (0); idx = sp->tcount - 1; sav = ipsec6_allocsa(ifp, m, sp, &idx, &error); if (sav == NULL) { key_freesp(&sp); /* * No matching SA was found and SADB_ACQUIRE message was generated. * Since we have matched a SP to this packet drop it silently. */ if (error == 0) error = EINPROGRESS; if (error != EJUSTRETURN) m_freem(m); return (error); } pmtu = ipsec_get_pmtu(sav); if (pmtu == 0) { key_freesav(&sav); return (0); } hlen = ipsec_hdrsiz_internal(sp); key_freesav(&sav); if (m_length(m, NULL) + hlen > pmtu) { /* * If we're forwarding generate ICMPv6 message here, * so that it contains pmtu subtracted by header size. * Set error to EINPROGRESS, in order for the frame * to be dropped silently. */ if (forwarding) { if (pmtu > hlen) icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, pmtu - hlen); else m_freem(m); key_freesp(&sp); return (EINPROGRESS); /* Pretend that we consumed it. */ } } return (0); } static int ipsec6_common_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, int forwarding, u_long mtu) { struct secpolicy *sp; int error; /* Lookup for the corresponding outbound security policy */ sp = ipsec6_checkpolicy(m, inp, &error, !forwarding); if (sp == NULL) { if (error == -EINVAL) { /* Discarded by policy. */ m_freem(m); return (EACCES); } return (0); /* No IPsec required. */ } - if (!forwarding) { - /* - * Do delayed checksums now because we send before - * this is done in the normal processing path. - */ - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA_IPV6) { - in6_delayed_cksum(m, m->m_pkthdr.len - - sizeof(struct ip6_hdr), sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA_IPV6; - } -#if defined(SCTP) || defined(SCTP_SUPPORT) - if (m->m_pkthdr.csum_flags & CSUM_SCTP_IPV6) { - sctp_delayed_cksum(m, sizeof(struct ip6_hdr)); - m->m_pkthdr.csum_flags &= ~CSUM_SCTP_IPV6; - } -#endif - } - error = ipsec6_check_pmtu(ifp, m, sp, forwarding); if (error != 0) { if (error == EJUSTRETURN) return (0); return (error); } /* NB: callee frees mbuf and releases reference to SP */ error = ipsec6_process_packet(ifp, m, sp, inp, mtu); if (error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We * will just continue to process the packet without * IPsec processing and return without error. */ return (0); } if (error == 0) return (EINPROGRESS); /* consumed by IPsec */ return (error); } /* * IPSEC_OUTPUT() method implementation for IPv6. * 0 - no IPsec handling needed * other values - mbuf consumed by IPsec. */ int ipsec6_output(struct ifnet *ifp, struct mbuf *m, struct inpcb *inp, u_long mtu) { /* * If the packet is resubmitted to ip_output (e.g. after * AH, ESP, etc. processing), there will be a tag to bypass * the lookup and related policy checking. */ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) return (0); return (ipsec6_common_output(ifp, m, inp, 0, mtu)); } /* * IPSEC_FORWARD() method implementation for IPv6. * 0 - no IPsec handling needed * other values - mbuf consumed by IPsec. */ int ipsec6_forward(struct mbuf *m) { /* * Check if this packet has an active inbound SP and needs to be * dropped instead of forwarded. */ if (ipsec6_in_reject(m, NULL) != 0) { m_freem(m); return (EACCES); } return (ipsec6_common_output(NULL /* XXXKIB */, m, NULL, 1, 0)); } #endif /* INET6 */ int ipsec_process_done(struct mbuf *m, struct secpolicy *sp, struct secasvar *sav, u_int idx) { struct epoch_tracker et; struct xform_history *xh; struct secasindex *saidx; struct m_tag *mtag; int error; if (sav->state >= SADB_SASTATE_DEAD) { error = ESRCH; goto bad; } saidx = &sav->sah->saidx; switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: /* Fix the header length, for AH processing. */ mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len); break; #endif /* INET */ #ifdef INET6 case AF_INET6: /* Fix the header length, for AH processing. */ if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) { error = ENXIO; goto bad; } if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) { /* No jumbogram support. */ error = ENXIO; /*?*/ goto bad; } mtod(m, struct ip6_hdr *)->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); break; #endif /* INET6 */ default: DPRINTF(("%s: unknown protocol family %u\n", __func__, saidx->dst.sa.sa_family)); error = ENXIO; goto bad; } /* * Add a record of what we've done to the packet. */ mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE, sizeof(*xh), M_NOWAIT); if (mtag == NULL) { DPRINTF(("%s: could not get packet tag\n", __func__)); error = ENOMEM; goto bad; } xh = (struct xform_history *)(mtag + 1); xh->dst = saidx->dst; xh->proto = saidx->proto; xh->mode = saidx->mode; xh->spi = sav->spi; m_tag_prepend(m, mtag); key_sa_recordxfer(sav, m); /* record data transfer */ /* * If there's another (bundled) SA to apply, do so. * Note that this puts a burden on the kernel stack size. * If this is a problem we'll need to introduce a queue * to set the packet on so we can unwind the stack before * doing further processing. */ if (++idx < sp->tcount) { switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: key_freesav(&sav); IPSECSTAT_INC(ips_out_bundlesa); return (ipsec4_perform_request(NULL, m, sp, NULL, idx, 0)); /* NOTREACHED */ #endif #ifdef INET6 case AF_INET6: key_freesav(&sav); IPSEC6STAT_INC(ips_out_bundlesa); return (ipsec6_perform_request(NULL, m, sp, NULL, idx, 0)); /* NOTREACHED */ #endif /* INET6 */ default: DPRINTF(("%s: unknown protocol family %u\n", __func__, saidx->dst.sa.sa_family)); error = EPFNOSUPPORT; goto bad; } } key_freesp(&sp), sp = NULL; /* Release reference to SP */ #if defined(INET) || defined(INET6) /* * Do UDP encapsulation if SA requires it. */ if (sav->natt != NULL) { error = udp_ipsec_output(m, sav); if (error != 0) goto bad; } #endif /* INET || INET6 */ /* * We're done with IPsec processing, transmit the packet using the * appropriate network protocol (IP or IPv6). */ NET_EPOCH_ENTER(et); switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: key_freesav(&sav); error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL); break; #endif /* INET */ #ifdef INET6 case AF_INET6: key_freesav(&sav); error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); break; #endif /* INET6 */ default: panic("ipsec_process_done"); } NET_EPOCH_EXIT(et); return (error); bad: m_freem(m); key_freesav(&sav); if (sp != NULL) key_freesp(&sp); return (error); } /* * ipsec_prepend() is optimized version of M_PREPEND(). * ipsec_encap() is called by IPsec output routine for tunnel mode SA. * It is expected that after IP encapsulation some IPsec transform will * be performed. Each IPsec transform inserts its variable length header * just after outer IP header using m_makespace(). If given mbuf has not * enough free space at the beginning, we allocate new mbuf and reserve * some space at the beginning and at the end. * This helps avoid allocating of new mbuf and data copying in m_makespace(), * we place outer header in the middle of mbuf's data with reserved leading * and trailing space: * [ LEADINGSPACE ][ Outer IP header ][ TRAILINGSPACE ] * LEADINGSPACE will be used to add ethernet header, TRAILINGSPACE will * be used to inject AH/ESP/IPCOMP header. */ #define IPSEC_TRAILINGSPACE (sizeof(struct udphdr) +/* NAT-T */ \ max(sizeof(struct newesp) + EALG_MAX_BLOCK_LEN, /* ESP + IV */ \ sizeof(struct newah) + HASH_MAX_LEN /* AH + ICV */)) static struct mbuf * ipsec_prepend(struct mbuf *m, int len, int how) { struct mbuf *n; M_ASSERTPKTHDR(m); IPSEC_ASSERT(len < MHLEN, ("wrong length")); if (M_LEADINGSPACE(m) >= len) { /* No need to allocate new mbuf. */ m->m_data -= len; m->m_len += len; m->m_pkthdr.len += len; return (m); } n = m_gethdr(how, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } m_move_pkthdr(n, m); n->m_next = m; if (len + IPSEC_TRAILINGSPACE < M_SIZE(n)) m_align(n, len + IPSEC_TRAILINGSPACE); n->m_len = len; n->m_pkthdr.len += len; return (n); } static size_t ipsec_get_pmtu(struct secasvar *sav) { union sockaddr_union *dst; struct in_conninfo inc; size_t pmtu; dst = &sav->sah->saidx.dst; memset(&inc, 0, sizeof(inc)); switch (dst->sa.sa_family) { #ifdef INET case AF_INET: inc.inc_faddr = satosin(&dst->sa)->sin_addr; break; #endif #ifdef INET6 case AF_INET6: inc.inc6_faddr = satosin6(&dst->sa)->sin6_addr; inc.inc_flags |= INC_ISIPV6; break; #endif default: return (0); } pmtu = tcp_hc_getmtu(&inc); if (pmtu != 0) return (pmtu); /* No entry in hostcache. Assume that PMTU is equal to link's MTU */ switch (dst->sa.sa_family) { #ifdef INET case AF_INET: pmtu = tcp_maxmtu(&inc, NULL); break; #endif #ifdef INET6 case AF_INET6: pmtu = tcp_maxmtu6(&inc, NULL); break; #endif default: return (0); } if (pmtu == 0) return (0); tcp_hc_updatemtu(&inc, pmtu); return (pmtu); } static int ipsec_encap(struct mbuf **mp, struct secasindex *saidx) { #ifdef INET6 struct ip6_hdr *ip6; #endif struct ip *ip; #ifdef INET int setdf; #endif uint8_t itos, proto; ip = mtod(*mp, struct ip *); switch (ip->ip_v) { #ifdef INET case IPVERSION: proto = IPPROTO_IPIP; /* * Collect IP_DF state from the inner header * and honor system-wide control of how to handle it. */ switch (V_ip4_ipsec_dfbit) { case 0: /* clear in outer header */ case 1: /* set in outer header */ setdf = V_ip4_ipsec_dfbit; break; default:/* propagate to outer header */ setdf = (ip->ip_off & htons(IP_DF)) != 0; } itos = ip->ip_tos; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): proto = IPPROTO_IPV6; ip6 = mtod(*mp, struct ip6_hdr *); itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; /* scoped address handling */ in6_clearscope(&ip6->ip6_src); in6_clearscope(&ip6->ip6_dst); break; #endif default: return (EAFNOSUPPORT); } switch (saidx->dst.sa.sa_family) { #ifdef INET case AF_INET: if (saidx->src.sa.sa_family != AF_INET || saidx->src.sin.sin_addr.s_addr == INADDR_ANY || saidx->dst.sin.sin_addr.s_addr == INADDR_ANY) return (EINVAL); *mp = ipsec_prepend(*mp, sizeof(struct ip), M_NOWAIT); if (*mp == NULL) return (ENOBUFS); ip = mtod(*mp, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(struct ip) >> 2; ip->ip_p = proto; ip->ip_len = htons((*mp)->m_pkthdr.len); ip->ip_ttl = V_ip_defttl; ip->ip_sum = 0; ip->ip_off = setdf ? htons(IP_DF): 0; ip->ip_src = saidx->src.sin.sin_addr; ip->ip_dst = saidx->dst.sin.sin_addr; ip_ecn_ingress(V_ip4_ipsec_ecn, &ip->ip_tos, &itos); ip_fillid(ip); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (saidx->src.sa.sa_family != AF_INET6 || IN6_IS_ADDR_UNSPECIFIED(&saidx->src.sin6.sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&saidx->dst.sin6.sin6_addr)) return (EINVAL); *mp = ipsec_prepend(*mp, sizeof(struct ip6_hdr), M_NOWAIT); if (*mp == NULL) return (ENOBUFS); ip6 = mtod(*mp, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_hlim = V_ip6_defhlim; ip6->ip6_nxt = proto; ip6->ip6_dst = saidx->dst.sin6.sin6_addr; /* For link-local address embed scope zone id */ if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_dst)) ip6->ip6_dst.s6_addr16[1] = htons(saidx->dst.sin6.sin6_scope_id & 0xffff); ip6->ip6_src = saidx->src.sin6.sin6_addr; if (IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) ip6->ip6_src.s6_addr16[1] = htons(saidx->src.sin6.sin6_scope_id & 0xffff); ip6->ip6_plen = htons((*mp)->m_pkthdr.len - sizeof(*ip6)); ip_ecn_ingress(V_ip6_ipsec_ecn, &proto, &itos); ip6->ip6_flow |= htonl((uint32_t)proto << 20); break; #endif /* INET6 */ default: return (EAFNOSUPPORT); } (*mp)->m_flags &= ~(M_BCAST | M_MCAST); return (0); }