Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -1422,6 +1422,8 @@ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/common/t4vf_hw.c optional cxgbev pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" +dev/cxgbe/crypto/t4_kern_tls.c optional cxgbe pci kern_tls \ + compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/cudbg/cudbg_common.c optional cxgbe \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/cudbg/cudbg_flash_utils.c optional cxgbe \ Index: sys/dev/cxgbe/adapter.h =================================================================== --- sys/dev/cxgbe/adapter.h +++ sys/dev/cxgbe/adapter.h @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -158,6 +159,7 @@ ADAP_ERR = (1 << 5), BUF_PACKING_OK = (1 << 6), IS_VF = (1 << 7), + KERN_TLS_OK = (1 << 8), CXGBE_BUSY = (1 << 9), @@ -380,7 +382,7 @@ CPL_COOKIE_TOM, CPL_COOKIE_HASHFILTER, CPL_COOKIE_ETHOFLD, - CPL_COOKIE_AVAILABLE3, + CPL_COOKIE_KERN_TLS, NUM_CPL_COOKIES = 8 /* Limited by M_COOKIE. Do not increase. */ }; @@ -582,8 +584,25 @@ uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */ uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */ uint64_t raw_wrs; /* # of raw work requests (alloc_wr_mbuf) */ + uint64_t tls_wrs; /* # of TLS work requests */ + + uint64_t kern_tls_records; + uint64_t kern_tls_short; + uint64_t kern_tls_partial; + uint64_t kern_tls_full; + uint64_t kern_tls_octets; + uint64_t kern_tls_waste; + uint64_t kern_tls_options; + uint64_t kern_tls_header; + uint64_t kern_tls_fin; + uint64_t kern_tls_fin_short; + uint64_t kern_tls_cbc; + uint64_t kern_tls_gcm; /* stats for not-that-common events */ + + /* Optional scratch space for constructing work requests. */ + uint8_t ss[SGE_MAX_WR_LEN] __aligned(16); } __aligned(CACHE_LINE_SIZE); /* rxq: SGE ingress queue + SGE free list + miscellaneous items */ @@ -840,6 +859,7 @@ struct smt_data *smt; /* Source MAC Table */ struct tid_info tids; vmem_t *key_map; + struct tls_tunables tlst; uint8_t doorbells; int offload_map; /* ports with IFCAP_TOE enabled */ @@ -897,6 +917,8 @@ int last_op_flags; int swintr; + + struct callout ktls_tick; }; #define ADAPTER_LOCK(sc) mtx_lock(&(sc)->sc_lock) @@ -1155,13 +1177,12 @@ int adapter_full_init(struct adapter *); int adapter_full_uninit(struct adapter *); uint64_t cxgbe_get_counter(struct ifnet *, ift_counter); +void cxgbe_snd_tag_init(struct cxgbe_snd_tag *, struct ifnet *, int); int vi_full_init(struct vi_info *); int vi_full_uninit(struct vi_info *); void vi_sysctls(struct vi_info *); void vi_tick(void *); int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); -int alloc_atid_tab(struct tid_info *, int); -void free_atid_tab(struct tid_info *); int alloc_atid(struct adapter *, void *); void *lookup_atid(struct adapter *, int); void free_atid(struct adapter *, int); @@ -1171,6 +1192,18 @@ bool t4_os_dump_cimla(struct adapter *, int, bool); void t4_os_dump_devlog(struct adapter *); +#ifdef KERN_TLS +/* t4_kern_tls.c */ +int cxgbe_tls_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, + struct m_snd_tag **); +void cxgbe_tls_tag_free(struct m_snd_tag *); +void t6_ktls_modload(void); +void t6_ktls_modunload(void); +int t6_ktls_try(struct ifnet *, struct socket *, struct ktls_session *); +int t6_ktls_parse_pkt(struct mbuf *, int *, int *); +int t6_ktls_write_wr(struct sge_txq *, void *, struct mbuf *, u_int, u_int); +#endif + #ifdef DEV_NETMAP /* t4_netmap.c */ struct sge_nm_rxq; @@ -1214,7 +1247,7 @@ void t4_register_shared_cpl_handler(int, cpl_handler_t, int); #ifdef RATELIMIT int ethofld_transmit(struct ifnet *, struct mbuf *); -void send_etid_flush_wr(struct cxgbe_snd_tag *); +void send_etid_flush_wr(struct cxgbe_rate_tag *); #endif /* t4_tracer.c */ @@ -1240,13 +1273,13 @@ #ifdef RATELIMIT void t4_init_etid_table(struct adapter *); void t4_free_etid_table(struct adapter *); -struct cxgbe_snd_tag *lookup_etid(struct adapter *, int); -int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, +struct cxgbe_rate_tag *lookup_etid(struct adapter *, int); +int cxgbe_rate_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); -int cxgbe_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); -int cxgbe_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); -void cxgbe_snd_tag_free(struct m_snd_tag *); -void cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *); +int cxgbe_rate_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); +int cxgbe_rate_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); +void cxgbe_rate_tag_free(struct m_snd_tag *); +void cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *); void cxgbe_ratelimit_query(struct ifnet *, struct if_ratelimit_query_results *); #endif Index: sys/dev/cxgbe/common/t4_msg.h =================================================================== --- sys/dev/cxgbe/common/t4_msg.h +++ sys/dev/cxgbe/common/t4_msg.h @@ -1158,6 +1158,17 @@ __be32 flags; }; +/* cpl_tx_data.len fields */ +#define S_TX_DATA_MSS 16 +#define M_TX_DATA_MSS 0xFFFF +#define V_TX_DATA_MSS(x) ((x) << S_TX_DATA_MSS) +#define G_TX_DATA_MSS(x) (((x) >> S_TX_DATA_MSS) & M_TX_DATA_MSS) + +#define S_TX_LENGTH 0 +#define M_TX_LENGTH 0xFFFF +#define V_TX_LENGTH(x) ((x) << S_TX_LENGTH) +#define G_TX_LENGTH(x) (((x) >> S_TX_LENGTH) & M_TX_LENGTH) + /* cpl_tx_data.flags fields */ #define S_TX_PROXY 5 #define V_TX_PROXY(x) ((x) << S_TX_PROXY) @@ -1205,6 +1216,14 @@ #define V_T6_TX_FORCE(x) ((x) << S_T6_TX_FORCE) #define F_T6_TX_FORCE V_T6_TX_FORCE(1U) +#define S_TX_BYPASS 21 +#define V_TX_BYPASS(x) ((x) << S_TX_BYPASS) +#define F_TX_BYPASS V_TX_BYPASS(1U) + +#define S_TX_PUSH 22 +#define V_TX_PUSH(x) ((x) << S_TX_PUSH) +#define F_TX_PUSH V_TX_PUSH(1U) + /* additional tx_data_wr.flags fields */ #define S_TX_CPU_IDX 0 #define M_TX_CPU_IDX 0x3F Index: sys/dev/cxgbe/common/t4_regs.h =================================================================== --- sys/dev/cxgbe/common/t4_regs.h +++ sys/dev/cxgbe/common/t4_regs.h @@ -22617,6 +22617,10 @@ #define V_TXPDUSIZEADJ(x) ((x) << S_TXPDUSIZEADJ) #define G_TXPDUSIZEADJ(x) (((x) >> S_TXPDUSIZEADJ) & M_TXPDUSIZEADJ) +#define S_ENABLECBYP 21 +#define V_ENABLECBYP(x) ((x) << S_ENABLECBYP) +#define F_ENABLECBYP V_ENABLECBYP(1U) + #define S_LIMITEDTRANSMIT 20 #define M_LIMITEDTRANSMIT 0xfU #define V_LIMITEDTRANSMIT(x) ((x) << S_LIMITEDTRANSMIT) Index: sys/dev/cxgbe/common/t4_tcb.h =================================================================== --- sys/dev/cxgbe/common/t4_tcb.h +++ sys/dev/cxgbe/common/t4_tcb.h @@ -753,6 +753,9 @@ #define S_TF_CCTRL_RFR 62 #define V_TF_CCTRL_RFR(x) ((__u64)(x) << S_TF_CCTRL_RFR) +#define S_TF_CORE_BYPASS 63 +#define V_TF_CORE_BYPASS(x) ((__u64)(x) << S_TF_CORE_BYPASS) + #define S_TF_DDP_INDICATE_OUT 16 #define V_TF_DDP_INDICATE_OUT(x) ((x) << S_TF_DDP_INDICATE_OUT) Index: sys/dev/cxgbe/crypto/t4_kern_tls.c =================================================================== --- /dev/null +++ sys/dev/cxgbe/crypto/t4_kern_tls.c @@ -0,0 +1,2480 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2018-2019 Chelsio Communications, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_kern_tls.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common.h" +#include "common/t4_regs.h" +#include "common/t4_regs_values.h" +#include "common/t4_tcb.h" +#include "t4_l2t.h" +#include "t4_clip.h" +#include "t4_mp_ring.h" +#include "crypto/t4_crypto.h" + +#if defined(INET) || defined(INET6) + +#define SALT_SIZE 4 + +#define GCM_TAG_SIZE 16 +#define TLS_HEADER_LENGTH 5 + +#define TLS_KEY_CONTEXT_SZ roundup2(sizeof(struct tls_keyctx), 32) + +struct tls_scmd { + __be32 seqno_numivs; + __be32 ivgen_hdrlen; +}; + +struct tls_key_req { + /* FW_ULPTX_WR */ + __be32 wr_hi; + __be32 wr_mid; + __be32 ftid; + __u8 reneg_to_write_rx; + __u8 protocol; + __be16 mfs; + /* master command */ + __be32 cmd; + __be32 len16; /* command length */ + __be32 dlen; /* data length in 32-byte units */ + __be32 kaddr; + /* sub-command */ + __be32 sc_more; + __be32 sc_len; +}__packed; + +struct tls_keyctx { + struct tx_keyctx_hdr { + __u8 ctxlen; + __u8 r2; + __be16 dualck_to_txvalid; + __u8 txsalt[4]; + __be64 r5; + } txhdr; + struct keys { + __u8 edkey[32]; + __u8 ipad[64]; + __u8 opad[64]; + } keys; +}; + +#define S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 11 +#define M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT 0x1 +#define V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \ + ((x) << S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) +#define G_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(x) \ + (((x) >> S_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) & \ + M_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT) +#define F_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT \ + V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1U) + +#define S_TLS_KEYCTX_TX_WR_SALT_PRESENT 10 +#define M_TLS_KEYCTX_TX_WR_SALT_PRESENT 0x1 +#define V_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \ + ((x) << S_TLS_KEYCTX_TX_WR_SALT_PRESENT) +#define G_TLS_KEYCTX_TX_WR_SALT_PRESENT(x) \ + (((x) >> S_TLS_KEYCTX_TX_WR_SALT_PRESENT) & \ + M_TLS_KEYCTX_TX_WR_SALT_PRESENT) +#define F_TLS_KEYCTX_TX_WR_SALT_PRESENT \ + V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1U) + +#define S_TLS_KEYCTX_TX_WR_TXCK_SIZE 6 +#define M_TLS_KEYCTX_TX_WR_TXCK_SIZE 0xf +#define V_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \ + ((x) << S_TLS_KEYCTX_TX_WR_TXCK_SIZE) +#define G_TLS_KEYCTX_TX_WR_TXCK_SIZE(x) \ + (((x) >> S_TLS_KEYCTX_TX_WR_TXCK_SIZE) & \ + M_TLS_KEYCTX_TX_WR_TXCK_SIZE) + +#define S_TLS_KEYCTX_TX_WR_TXMK_SIZE 2 +#define M_TLS_KEYCTX_TX_WR_TXMK_SIZE 0xf +#define V_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \ + ((x) << S_TLS_KEYCTX_TX_WR_TXMK_SIZE) +#define G_TLS_KEYCTX_TX_WR_TXMK_SIZE(x) \ + (((x) >> S_TLS_KEYCTX_TX_WR_TXMK_SIZE) & \ + M_TLS_KEYCTX_TX_WR_TXMK_SIZE) + +#define S_TLS_KEYCTX_TX_WR_TXVALID 0 +#define M_TLS_KEYCTX_TX_WR_TXVALID 0x1 +#define V_TLS_KEYCTX_TX_WR_TXVALID(x) \ + ((x) << S_TLS_KEYCTX_TX_WR_TXVALID) +#define G_TLS_KEYCTX_TX_WR_TXVALID(x) \ + (((x) >> S_TLS_KEYCTX_TX_WR_TXVALID) & M_TLS_KEYCTX_TX_WR_TXVALID) +#define F_TLS_KEYCTX_TX_WR_TXVALID V_TLS_KEYCTX_TX_WR_TXVALID(1U) + +/* Key Context Programming Operation type */ +#define KEY_WRITE_RX 0x1 +#define KEY_WRITE_TX 0x2 +#define KEY_DELETE_RX 0x4 +#define KEY_DELETE_TX 0x8 + +struct tlspcb { + struct cxgbe_snd_tag com; + struct vi_info *vi; /* virtual interface */ + struct adapter *sc; + struct l2t_entry *l2te; /* L2 table entry used by this connection */ + int tid; /* Connection identifier */ + + int tx_key_addr; + bool inline_key; + bool using_timestamps; + unsigned char enc_mode; + + struct tls_scmd scmd0; + struct tls_scmd scmd0_short; + + unsigned int tx_key_info_size; + + uint32_t prev_seq; + uint32_t prev_ack; + uint32_t prev_tsecr; + uint16_t prev_win; + uint16_t prev_mss; + + /* Only used outside of setup and teardown when using inline keys. */ + struct tls_keyctx keyctx; + + /* Fields only used during setup and teardown. */ + struct inpcb *inp; /* backpointer to host stack's PCB */ + struct sge_txq *txq; + struct sge_wrq *ctrlq; + struct clip_entry *ce; /* CLIP table entry used by this tid */ + + unsigned char auth_mode; + unsigned char hmac_ctrl; + unsigned char mac_first; + unsigned char iv_size; + + unsigned int frag_size; + unsigned int cipher_secret_size; + int proto_ver; + + bool open_pending; +}; + +static int ktls_setup_keys(struct tlspcb *tlsp, + const struct ktls_session *tls, struct sge_txq *txq); + +static inline struct tlspcb * +mst_to_tls(struct m_snd_tag *t) +{ + return ((struct tlspcb *)mst_to_cst(t)); +} + +/* XXX: There are similar versions of these two in tom/t4_tls.c. */ +static int +get_new_keyid(struct tlspcb *tlsp) +{ + vmem_addr_t addr; + + if (vmem_alloc(tlsp->sc->key_map, TLS_KEY_CONTEXT_SZ, + M_NOWAIT | M_FIRSTFIT, &addr) != 0) + return (-1); + + return (addr); +} + +static void +free_keyid(struct tlspcb *tlsp, int keyid) +{ + + CTR3(KTR_CXGBE, "%s: tid %d key addr %#x", __func__, tlsp->tid, keyid); + vmem_free(tlsp->sc->key_map, keyid, TLS_KEY_CONTEXT_SZ); +} + +static struct tlspcb * +alloc_tlspcb(struct ifnet *ifp, struct vi_info *vi, int flags) +{ + struct port_info *pi = vi->pi; + struct adapter *sc = pi->adapter; + struct tlspcb *tlsp; + + tlsp = malloc(sizeof(*tlsp), M_CXGBE, M_ZERO | flags); + if (tlsp == NULL) + return (NULL); + + cxgbe_snd_tag_init(&tlsp->com, ifp, IF_SND_TAG_TYPE_TLS); + tlsp->vi = vi; + tlsp->sc = sc; + tlsp->ctrlq = &sc->sge.ctrlq[pi->port_id]; + tlsp->tid = -1; + tlsp->tx_key_addr = -1; + + return (tlsp); +} + +static void +init_ktls_key_params(struct tlspcb *tlsp, const struct ktls_session *tls) +{ + int mac_key_size; + + if (tls->params.tls_vminor == TLS_MINOR_VER_ONE) + tlsp->proto_ver = SCMD_PROTO_VERSION_TLS_1_1; + else + tlsp->proto_ver = SCMD_PROTO_VERSION_TLS_1_2; + tlsp->cipher_secret_size = tls->params.cipher_key_len; + tlsp->tx_key_info_size = sizeof(struct tx_keyctx_hdr) + + tlsp->cipher_secret_size; + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { + tlsp->auth_mode = SCMD_AUTH_MODE_GHASH; + tlsp->enc_mode = SCMD_CIPH_MODE_AES_GCM; + tlsp->iv_size = 4; + tlsp->mac_first = 0; + tlsp->hmac_ctrl = SCMD_HMAC_CTRL_NOP; + tlsp->tx_key_info_size += GMAC_BLOCK_LEN; + } else { + switch (tls->params.auth_algorithm) { + case CRYPTO_SHA1_HMAC: + mac_key_size = roundup2(SHA1_HASH_LEN, 16); + tlsp->auth_mode = SCMD_AUTH_MODE_SHA1; + break; + case CRYPTO_SHA2_256_HMAC: + mac_key_size = SHA2_256_HASH_LEN; + tlsp->auth_mode = SCMD_AUTH_MODE_SHA256; + break; + case CRYPTO_SHA2_384_HMAC: + mac_key_size = SHA2_512_HASH_LEN; + tlsp->auth_mode = SCMD_AUTH_MODE_SHA512_384; + break; + } + tlsp->enc_mode = SCMD_CIPH_MODE_AES_CBC; + tlsp->iv_size = 8; /* for CBC, iv is 16B, unit of 2B */ + tlsp->mac_first = 1; + tlsp->hmac_ctrl = SCMD_HMAC_CTRL_NO_TRUNC; + tlsp->tx_key_info_size += mac_key_size * 2; + } + + tlsp->frag_size = tls->params.max_frame_len; +} + +static int +ktls_act_open_cpl_size(bool isipv6) +{ + + if (isipv6) + return (sizeof(struct cpl_t6_act_open_req6)); + else + return (sizeof(struct cpl_t6_act_open_req)); +} + +static void +mk_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, struct inpcb *inp, + struct tlspcb *tlsp, int atid, void *dst) +{ + struct tcpcb *tp = intotcpcb(inp); + struct cpl_t6_act_open_req *cpl6; + struct cpl_act_open_req *cpl; + uint64_t options; + int qid_atid; + + cpl6 = dst; + cpl = (struct cpl_act_open_req *)cpl6; + INIT_TP_WR(cpl6, 0); + qid_atid = V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | + V_TID_COOKIE(CPL_COOKIE_KERN_TLS); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, + qid_atid)); + inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, + &cpl->peer_ip, &cpl->peer_port); + + options = F_TCAM_BYPASS | V_ULP_MODE(ULP_MODE_NONE); + options |= V_SMAC_SEL(vi->smt_idx) | V_TX_CHAN(vi->pi->tx_chan); + options |= F_NON_OFFLOAD; + cpl->opt0 = htobe64(options); + + options = V_TX_QUEUE(sc->params.tp.tx_modq[vi->pi->tx_chan]); + if (tp->t_flags & TF_REQ_TSTMP) + options |= F_TSTAMPS_EN; + cpl->opt2 = htobe32(options); +} + +static void +mk_ktls_act_open_req6(struct adapter *sc, struct vi_info *vi, + struct inpcb *inp, struct tlspcb *tlsp, int atid, void *dst) +{ + struct tcpcb *tp = intotcpcb(inp); + struct cpl_t6_act_open_req6 *cpl6; + struct cpl_act_open_req6 *cpl; + uint64_t options; + int qid_atid; + + cpl6 = dst; + cpl = (struct cpl_act_open_req6 *)cpl6; + INIT_TP_WR(cpl6, 0); + qid_atid = V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | + V_TID_COOKIE(CPL_COOKIE_KERN_TLS); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, + qid_atid)); + cpl->local_port = inp->inp_lport; + cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; + cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; + cpl->peer_port = inp->inp_fport; + cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; + cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; + + options = F_TCAM_BYPASS | V_ULP_MODE(ULP_MODE_NONE); + options |= V_SMAC_SEL(vi->smt_idx) | V_TX_CHAN(vi->pi->tx_chan); + options |= F_NON_OFFLOAD; + cpl->opt0 = htobe64(options); + + options = V_TX_QUEUE(sc->params.tp.tx_modq[vi->pi->tx_chan]); + if (tp->t_flags & TF_REQ_TSTMP) + options |= F_TSTAMPS_EN; + cpl->opt2 = htobe32(options); +} + +static int +send_ktls_act_open_req(struct adapter *sc, struct vi_info *vi, + struct inpcb *inp, struct tlspcb *tlsp, int atid) +{ + struct wrqe *wr; + bool isipv6; + + isipv6 = (inp->inp_vflag & INP_IPV6) != 0; + if (isipv6) { + tlsp->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); + if (tlsp->ce == NULL) + return (ENOENT); + } + + /* XXX: Use start/commit? */ + wr = alloc_wrqe(ktls_act_open_cpl_size(isipv6), tlsp->ctrlq); + if (wr == NULL) { + CTR2(KTR_CXGBE, "%s: atid %d failed to alloc WR", __func__, + atid); + return (ENOMEM); + } + + if (isipv6) + mk_ktls_act_open_req6(sc, vi, inp, tlsp, atid, wrtod(wr)); + else + mk_ktls_act_open_req(sc, vi, inp, tlsp, atid, wrtod(wr)); + + tlsp->open_pending = true; + t4_wrq_tx(sc, wr); + return (0); +}; + +static int +ktls_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, + struct mbuf *m) +{ + struct adapter *sc = iq->adapter; + const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); + u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); + u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); + struct tlspcb *tlsp = lookup_atid(sc, atid); + struct inpcb *inp = tlsp->inp; + + CTR3(KTR_CXGBE, "%s: atid %d status %d", __func__, atid, status); + free_atid(sc, atid); + if (status == 0) + tlsp->tid = GET_TID(cpl); + + INP_WLOCK(inp); + tlsp->open_pending = false; + wakeup(tlsp); + INP_WUNLOCK(inp); + return (0); +} + +/* SET_TCB_FIELD sent as a ULP command looks like this */ +#define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) + +_Static_assert((LEN__SET_TCB_FIELD_ULP + sizeof(struct ulptx_idata)) % 16 == 0, + "CPL_SET_TCB_FIELD ULP command not 16-byte aligned"); + +static void +write_set_tcb_field_ulp(struct tlspcb *tlsp, void *dst, struct sge_txq *txq, + uint16_t word, uint64_t mask, uint64_t val) +{ + struct ulp_txpkt *txpkt; + struct ulptx_idata *idata; + struct cpl_set_tcb_field_core *cpl; + + /* ULP_TXPKT */ + txpkt = dst; + txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | + V_ULP_TXPKT_DATAMODIFY(0) | + V_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) | V_ULP_TXPKT_DEST(0) | + V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1)); + txpkt->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); + + /* ULPTX_IDATA sub-command */ + idata = (struct ulptx_idata *)(txpkt + 1); + idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + idata->len = htobe32(sizeof(*cpl)); + + /* CPL_SET_TCB_FIELD */ + cpl = (struct cpl_set_tcb_field_core *)(idata + 1); + OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tlsp->tid)); + cpl->reply_ctrl = htobe16(F_NO_REPLY); + cpl->word_cookie = htobe16(V_WORD(word)); + cpl->mask = htobe64(mask); + cpl->val = htobe64(val); + + /* ULPTX_NOOP */ + idata = (struct ulptx_idata *)(cpl + 1); + idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + idata->len = htobe32(0); +} + +static int +ktls_set_tcb_fields(struct tlspcb *tlsp, struct tcpcb *tp, struct sge_txq *txq) +{ + struct fw_ulptx_wr *wr; + struct mbuf *m; + char *dst; + void *items[1]; + int error, len; + + len = sizeof(*wr) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); + if (tp->t_flags & TF_REQ_TSTMP) + len += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + m = alloc_wr_mbuf(len, M_NOWAIT); + if (m == NULL) { + CTR2(KTR_CXGBE, "%s: tid %d failed to alloc WR mbuf", __func__, + tlsp->tid); + return (ENOMEM); + } + m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com.com); + m->m_pkthdr.csum_flags |= CSUM_SND_TAG; + + /* FW_ULPTX_WR */ + wr = mtod(m, void *); + wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR)); + wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA | + V_FW_WR_LEN16(len / 16)); + wr->cookie = 0; + dst = (char *)(wr + 1); + + /* Clear TF_NON_OFFLOAD and set TF_CORE_BYPASS */ + write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_T_FLAGS, + V_TCB_T_FLAGS(V_TF_CORE_BYPASS(1) | V_TF_NON_OFFLOAD(1)), + V_TCB_T_FLAGS(V_TF_CORE_BYPASS(1))); + dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + + /* Clear the SND_UNA_RAW, SND_NXT_RAW, and SND_MAX_RAW offsets. */ + write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_SND_UNA_RAW, + V_TCB_SND_NXT_RAW(M_TCB_SND_NXT_RAW) | + V_TCB_SND_UNA_RAW(M_TCB_SND_UNA_RAW), + V_TCB_SND_NXT_RAW(0) | V_TCB_SND_UNA_RAW(0)); + dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + + write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_SND_MAX_RAW, + V_TCB_SND_MAX_RAW(M_TCB_SND_MAX_RAW), V_TCB_SND_MAX_RAW(0)); + dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + + if (tp->t_flags & TF_REQ_TSTMP) { + write_set_tcb_field_ulp(tlsp, dst, txq, W_TCB_TIMESTAMP_OFFSET, + V_TCB_TIMESTAMP_OFFSET(M_TCB_TIMESTAMP_OFFSET), + V_TCB_TIMESTAMP_OFFSET(tp->ts_offset >> 28)); + dst += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + } + + KASSERT(dst - (char *)wr == len, ("%s: length mismatch", __func__)); + + items[0] = m; + error = mp_ring_enqueue(txq->r, items, 1, 1); + if (error) + m_free(m); + return (error); +} + +int +cxgbe_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, + struct m_snd_tag **pt) +{ + const struct ktls_session *tls; + struct tlspcb *tlsp; + struct adapter *sc; + struct vi_info *vi; + struct inpcb *inp; + struct tcpcb *tp; + struct sge_txq *txq; + int atid, error, keyid; + + /* Sanity check values in *tls. */ + tls = params->tls.tls; + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + /* XXX: Explicitly ignore any provided IV. */ + switch (tls->params.cipher_key_len) { + case 128 / 8: + case 192 / 8: + case 256 / 8: + break; + default: + return (EINVAL); + } + switch (tls->params.auth_algorithm) { + case CRYPTO_SHA1_HMAC: + case CRYPTO_SHA2_256_HMAC: + case CRYPTO_SHA2_384_HMAC: + break; + default: + return (EPROTONOSUPPORT); + } + break; + case CRYPTO_AES_NIST_GCM_16: + if (tls->params.iv_len != SALT_SIZE) + return (EINVAL); + switch (tls->params.cipher_key_len) { + case 128 / 8: + case 192 / 8: + case 256 / 8: + break; + default: + return (EINVAL); + } + break; + default: + return (EPROTONOSUPPORT); + } + + /* Only TLS 1.1 and TLS 1.2 are currently supported. */ + if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || + tls->params.tls_vminor < TLS_MINOR_VER_ONE || + tls->params.tls_vminor > TLS_MINOR_VER_TWO) + return (EPROTONOSUPPORT); + + vi = ifp->if_softc; + sc = vi->pi->adapter; + + tlsp = alloc_tlspcb(ifp, vi, M_WAITOK); + + atid = alloc_atid(sc, tlsp); + if (atid < 0) { + error = ENOMEM; + goto failed; + } + + if (sc->tlst.inline_keys) + keyid = -1; + else + keyid = get_new_keyid(tlsp); + if (keyid < 0) { + CTR2(KTR_CXGBE, "%s: atid %d using immediate key ctx", __func__, + atid); + tlsp->inline_key = true; + } else { + tlsp->tx_key_addr = keyid; + CTR3(KTR_CXGBE, "%s: atid %d allocated TX key addr %#x", + __func__, + atid, tlsp->tx_key_addr); + } + + inp = params->tls.inp; + INP_RLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + error = ECONNRESET; + goto failed; + } + tlsp->inp = inp; + + tp = inp->inp_ppcb; + if (tp->t_flags & TF_REQ_TSTMP) { + tlsp->using_timestamps = true; + if ((tp->ts_offset & 0xfffffff) != 0) { + INP_RUNLOCK(inp); + error = EINVAL; + goto failed; + } + } else + tlsp->using_timestamps = false; + + error = send_ktls_act_open_req(sc, vi, inp, tlsp, atid); + if (error) { + INP_RUNLOCK(inp); + goto failed; + } + + /* Wait for reply to active open. */ + CTR2(KTR_CXGBE, "%s: atid %d sent CPL_ACT_OPEN_REQ", __func__, + atid); + while (tlsp->open_pending) { + /* + * XXX: PCATCH? We would then have to discard the PCB + * when the completion CPL arrived. + */ + error = rw_sleep(tlsp, &inp->inp_lock, 0, "t6tlsop", 0); + } + + atid = -1; + if (tlsp->tid < 0) { + INP_RUNLOCK(inp); + error = ENOMEM; + goto failed; + } + + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + error = ECONNRESET; + goto failed; + } + + txq = &sc->sge.txq[vi->first_txq]; + if (inp->inp_flowtype != M_HASHTYPE_NONE) + txq += ((inp->inp_flowid % (vi->ntxq - vi->rsrv_noflowq)) + + vi->rsrv_noflowq); + tlsp->txq = txq; + + error = ktls_set_tcb_fields(tlsp, tp, txq); + INP_RUNLOCK(inp); + if (error) + goto failed; + + init_ktls_key_params(tlsp, tls); + + error = ktls_setup_keys(tlsp, tls, txq); + if (error) + goto failed; + + /* The SCMD fields used when encrypting a full TLS record. */ + tlsp->scmd0.seqno_numivs = htobe32(V_SCMD_SEQ_NO_CTRL(3) | + V_SCMD_PROTO_VERSION(tlsp->proto_ver) | + V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) | + V_SCMD_CIPH_AUTH_SEQ_CTRL((tlsp->mac_first == 0)) | + V_SCMD_CIPH_MODE(tlsp->enc_mode) | + V_SCMD_AUTH_MODE(tlsp->auth_mode) | + V_SCMD_HMAC_CTRL(tlsp->hmac_ctrl) | + V_SCMD_IV_SIZE(tlsp->iv_size) | V_SCMD_NUM_IVS(1)); + + tlsp->scmd0.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) | + V_SCMD_TLS_FRAG_ENABLE(0); + if (tlsp->inline_key) + tlsp->scmd0.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1); + tlsp->scmd0.ivgen_hdrlen = htobe32(tlsp->scmd0.ivgen_hdrlen); + + /* + * The SCMD fields used when encrypting a partial TLS record + * (no trailer and possibly a truncated payload). + */ + tlsp->scmd0_short.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) | + V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) | + V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) | + V_SCMD_CIPH_AUTH_SEQ_CTRL((tlsp->mac_first == 0)) | + V_SCMD_AUTH_MODE(SCMD_AUTH_MODE_NOP) | + V_SCMD_HMAC_CTRL(SCMD_HMAC_CTRL_NOP) | + V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(0); + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) + tlsp->scmd0_short.seqno_numivs |= + V_SCMD_CIPH_MODE(SCMD_CIPH_MODE_AES_CTR); + else + tlsp->scmd0_short.seqno_numivs |= + V_SCMD_CIPH_MODE(tlsp->enc_mode); + tlsp->scmd0_short.seqno_numivs = + htobe32(tlsp->scmd0_short.seqno_numivs); + + tlsp->scmd0_short.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) | + V_SCMD_TLS_FRAG_ENABLE(0) | + V_SCMD_AADIVDROP(1); + if (tlsp->inline_key) + tlsp->scmd0_short.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1); + + TXQ_LOCK(txq); + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) + txq->kern_tls_gcm++; + else + txq->kern_tls_cbc++; + TXQ_UNLOCK(txq); + *pt = &tlsp->com.com; + return (0); + +failed: + if (atid >= 0) + free_atid(sc, atid); + m_snd_tag_rele(&tlsp->com.com); + return (error); +} + +/* XXX: Should share this with ccr(4) eventually. */ +static void +init_ktls_gmac_hash(const char *key, int klen, char *ghash) +{ + static char zeroes[GMAC_BLOCK_LEN]; + uint32_t keysched[4 * (RIJNDAEL_MAXNR + 1)]; + int rounds; + + rounds = rijndaelKeySetupEnc(keysched, key, klen); + rijndaelEncrypt(keysched, rounds, zeroes, ghash); +} + +/* XXX: Should share this with ccr(4) eventually. */ +static void +ktls_copy_partial_hash(void *dst, int cri_alg, union authctx *auth_ctx) +{ + uint32_t *u32; + uint64_t *u64; + u_int i; + + u32 = (uint32_t *)dst; + u64 = (uint64_t *)dst; + switch (cri_alg) { + case CRYPTO_SHA1_HMAC: + for (i = 0; i < SHA1_HASH_LEN / 4; i++) + u32[i] = htobe32(auth_ctx->sha1ctx.h.b32[i]); + break; + case CRYPTO_SHA2_256_HMAC: + for (i = 0; i < SHA2_256_HASH_LEN / 4; i++) + u32[i] = htobe32(auth_ctx->sha256ctx.state[i]); + break; + case CRYPTO_SHA2_384_HMAC: + for (i = 0; i < SHA2_512_HASH_LEN / 8; i++) + u64[i] = htobe64(auth_ctx->sha384ctx.state[i]); + break; + } +} + +static void +init_ktls_hmac_digest(struct auth_hash *axf, u_int partial_digest_len, + char *key, int klen, char *dst) +{ + union authctx auth_ctx; + char ipad[SHA2_512_BLOCK_LEN], opad[SHA2_512_BLOCK_LEN]; + u_int i; + + /* + * If the key is larger than the block size, use the digest of + * the key as the key instead. + */ + klen /= 8; + if (klen > axf->blocksize) { + axf->Init(&auth_ctx); + axf->Update(&auth_ctx, key, klen); + axf->Final(ipad, &auth_ctx); + klen = axf->hashsize; + } else + memcpy(ipad, key, klen); + + memset(ipad + klen, 0, axf->blocksize - klen); + memcpy(opad, ipad, axf->blocksize); + + for (i = 0; i < axf->blocksize; i++) { + ipad[i] ^= HMAC_IPAD_VAL; + opad[i] ^= HMAC_OPAD_VAL; + } + + /* + * Hash the raw ipad and opad and store the partial results in + * the key context. + */ + axf->Init(&auth_ctx); + axf->Update(&auth_ctx, ipad, axf->blocksize); + ktls_copy_partial_hash(dst, axf->type, &auth_ctx); + + dst += roundup2(partial_digest_len, 16); + axf->Init(&auth_ctx); + axf->Update(&auth_ctx, opad, axf->blocksize); + ktls_copy_partial_hash(dst, axf->type, &auth_ctx); +} + +static int +ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls, + struct sge_txq *txq) +{ + struct auth_hash *axf; + int error, keyid, kwrlen, kctxlen, len; + struct tls_key_req *kwr; + struct tls_keyctx *kctx; + void *items[1], *key; + struct tx_keyctx_hdr *khdr; + unsigned int ck_size, mk_size, partial_digest_len; + struct mbuf *m; + + /* + * Store the salt and keys in the key context. For + * connections with an inline key, this key context is passed + * as immediate data in each work request. For connections + * storing the key in DDR, a work request is used to store a + * copy of the key context in DDR. + */ + kctx = &tlsp->keyctx; + khdr = &kctx->txhdr; + + switch (tlsp->cipher_secret_size) { + case 128 / 8: + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + break; + case 192 / 8: + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + break; + case 256 / 8: + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + break; + default: + panic("bad key size"); + } + axf = NULL; + partial_digest_len = 0; + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) + mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; + else { + switch (tlsp->auth_mode) { + case SCMD_AUTH_MODE_SHA1: + axf = &auth_hash_hmac_sha1; + mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_160; + partial_digest_len = SHA1_HASH_LEN; + break; + case SCMD_AUTH_MODE_SHA256: + axf = &auth_hash_hmac_sha2_256; + mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; + partial_digest_len = SHA2_256_HASH_LEN; + break; + case SCMD_AUTH_MODE_SHA512_384: + axf = &auth_hash_hmac_sha2_384; + mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; + partial_digest_len = SHA2_512_HASH_LEN; + break; + default: + panic("bad auth mode"); + } + } + + khdr->ctxlen = (tlsp->tx_key_info_size >> 4); + khdr->dualck_to_txvalid = V_TLS_KEYCTX_TX_WR_SALT_PRESENT(1) | + V_TLS_KEYCTX_TX_WR_TXCK_SIZE(ck_size) | + V_TLS_KEYCTX_TX_WR_TXMK_SIZE(mk_size) | + V_TLS_KEYCTX_TX_WR_TXVALID(1); + if (tlsp->enc_mode != SCMD_CIPH_MODE_AES_GCM) + khdr->dualck_to_txvalid |= V_TLS_KEYCTX_TX_WR_TXOPAD_PRESENT(1); + khdr->dualck_to_txvalid = htobe16(khdr->dualck_to_txvalid); + key = kctx->keys.edkey; + memcpy(key, tls->params.cipher_key, tls->params.cipher_key_len); + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { + memcpy(khdr->txsalt, tls->params.iv, SALT_SIZE); + init_ktls_gmac_hash(tls->params.cipher_key, + tls->params.cipher_key_len * 8, + (char *)key + tls->params.cipher_key_len); + } else { + init_ktls_hmac_digest(axf, partial_digest_len, + tls->params.auth_key, tls->params.auth_key_len * 8, + (char *)key + tls->params.cipher_key_len); + } + + if (tlsp->inline_key) + return (0); + + keyid = tlsp->tx_key_addr; + + /* Populate key work request. */ + kwrlen = sizeof(*kwr); + kctxlen = roundup2(sizeof(*kctx), 32); + len = kwrlen + kctxlen; + + m = alloc_wr_mbuf(len, M_NOWAIT); + if (m == NULL) { + CTR2(KTR_CXGBE, "%s: tid %d failed to alloc WR mbuf", __func__, + tlsp->tid); + return (ENOMEM); + } + m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com.com); + m->m_pkthdr.csum_flags |= CSUM_SND_TAG; + kwr = mtod(m, void *); + memset(kwr, 0, len); + + kwr->wr_hi = htobe32(V_FW_WR_OP(FW_ULPTX_WR) | + F_FW_WR_ATOMIC); + kwr->wr_mid = htobe32(V_FW_WR_LEN16(DIV_ROUND_UP(len, 16))); + kwr->protocol = tlsp->proto_ver; + kwr->mfs = htons(tlsp->frag_size); + kwr->reneg_to_write_rx = KEY_WRITE_TX; + + /* master command */ + kwr->cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | + V_T5_ULP_MEMIO_ORDER(1) | V_T5_ULP_MEMIO_IMM(1)); + kwr->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(kctxlen >> 5)); + kwr->len16 = htobe32((tlsp->tid << 8) | + DIV_ROUND_UP(len - sizeof(struct work_request_hdr), 16)); + kwr->kaddr = htobe32(V_ULP_MEMIO_ADDR(keyid >> 5)); + + /* sub command */ + kwr->sc_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); + kwr->sc_len = htobe32(kctxlen); + + kctx = (struct tls_keyctx *)(kwr + 1); + memcpy(kctx, &tlsp->keyctx, sizeof(*kctx)); + + /* + * Place the key work request in the transmit queue. It + * should be sent to the NIC before any TLS packets using this + * session. + */ + items[0] = m; + error = mp_ring_enqueue(txq->r, items, 1, 1); + if (error) + m_free(m); + else + CTR2(KTR_CXGBE, "%s: tid %d sent key WR", __func__, tlsp->tid); + return (error); +} + +static u_int +ktls_base_wr_size(struct tlspcb *tlsp) +{ + u_int wr_len; + + wr_len = sizeof(struct fw_ulptx_wr); // 16 + wr_len += sizeof(struct ulp_txpkt); // 8 + wr_len += sizeof(struct ulptx_idata); // 8 + wr_len += sizeof(struct cpl_tx_sec_pdu);// 32 + if (tlsp->inline_key) + wr_len += tlsp->tx_key_info_size; + else { + wr_len += sizeof(struct ulptx_sc_memrd);// 8 + wr_len += sizeof(struct ulptx_idata); // 8 + } + wr_len += sizeof(struct cpl_tx_data); // 16 + return (wr_len); +} + +/* How many bytes of TCP payload to send for a given TLS record. */ +static u_int +ktls_tcp_payload_length(struct tlspcb *tlsp, struct mbuf *m_tls) +{ + struct mbuf_ext_pgs *ext_pgs; + struct tls_record_layer *hdr; + u_int plen, mlen; + + MBUF_EXT_PGS_ASSERT(m_tls); + ext_pgs = m_tls->m_ext.ext_pgs; + hdr = (void *)ext_pgs->hdr; + plen = ntohs(hdr->tls_length); + + /* + * What range of the TLS record is the mbuf requesting to be + * sent. + */ + mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len; + + /* Always send complete records. */ + if (mlen == TLS_HEADER_LENGTH + plen) + return (mlen); + + /* + * If the host stack has asked to send part of the trailer, + * trim the length to avoid sending any of the trailer. There + * is no way to send a partial trailer currently. + */ + if (mlen > TLS_HEADER_LENGTH + plen - ext_pgs->trail_len) + mlen = TLS_HEADER_LENGTH + plen - ext_pgs->trail_len; + + + /* + * For AES-CBC adjust the ciphertext length for the block + * size. + */ + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC && + mlen > TLS_HEADER_LENGTH) { + mlen = TLS_HEADER_LENGTH + rounddown(mlen - TLS_HEADER_LENGTH, + AES_BLOCK_LEN); + } + +#ifdef VERBOSE_TRACES + CTR4(KTR_CXGBE, "%s: tid %d short TLS record (%u vs %u)", + __func__, tlsp->tid, mlen, TLS_HEADER_LENGTH + plen); +#endif + return (mlen); +} + +/* + * For a "short" TLS record, determine the offset into the TLS record + * payload to send. This offset does not include the TLS header, but + * a non-zero offset implies that a header will not be sent. + */ +static u_int +ktls_payload_offset(struct tlspcb *tlsp, struct mbuf *m_tls) +{ + struct mbuf_ext_pgs *ext_pgs; + struct tls_record_layer *hdr; + u_int offset, plen; +#ifdef INVARIANTS + u_int mlen; +#endif + + MBUF_EXT_PGS_ASSERT(m_tls); + ext_pgs = m_tls->m_ext.ext_pgs; + hdr = (void *)ext_pgs->hdr; + plen = ntohs(hdr->tls_length); +#ifdef INVARIANTS + mlen = mtod(m_tls, vm_offset_t) + m_tls->m_len; + MPASS(mlen < TLS_HEADER_LENGTH + plen); +#endif + if (mtod(m_tls, vm_offset_t) <= ext_pgs->hdr_len) + return (0); + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { + /* + * Always send something. This function is only called + * if we aren't sending the tag at all, but if the + * request starts in the tag then we are in an odd + * state where would effectively send nothing. Cap + * the offset at the last byte of the record payload + * to send the last cipher block. + */ + offset = min(mtod(m_tls, vm_offset_t) - ext_pgs->hdr_len, + (plen - TLS_HEADER_LENGTH - ext_pgs->trail_len) - 1); + return (rounddown(offset, AES_BLOCK_LEN)); + } + return (0); +} + +static u_int +ktls_sgl_size(u_int nsegs) +{ + u_int wr_len; + + /* First segment is part of ulptx_sgl. */ + nsegs--; + + wr_len = sizeof(struct ulptx_sgl); + wr_len += 8 * ((3 * nsegs) / 2 + (nsegs & 1)); + return (wr_len); +} + +static int +ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls, + int *nsegsp) +{ + struct mbuf_ext_pgs *ext_pgs; + struct tls_record_layer *hdr; + u_int imm_len, offset, plen, wr_len, tlen; + + MBUF_EXT_PGS_ASSERT(m_tls); + ext_pgs = m_tls->m_ext.ext_pgs; + + /* + * Determine the size of the TLS record payload to send + * excluding header and trailer. + */ + tlen = ktls_tcp_payload_length(tlsp, m_tls); + if (tlen <= ext_pgs->hdr_len) { + /* + * For requests that only want to send the TLS header, + * send a tunnelled packet as immediate data. + */ + wr_len = sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_core) + + roundup2(m->m_len + m_tls->m_len, 16); + if (wr_len > SGE_MAX_WR_LEN) { + CTR3(KTR_CXGBE, + "%s: tid %d TLS header-only packet too long (len %d)", + __func__, tlsp->tid, m->m_len + m_tls->m_len); + } + + /* This should always be the last TLS record in a chain. */ + MPASS(m_tls->m_next == NULL); + + /* + * XXX: Set a bogus 'nsegs' value to avoid tripping an + * assertion in mbuf_nsegs() in t4_sge.c. + */ + *nsegsp = 1; + return (wr_len); + } + + hdr = (void *)ext_pgs->hdr; + plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len; + if (tlen < plen) { + plen = tlen; + offset = ktls_payload_offset(tlsp, m_tls); + } else + offset = 0; + + /* Calculate the size of the work request. */ + wr_len = ktls_base_wr_size(tlsp); + + /* + * Full records and short records with an offset of 0 include + * the TLS header as immediate data. Short records include a + * raw AES IV as immediate data. + */ + imm_len = 0; + if (offset == 0) + imm_len += ext_pgs->hdr_len; + if (plen == tlen) + imm_len += AES_BLOCK_LEN; + wr_len += roundup2(imm_len, 16); + + /* TLS record payload via DSGL. */ + *nsegsp = sglist_count_ext_pgs(ext_pgs, ext_pgs->hdr_len + offset, + plen - (ext_pgs->hdr_len + offset)); + wr_len += ktls_sgl_size(*nsegsp); + + wr_len = roundup2(wr_len, 16); + return (wr_len); +} + +/* + * See if we have any TCP options requiring a dedicated options-only + * packet. + */ +static int +ktls_has_tcp_options(struct tcphdr *tcp) +{ + u_char *cp; + int cnt, opt, optlen; + + cp = (u_char *)(tcp + 1); + cnt = tcp->th_off * 4 - sizeof(struct tcphdr); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = cp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + switch (opt) { + case TCPOPT_NOP: + case TCPOPT_TIMESTAMP: + break; + default: + return (1); + } + } + return (0); +} + +/* + * Find the TCP timestamp option. + */ +static void * +ktls_find_tcp_timestamps(struct tcphdr *tcp) +{ + u_char *cp; + int cnt, opt, optlen; + + cp = (u_char *)(tcp + 1); + cnt = tcp->th_off * 4 - sizeof(struct tcphdr); + for (; cnt > 0; cnt -= optlen, cp += optlen) { + opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + if (cnt < 2) + break; + optlen = cp[1]; + if (optlen < 2 || optlen > cnt) + break; + } + if (opt == TCPOPT_TIMESTAMP && optlen == TCPOLEN_TIMESTAMP) + return (cp + 2); + } + return (NULL); +} + +int +t6_ktls_parse_pkt(struct mbuf *m, int *nsegsp, int *len16p) +{ + struct tlspcb *tlsp; + struct ether_header *eh; + struct ip *ip; + struct ip6_hdr *ip6; + struct tcphdr *tcp; + struct mbuf *m_tls; + int nsegs; + u_int wr_len, tot_len; + + /* + * Locate headers in initial mbuf. + * XXX: This assumes all of the headers are in the initial mbuf. + * Could perhaps use m_advance() like parse_pkt() if that turns + * out to not be true. + */ + M_ASSERTPKTHDR(m); + MPASS(m->m_pkthdr.snd_tag != NULL); + tlsp = mst_to_tls(m->m_pkthdr.snd_tag); + + if (m->m_len <= sizeof(*eh) + sizeof(*ip)) { + CTR2(KTR_CXGBE, "%s: tid %d header mbuf too short", __func__, + tlsp->tid); + return (EINVAL); + } + eh = mtod(m, struct ether_header *); + if (ntohs(eh->ether_type) != ETHERTYPE_IP && + ntohs(eh->ether_type) != ETHERTYPE_IPV6) { + CTR2(KTR_CXGBE, "%s: tid %d mbuf not ETHERTYPE_IP{,V6}", + __func__, tlsp->tid); + return (EINVAL); + } + m->m_pkthdr.l2hlen = sizeof(*eh); + + /* XXX: Reject unsupported IP options? */ + if (ntohs(eh->ether_type) == ETHERTYPE_IP) { + ip = (struct ip *)(eh + 1); + if (ip->ip_p != IPPROTO_TCP) { + CTR2(KTR_CXGBE, "%s: tid %d mbuf not IPPROTO_TCP", + __func__, tlsp->tid); + return (EINVAL); + } + m->m_pkthdr.l3hlen = ip->ip_hl * 4; + } else { + ip6 = (struct ip6_hdr *)(eh + 1); + if (ip6->ip6_nxt != IPPROTO_TCP) { + CTR3(KTR_CXGBE, "%s: tid %d mbuf not IPPROTO_TCP (%u)", + __func__, tlsp->tid, ip6->ip6_nxt); + return (EINVAL); + } + m->m_pkthdr.l3hlen = sizeof(struct ip6_hdr); + } + if (m->m_len < m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + + sizeof(*tcp)) { + CTR2(KTR_CXGBE, "%s: tid %d header mbuf too short (2)", + __func__, tlsp->tid); + return (EINVAL); + } + tcp = (struct tcphdr *)((char *)(eh + 1) + m->m_pkthdr.l3hlen); + m->m_pkthdr.l4hlen = tcp->th_off * 4; + + /* Bail if there is TCP payload before the TLS record. */ + if (m->m_len != m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + + m->m_pkthdr.l4hlen) { + CTR6(KTR_CXGBE, + "%s: tid %d header mbuf bad length (%d + %d + %d != %d)", + __func__, tlsp->tid, m->m_pkthdr.l2hlen, + m->m_pkthdr.l3hlen, m->m_pkthdr.l4hlen, m->m_len); + return (EINVAL); + } + + /* Assume all headers are in 'm' for now. */ + MPASS(m->m_next != NULL); + MPASS(m->m_next->m_flags & M_NOMAP); + + tot_len = 0; + + /* + * Each of the remaining mbufs in the chain should reference a + * TLS record. + */ + *nsegsp = 0; + for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) { + MPASS(m_tls->m_flags & M_NOMAP); + + wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs); +#ifdef VERBOSE_TRACES + CTR4(KTR_CXGBE, "%s: tid %d wr_len %d nsegs %d", __func__, + tlsp->tid, wr_len, nsegs); +#endif + if (wr_len > SGE_MAX_WR_LEN || nsegs > TX_SGL_SEGS) + return (EFBIG); + tot_len += roundup2(wr_len, EQ_ESIZE); + + /* + * Store 'nsegs' for the first TLS record in the + * header mbuf's metadata. + */ + if (*nsegsp == 0) + *nsegsp = nsegs; + } + + MPASS(tot_len != 0); + + /* + * See if we have any TCP options or a FIN requiring a + * dedicated packet. + */ + if ((tcp->th_flags & TH_FIN) != 0 || ktls_has_tcp_options(tcp)) { + wr_len = sizeof(struct fw_eth_tx_pkt_wr) + + sizeof(struct cpl_tx_pkt_core) + roundup2(m->m_len, 16); + if (wr_len > SGE_MAX_WR_LEN) { + CTR3(KTR_CXGBE, + "%s: tid %d options-only packet too long (len %d)", + __func__, tlsp->tid, m->m_len); + return (EINVAL); + } + tot_len += roundup2(wr_len, EQ_ESIZE); + } + + /* Include room for a TP work request to program an L2T entry. */ + tot_len += EQ_ESIZE; + + /* + * Include room for a ULPTX work request including up to 5 + * CPL_SET_TCB_FIELD commands before the first TLS work + * request. + */ + wr_len = sizeof(struct fw_ulptx_wr) + + 5 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); + + /* + * If timestamps are present, reserve 1 more command for + * setting the echoed timestamp. + */ + if (tlsp->using_timestamps) + wr_len += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + + tot_len += roundup2(wr_len, EQ_ESIZE); + + *len16p = tot_len / 16; +#ifdef VERBOSE_TRACES + CTR4(KTR_CXGBE, "%s: tid %d len16 %d nsegs %d", __func__, + tlsp->tid, *len16p, *nsegsp); +#endif + return (0); +} + +/* + * If the SGL ends on an address that is not 16 byte aligned, this function will + * add a 0 filled flit at the end. + */ +static void +write_gl_to_buf(struct sglist *gl, caddr_t to) +{ + struct sglist_seg *seg; + __be64 *flitp; + struct ulptx_sgl *usgl; + int i, nflits, nsegs; + + KASSERT(((uintptr_t)to & 0xf) == 0, + ("%s: SGL must start at a 16 byte boundary: %p", __func__, to)); + + nsegs = gl->sg_nseg; + MPASS(nsegs > 0); + + nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2; + flitp = (__be64 *)to; + seg = &gl->sg_segs[0]; + usgl = (void *)flitp; + + usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | + V_ULPTX_NSGE(nsegs)); + usgl->len0 = htobe32(seg->ss_len); + usgl->addr0 = htobe64(seg->ss_paddr); + seg++; + + for (i = 0; i < nsegs - 1; i++, seg++) { + usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len); + usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr); + } + if (i & 1) + usgl->sge[i / 2].len[1] = htobe32(0); + flitp += nflits; + + if (nflits & 1) { + MPASS(((uintptr_t)flitp) & 0xf); + *flitp++ = 0; + } + + MPASS((((uintptr_t)flitp) & 0xf) == 0); +} + +static inline void +copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len) +{ + + MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]); + MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]); + + if (__predict_true((uintptr_t)(*to) + len <= + (uintptr_t)&eq->desc[eq->sidx])) { + bcopy(from, *to, len); + (*to) += len; + if ((uintptr_t)(*to) == (uintptr_t)&eq->desc[eq->sidx]) + (*to) = (caddr_t)eq->desc; + } else { + int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to); + + bcopy(from, *to, portion); + from += portion; + portion = len - portion; /* remaining */ + bcopy(from, (void *)eq->desc, portion); + (*to) = (caddr_t)eq->desc + portion; + } +} + +static int +ktls_write_tcp_options(struct sge_txq *txq, void *dst, struct mbuf *m, + u_int available, u_int pidx) +{ + struct tx_sdesc *txsd; + struct fw_eth_tx_pkt_wr *wr; + struct cpl_tx_pkt_core *cpl; + uint32_t ctrl; + uint64_t ctrl1; + int len16, ndesc, pktlen; + struct ether_header *eh; + struct ip *ip, newip; + struct ip6_hdr *ip6, newip6; + struct tcphdr *tcp, newtcp; + caddr_t out; + + TXQ_LOCK_ASSERT_OWNED(txq); + M_ASSERTPKTHDR(m); + + wr = dst; + pktlen = m->m_len; + ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen; + len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16); + ndesc = howmany(len16, EQ_ESIZE / 16); + MPASS(ndesc <= available); + + /* Firmware work request header */ + wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | + V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); + + ctrl = V_FW_WR_LEN16(len16); + wr->equiq_to_len16 = htobe32(ctrl); + wr->r3 = 0; + + cpl = (void *)(wr + 1); + + /* Checksum offload */ + ctrl1 = 0; + txq->txcsum++; + + /* CPL header */ + cpl->ctrl0 = txq->cpl_ctrl0; + cpl->pack = 0; + cpl->len = htobe16(pktlen); + cpl->ctrl1 = htobe64(ctrl1); + + out = (void *)(cpl + 1); + + /* Copy over Ethernet header. */ + eh = mtod(m, struct ether_header *); + copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen); + + /* Fixup length in IP header and copy out. */ + if (ntohs(eh->ether_type) == ETHERTYPE_IP) { + ip = (void *)((char *)eh + m->m_pkthdr.l2hlen); + newip = *ip; + newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen); + copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip)); + if (m->m_pkthdr.l3hlen > sizeof(*ip)) + copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out, + m->m_pkthdr.l3hlen - sizeof(*ip)); + } else { + ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen); + newip6 = *ip6; + newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen); + copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6)); + MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6)); + } + + /* Clear PUSH and FIN in the TCP header if present. */ + tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); + newtcp = *tcp; + newtcp.th_flags &= ~(TH_PUSH | TH_FIN); + copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp)); + + /* Copy rest of packet. */ + copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, pktlen - + (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); + txq->imm_wrs++; + + txq->txpkt_wrs++; + + txq->kern_tls_options++; + + txsd = &txq->sdesc[pidx]; + txsd->m = NULL; + txsd->desc_used = ndesc; + + return (ndesc); +} + +static int +ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m, + struct mbuf *m_tls, u_int available, tcp_seq tcp_seqno, u_int pidx) +{ + struct tx_sdesc *txsd; + struct fw_eth_tx_pkt_wr *wr; + struct cpl_tx_pkt_core *cpl; + uint32_t ctrl; + uint64_t ctrl1; + int len16, ndesc, pktlen; + struct ether_header *eh; + struct ip *ip, newip; + struct ip6_hdr *ip6, newip6; + struct tcphdr *tcp, newtcp; + struct mbuf_ext_pgs *ext_pgs; + caddr_t out; + + TXQ_LOCK_ASSERT_OWNED(txq); + M_ASSERTPKTHDR(m); + + /* Locate the template TLS header. */ + MBUF_EXT_PGS_ASSERT(m_tls); + ext_pgs = m_tls->m_ext.ext_pgs; + + /* This should always be the last TLS record in a chain. */ + MPASS(m_tls->m_next == NULL); + + wr = dst; + pktlen = m->m_len + m_tls->m_len; + ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen; + len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16); + ndesc = howmany(len16, EQ_ESIZE / 16); + MPASS(ndesc <= available); + + /* Firmware work request header */ + wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | + V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); + + ctrl = V_FW_WR_LEN16(len16); + wr->equiq_to_len16 = htobe32(ctrl); + wr->r3 = 0; + + cpl = (void *)(wr + 1); + + /* Checksum offload */ + ctrl1 = 0; + txq->txcsum++; + + /* CPL header */ + cpl->ctrl0 = txq->cpl_ctrl0; + cpl->pack = 0; + cpl->len = htobe16(pktlen); + cpl->ctrl1 = htobe64(ctrl1); + + out = (void *)(cpl + 1); + + /* Copy over Ethernet header. */ + eh = mtod(m, struct ether_header *); + copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen); + + /* Fixup length in IP header and copy out. */ + if (ntohs(eh->ether_type) == ETHERTYPE_IP) { + ip = (void *)((char *)eh + m->m_pkthdr.l2hlen); + newip = *ip; + newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen); + copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip)); + if (m->m_pkthdr.l3hlen > sizeof(*ip)) + copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out, + m->m_pkthdr.l3hlen - sizeof(*ip)); + } else { + ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen); + newip6 = *ip6; + newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen); + copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6)); + MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6)); + } + + /* Set sequence number in TCP header. */ + tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); + newtcp = *tcp; + newtcp.th_seq = htonl(tcp_seqno + mtod(m_tls, vm_offset_t)); + copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp)); + + /* Copy rest of TCP header. */ + copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len - + (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); + + /* Copy the subset of the TLS header requested. */ + copy_to_txd(&txq->eq, (char *)ext_pgs->hdr + mtod(m_tls, vm_offset_t), + &out, m_tls->m_len); + txq->imm_wrs++; + + txq->txpkt_wrs++; + + txq->kern_tls_header++; + + txsd = &txq->sdesc[pidx]; + txsd->m = m; + txsd->desc_used = ndesc; + + return (ndesc); +} + +_Static_assert(sizeof(struct cpl_set_tcb_field) <= EQ_ESIZE, + "CPL_SET_TCB_FIELD must be smaller than a single TX descriptor"); +_Static_assert(W_TCB_SND_UNA_RAW == W_TCB_SND_NXT_RAW, + "SND_NXT_RAW and SND_UNA_RAW are in different words"); + +static int +ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq, + void *dst, struct mbuf *m, struct tcphdr *tcp, struct mbuf *m_tls, + u_int nsegs, u_int available, tcp_seq tcp_seqno, uint32_t *tsopt, + u_int pidx, bool set_l2t_idx) +{ + struct sge_eq *eq = &txq->eq; + struct tx_sdesc *txsd; + struct fw_ulptx_wr *wr; + struct ulp_txpkt *txpkt; + struct ulptx_sc_memrd *memrd; + struct ulptx_idata *idata; + struct cpl_tx_sec_pdu *sec_pdu; + struct cpl_tx_data *tx_data; + struct mbuf_ext_pgs *ext_pgs; + struct tls_record_layer *hdr; + char *iv, *out; + u_int aad_start, aad_stop; + u_int auth_start, auth_stop, auth_insert; + u_int cipher_start, cipher_stop, iv_offset; + u_int imm_len, mss, ndesc, offset, plen, tlen, twr_len, wr_len; + u_int fields, tx_max_offset, tx_max; + bool first_wr, last_wr, using_scratch; + + ndesc = 0; + MPASS(tlsp->txq == txq); + + first_wr = (tlsp->prev_seq == 0 && tlsp->prev_ack == 0 && + tlsp->prev_win == 0); + + /* + * Use the per-txq scratch pad if near the end of the ring to + * simplify handling of wrap-around. This uses a simple but + * not quite perfect test of using the scratch buffer if we + * can't fit a maximal work request in without wrapping. + */ + using_scratch = (eq->sidx - pidx < SGE_MAX_WR_LEN / EQ_ESIZE); + + /* Locate the TLS header. */ + MBUF_EXT_PGS_ASSERT(m_tls); + ext_pgs = m_tls->m_ext.ext_pgs; + hdr = (void *)ext_pgs->hdr; + plen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - ext_pgs->trail_len; + + /* Determine how much of the TLS record to send. */ + tlen = ktls_tcp_payload_length(tlsp, m_tls); + if (tlen <= ext_pgs->hdr_len) { + /* + * For requests that only want to send the TLS header, + * send a tunnelled packet as immediate data. + */ +#ifdef VERBOSE_TRACES + CTR3(KTR_CXGBE, "%s: tid %d header-only TLS record %u", + __func__, tlsp->tid, (u_int)ext_pgs->seqno); +#endif + return (ktls_write_tunnel_packet(txq, dst, m, m_tls, available, + tcp_seqno, pidx)); + } + if (tlen < plen) { + plen = tlen; + offset = ktls_payload_offset(tlsp, m_tls); +#ifdef VERBOSE_TRACES + CTR4(KTR_CXGBE, "%s: tid %d short TLS record %u with offset %u", + __func__, tlsp->tid, (u_int)ext_pgs->seqno, offset); +#endif + if (m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) != 0) { + txq->kern_tls_fin_short++; +#ifdef INVARIANTS + panic("%s: FIN on short TLS record", __func__); +#endif + } + } else + offset = 0; + + /* + * This is the last work request for a given TLS mbuf chain if + * it is the last mbuf in the chain and FIN is not set. If + * FIN is set, then ktls_write_tcp_fin() will write out the + * last work request. + */ + last_wr = m_tls->m_next == NULL && (tcp->th_flags & TH_FIN) == 0; + + /* + * The host stack may ask us to not send part of the start of + * a TLS record. (For example, the stack might have + * previously sent a "short" TLS record and might later send + * down an mbuf that requests to send the remainder of the TLS + * record.) The crypto engine must process a TLS record from + * the beginning if computing a GCM tag or HMAC, so we always + * send the TLS record from the beginning as input to the + * crypto engine and via CPL_TX_DATA to TP. However, TP will + * drop individual packets after they have been chopped up + * into MSS-sized chunks if the entire sequence range of those + * packets is less than SND_UNA. SND_UNA is computed as + * TX_MAX - SND_UNA_RAW. Thus, use the offset stored in + * m_data to set TX_MAX to the first byte in the TCP sequence + * space the host actually wants us to send and set + * SND_UNA_RAW to 0. + * + * If the host sends us back to back requests that span the + * trailer of a single TLS record (first request ends "in" the + * trailer and second request starts at the next byte but + * still "in" the trailer), the initial bytes of the trailer + * that the first request drops will not be retransmitted. If + * the host uses the same requests when retransmitting the + * connection will hang. To handle this, always transmit the + * full trailer for a request that begins "in" the trailer + * (the second request in the example above). This should + * also help to avoid retransmits for the common case. + * + * A similar condition exists when using CBC for back to back + * requests that span a single AES block. The first request + * will be truncated to end at the end of the previous AES + * block. To handle this, always begin transmission at the + * start of the current AES block. + */ + tx_max_offset = mtod(m_tls, vm_offset_t); + if (tx_max_offset > TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - + ext_pgs->trail_len) { + /* Always send the full trailer. */ + tx_max_offset = TLS_HEADER_LENGTH + ntohs(hdr->tls_length) - + ext_pgs->trail_len; + } + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC && + tx_max_offset > TLS_HEADER_LENGTH) { + /* Always send all of the first AES block. */ + tx_max_offset = TLS_HEADER_LENGTH + + rounddown(tx_max_offset - TLS_HEADER_LENGTH, + AES_BLOCK_LEN); + } + tx_max = tcp_seqno + tx_max_offset; + + /* + * Update TCB fields. Reserve space for the FW_ULPTX_WR header + * but don't populate it until we know how many field updates + * are required. + */ + if (using_scratch) + wr = (void *)txq->ss; + else + wr = dst; + out = (void *)(wr + 1); + fields = 0; + if (set_l2t_idx) { + KASSERT(nsegs != 0, + ("trying to set L2T_IX for subsequent TLS WR")); +#ifdef VERBOSE_TRACES + CTR3(KTR_CXGBE, "%s: tid %d set L2T_IX to %d", __func__, + tlsp->tid, tlsp->l2te->idx); +#endif + write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_L2T_IX, + V_TCB_L2T_IX(M_TCB_L2T_IX), V_TCB_L2T_IX(tlsp->l2te->idx)); + out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + fields++; + } + if (tsopt != NULL && tlsp->prev_tsecr != ntohl(tsopt[1])) { + KASSERT(nsegs != 0, + ("trying to set T_RTSEQ_RECENT for subsequent TLS WR")); +#ifdef VERBOSE_TRACES + CTR2(KTR_CXGBE, "%s: tid %d wrote updated T_RTSEQ_RECENT", + __func__, tlsp->tid); +#endif + write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_T_RTSEQ_RECENT, + V_TCB_T_RTSEQ_RECENT(M_TCB_T_RTSEQ_RECENT), + V_TCB_T_RTSEQ_RECENT(ntohl(tsopt[1]))); + out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + fields++; + + tlsp->prev_tsecr = ntohl(tsopt[1]); + } + + if (first_wr || tlsp->prev_seq != tx_max) { + KASSERT(nsegs != 0, + ("trying to set TX_MAX for subsequent TLS WR")); +#ifdef VERBOSE_TRACES + CTR4(KTR_CXGBE, + "%s: tid %d setting TX_MAX to %u (tcp_seqno %u)", + __func__, tlsp->tid, tx_max, tcp_seqno); +#endif + write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_TX_MAX, + V_TCB_TX_MAX(M_TCB_TX_MAX), V_TCB_TX_MAX(tx_max)); + out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + fields++; + } + + /* + * If there is data to drop at the beginning of this TLS + * record or if this is a retransmit, + * reset SND_UNA_RAW to 0 so that SND_UNA == TX_MAX. + */ + if (tlsp->prev_seq != tx_max || mtod(m_tls, vm_offset_t) != 0) { + KASSERT(nsegs != 0, + ("trying to clear SND_UNA_RAW for subsequent TLS WR")); +#ifdef VERBOSE_TRACES + CTR2(KTR_CXGBE, "%s: tid %d clearing SND_UNA_RAW", __func__, + tlsp->tid); +#endif + write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_SND_UNA_RAW, + V_TCB_SND_UNA_RAW(M_TCB_SND_UNA_RAW), + V_TCB_SND_UNA_RAW(0)); + out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + fields++; + } + + /* + * Store the expected sequence number of the next byte after + * this record. + */ + tlsp->prev_seq = tcp_seqno + tlen; + + if (first_wr || tlsp->prev_ack != ntohl(tcp->th_ack)) { + KASSERT(nsegs != 0, + ("trying to set RCV_NXT for subsequent TLS WR")); + write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_RCV_NXT, + V_TCB_RCV_NXT(M_TCB_RCV_NXT), + V_TCB_RCV_NXT(ntohl(tcp->th_ack))); + out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + fields++; + + tlsp->prev_ack = ntohl(tcp->th_ack); + } + + if (first_wr || tlsp->prev_win != ntohs(tcp->th_win)) { + KASSERT(nsegs != 0, + ("trying to set RCV_WND for subsequent TLS WR")); + write_set_tcb_field_ulp(tlsp, out, txq, W_TCB_RCV_WND, + V_TCB_RCV_WND(M_TCB_RCV_WND), + V_TCB_RCV_WND(ntohs(tcp->th_win))); + out += roundup2(LEN__SET_TCB_FIELD_ULP, 16); + fields++; + + tlsp->prev_win = ntohs(tcp->th_win); + } + + /* Recalculate 'nsegs' if cached value is not available. */ + if (nsegs == 0) + nsegs = sglist_count_ext_pgs(ext_pgs, ext_pgs->hdr_len + + offset, plen - (ext_pgs->hdr_len + offset)); + + /* Calculate the size of the TLS work request. */ + twr_len = ktls_base_wr_size(tlsp); + + imm_len = 0; + if (offset == 0) + imm_len += ext_pgs->hdr_len; + if (plen == tlen) + imm_len += AES_BLOCK_LEN; + twr_len += roundup2(imm_len, 16); + twr_len += ktls_sgl_size(nsegs); + + /* + * If any field updates were required, determine if they can + * be included in the TLS work request. If not, use the + * FW_ULPTX_WR work request header at 'wr' as a dedicated work + * request for the field updates and start a new work request + * for the TLS work request afterward. + */ + if (fields != 0) { + wr_len = fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16); + if (twr_len + wr_len <= SGE_MAX_WR_LEN && + tlsp->sc->tlst.combo_wrs) { + wr_len += twr_len; + txpkt = (void *)out; + } else { + wr_len += sizeof(*wr); + wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR)); + wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA | + V_FW_WR_LEN16(wr_len / 16)); + wr->cookie = 0; + + /* + * If we were using scratch space, copy the + * field updates work request to the ring. + */ + if (using_scratch) { + out = dst; + copy_to_txd(eq, txq->ss, &out, wr_len); + } + + ndesc = howmany(wr_len, EQ_ESIZE); + MPASS(ndesc <= available); + + txq->raw_wrs++; + txsd = &txq->sdesc[pidx]; + txsd->m = NULL; + txsd->desc_used = ndesc; + IDXINCR(pidx, ndesc, eq->sidx); + dst = &eq->desc[pidx]; + + /* + * Determine if we should use scratch space + * for the TLS work request based on the + * available space after advancing pidx for + * the field updates work request. + */ + wr_len = twr_len; + using_scratch = (eq->sidx - pidx < + howmany(wr_len, EQ_ESIZE)); + if (using_scratch) + wr = (void *)txq->ss; + else + wr = dst; + txpkt = (void *)(wr + 1); + } + } else { + wr_len = twr_len; + txpkt = (void *)out; + } + + wr_len = roundup2(wr_len, 16); + MPASS(ndesc + howmany(wr_len, EQ_ESIZE) <= available); + + /* FW_ULPTX_WR */ + wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR)); + wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA | + V_FW_WR_LEN16(wr_len / 16)); + wr->cookie = 0; + + /* ULP_TXPKT */ + txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) | + V_ULP_TXPKT_DATAMODIFY(0) | + V_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) | V_ULP_TXPKT_DEST(0) | + V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1)); + txpkt->len = htobe32(howmany(twr_len - sizeof(*wr), 16)); + + /* ULPTX_IDATA sub-command */ + idata = (void *)(txpkt + 1); + idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | + V_ULP_TX_SC_MORE(1)); + idata->len = sizeof(struct cpl_tx_sec_pdu); + + /* + * The key context, CPL_TX_DATA, and immediate data are part + * of this ULPTX_IDATA when using an inline key. When reading + * the key from memory, the CPL_TX_DATA and immediate data are + * part of a separate ULPTX_IDATA. + */ + if (tlsp->inline_key) + idata->len += tlsp->tx_key_info_size + + sizeof(struct cpl_tx_data) + imm_len; + idata->len = htobe32(idata->len); + + /* CPL_TX_SEC_PDU */ + sec_pdu = (void *)(idata + 1); + + /* + * For short records, AAD is counted as header data in SCMD0, + * the IV is next followed by a cipher region for the payload. + */ + if (plen == tlen) { + aad_start = 0; + aad_stop = 0; + iv_offset = 1; + auth_start = 0; + auth_stop = 0; + auth_insert = 0; + cipher_start = AES_BLOCK_LEN + 1; + cipher_stop = 0; + + sec_pdu->pldlen = htobe32(16 + plen - + (ext_pgs->hdr_len + offset)); + + /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ + sec_pdu->seqno_numivs = tlsp->scmd0_short.seqno_numivs; + sec_pdu->ivgen_hdrlen = htobe32( + tlsp->scmd0_short.ivgen_hdrlen | + V_SCMD_HDR_LEN(offset == 0 ? ext_pgs->hdr_len : 0)); + + txq->kern_tls_short++; + } else { + /* + * AAD is TLS header. IV is after AAD. The cipher region + * starts after the IV. See comments in ccr_authenc() and + * ccr_gmac() in t4_crypto.c regarding cipher and auth + * start/stop values. + */ + aad_start = 1; + aad_stop = TLS_HEADER_LENGTH; + iv_offset = TLS_HEADER_LENGTH + 1; + cipher_start = ext_pgs->hdr_len + 1; + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { + cipher_stop = 0; + auth_start = cipher_start; + auth_stop = 0; + auth_insert = 0; + } else { + cipher_stop = 0; + auth_start = cipher_start; + auth_stop = 0; + auth_insert = 0; + } + + sec_pdu->pldlen = htobe32(plen); + + /* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ + sec_pdu->seqno_numivs = tlsp->scmd0.seqno_numivs; + sec_pdu->ivgen_hdrlen = tlsp->scmd0.ivgen_hdrlen; + + if (mtod(m_tls, vm_offset_t) == 0) + txq->kern_tls_full++; + else + txq->kern_tls_partial++; + } + sec_pdu->op_ivinsrtofst = htobe32( + V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | + V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | + V_CPL_TX_SEC_PDU_IVINSRTOFST(iv_offset)); + sec_pdu->aadstart_cipherstop_hi = htobe32( + V_CPL_TX_SEC_PDU_AADSTART(aad_start) | + V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) | + V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) | + V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4)); + sec_pdu->cipherstop_lo_authinsert = htobe32( + V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) | + V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) | + V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) | + V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert)); + + sec_pdu->scmd1 = htobe64(ext_pgs->seqno); + + /* Key context */ + out = (void *)(sec_pdu + 1); + if (tlsp->inline_key) { + memcpy(out, &tlsp->keyctx, tlsp->tx_key_info_size); + out += tlsp->tx_key_info_size; + } else { + /* ULPTX_SC_MEMRD to read key context. */ + memrd = (void *)out; + memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) | + V_ULP_TX_SC_MORE(1) | + V_ULPTX_LEN16(tlsp->tx_key_info_size >> 4)); + memrd->addr = htobe32(tlsp->tx_key_addr >> 5); + + /* ULPTX_IDATA for CPL_TX_DATA and TLS header. */ + idata = (void *)(memrd + 1); + idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) | + V_ULP_TX_SC_MORE(1)); + idata->len = htobe32(sizeof(struct cpl_tx_data) + imm_len); + + out = (void *)(idata + 1); + } + + /* CPL_TX_DATA */ + tx_data = (void *)out; + OPCODE_TID(tx_data) = htonl(MK_OPCODE_TID(CPL_TX_DATA, tlsp->tid)); + if (m->m_pkthdr.csum_flags & CSUM_TSO) { + mss = m->m_pkthdr.tso_segsz; + tlsp->prev_mss = mss; + } else if (tlsp->prev_mss != 0) + mss = tlsp->prev_mss; + else + mss = tlsp->vi->ifp->if_mtu - + (m->m_pkthdr.l3hlen + m->m_pkthdr.l4hlen); + if (offset == 0) { + tx_data->len = htobe32(V_TX_DATA_MSS(mss) | V_TX_LENGTH(tlen)); + tx_data->rsvd = htobe32(tcp_seqno); + } else { + tx_data->len = htobe32(V_TX_DATA_MSS(mss) | + V_TX_LENGTH(tlen - (ext_pgs->hdr_len + offset))); + tx_data->rsvd = htobe32(tcp_seqno + ext_pgs->hdr_len + offset); + } + tx_data->flags = htobe32(F_TX_BYPASS); + if (last_wr && tcp->th_flags & TH_PUSH) + tx_data->flags |= htobe32(F_TX_PUSH | F_TX_SHOVE); + + /* Populate the TLS header */ + out = (void *)(tx_data + 1); + if (offset == 0) { + memcpy(out, ext_pgs->hdr, ext_pgs->hdr_len); + out += ext_pgs->hdr_len; + } + + /* AES IV for a short record. */ + if (plen == tlen) { + iv = out; + if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) { + memcpy(iv, tlsp->keyctx.txhdr.txsalt, SALT_SIZE); + memcpy(iv + 4, hdr + 1, 8); + *(uint32_t *)(iv + 12) = htobe32(2 + + offset / AES_BLOCK_LEN); + } else + memcpy(iv, hdr + 1, AES_BLOCK_LEN); + out += AES_BLOCK_LEN; + } + + if (imm_len % 16 != 0) { + /* Zero pad to an 8-byte boundary. */ + memset(out, 0, 8 - (imm_len % 8)); + out += 8 - (imm_len % 8); + + /* + * Insert a ULP_TX_SC_NOOP if needed so the SGL is + * 16-byte aligned. + */ + if (imm_len % 16 <= 8) { + idata = (void *)out; + idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); + idata->len = htobe32(0); + out = (void *)(idata + 1); + } + } + + /* SGL for record payload */ + sglist_reset(txq->gl); + if (sglist_append_ext_pgs(txq->gl, ext_pgs, ext_pgs->hdr_len + offset, + plen - (ext_pgs->hdr_len + offset)) != 0) { +#ifdef INVARIANTS + panic("%s: failed to append sglist", __func__); +#endif + } + write_gl_to_buf(txq->gl, out); + + if (using_scratch) { + out = dst; + copy_to_txd(eq, txq->ss, &out, wr_len); + } + + ndesc += howmany(wr_len, EQ_ESIZE); + MPASS(ndesc <= available); + txq->tls_wrs++; + + txq->kern_tls_records++; + txq->kern_tls_octets += tlen - mtod(m_tls, vm_offset_t); + if (mtod(m_tls, vm_offset_t) != 0) { + if (offset == 0) + txq->kern_tls_waste += mtod(m_tls, vm_offset_t); + else + txq->kern_tls_waste += mtod(m_tls, vm_offset_t) - + (ext_pgs->hdr_len + offset); + } + + txsd = &txq->sdesc[pidx]; + if (last_wr) + txsd->m = m; + else + txsd->m = NULL; + txsd->desc_used = howmany(wr_len, EQ_ESIZE); + + return (ndesc); +} + +static int +ktls_write_tcp_fin(struct sge_txq *txq, void *dst, struct mbuf *m, + u_int available, tcp_seq tcp_seqno, u_int pidx) +{ + struct tx_sdesc *txsd; + struct fw_eth_tx_pkt_wr *wr; + struct cpl_tx_pkt_core *cpl; + uint32_t ctrl; + uint64_t ctrl1; + int len16, ndesc, pktlen; + struct ether_header *eh; + struct ip *ip, newip; + struct ip6_hdr *ip6, newip6; + struct tcphdr *tcp, newtcp; + caddr_t out; + + TXQ_LOCK_ASSERT_OWNED(txq); + M_ASSERTPKTHDR(m); + + wr = dst; + pktlen = m->m_len; + ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen; + len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16); + ndesc = howmany(len16, EQ_ESIZE / 16); + MPASS(ndesc <= available); + + /* Firmware work request header */ + wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) | + V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); + + ctrl = V_FW_WR_LEN16(len16); + wr->equiq_to_len16 = htobe32(ctrl); + wr->r3 = 0; + + cpl = (void *)(wr + 1); + + /* Checksum offload */ + ctrl1 = 0; + txq->txcsum++; + + /* CPL header */ + cpl->ctrl0 = txq->cpl_ctrl0; + cpl->pack = 0; + cpl->len = htobe16(pktlen); + cpl->ctrl1 = htobe64(ctrl1); + + out = (void *)(cpl + 1); + + /* Copy over Ethernet header. */ + eh = mtod(m, struct ether_header *); + copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen); + + /* Fixup length in IP header and copy out. */ + if (ntohs(eh->ether_type) == ETHERTYPE_IP) { + ip = (void *)((char *)eh + m->m_pkthdr.l2hlen); + newip = *ip; + newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen); + copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip)); + if (m->m_pkthdr.l3hlen > sizeof(*ip)) + copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out, + m->m_pkthdr.l3hlen - sizeof(*ip)); + } else { + ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen); + newip6 = *ip6; + newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen); + copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6)); + MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6)); + } + + /* Set sequence number in TCP header. */ + tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen); + newtcp = *tcp; + newtcp.th_seq = htonl(tcp_seqno); + copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp)); + + /* Copy rest of packet. */ + copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len - + (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp))); + txq->imm_wrs++; + + txq->txpkt_wrs++; + + txq->kern_tls_fin++; + + txsd = &txq->sdesc[pidx]; + txsd->m = m; + txsd->desc_used = ndesc; + + return (ndesc); +} + +int +t6_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m, u_int nsegs, + u_int available) +{ + struct sge_eq *eq = &txq->eq; + struct tx_sdesc *txsd; + struct tlspcb *tlsp; + struct tcphdr *tcp; + struct mbuf *m_tls; + struct ether_header *eh; + tcp_seq tcp_seqno; + u_int ndesc, pidx, totdesc; + uint16_t vlan_tag; + bool has_fin, set_l2t_idx; + void *tsopt; + + M_ASSERTPKTHDR(m); + MPASS(m->m_pkthdr.snd_tag != NULL); + tlsp = mst_to_tls(m->m_pkthdr.snd_tag); + + totdesc = 0; + eh = mtod(m, struct ether_header *); + tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen + + m->m_pkthdr.l3hlen); + pidx = eq->pidx; + has_fin = (tcp->th_flags & TH_FIN) != 0; + + /* + * If this TLS record has a FIN, then we will send any + * requested options as part of the FIN packet. + */ + if (!has_fin && ktls_has_tcp_options(tcp)) { + ndesc = ktls_write_tcp_options(txq, dst, m, available, pidx); + totdesc += ndesc; + IDXINCR(pidx, ndesc, eq->sidx); + dst = &eq->desc[pidx]; +#ifdef VERBOSE_TRACES + CTR2(KTR_CXGBE, "%s: tid %d wrote TCP options packet", __func__, + tlsp->tid); +#endif + } + + /* + * Allocate a new L2T entry if necessary. This may write out + * a work request to the txq. + */ + if (m->m_flags & M_VLANTAG) + vlan_tag = m->m_pkthdr.ether_vtag; + else + vlan_tag = 0xfff; + set_l2t_idx = false; + if (tlsp->l2te == NULL || tlsp->l2te->vlan != vlan_tag || + memcmp(tlsp->l2te->dmac, eh->ether_dhost, ETHER_ADDR_LEN) != 0) { + set_l2t_idx = true; + if (tlsp->l2te) + t4_l2t_release(tlsp->l2te); + tlsp->l2te = t4_l2t_alloc_tls(tlsp->sc, txq, dst, &ndesc, + vlan_tag, tlsp->vi->pi->lport, eh->ether_dhost); + if (tlsp->l2te == NULL) + CXGBE_UNIMPLEMENTED("failed to allocate TLS L2TE"); + if (ndesc != 0) { + MPASS(ndesc <= available - totdesc); + + txq->raw_wrs++; + txsd = &txq->sdesc[pidx]; + txsd->m = NULL; + txsd->desc_used = ndesc; + totdesc += ndesc; + IDXINCR(pidx, ndesc, eq->sidx); + dst = &eq->desc[pidx]; + } + } + + /* + * Iterate over each TLS record constructing a work request + * for that record. + */ + for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) { + MPASS(m_tls->m_flags & M_NOMAP); + + /* + * Determine the initial TCP sequence number for this + * record. + */ + tsopt = NULL; + if (m_tls == m->m_next) { + tcp_seqno = ntohl(tcp->th_seq) - + mtod(m_tls, vm_offset_t); + if (tlsp->using_timestamps) + tsopt = ktls_find_tcp_timestamps(tcp); + } else { + MPASS(mtod(m_tls, vm_offset_t) == 0); + tcp_seqno = tlsp->prev_seq; + } + + ndesc = ktls_write_tls_wr(tlsp, txq, dst, m, tcp, m_tls, + nsegs, available - totdesc, tcp_seqno, tsopt, pidx, + set_l2t_idx); + totdesc += ndesc; + IDXINCR(pidx, ndesc, eq->sidx); + dst = &eq->desc[pidx]; + + /* + * The value of nsegs from the header mbuf's metadata + * is only valid for the first TLS record. + */ + nsegs = 0; + + /* Only need to set the L2T index once. */ + set_l2t_idx = false; + } + + if (has_fin) { + /* + * If the TCP header for this chain has FIN sent, then + * explicitly send a packet that has FIN set. This + * will also have PUSH set if requested. This assumes + * we sent at least one TLS record work request and + * uses the TCP sequence number after that reqeust as + * the sequence number for the FIN packet. + */ + ndesc = ktls_write_tcp_fin(txq, dst, m, available, + tlsp->prev_seq, pidx); + totdesc += ndesc; + } + + MPASS(totdesc <= available); + return (totdesc); +} + +void +cxgbe_tls_tag_free(struct m_snd_tag *mst) +{ + struct adapter *sc; + struct tlspcb *tlsp; + + tlsp = mst_to_tls(mst); + sc = tlsp->sc; + + CTR2(KTR_CXGBE, "%s: tid %d", __func__, tlsp->tid); + + if (tlsp->l2te) + t4_l2t_release(tlsp->l2te); + if (tlsp->tid >= 0) + release_tid(sc, tlsp->tid, tlsp->ctrlq); + if (tlsp->ce) + t4_release_lip(sc, tlsp->ce); + if (tlsp->tx_key_addr >= 0) + free_keyid(tlsp, tlsp->tx_key_addr); + + explicit_bzero(&tlsp->keyctx, sizeof(&tlsp->keyctx)); + free(tlsp, M_CXGBE); +} + +void +t6_ktls_modload(void) +{ + + t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, ktls_act_open_rpl, + CPL_COOKIE_KERN_TLS); +} + +void +t6_ktls_modunload(void) +{ + + t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL, + CPL_COOKIE_KERN_TLS); +} + +#else + +int +cxgbe_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, + struct m_snd_tag **pt) +{ + return (ENXIO); +} + +int +t6_ktls_parse_pkt(struct mbuf *m, int *nsegsp, int *len16p) +{ + return (EINVAL); +} + +int +t6_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m, u_int nsegs, + u_int available) +{ + panic("can't happen"); +} + +void +cxgbe_tls_tag_free(struct m_snd_tag *mst) +{ + panic("can't happen"); +} + +void +t6_ktls_modload(void) +{ +} + +void +t6_ktls_modunload(void) +{ +} + +#endif Index: sys/dev/cxgbe/firmware/t6fw_cfg_kern_tls.txt =================================================================== --- /dev/null +++ sys/dev/cxgbe/firmware/t6fw_cfg_kern_tls.txt @@ -0,0 +1,278 @@ +# Firmware configuration file. +# +# Global limits (some are hardware limits, others are due to the firmware). +# nvi = 128 virtual interfaces +# niqflint = 1023 ingress queues with freelists and/or interrupts +# nethctrl = 64K Ethernet or ctrl egress queues +# neq = 64K egress queues of all kinds, including freelists +# nexactf = 512 MPS TCAM entries, can oversubscribe. + +[global] + rss_glb_config_mode = basicvirtual + rss_glb_config_options = tnlmapen,hashtoeplitz,tnlalllkp + + # PL_TIMEOUT register + pl_timeout_value = 200 # the timeout value in units of us + + sge_timer_value = 1, 5, 10, 50, 100, 200 # SGE_TIMER_VALUE* in usecs + + reg[0x10c4] = 0x20000000/0x20000000 # GK_CONTROL, enable 5th thread + + reg[0x7dc0] = 0x0e2f8849 # TP_SHIFT_CNT + + #Tick granularities in kbps + tsch_ticks = 100000, 10000, 1000, 10 + + filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe + filterMask = protocol + + tp_pmrx = 10, 512 + tp_pmrx_pagesize = 64K + + # TP number of RX channels (0 = auto) + tp_nrxch = 0 + + tp_pmtx = 10, 512 + tp_pmtx_pagesize = 64K + + # TP number of TX channels (0 = auto) + tp_ntxch = 0 + + # TP OFLD MTUs + tp_mtus = 88, 256, 512, 576, 808, 1024, 1280, 1488, 1500, 2002, 2048, 4096, 4352, 8192, 9000, 9600 + + # enable TP_OUT_CONFIG.IPIDSPLITMODE and CRXPKTENC + reg[0x7d04] = 0x00010008/0x00010008 + + # TP_GLOBAL_CONFIG + reg[0x7d08] = 0x00000800/0x00000800 # set IssFromCplEnable + + # TP_PC_CONFIG + reg[0x7d48] = 0x00000000/0x00000400 # clear EnableFLMError + + # TP_PARA_REG0 + reg[0x7d60] = 0x06000000/0x07000000 # set InitCWND to 6 + + # cluster, lan, or wan. + tp_tcptuning = lan + + # LE_DB_CONFIG + reg[0x19c04] = 0x00000000/0x00440000 # LE Server SRAM disabled + # LE IPv4 compression disabled + # LE_DB_HASH_CONFIG + reg[0x19c28] = 0x00800000/0x01f00000 # LE Hash bucket size 8, + + # ULP_TX_CONFIG + reg[0x8dc0] = 0x00000104/0x00000104 # Enable ITT on PI err + # Enable more error msg for ... + # TPT error. + + # ULP_RX_MISC_FEATURE_ENABLE + #reg[0x1925c] = 0x01003400/0x01003400 # iscsi tag pi bit + # Enable offset decrement after ... + # PI extraction and before DDP + # ulp insert pi source info in DIF + # iscsi_eff_offset_en + + #Enable iscsi completion moderation feature + reg[0x1925c] = 0x000041c0/0x000031c0 # Enable offset decrement after + # PI extraction and before DDP. + # ulp insert pi source info in + # DIF. + # Enable iscsi hdr cmd mode. + # iscsi force cmd mode. + # Enable iscsi cmp mode. + # MC configuration + #mc_mode_brc[0] = 1 # mc0 - 1: enable BRC, 0: enable RBC + +# PFs 0-3. These get 8 MSI/8 MSI-X vectors each. VFs are supported by +# these 4 PFs only. +[function "0"] + wx_caps = all + r_caps = all + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "1"] + wx_caps = all + r_caps = all + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +[function "2"] + wx_caps = all + r_caps = all + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x4 + +[function "3"] + wx_caps = all + r_caps = all + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x8 + +# PF4 is the resource-rich PF that the bus/nexus driver attaches to. +# It gets 32 MSI/128 MSI-X vectors. +[function "4"] + wx_caps = all + r_caps = all + nvi = 32 + rssnvi = 32 + niqflint = 512 + nethctrl = 1024 + neq = 2048 + nqpcq = 8192 + nexactf = 456 + cmask = all + pmask = all + ncrypto_lookaside = 16 + nclip = 320 + nethofld = 8192 + + # TCAM has 6K cells; each region must start at a multiple of 128 cell. + # Each entry in these categories takes 2 cells each. nhash will use the + # TCAM iff there is room left (that is, the rest don't add up to 3072). + nfilter = 48 + nserver = 64 + nhpfilter = 0 + nhash = 524288 + protocol = ofld, tlskeys, crypto_lookaside + tp_l2t = 4096 + tp_ddp = 2 + tp_ddp_iscsi = 2 + tp_tls_key = 3 + tp_tls_mxrxsize = 17408 # 16384 + 1024, governs max rx data, pm max xfer len, rx coalesce sizes + tp_stag = 2 + tp_pbl = 5 + tp_rq = 7 + tp_srq = 128 + +# PF5 is the SCSI Controller PF. It gets 32 MSI/40 MSI-X vectors. +# Not used right now. +[function "5"] + nvi = 1 + rssnvi = 0 + +# PF6 is the FCoE Controller PF. It gets 32 MSI/40 MSI-X vectors. +# Not used right now. +[function "6"] + nvi = 1 + rssnvi = 0 + +# The following function, 1023, is not an actual PCIE function but is used to +# configure and reserve firmware internal resources that come from the global +# resource pool. +# +[function "1023"] + wx_caps = all + r_caps = all + nvi = 4 + rssnvi = 0 + cmask = all + pmask = all + nexactf = 8 + nfilter = 16 + + +# For Virtual functions, we only allow NIC functionality and we only allow +# access to one port (1 << PF). Note that because of limitations in the +# Scatter Gather Engine (SGE) hardware which checks writes to VF KDOORBELL +# and GTS registers, the number of Ingress and Egress Queues must be a power +# of 2. +# +[function "0/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "1/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +[function "2/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x1 + +[function "3/*"] + wx_caps = 0x82 + r_caps = 0x86 + nvi = 1 + rssnvi = 0 + niqflint = 2 + nethctrl = 2 + neq = 4 + nexactf = 2 + cmask = all + pmask = 0x2 + +# MPS has 192K buffer space for ingress packets from the wire as well as +# loopback path of the L2 switch. +[port "0"] + dcb = none + #bg_mem = 25 + #lpbk_mem = 25 + hwm = 60 + lwm = 15 + dwm = 30 + +[port "1"] + dcb = none + #bg_mem = 25 + #lpbk_mem = 25 + hwm = 60 + lwm = 15 + dwm = 30 + +[fini] + version = 0x1 + checksum = 0xa737b06f +# +# $FreeBSD$ +# Index: sys/dev/cxgbe/offload.h =================================================================== --- sys/dev/cxgbe/offload.h +++ sys/dev/cxgbe/offload.h @@ -79,7 +79,7 @@ union aopen_entry *next; }; -/* cxgbe_snd_tag flags */ +/* cxgbe_rate_tag flags */ enum { EO_FLOWC_PENDING = (1 << 0), /* flowc needs to be sent */ EO_FLOWC_RPL_PENDING = (1 << 1), /* flowc credits due back */ @@ -89,6 +89,11 @@ struct cxgbe_snd_tag { struct m_snd_tag com; + int type; +}; + +struct cxgbe_rate_tag { + struct cxgbe_snd_tag com; struct adapter *adapter; u_int flags; struct mtx lock; @@ -114,8 +119,14 @@ return (__containerof(t, struct cxgbe_snd_tag, com)); } +static inline struct cxgbe_rate_tag * +mst_to_crt(struct m_snd_tag *t) +{ + return ((struct cxgbe_rate_tag *)mst_to_cst(t)); +} + union etid_entry { - struct cxgbe_snd_tag *cst; + struct cxgbe_rate_tag *cst; union etid_entry *next; }; @@ -232,10 +243,17 @@ int cop_managed_offloading; int autorcvbuf_inc; }; + /* iWARP driver tunables */ struct iw_tunables { int wc_en; }; + +struct tls_tunables { + int inline_keys; + int combo_wrs; +}; + #ifdef TCP_OFFLOAD int t4_register_uld(struct uld_info *); int t4_unregister_uld(struct uld_info *); Index: sys/dev/cxgbe/t4_filter.c =================================================================== --- sys/dev/cxgbe/t4_filter.c +++ sys/dev/cxgbe/t4_filter.c @@ -891,11 +891,6 @@ if (rc != 0) goto done; } - if (__predict_false(sc->tids.atid_tab == NULL)) { - rc = alloc_atid_tab(&sc->tids, M_NOWAIT); - if (rc != 0) - goto done; - } } else if (separate_hpfilter_region(sc) && t->fs.prio && __predict_false(ti->hpftid_tab == NULL)) { MPASS(ti->nhpftids != 0); Index: sys/dev/cxgbe/t4_l2t.h =================================================================== --- sys/dev/cxgbe/t4_l2t.h +++ sys/dev/cxgbe/t4_l2t.h @@ -48,6 +48,7 @@ /* when state is one of the below the entry is not hashed */ L2T_STATE_SWITCHING, /* entry is being used by a switching filter */ + L2T_STATE_TLS, /* entry is being used by TLS sessions */ L2T_STATE_UNUSED /* entry not in use */ }; @@ -93,6 +94,8 @@ struct l2t_entry *t4_alloc_l2e(struct l2t_data *); struct l2t_entry *t4_l2t_alloc_switching(struct adapter *, uint16_t, uint8_t, uint8_t *); +struct l2t_entry *t4_l2t_alloc_tls(struct adapter *, struct sge_txq *, + void *, int *, uint16_t, uint8_t, uint8_t *); int t4_l2t_set_switching(struct adapter *, struct l2t_entry *, uint16_t, uint8_t, uint8_t *); int t4_write_l2e(struct l2t_entry *, int); Index: sys/dev/cxgbe/t4_l2t.c =================================================================== --- sys/dev/cxgbe/t4_l2t.c +++ sys/dev/cxgbe/t4_l2t.c @@ -145,6 +145,23 @@ return (e); } +static void +mk_write_l2e(struct adapter *sc, struct l2t_entry *e, int sync, int reply, + void *dst) +{ + struct cpl_l2t_write_req *req; + int idx; + + req = dst; + idx = e->idx + sc->vres.l2t.start; + INIT_TP_WR(req, 0); + OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx | + V_SYNC_WR(sync) | V_TID_QID(e->iqid))); + req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!reply)); + req->l2t_idx = htons(idx); + req->vlan = htons(e->vlan); + memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); +} /* * Write an L2T entry. Must be called with the entry locked. @@ -155,9 +172,8 @@ { struct sge_wrq *wrq; struct adapter *sc; + struct cpl_l2t_write_req *req; struct wrq_cookie cookie; - struct cpl_l2t_write_req *req; - int idx; mtx_assert(&e->lock, MA_OWNED); MPASS(e->wrq != NULL); @@ -169,14 +185,7 @@ if (req == NULL) return (ENOMEM); - idx = e->idx + sc->vres.l2t.start; - INIT_TP_WR(req, 0); - OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx | - V_SYNC_WR(sync) | V_TID_QID(e->iqid))); - req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); - req->l2t_idx = htons(idx); - req->vlan = htons(e->vlan); - memcpy(req->dst_mac, e->dmac, sizeof(req->dst_mac)); + mk_write_l2e(sc, e, sync, sync, req); commit_wrq_wr(wrq, req, &cookie); @@ -186,6 +195,90 @@ return (0); } +/* + * Allocate an L2T entry for use by a TLS connection. These entries are + * associated with a specific VLAN and destination MAC that never changes. + * However, multiple TLS connections might share a single entry. + * + * If a new L2T entry is allocated, a work request to initialize it is + * written to 'txq' and 'ndesc' will be set to 1. Otherwise, 'ndesc' + * will be set to 0. + * + * To avoid races, separate L2T entries are reserved for individual + * queues since the L2T entry update is written to a txq just prior to + * TLS work requests that will depend on it being written. + */ +struct l2t_entry * +t4_l2t_alloc_tls(struct adapter *sc, struct sge_txq *txq, void *dst, + int *ndesc, uint16_t vlan, uint8_t port, uint8_t *eth_addr) +{ + struct l2t_data *d; + struct l2t_entry *e; + int i; + + TXQ_LOCK_ASSERT_OWNED(txq); + + d = sc->l2t; + *ndesc = 0; + + rw_rlock(&d->lock); + + /* First, try to find an existing entry. */ + for (i = 0; i < d->l2t_size; i++) { + e = &d->l2tab[i]; + if (e->state != L2T_STATE_TLS) + continue; + if (e->vlan == vlan && e->lport == port && + e->wrq == (struct sge_wrq *)txq && + memcmp(e->dmac, eth_addr, ETHER_ADDR_LEN) == 0) { + if (atomic_fetchadd_int(&e->refcnt, 1) == 0) { + /* + * This entry wasn't held but is still + * valid, so decrement nfree. + */ + atomic_subtract_int(&d->nfree, 1); + } + KASSERT(e->refcnt > 0, + ("%s: refcount overflow", __func__)); + rw_runlock(&d->lock); + return (e); + } + } + + /* + * Don't bother rechecking if the upgrade fails since the txq is + * already locked. + */ + if (!rw_try_upgrade(&d->lock)) { + rw_runlock(&d->lock); + rw_wlock(&d->lock); + } + + /* Match not found, allocate a new entry. */ + e = t4_alloc_l2e(d); + if (e == NULL) { + rw_wunlock(&d->lock); + return (e); + } + + /* Initialize the entry. */ + e->state = L2T_STATE_TLS; + e->vlan = vlan; + e->lport = port; + e->iqid = sc->sge.fwq.abs_id; + e->wrq = (struct sge_wrq *)txq; + memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN); + atomic_store_rel_int(&e->refcnt, 1); + rw_wunlock(&d->lock); + + /* Write out the work request. */ + *ndesc = howmany(sizeof(struct cpl_l2t_write_req), EQ_ESIZE); + MPASS(*ndesc == 1); + mk_write_l2e(sc, e, 1, 0, dst); + + return (e); +} + /* * Allocate an L2T entry for use by a switching rule. Such need to be * explicitly freed and while busy they are not on any hash chain, so normal @@ -307,6 +400,7 @@ case L2T_STATE_SYNC_WRITE: return 'W'; case L2T_STATE_RESOLVING: return STAILQ_EMPTY(&e->wr_list) ? 'R' : 'A'; case L2T_STATE_SWITCHING: return 'X'; + case L2T_STATE_TLS: return 'T'; default: return 'U'; } } @@ -343,7 +437,7 @@ "Ethernet address VLAN/P LP State Users Port"); header = 1; } - if (e->state == L2T_STATE_SWITCHING) + if (e->state >= L2T_STATE_SWITCHING) ip[0] = 0; else { inet_ntop(e->ipv6 ? AF_INET6 : AF_INET, &e->addr[0], Index: sys/dev/cxgbe/t4_main.c =================================================================== --- sys/dev/cxgbe/t4_main.c +++ sys/dev/cxgbe/t4_main.c @@ -33,6 +33,7 @@ #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_kern_tls.h" #include "opt_ratelimit.h" #include "opt_rss.h" @@ -65,6 +66,9 @@ #endif #include #include +#ifdef KERN_TLS +#include +#endif #if defined(__i386__) || defined(__amd64__) #include #include @@ -230,6 +234,15 @@ static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); +#if defined(KERN_TLS) || defined(RATELIMIT) +static int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, + struct m_snd_tag **); +static int cxgbe_snd_tag_modify(struct m_snd_tag *, + union if_snd_tag_modify_params *); +static int cxgbe_snd_tag_query(struct m_snd_tag *, + union if_snd_tag_query_params *); +static void cxgbe_snd_tag_free(struct m_snd_tag *); +#endif MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); @@ -568,6 +581,28 @@ "COP (Connection Offload Policy) controls all TOE offload"); #endif +#ifdef KERN_TLS +/* + * This enables KERN_TLS for all adapters if set. + */ +static int t4_kern_tls = 0; +SYSCTL_INT(_hw_cxgbe, OID_AUTO, kern_tls, CTLFLAG_RDTUN, &t4_kern_tls, 0, + "Enable KERN_TLS mode for all supported adapters"); + +SYSCTL_NODE(_hw_cxgbe, OID_AUTO, tls, CTLFLAG_RD, 0, + "cxgbe(4) KERN_TLS parameters"); + +static int t4_tls_inline_keys = 0; +SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, inline_keys, CTLFLAG_RDTUN, + &t4_tls_inline_keys, 0, + "Always pass TLS keys in work requests (1) or attempt to store TLS keys " + "in card memory."); + +static int t4_tls_combo_wrs = 0; +SYSCTL_INT(_hw_cxgbe_tls, OID_AUTO, combo_wrs, CTLFLAG_RDTUN, &t4_tls_combo_wrs, + 0, "Attempt to combine TCB field updates with TLS record work requests."); +#endif + /* Functions used by VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_ETH, @@ -626,6 +661,8 @@ static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); +static void t4_init_atid_table(struct adapter *); +static void t4_free_atid_table(struct adapter *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); @@ -1000,6 +1037,8 @@ sc->policy = NULL; rw_init(&sc->policy_lock, "connection offload policy"); + callout_init(&sc->ktls_tick, 1); + rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ @@ -1236,6 +1275,7 @@ t4_init_l2t(sc, M_WAITOK); t4_init_smt(sc, M_WAITOK); t4_init_tx_sched(sc); + t4_init_atid_table(sc); #ifdef RATELIMIT t4_init_etid_table(sc); #endif @@ -1536,6 +1576,7 @@ t4_free_l2t(sc->l2t); if (sc->smt) t4_free_smt(sc->smt); + t4_free_atid_table(sc); #ifdef RATELIMIT t4_free_etid_table(sc); #endif @@ -1564,7 +1605,6 @@ free(sc->tids.ftid_tab, M_CXGBE); free(sc->tids.hpftid_tab, M_CXGBE); free_hftid_hash(&sc->tids); - free(sc->tids.atid_tab, M_CXGBE); free(sc->tids.tid_tab, M_CXGBE); free(sc->tt.tls_rx_ports, M_CXGBE); t4_destroy_dma_tag(sc); @@ -1575,6 +1615,7 @@ mtx_destroy(&sc->sc_lock); } + callout_drain(&sc->ktls_tick); callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) { mtx_destroy(&sc->tids.ftid_lock); @@ -1653,7 +1694,7 @@ ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = cxgbe_snd_tag_alloc; ifp->if_snd_tag_modify = cxgbe_snd_tag_modify; ifp->if_snd_tag_query = cxgbe_snd_tag_query; @@ -1664,7 +1705,7 @@ ifp->if_capabilities = T4_CAP; ifp->if_capenable = T4_CAP_ENABLE; #ifdef TCP_OFFLOAD - if (vi->nofldrxq != 0) + if (vi->nofldrxq != 0 && (vi->pi->adapter->flags & KERN_TLS_OK) == 0) ifp->if_capabilities |= IFCAP_TOE; #endif #ifdef RATELIMIT @@ -1683,6 +1724,12 @@ ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO; #endif ifp->if_hw_tsomaxsegsize = 65536; +#ifdef KERN_TLS + if (vi->pi->adapter->flags & KERN_TLS_OK) { + ifp->if_capabilities |= IFCAP_TXTLS; + ifp->if_capenable |= IFCAP_TXTLS; + } +#endif ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP @@ -1909,6 +1956,8 @@ if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + if (mask & IFCAP_NOMAP) + ifp->if_capenable ^= IFCAP_NOMAP; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before @@ -1991,6 +2040,11 @@ if (mask & IFCAP_NOMAP) ifp->if_capenable ^= IFCAP_NOMAP; +#ifdef KERN_TLS + if (mask & IFCAP_TXTLS) + ifp->if_capenable ^= (mask & IFCAP_TXTLS); +#endif + #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif @@ -2043,11 +2097,18 @@ struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; +#ifdef RATELIMIT + struct cxgbe_snd_tag *cst; +#endif void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ +#if defined(KERN_TLS) || defined(RATELIMIT) + if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) + MPASS(m->m_pkthdr.snd_tag->ifp == ifp); +#endif if (__predict_false(pi->link_cfg.link_ok == false)) { m_freem(m); @@ -2062,8 +2123,9 @@ } #ifdef RATELIMIT if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { - MPASS(m->m_pkthdr.snd_tag->ifp == ifp); - return (ethofld_transmit(ifp, m)); + cst = mst_to_cst(m->m_pkthdr.snd_tag); + if (cst->type == IF_SND_TAG_TYPE_RATE_LIMIT) + return (ethofld_transmit(ifp, m)); } #endif @@ -2221,6 +2283,97 @@ } } +#if defined(KERN_TLS) || defined(RATELIMIT) +void +cxgbe_snd_tag_init(struct cxgbe_snd_tag *cst, struct ifnet *ifp, int type) +{ + + m_snd_tag_init(&cst->com, ifp); + cst->type = type; +} + +static int +cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, + struct m_snd_tag **pt) +{ + int error; + + switch (params->hdr.type) { +#ifdef RATELIMIT + case IF_SND_TAG_TYPE_RATE_LIMIT: + error = cxgbe_rate_tag_alloc(ifp, params, pt); + break; +#endif +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: + error = cxgbe_tls_tag_alloc(ifp, params, pt); + break; +#endif + default: + error = EOPNOTSUPP; + } + if (error == 0) + MPASS(mst_to_cst(*pt)->type == params->hdr.type); + return (error); +} + +static int +cxgbe_snd_tag_modify(struct m_snd_tag *mst, + union if_snd_tag_modify_params *params) +{ + struct cxgbe_snd_tag *cst; + + cst = mst_to_cst(mst); + switch (cst->type) { +#ifdef RATELIMIT + case IF_SND_TAG_TYPE_RATE_LIMIT: + return (cxgbe_rate_tag_modify(mst, params)); +#endif + default: + return (EOPNOTSUPP); + } +} + +static int +cxgbe_snd_tag_query(struct m_snd_tag *mst, + union if_snd_tag_query_params *params) +{ + struct cxgbe_snd_tag *cst; + + cst = mst_to_cst(mst); + switch (cst->type) { +#ifdef RATELIMIT + case IF_SND_TAG_TYPE_RATE_LIMIT: + return (cxgbe_rate_tag_query(mst, params)); +#endif + default: + return (EOPNOTSUPP); + } +} + +static void +cxgbe_snd_tag_free(struct m_snd_tag *mst) +{ + struct cxgbe_snd_tag *cst; + + cst = mst_to_cst(mst); + switch (cst->type) { +#ifdef RATELIMIT + case IF_SND_TAG_TYPE_RATE_LIMIT: + cxgbe_rate_tag_free(mst); + return; +#endif +#ifdef KERN_TLS + case IF_SND_TAG_TYPE_TLS: + cxgbe_tls_tag_free(mst); + return; +#endif + default: + panic("shouldn't get here"); + } +} +#endif + /* * The kernel picks a media from the list we had provided but we still validate * the requeste. @@ -2831,31 +2984,34 @@ return (0); } -int -alloc_atid_tab(struct tid_info *t, int flags) +static void +t4_init_atid_table(struct adapter *sc) { + struct tid_info *t; int i; - MPASS(t->natids > 0); + t = &sc->tids; + if (t->natids == 0) + return; + MPASS(t->atid_tab == NULL); t->atid_tab = malloc(t->natids * sizeof(*t->atid_tab), M_CXGBE, - M_ZERO | flags); - if (t->atid_tab == NULL) - return (ENOMEM); + M_ZERO | M_WAITOK); mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); t->afree = t->atid_tab; t->atids_in_use = 0; for (i = 1; i < t->natids; i++) t->atid_tab[i - 1].next = &t->atid_tab[i]; t->atid_tab[t->natids - 1].next = NULL; - - return (0); } -void -free_atid_tab(struct tid_info *t) +static void +t4_free_atid_table(struct adapter *sc) { + struct tid_info *t; + + t = &sc->tids; KASSERT(t->atids_in_use == 0, ("%s: %d atids still in use.", __func__, t->atids_in_use)); @@ -4421,6 +4577,58 @@ return (rc); } +#ifdef KERN_TLS +static void +ktls_tick(void *arg) +{ + struct adapter *sc; + uint32_t tstamp; + + sc = arg; + + tstamp = tcp_ts_getticks(); + t4_write_reg(sc, A_TP_SYNC_TIME_HI, tstamp >> 1); + t4_write_reg(sc, A_TP_SYNC_TIME_LO, tstamp << 31); + + callout_schedule_sbt(&sc->ktls_tick, SBT_1MS, 0, C_HARDCLOCK); +} + +static void +t4_enable_kern_tls(struct adapter *sc) +{ + uint32_t m, v; + + m = F_ENABLECBYP; + v = F_ENABLECBYP; + t4_set_reg_field(sc, A_TP_PARA_REG6, m, v); + + m = F_CPL_FLAGS_UPDATE_EN | F_SEQ_UPDATE_EN; + v = F_CPL_FLAGS_UPDATE_EN | F_SEQ_UPDATE_EN; + t4_set_reg_field(sc, A_ULP_TX_CONFIG, m, v); + + m = F_NICMODE; + v = F_NICMODE; + t4_set_reg_field(sc, A_TP_IN_CONFIG, m, v); + + m = F_LOOKUPEVERYPKT; + v = 0; + t4_set_reg_field(sc, A_TP_INGRESS_CONFIG, m, v); + + m = F_TXDEFERENABLE | F_DISABLEWINDOWPSH | F_DISABLESEPPSHFLAG; + v = F_DISABLEWINDOWPSH; + t4_set_reg_field(sc, A_TP_PC_CONFIG, m, v); + + m = V_TIMESTAMPRESOLUTION(M_TIMESTAMPRESOLUTION); + v = V_TIMESTAMPRESOLUTION(0x1f); + t4_set_reg_field(sc, A_TP_TIMER_RESOLUTION, m, v); + + sc->flags |= KERN_TLS_OK; + + sc->tlst.inline_keys = t4_tls_inline_keys; + sc->tlst.combo_wrs = t4_tls_combo_wrs; +} +#endif + static int set_params__post_init(struct adapter *sc) { @@ -4500,6 +4708,12 @@ } } #endif + +#ifdef KERN_TLS + if (t4_kern_tls != 0 && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS && + sc->toecaps & FW_CAPS_CONFIG_TOE) + t4_enable_kern_tls(sc); +#endif return (0); } @@ -5344,6 +5558,11 @@ if (!(sc->flags & IS_VF)) t4_intr_enable(sc); +#ifdef KERN_TLS + if (sc->flags & KERN_TLS_OK) + callout_reset_sbt(&sc->ktls_tick, SBT_1MS, 0, ktls_tick, sc, + C_HARDCLOCK); +#endif sc->flags |= FULL_INIT_DONE; done: if (rc != 0) @@ -6211,6 +6430,26 @@ sysctl_wcwr_stats, "A", "write combined work requests"); } +#ifdef KERN_TLS + if (sc->flags & KERN_TLS_OK) { + + /* + * dev.t4nex.0.tls. + */ + oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "tls", CTLFLAG_RD, + NULL, "KERN_TLS parameters"); + children = SYSCTL_CHILDREN(oid); + + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "inline_keys", + CTLFLAG_RW, &sc->tlst.inline_keys, 0, "Always pass TLS " + "keys in work requests (1) or attempt to store TLS keys " + "in card memory."); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "combo_wrs", + CTLFLAG_RW, &sc->tlst.combo_wrs, 0, "Attempt to combine " + "TCB field updates with TLS record work requests."); + } +#endif + #ifdef TCP_OFFLOAD if (is_offload(sc)) { int i; @@ -6681,16 +6920,16 @@ SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records", CTLFLAG_RD, &pi->tx_tls_records, - "# of TLS records transmitted"); + "# of TOE TLS records transmitted"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets", CTLFLAG_RD, &pi->tx_tls_octets, - "# of payload octets in transmitted TLS records"); + "# of payload octets in transmitted TOE TLS records"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records", CTLFLAG_RD, &pi->rx_tls_records, - "# of TLS records received"); + "# of TOE TLS records received"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets", CTLFLAG_RD, &pi->rx_tls_octets, - "# of payload octets in received TLS records"); + "# of payload octets in received TOE TLS records"); } static int @@ -9915,6 +10154,19 @@ txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; txq->raw_wrs = 0; + txq->tls_wrs = 0; + txq->kern_tls_records = 0; + txq->kern_tls_short = 0; + txq->kern_tls_partial = 0; + txq->kern_tls_full = 0; + txq->kern_tls_octets = 0; + txq->kern_tls_waste = 0; + txq->kern_tls_options = 0; + txq->kern_tls_header = 0; + txq->kern_tls_fin = 0; + txq->kern_tls_fin_short = 0; + txq->kern_tls_cbc = 0; + txq->kern_tls_gcm = 0; mp_ring_reset_stats(txq->r); } @@ -10440,10 +10692,17 @@ #ifdef TCP_OFFLOAD calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ); calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI); +#endif +#if defined(TCP_OFFLOAD) || defined(KERN_TLS) if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; +#else + if (t4_toecaps_allowed == -1) + t4_toecaps_allowed = 0; +#endif +#ifdef TCP_OFFLOAD if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; @@ -10461,9 +10720,6 @@ if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS) t4_pktc_idx_ofld = PKTC_IDX_OFLD; #else - if (t4_toecaps_allowed == -1) - t4_toecaps_allowed = 0; - if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; @@ -10765,6 +11021,9 @@ #endif #ifdef INET6 t4_clip_modload(); +#endif +#ifdef KERN_TLS + t6_ktls_modload(); #endif t4_tracer_modload(); tweak_tunables(); @@ -10805,6 +11064,9 @@ if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); +#ifdef KERN_TLS + t6_ktls_modunload(); +#endif #ifdef INET6 t4_clip_modunload(); #endif Index: sys/dev/cxgbe/t4_sched.c =================================================================== --- sys/dev/cxgbe/t4_sched.c +++ sys/dev/cxgbe/t4_sched.c @@ -711,11 +711,11 @@ } /* etid services */ -static int alloc_etid(struct adapter *, struct cxgbe_snd_tag *); +static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *); static void free_etid(struct adapter *, int); static int -alloc_etid(struct adapter *sc, struct cxgbe_snd_tag *cst) +alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst) { struct tid_info *t = &sc->tids; int etid = -1; @@ -733,7 +733,7 @@ return (etid); } -struct cxgbe_snd_tag * +struct cxgbe_rate_tag * lookup_etid(struct adapter *sc, int etid) { struct tid_info *t = &sc->tids; @@ -755,17 +755,16 @@ } int -cxgbe_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, +cxgbe_rate_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **pt) { int rc, schedcl; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; - struct cxgbe_snd_tag *cst; + struct cxgbe_rate_tag *cst; - if (params->hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) - return (ENOTSUP); + MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT); rc = t4_reserve_cl_rl_kbps(sc, pi->port_id, (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); @@ -789,7 +788,7 @@ mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); mbufq_init(&cst->pending_tx, INT_MAX); mbufq_init(&cst->pending_fwack, INT_MAX); - m_snd_tag_init(&cst->com, ifp); + cxgbe_snd_tag_init(&cst->com, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; cst->adapter = sc; cst->port_id = pi->port_id; @@ -806,7 +805,7 @@ * Queues will be selected later when the connection flowid is available. */ - *pt = &cst->com; + *pt = &cst->com.com; return (0); } @@ -814,11 +813,11 @@ * Change in parameters, no change in ifp. */ int -cxgbe_snd_tag_modify(struct m_snd_tag *mst, +cxgbe_rate_tag_modify(struct m_snd_tag *mst, union if_snd_tag_modify_params *params) { int rc, schedcl; - struct cxgbe_snd_tag *cst = mst_to_cst(mst); + struct cxgbe_rate_tag *cst = mst_to_crt(mst); struct adapter *sc = cst->adapter; /* XXX: is schedcl -1 ok here? */ @@ -840,10 +839,10 @@ } int -cxgbe_snd_tag_query(struct m_snd_tag *mst, +cxgbe_rate_tag_query(struct m_snd_tag *mst, union if_snd_tag_query_params *params) { - struct cxgbe_snd_tag *cst = mst_to_cst(mst); + struct cxgbe_rate_tag *cst = mst_to_crt(mst); params->rate_limit.max_rate = cst->max_rate; @@ -858,7 +857,7 @@ * Unlocks cst and frees it. */ void -cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *cst) +cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst) { struct adapter *sc = cst->adapter; @@ -879,9 +878,9 @@ } void -cxgbe_snd_tag_free(struct m_snd_tag *mst) +cxgbe_rate_tag_free(struct m_snd_tag *mst) { - struct cxgbe_snd_tag *cst = mst_to_cst(mst); + struct cxgbe_rate_tag *cst = mst_to_crt(mst); mtx_lock(&cst->lock); @@ -896,7 +895,7 @@ * credits for the etid otherwise. */ if (cst->tx_credits == cst->tx_total) { - cxgbe_snd_tag_free_locked(cst); + cxgbe_rate_tag_free_locked(cst); return; /* cst is gone. */ } send_etid_flush_wr(cst); Index: sys/dev/cxgbe/t4_sge.c =================================================================== --- sys/dev/cxgbe/t4_sge.c +++ sys/dev/cxgbe/t4_sge.c @@ -32,6 +32,7 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_kern_tls.h" #include "opt_ratelimit.h" #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -85,6 +88,7 @@ /* Internal mbuf flags stored in PH_loc.eight[1]. */ #define MC_NOMAP 0x01 #define MC_RAW_WR 0x02 +#define MC_TLS 0x04 /* * Ethernet frames are DMA'd at this byte offset into the freelist buffer. @@ -2240,7 +2244,8 @@ M_ASSERTPKTHDR(m); n = m->m_pkthdr.PH_loc.eight[0]; - MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); + if (!(mbuf_cflags(m) & MC_TLS)) + MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); return (n); } @@ -2307,10 +2312,10 @@ } static inline int -needs_eo(struct mbuf *m) +needs_eo(struct cxgbe_snd_tag *cst) { - return (m->m_pkthdr.csum_flags & CSUM_SND_TAG); + return (cst != NULL && cst->type == IF_SND_TAG_TYPE_RATE_LIMIT); } #endif @@ -2541,6 +2546,9 @@ void *l3hdr; #if defined(INET) || defined(INET6) struct tcphdr *tcp; +#endif +#if defined(KERN_TLS) || defined(RATELIMIT) + struct cxgbe_snd_tag *cst; #endif uint16_t eh_type; uint8_t cflags; @@ -2562,6 +2570,26 @@ M_ASSERTPKTHDR(m0); MPASS(m0->m_pkthdr.len > 0); nsegs = count_mbuf_nsegs(m0, 0, &cflags); +#if defined(KERN_TLS) || defined(RATELIMIT) + if (m0->m_pkthdr.csum_flags & CSUM_SND_TAG) + cst = mst_to_cst(m0->m_pkthdr.snd_tag); + else + cst = NULL; +#endif +#ifdef KERN_TLS + if (cst != NULL && cst->type == IF_SND_TAG_TYPE_TLS) { + int len16; + + cflags |= MC_TLS; + set_mbuf_cflags(m0, cflags); + rc = t6_ktls_parse_pkt(m0, &nsegs, &len16); + if (rc != 0) + goto fail; + set_mbuf_nsegs(m0, nsegs); + set_mbuf_len16(m0, len16); + return (0); + } +#endif if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { rc = EFBIG; @@ -2595,16 +2623,17 @@ * checksumming is enabled. needs_l4_csum happens to check for all the * right things. */ - if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) { + if (__predict_false(needs_eo(cst) && !needs_l4_csum(m0))) { m_snd_tag_rele(m0->m_pkthdr.snd_tag); m0->m_pkthdr.snd_tag = NULL; m0->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; + cst = NULL; } #endif if (!needs_tso(m0) && #ifdef RATELIMIT - !needs_eo(m0) && + !needs_eo(cst) && #endif !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) return (0); @@ -2666,7 +2695,7 @@ #endif } #ifdef RATELIMIT - if (needs_eo(m0)) { + if (needs_eo(cst)) { u_int immhdrs; /* EO WRs have the headers in the WR and not the GL. */ @@ -2831,7 +2860,7 @@ { /* maybe put a GL limit too, to avoid silliness? */ - return (needs_tso(m) || (mbuf_cflags(m) & MC_RAW_WR) != 0); + return (needs_tso(m) || (mbuf_cflags(m) & (MC_RAW_WR | MC_TLS)) != 0); } static inline int @@ -2906,7 +2935,8 @@ M_ASSERTPKTHDR(m0); MPASS(m0->m_nextpkt == NULL); - if (available < SGE_MAX_WR_NDESC) { + if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) { + MPASS(howmany(mbuf_len16(m0), EQ_ESIZE / 16) <= 64); available += reclaim_tx_descs(txq, 64); if (available < howmany(mbuf_len16(m0), EQ_ESIZE / 16)) break; /* out of descriptors */ @@ -2917,7 +2947,19 @@ next_cidx = 0; wr = (void *)&eq->desc[eq->pidx]; - if (sc->flags & IS_VF) { + if (mbuf_cflags(m0) & MC_RAW_WR) { + total++; + remaining--; + n = write_raw_wr(txq, (void *)wr, m0, available); +#ifdef KERN_TLS + } else if (mbuf_cflags(m0) & MC_TLS) { + total++; + remaining--; + ETHER_BPF_MTAP(ifp, m0); + n = t6_ktls_write_wr(txq,(void *)wr, m0, + mbuf_nsegs(m0), available); +#endif + } else if (sc->flags & IS_VF) { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); @@ -2951,17 +2993,15 @@ n = write_txpkts_wr(txq, wr, m0, &txp, available); total += txp.npkt; remaining -= txp.npkt; - } else if (mbuf_cflags(m0) & MC_RAW_WR) { - total++; - remaining--; - n = write_raw_wr(txq, (void *)wr, m0, available); } else { total++; remaining--; ETHER_BPF_MTAP(ifp, m0); n = write_txpkt_wr(txq, (void *)wr, m0, available); } - MPASS(n >= 1 && n <= available && n <= SGE_MAX_WR_NDESC); + MPASS(n >= 1 && n <= available); + if (!(mbuf_cflags(m0) & MC_TLS)) + MPASS(n <= SGE_MAX_WR_NDESC); available -= n; dbdiff += n; @@ -4173,6 +4213,49 @@ "# of frames tx'd using type1 txpkts work requests"); SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD, &txq->raw_wrs, "# of raw work requests (non-packets)"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "tls_wrs", CTLFLAG_RD, + &txq->tls_wrs, "# of TLS work requests (TLS records)"); + +#ifdef KERN_TLS + if (sc->flags & KERN_TLS_OK) { + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_records", CTLFLAG_RD, &txq->kern_tls_records, + "# of NIC TLS records transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_short", CTLFLAG_RD, &txq->kern_tls_short, + "# of short NIC TLS records transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_partial", CTLFLAG_RD, &txq->kern_tls_partial, + "# of partial NIC TLS records transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_full", CTLFLAG_RD, &txq->kern_tls_full, + "# of full NIC TLS records transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_octets", CTLFLAG_RD, &txq->kern_tls_octets, + "# of payload octets in transmitted NIC TLS records"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_waste", CTLFLAG_RD, &txq->kern_tls_waste, + "# of octets DMAd but not transmitted in NIC TLS records"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_options", CTLFLAG_RD, &txq->kern_tls_options, + "# of NIC TLS options-only packets transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_header", CTLFLAG_RD, &txq->kern_tls_header, + "# of NIC TLS header-only packets transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_fin", CTLFLAG_RD, &txq->kern_tls_fin, + "# of NIC TLS FIN-only packets transmitted"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_fin_short", CTLFLAG_RD, &txq->kern_tls_fin_short, + "# of NIC TLS padded FIN packets on short TLS records"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_cbc", CTLFLAG_RD, &txq->kern_tls_cbc, + "# of NIC TLS sessions using AES-CBC"); + SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, + "kern_tls_gcm", CTLFLAG_RD, &txq->kern_tls_gcm, + "# of NIC TLS sessions using AES-GCM"); + } +#endif SYSCTL_ADD_COUNTER_U64(&vi->ctx, children, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->r->enqueues, @@ -5722,7 +5805,7 @@ #define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) static int -send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi, +send_etid_flowc_wr(struct cxgbe_rate_tag *cst, struct port_info *pi, struct vi_info *vi) { struct wrq_cookie cookie; @@ -5768,7 +5851,7 @@ #define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) void -send_etid_flush_wr(struct cxgbe_snd_tag *cst) +send_etid_flush_wr(struct cxgbe_rate_tag *cst) { struct fw_flowc_wr *flowc; struct wrq_cookie cookie; @@ -5794,7 +5877,7 @@ } static void -write_ethofld_wr(struct cxgbe_snd_tag *cst, struct fw_eth_tx_eo_wr *wr, +write_ethofld_wr(struct cxgbe_rate_tag *cst, struct fw_eth_tx_eo_wr *wr, struct mbuf *m0, int compl) { struct cpl_tx_pkt_core *cpl; @@ -5943,7 +6026,7 @@ } static void -ethofld_tx(struct cxgbe_snd_tag *cst) +ethofld_tx(struct cxgbe_rate_tag *cst) { struct mbuf *m; struct wrq_cookie cookie; @@ -5976,7 +6059,7 @@ cst->tx_credits -= next_credits; cst->tx_nocompl += next_credits; compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; - ETHER_BPF_MTAP(cst->com.ifp, m); + ETHER_BPF_MTAP(cst->com.com.ifp, m); write_ethofld_wr(cst, wr, m, compl); commit_wrq_wr(cst->eo_txq, wr, &cookie); if (compl) { @@ -5988,7 +6071,7 @@ /* * Drop the mbuf's reference on the tag now rather * than waiting until m_freem(). This ensures that - * cxgbe_snd_tag_free gets called when the inp drops + * cxgbe_rate_tag_free gets called when the inp drops * its reference on the tag and there are no more * mbufs in the pending_tx queue and can flush any * pending requests. Otherwise if the last mbuf @@ -5997,7 +6080,7 @@ */ m->m_pkthdr.snd_tag = NULL; m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG; - m_snd_tag_rele(&cst->com); + m_snd_tag_rele(&cst->com.com); mbufq_enqueue(&cst->pending_fwack, m); } @@ -6006,13 +6089,13 @@ int ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) { - struct cxgbe_snd_tag *cst; + struct cxgbe_rate_tag *cst; int rc; MPASS(m0->m_nextpkt == NULL); MPASS(m0->m_pkthdr.csum_flags & CSUM_SND_TAG); MPASS(m0->m_pkthdr.snd_tag != NULL); - cst = mst_to_cst(m0->m_pkthdr.snd_tag); + cst = mst_to_crt(m0->m_pkthdr.snd_tag); mtx_lock(&cst->lock); MPASS(cst->flags & EO_SND_TAG_REF); @@ -6051,10 +6134,10 @@ * ethofld_tx() in case we are sending the final mbuf after * the inp was freed. */ - m_snd_tag_ref(&cst->com); + m_snd_tag_ref(&cst->com.com); ethofld_tx(cst); mtx_unlock(&cst->lock); - m_snd_tag_rele(&cst->com); + m_snd_tag_rele(&cst->com.com); return (0); done: @@ -6071,7 +6154,7 @@ const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); struct mbuf *m; u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); - struct cxgbe_snd_tag *cst; + struct cxgbe_rate_tag *cst; uint8_t credits = cpl->credits; cst = lookup_etid(sc, etid); @@ -6103,7 +6186,7 @@ cst->flags &= ~EO_FLUSH_RPL_PENDING; cst->tx_credits += cpl->credits; - cxgbe_snd_tag_free_locked(cst); + cxgbe_rate_tag_free_locked(cst); return (0); /* cst is gone. */ } KASSERT(m != NULL, @@ -6125,12 +6208,12 @@ * As with ethofld_transmit(), hold an extra reference * so that the tag is stable across ethold_tx(). */ - m_snd_tag_ref(&cst->com); + m_snd_tag_ref(&cst->com.com); m = mbufq_first(&cst->pending_tx); if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) ethofld_tx(cst); mtx_unlock(&cst->lock); - m_snd_tag_rele(&cst->com); + m_snd_tag_rele(&cst->com.com); } else { /* * There shouldn't be any pending packets if the tag Index: sys/dev/cxgbe/tom/t4_connect.c =================================================================== --- sys/dev/cxgbe/tom/t4_connect.c +++ sys/dev/cxgbe/tom/t4_connect.c @@ -255,6 +255,8 @@ DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); + if (sc->flags & KERN_TLS_OK) + DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); rw_rlock(&sc->policy_lock); settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL, Index: sys/dev/cxgbe/tom/t4_listen.c =================================================================== --- sys/dev/cxgbe/tom/t4_listen.c +++ sys/dev/cxgbe/tom/t4_listen.c @@ -524,6 +524,8 @@ if (!(inp->inp_vflag & INP_IPV6) && IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr))) return (0); + if (sc->flags & KERN_TLS_OK) + return (0); #if 0 ADAPTER_LOCK(sc); if (IS_BUSY(sc)) { Index: sys/dev/cxgbe/tom/t4_tls.c =================================================================== --- sys/dev/cxgbe/tom/t4_tls.c +++ sys/dev/cxgbe/tom/t4_tls.c @@ -432,7 +432,7 @@ /* TLS Key memory management */ static int -get_new_keyid(struct toepcb *toep, struct tls_key_context *k_ctx) +get_new_keyid(struct toepcb *toep) { struct adapter *sc = td_adapter(toep->td); vmem_addr_t addr; @@ -503,7 +503,7 @@ /* Dont initialize key for re-neg */ if (!G_KEY_CLR_LOC(k_ctx->l_p_key)) { - if ((keyid = get_new_keyid(toep, k_ctx)) < 0) { + if ((keyid = get_new_keyid(toep)) < 0) { return (ENOSPC); } } else { Index: sys/dev/cxgbe/tom/t4_tom.h =================================================================== --- sys/dev/cxgbe/tom/t4_tom.h +++ sys/dev/cxgbe/tom/t4_tom.h @@ -370,6 +370,8 @@ int t4_connect(struct toedev *, struct socket *, struct rtentry *, struct sockaddr *); void act_open_failure_cleanup(struct adapter *, u_int, u_int); +int alloc_atid(struct adapter *, void *); +void free_atid(struct adapter *, int); /* t4_listen.c */ void t4_init_listen_cpl_handlers(void); Index: sys/dev/cxgbe/tom/t4_tom.c =================================================================== --- sys/dev/cxgbe/tom/t4_tom.c +++ sys/dev/cxgbe/tom/t4_tom.c @@ -1353,7 +1353,6 @@ { free_tid_tab(t); - free_atid_tab(t); free_stid_tab(t); } @@ -1366,10 +1365,6 @@ if (rc != 0) goto failed; - rc = alloc_atid_tab(t, M_NOWAIT); - if (rc != 0) - goto failed; - rc = alloc_stid_tab(t, M_NOWAIT); if (rc != 0) goto failed; Index: sys/modules/cxgbe/if_cxgbe/Makefile =================================================================== --- sys/modules/cxgbe/if_cxgbe/Makefile +++ sys/modules/cxgbe/if_cxgbe/Makefile @@ -3,7 +3,7 @@ # CXGBE= ${SRCTOP}/sys/dev/cxgbe -.PATH: ${CXGBE} ${CXGBE}/common ${CXGBE}/cudbg +.PATH: ${CXGBE} ${CXGBE}/common ${CXGBE}/crypto ${CXGBE}/cudbg KMOD= if_cxgbe SRCS= bus_if.h @@ -11,6 +11,7 @@ SRCS+= opt_ddb.h SRCS+= opt_inet.h SRCS+= opt_inet6.h +SRCS+= opt_kern_tls.h SRCS+= opt_ofed.h SRCS+= opt_ratelimit.h SRCS+= opt_rss.h @@ -20,6 +21,9 @@ SRCS+= t4_hw.c SRCS+= t4_if.c t4_if.h SRCS+= t4_iov.c +.if ${KERN_OPTS:MKERN_TLS} != "" +SRCS+= t4_kern_tls.c +.endif SRCS+= t4_l2t.c SRCS+= t4_main.c SRCS+= t4_mp_ring.c Index: sys/modules/cxgbe/tom/Makefile =================================================================== --- sys/modules/cxgbe/tom/Makefile +++ sys/modules/cxgbe/tom/Makefile @@ -10,6 +10,7 @@ SRCS+= device_if.h SRCS+= opt_inet.h SRCS+= opt_inet6.h +SRCS+= opt_kern_tls.h SRCS+= opt_ratelimit.h SRCS+= pci_if.h SRCS+= t4_connect.c