Index: head/sys/dev/cxgbe/tom/t4_connect.c =================================================================== --- head/sys/dev/cxgbe/tom/t4_connect.c (revision 360190) +++ head/sys/dev/cxgbe/tom/t4_connect.c (revision 360191) @@ -1,405 +1,406 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #define TCPSTATES #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_clip.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* * Active open succeeded. */ static int do_act_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_establish *cpl = (const void *)(rss + 1); u_int tid = GET_TID(cpl); u_int atid = G_TID_TID(ntohl(cpl->tos_atid)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid); free_atid(sc, atid); CURVNET_SET(toep->vnet); INP_WLOCK(inp); toep->tid = tid; insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1); if (inp->inp_flags & INP_DROPPED) { /* socket closed by the kernel before hw told us it connected */ send_flowc_wr(toep, NULL); send_reset(sc, toep, be32toh(cpl->snd_isn)); goto done; } make_established(toep, be32toh(cpl->snd_isn) - 1, be32toh(cpl->rcv_isn) - 1, cpl->tcp_opt); inp->inp_flowtype = M_HASHTYPE_OPAQUE; inp->inp_flowid = tid; if (ulp_mode(toep) == ULP_MODE_TLS) tls_establish(toep); done: INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } void act_open_failure_cleanup(struct adapter *sc, u_int atid, u_int status) { struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; struct toedev *tod = &toep->td->tod; struct epoch_tracker et; free_atid(sc, atid); toep->tid = -1; CURVNET_SET(toep->vnet); if (status != EAGAIN) NET_EPOCH_ENTER(et); INP_WLOCK(inp); toe_connect_failed(tod, inp, status); final_cpl_received(toep); /* unlocks inp */ if (status != EAGAIN) NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } /* * Active open failed. */ static int do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct toepcb *toep = lookup_atid(sc, atid); int rc; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status); /* Ignore negative advice */ if (negative_advice(status)) return (0); if (status && act_open_has_tid(status)) release_tid(sc, GET_TID(cpl), toep->ctrlq); rc = act_open_rpl_status_to_errno(status); act_open_failure_cleanup(sc, atid, rc); return (0); } void t4_init_connect_cpl_handlers(void) { t4_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl, CPL_COOKIE_TOM); } void t4_uninit_connect_cpl_handlers(void) { t4_register_cpl_handler(CPL_ACT_ESTABLISH, NULL); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL, CPL_COOKIE_TOM); } #define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \ reason = __LINE__; \ rc = (x); \ goto failed; \ } while (0) static inline int act_open_cpl_size(struct adapter *sc, int isipv6) { int idx; static const int sz_table[3][2] = { { sizeof (struct cpl_act_open_req), sizeof (struct cpl_act_open_req6) }, { sizeof (struct cpl_t5_act_open_req), sizeof (struct cpl_t5_act_open_req6) }, { sizeof (struct cpl_t6_act_open_req), sizeof (struct cpl_t6_act_open_req6) }, }; MPASS(chip_id(sc) >= CHELSIO_T4); idx = min(chip_id(sc) - CHELSIO_T4, 2); return (sz_table[idx][!!isipv6]); } /* * active open (soconnect). * * State of affairs on entry: * soisconnecting (so_state |= SS_ISCONNECTING) * tcbinfo not locked (This has changed - used to be WLOCKed) * inp WLOCKed * tp->t_state = TCPS_SYN_SENT * rtalloc1, RT_UNLOCK on rt. */ int -t4_connect(struct toedev *tod, struct socket *so, struct rtentry *rt, +t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = NULL; struct wrqe *wr = NULL; - struct ifnet *rt_ifp = rt->rt_ifp; + struct ifnet *rt_ifp = nh->nh_ifp; struct vi_info *vi; int qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); int reason; struct offload_settings settings; struct epoch_tracker et; uint16_t vid = 0xfff, pcp = 0; INP_WLOCK_ASSERT(inp); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (rt_ifp->if_type == IFT_ETHER) vi = rt_ifp->if_softc; else if (rt_ifp->if_type == IFT_L2VLAN) { struct ifnet *ifp = VLAN_TRUNKDEV(rt_ifp); vi = ifp->if_softc; VLAN_TAG(rt_ifp, &vid); VLAN_PCP(rt_ifp, &pcp); } else if (rt_ifp->if_type == IFT_IEEE8023ADLAG) DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); if (sc->flags & KERN_TLS_OK) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); rw_rlock(&sc->policy_lock); settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL, EVL_MAKETAG(vid, pcp, 0), inp); rw_runlock(&sc->policy_lock); if (!settings.offload) DONT_OFFLOAD_ACTIVE_OPEN(EPERM); toep = alloc_toepcb(vi, M_NOWAIT); if (toep == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->tid = alloc_atid(sc, toep); if (toep->tid < 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->l2te = t4_l2t_get(vi->pi, rt_ifp, - rt->rt_flags & RTF_GATEWAY ? rt->rt_gateway : nam); + nh->nh_flags & NHF_GATEWAY ? &nh->gw_sa : nam); if (toep->l2te == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->vnet = so->so_vnet; init_conn_params(vi, &settings, &inp->inp_inc, so, NULL, toep->l2te->idx, &toep->params); init_toepcb(vi, toep); isipv6 = nam->sa_family == AF_INET6; wr = alloc_wrqe(act_open_cpl_size(sc, isipv6), toep->ctrlq); if (wr == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); qid_atid = V_TID_QID(toep->ofld_rxq->iq.abs_id) | V_TID_TID(toep->tid) | V_TID_COOKIE(CPL_COOKIE_TOM); if (isipv6) { struct cpl_act_open_req6 *cpl = wrtod(wr); struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl; struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl; if ((inp->inp_vflag & INP_IPV6) == 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep->ce = t4_hold_lip(sc, &inp->in6p_laddr, NULL); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); cpl->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); cpl5->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T6: default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); cpl6->params = select_ntuple(vi, toep->l2te); break; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); cpl->local_port = inp->inp_lport; cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; cpl->opt0 = calc_options0(vi, &toep->params); cpl->opt2 = calc_options2(vi, &toep->params); CTR6(KTR_CXGBE, "%s: atid %u, toep %p, inp %p, opt0 %#016lx, opt2 %#08x", __func__, toep->tid, toep, inp, be64toh(cpl->opt0), be32toh(cpl->opt2)); } else { struct cpl_act_open_req *cpl = wrtod(wr); struct cpl_t5_act_open_req *cpl5 = (void *)cpl; struct cpl_t6_act_open_req *cpl6 = (void *)cpl; switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); cpl->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); cpl5->params = select_ntuple(vi, toep->l2te); break; case CHELSIO_T6: default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); cpl6->params = select_ntuple(vi, toep->l2te); break; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); cpl->opt0 = calc_options0(vi, &toep->params); cpl->opt2 = calc_options2(vi, &toep->params); CTR6(KTR_CXGBE, "%s: atid %u, toep %p, inp %p, opt0 %#016lx, opt2 %#08x", __func__, toep->tid, toep, inp, be64toh(cpl->opt0), be32toh(cpl->opt2)); } offload_socket(so, toep); NET_EPOCH_ENTER(et); rc = t4_l2t_send(sc, wr, toep->l2te); NET_EPOCH_EXIT(et); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); reason = __LINE__; failed: CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); if (wr) free_wrqe(wr); if (toep) { if (toep->tid >= 0) free_atid(sc, toep->tid); if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) t4_release_lip(sc, toep->ce); free_toepcb(toep); } return (rc); } #endif Index: head/sys/dev/cxgbe/tom/t4_tom.h =================================================================== --- head/sys/dev/cxgbe/tom/t4_tom.h (revision 360190) +++ head/sys/dev/cxgbe/tom/t4_tom.h (revision 360191) @@ -1,457 +1,457 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012, 2015 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #ifndef __T4_TOM_H__ #define __T4_TOM_H__ #include #include "common/t4_hw.h" #include "common/t4_msg.h" #include "tom/t4_tls.h" #define LISTEN_HASH_SIZE 32 /* * Min receive window. We want it to be large enough to accommodate receive * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. */ #define MIN_RCV_WND (24 * 1024U) /* * Max receive window supported by HW in bytes. Only a small part of it can * be set through option0, the rest needs to be set through RX_DATA_ACK. */ #define MAX_RCV_WND ((1U << 27) - 1) #define DDP_RSVD_WIN (16 * 1024U) #define SB_DDP_INDICATE SB_IN_TOE /* soreceive must respond to indicate */ #define USE_DDP_RX_FLOW_CONTROL #define PPOD_SZ(n) ((n) * sizeof(struct pagepod)) #define PPOD_SIZE (PPOD_SZ(1)) /* TOE PCB flags */ enum { TPF_ATTACHED = (1 << 0), /* a tcpcb refers to this toepcb */ TPF_FLOWC_WR_SENT = (1 << 1), /* firmware flow context WR sent */ TPF_TX_DATA_SENT = (1 << 2), /* some data sent */ TPF_TX_SUSPENDED = (1 << 3), /* tx suspended for lack of resources */ TPF_SEND_FIN = (1 << 4), /* send FIN after all pending data */ TPF_FIN_SENT = (1 << 5), /* FIN has been sent */ TPF_ABORT_SHUTDOWN = (1 << 6), /* connection abort is in progress */ TPF_CPL_PENDING = (1 << 7), /* haven't received the last CPL */ TPF_SYNQE = (1 << 8), /* synq_entry, not really a toepcb */ TPF_SYNQE_EXPANDED = (1 << 9), /* toepcb ready, tid context updated */ TPF_FORCE_CREDITS = (1 << 10), /* always send credits */ TPF_KTLS = (1 << 11), /* send TLS records from KTLS */ TPF_INITIALIZED = (1 << 12), /* init_toepcb has been called */ }; enum { DDP_OK = (1 << 0), /* OK to turn on DDP */ DDP_SC_REQ = (1 << 1), /* state change (on/off) requested */ DDP_ON = (1 << 2), /* DDP is turned on */ DDP_BUF0_ACTIVE = (1 << 3), /* buffer 0 in use (not invalidated) */ DDP_BUF1_ACTIVE = (1 << 4), /* buffer 1 in use (not invalidated) */ DDP_TASK_ACTIVE = (1 << 5), /* requeue task is queued / running */ DDP_DEAD = (1 << 6), /* toepcb is shutting down */ }; struct sockopt; struct offload_settings; /* * Connection parameters for an offloaded connection. These are mostly (but not * all) hardware TOE parameters. */ struct conn_params { int8_t rx_coalesce; int8_t cong_algo; int8_t tc_idx; int8_t tstamp; int8_t sack; int8_t nagle; int8_t keepalive; int8_t wscale; int8_t ecn; int8_t mtu_idx; int8_t ulp_mode; int8_t tx_align; int16_t txq_idx; /* ofld_txq = &sc->sge.ofld_txq[txq_idx] */ int16_t rxq_idx; /* ofld_rxq = &sc->sge.ofld_rxq[rxq_idx] */ int16_t l2t_idx; uint16_t emss; uint16_t opt0_bufsize; u_int sndbuf; /* controls TP tx pages */ }; struct ofld_tx_sdesc { uint32_t plen; /* payload length */ uint8_t tx_credits; /* firmware tx credits (unit is 16B) */ void *iv_buffer; /* optional buffer holding IVs for TLS */ }; struct ppod_region { u_int pr_start; u_int pr_len; u_int pr_page_shift[4]; uint32_t pr_tag_mask; /* hardware tagmask for this region. */ uint32_t pr_invalid_bit; /* OR with this to invalidate tag. */ uint32_t pr_alias_mask; /* AND with tag to get alias bits. */ u_int pr_alias_shift; /* shift this much for first alias bit. */ vmem_t *pr_arena; }; struct ppod_reservation { struct ppod_region *prsv_pr; uint32_t prsv_tag; /* Full tag: pgsz, alias, tag, color */ u_int prsv_nppods; }; struct pageset { TAILQ_ENTRY(pageset) link; vm_page_t *pages; int npages; int flags; int offset; /* offset in first page */ int len; struct ppod_reservation prsv; struct vmspace *vm; vm_offset_t start; u_int vm_timestamp; }; TAILQ_HEAD(pagesetq, pageset); #define PS_PPODS_WRITTEN 0x0001 /* Page pods written to the card. */ struct ddp_buffer { struct pageset *ps; struct kaiocb *job; int cancel_pending; }; struct ddp_pcb { u_int flags; struct ddp_buffer db[2]; TAILQ_HEAD(, pageset) cached_pagesets; TAILQ_HEAD(, kaiocb) aiojobq; u_int waiting_count; u_int active_count; u_int cached_count; int active_id; /* the currently active DDP buffer */ struct task requeue_task; struct kaiocb *queueing; struct mtx lock; }; struct toepcb { struct tom_data *td; struct inpcb *inp; /* backpointer to host stack's PCB */ u_int flags; /* miscellaneous flags */ TAILQ_ENTRY(toepcb) link; /* toep_list */ int refcount; struct vnet *vnet; struct vi_info *vi; /* virtual interface */ struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ctrlq; struct l2t_entry *l2te; /* L2 table entry used by this connection */ struct clip_entry *ce; /* CLIP table entry used by this tid */ int tid; /* Connection identifier */ /* tx credit handling */ u_int tx_total; /* total tx WR credits (in 16B units) */ u_int tx_credits; /* tx WR credits (in 16B units) available */ u_int tx_nocompl; /* tx WR credits since last compl request */ u_int plen_nocompl; /* payload since last compl request */ struct conn_params params; void *ulpcb; void *ulpcb2; struct mbufq ulp_pduq; /* PDUs waiting to be sent out. */ struct mbufq ulp_pdu_reclaimq; struct ddp_pcb ddp; struct tls_ofld_info tls; TAILQ_HEAD(, kaiocb) aiotx_jobq; struct task aiotx_task; struct socket *aiotx_so; /* Tx software descriptor */ uint8_t txsd_total; uint8_t txsd_pidx; uint8_t txsd_cidx; uint8_t txsd_avail; struct ofld_tx_sdesc txsd[]; }; static inline int ulp_mode(struct toepcb *toep) { return (toep->params.ulp_mode); } #define DDP_LOCK(toep) mtx_lock(&(toep)->ddp.lock) #define DDP_UNLOCK(toep) mtx_unlock(&(toep)->ddp.lock) #define DDP_ASSERT_LOCKED(toep) mtx_assert(&(toep)->ddp.lock, MA_OWNED) /* * Compressed state for embryonic connections for a listener. */ struct synq_entry { struct listen_ctx *lctx; /* backpointer to listen ctx */ struct mbuf *syn; int flags; /* same as toepcb's tp_flags */ volatile int ok_to_respond; volatile u_int refcnt; int tid; uint32_t iss; uint32_t irs; uint32_t ts; uint32_t rss_hash; __be16 tcp_opt; /* from cpl_pass_establish */ struct toepcb *toep; struct conn_params params; }; /* listen_ctx flags */ #define LCTX_RPL_PENDING 1 /* waiting for a CPL_PASS_OPEN_RPL */ struct listen_ctx { LIST_ENTRY(listen_ctx) link; /* listen hash linkage */ volatile int refcount; int stid; struct stid_region stid_region; int flags; struct inpcb *inp; /* listening socket's inp */ struct vnet *vnet; struct sge_wrq *ctrlq; struct sge_ofld_rxq *ofld_rxq; struct clip_entry *ce; }; /* tcb_histent flags */ #define TE_RPL_PENDING 1 #define TE_ACTIVE 2 /* bits in one 8b tcb_histent sample. */ #define TS_RTO (1 << 0) #define TS_DUPACKS (1 << 1) #define TS_FASTREXMT (1 << 2) #define TS_SND_BACKLOGGED (1 << 3) #define TS_CWND_LIMITED (1 << 4) #define TS_ECN_ECE (1 << 5) #define TS_ECN_CWR (1 << 6) #define TS_RESERVED (1 << 7) /* Unused. */ struct tcb_histent { struct mtx te_lock; struct callout te_callout; uint64_t te_tcb[TCB_SIZE / sizeof(uint64_t)]; struct adapter *te_adapter; u_int te_flags; u_int te_tid; uint8_t te_pidx; uint8_t te_sample[100]; }; struct tom_data { struct toedev tod; /* toepcb's associated with this TOE device */ struct mtx toep_list_lock; TAILQ_HEAD(, toepcb) toep_list; struct mtx lctx_hash_lock; LIST_HEAD(, listen_ctx) *listen_hash; u_long listen_mask; int lctx_count; /* # of lctx in the hash table */ struct ppod_region pr; struct rwlock tcb_history_lock __aligned(CACHE_LINE_SIZE); struct tcb_histent **tcb_history; int dupack_threshold; /* WRs that will not be sent to the chip because L2 resolution failed */ struct mtx unsent_wr_lock; STAILQ_HEAD(, wrqe) unsent_wr_list; struct task reclaim_wr_resources; }; static inline struct tom_data * tod_td(struct toedev *tod) { return (__containerof(tod, struct tom_data, tod)); } static inline struct adapter * td_adapter(struct tom_data *td) { return (td->tod.tod_softc); } static inline void set_mbuf_ulp_submode(struct mbuf *m, uint8_t ulp_submode) { M_ASSERTPKTHDR(m); m->m_pkthdr.PH_per.eight[0] = ulp_submode; } static inline uint8_t mbuf_ulp_submode(struct mbuf *m) { M_ASSERTPKTHDR(m); return (m->m_pkthdr.PH_per.eight[0]); } /* t4_tom.c */ struct toepcb *alloc_toepcb(struct vi_info *, int); int init_toepcb(struct vi_info *, struct toepcb *); struct toepcb *hold_toepcb(struct toepcb *); void free_toepcb(struct toepcb *); void offload_socket(struct socket *, struct toepcb *); void undo_offload_socket(struct socket *); void final_cpl_received(struct toepcb *); void insert_tid(struct adapter *, int, void *, int); void *lookup_tid(struct adapter *, int); void update_tid(struct adapter *, int, void *); void remove_tid(struct adapter *, int, int); u_long select_rcv_wnd(struct socket *); int select_rcv_wscale(void); void init_conn_params(struct vi_info *, struct offload_settings *, struct in_conninfo *, struct socket *, const struct tcp_options *, int16_t, struct conn_params *cp); __be64 calc_options0(struct vi_info *, struct conn_params *); __be32 calc_options2(struct vi_info *, struct conn_params *); uint64_t select_ntuple(struct vi_info *, struct l2t_entry *); int negative_advice(int); int add_tid_to_history(struct adapter *, u_int); /* t4_connect.c */ void t4_init_connect_cpl_handlers(void); void t4_uninit_connect_cpl_handlers(void); -int t4_connect(struct toedev *, struct socket *, struct rtentry *, +int t4_connect(struct toedev *, struct socket *, struct nhop_object *, struct sockaddr *); void act_open_failure_cleanup(struct adapter *, u_int, u_int); /* t4_listen.c */ void t4_init_listen_cpl_handlers(void); void t4_uninit_listen_cpl_handlers(void); int t4_listen_start(struct toedev *, struct tcpcb *); int t4_listen_stop(struct toedev *, struct tcpcb *); void t4_syncache_added(struct toedev *, void *); void t4_syncache_removed(struct toedev *, void *); int t4_syncache_respond(struct toedev *, void *, struct mbuf *); int do_abort_req_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); int do_abort_rpl_synqe(struct sge_iq *, const struct rss_header *, struct mbuf *); void t4_offload_socket(struct toedev *, void *, struct socket *); void synack_failure_cleanup(struct adapter *, int); /* t4_cpl_io.c */ void aiotx_init_toep(struct toepcb *); int t4_aio_queue_aiotx(struct socket *, struct kaiocb *); void t4_init_cpl_io_handlers(void); void t4_uninit_cpl_io_handlers(void); void send_abort_rpl(struct adapter *, struct sge_wrq *, int , int); void send_flowc_wr(struct toepcb *, struct tcpcb *); void send_reset(struct adapter *, struct toepcb *, uint32_t); int send_rx_credits(struct adapter *, struct toepcb *, int); void send_rx_modulate(struct adapter *, struct toepcb *); void make_established(struct toepcb *, uint32_t, uint32_t, uint16_t); int t4_close_conn(struct adapter *, struct toepcb *); void t4_rcvd(struct toedev *, struct tcpcb *); void t4_rcvd_locked(struct toedev *, struct tcpcb *); int t4_tod_output(struct toedev *, struct tcpcb *); int t4_send_fin(struct toedev *, struct tcpcb *); int t4_send_rst(struct toedev *, struct tcpcb *); void t4_set_tcb_field(struct adapter *, struct sge_wrq *, struct toepcb *, uint16_t, uint64_t, uint64_t, int, int); void t4_push_frames(struct adapter *, struct toepcb *, int); void t4_push_pdus(struct adapter *, struct toepcb *, int); /* t4_ddp.c */ int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int, const char *); void t4_free_ppod_region(struct ppod_region *); int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *); int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int, struct ppod_reservation *); int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int, struct pageset *); int t4_write_page_pods_for_buf(struct adapter *, struct sge_wrq *, int tid, struct ppod_reservation *, vm_offset_t, int); void t4_free_page_pods(struct ppod_reservation *); int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *, struct mbuf **, struct mbuf **, int *); int t4_aio_queue_ddp(struct socket *, struct kaiocb *); void t4_ddp_mod_load(void); void t4_ddp_mod_unload(void); void ddp_assert_empty(struct toepcb *); void ddp_init_toep(struct toepcb *); void ddp_uninit_toep(struct toepcb *); void ddp_queue_toep(struct toepcb *); void release_ddp_resources(struct toepcb *toep); void handle_ddp_close(struct toepcb *, struct tcpcb *, uint32_t); void handle_ddp_indicate(struct toepcb *); void insert_ddp_data(struct toepcb *, uint32_t); const struct offload_settings *lookup_offload_policy(struct adapter *, int, struct mbuf *, uint16_t, struct inpcb *); /* t4_tls.c */ bool can_tls_offload(struct adapter *); int t4_ctloutput_tls(struct socket *, struct sockopt *); void t4_push_tls_records(struct adapter *, struct toepcb *, int); void t4_push_ktls(struct adapter *, struct toepcb *, int); void t4_tls_mod_load(void); void t4_tls_mod_unload(void); void tls_establish(struct toepcb *); void tls_init_toep(struct toepcb *); int tls_rx_key(struct toepcb *); void tls_stop_handshake_timer(struct toepcb *); int tls_tx_key(struct toepcb *); void tls_uninit_toep(struct toepcb *); int tls_alloc_ktls(struct toepcb *, struct ktls_session *); #endif Index: head/sys/netinet/tcp_offload.c =================================================================== --- head/sys/netinet/tcp_offload.c (revision 360190) +++ head/sys/netinet/tcp_offload.c (revision 360191) @@ -1,201 +1,212 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include +#include #include #include +#include +#include #include #include #define TCPOUTFLAGS #include #include #include int registered_toedevs; /* * Provide an opportunity for a TOE driver to offload. */ int tcp_offload_connect(struct socket *so, struct sockaddr *nam) { struct ifnet *ifp; struct toedev *tod; - struct rtentry *rt; + struct nhop_object *nh; + struct epoch_tracker et; int error = EOPNOTSUPP; INP_WLOCK_ASSERT(sotoinpcb(so)); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, ("%s: called with sa_family %d", __func__, nam->sa_family)); if (registered_toedevs == 0) return (error); - rt = rtalloc1(nam, 0, 0); - if (rt) - RT_UNLOCK(rt); - else + NET_EPOCH_ENTER(et); + nh = NULL; + if (nam->sa_family == AF_INET) + nh = fib4_lookup(0, ((struct sockaddr_in *)nam)->sin_addr, + NHR_NONE, 0, 0); + else if (nam->sa_family == AF_INET6) + nh = fib6_lookup(0, &((struct sockaddr_in6 *)nam)->sin6_addr, + NHR_NONE, 0, 0); + if (nh == NULL) { + NET_EPOCH_EXIT(et); return (EHOSTUNREACH); + } - ifp = rt->rt_ifp; + ifp = nh->nh_ifp; if (nam->sa_family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) goto done; if (nam->sa_family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6)) goto done; tod = TOEDEV(ifp); if (tod != NULL) - error = tod->tod_connect(tod, so, rt, nam); + error = tod->tod_connect(tod, so, nh, nam); done: - RTFREE(rt); + NET_EPOCH_EXIT(et); return (error); } void tcp_offload_listen_start(struct tcpcb *tp) { INP_WLOCK_ASSERT(tp->t_inpcb); EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp); } void tcp_offload_listen_stop(struct tcpcb *tp) { INP_WLOCK_ASSERT(tp->t_inpcb); EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp); } void tcp_offload_input(struct tcpcb *tp, struct mbuf *m) { struct toedev *tod = tp->tod; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); tod->tod_input(tod, tp, m); } int tcp_offload_output(struct tcpcb *tp) { struct toedev *tod = tp->tod; int error, flags; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); flags = tcp_outflags[tp->t_state]; if (flags & TH_RST) { /* XXX: avoid repeated calls like we do for FIN */ error = tod->tod_send_rst(tod, tp); } else if ((flags & TH_FIN || tp->t_flags & TF_NEEDFIN) && (tp->t_flags & TF_SENTFIN) == 0) { error = tod->tod_send_fin(tod, tp); if (error == 0) tp->t_flags |= TF_SENTFIN; } else error = tod->tod_output(tod, tp); return (error); } void tcp_offload_rcvd(struct tcpcb *tp) { struct toedev *tod = tp->tod; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); tod->tod_rcvd(tod, tp); } void tcp_offload_ctloutput(struct tcpcb *tp, int sopt_dir, int sopt_name) { struct toedev *tod = tp->tod; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); tod->tod_ctloutput(tod, tp, sopt_dir, sopt_name); } void tcp_offload_tcp_info(struct tcpcb *tp, struct tcp_info *ti) { struct toedev *tod = tp->tod; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); tod->tod_tcp_info(tod, tp, ti); } int tcp_offload_alloc_tls_session(struct tcpcb *tp, struct ktls_session *tls) { struct toedev *tod = tp->tod; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); return (tod->tod_alloc_tls_session(tod, tp, tls)); } void tcp_offload_detach(struct tcpcb *tp) { struct toedev *tod = tp->tod; KASSERT(tod != NULL, ("%s: tp->tod is NULL, tp %p", __func__, tp)); INP_WLOCK_ASSERT(tp->t_inpcb); tod->tod_pcb_detach(tod, tp); } Index: head/sys/netinet/toecore.c =================================================================== --- head/sys/netinet/toecore.c (revision 360190) +++ head/sys/netinet/toecore.c (revision 360191) @@ -1,596 +1,596 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #include static struct mtx toedev_lock; static TAILQ_HEAD(, toedev) toedev_list; static eventhandler_tag listen_start_eh; static eventhandler_tag listen_stop_eh; static eventhandler_tag lle_event_eh; static int toedev_connect(struct toedev *tod __unused, struct socket *so __unused, - struct rtentry *rt __unused, struct sockaddr *nam __unused) + struct nhop_object *nh __unused, struct sockaddr *nam __unused) { return (ENOTSUP); } static int toedev_listen_start(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static int toedev_listen_stop(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static void toedev_input(struct toedev *tod __unused, struct tcpcb *tp __unused, struct mbuf *m) { m_freem(m); return; } static void toedev_rcvd(struct toedev *tod __unused, struct tcpcb *tp __unused) { return; } static int toedev_output(struct toedev *tod __unused, struct tcpcb *tp __unused) { return (ENOTSUP); } static void toedev_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp __unused) { return; } static void toedev_l2_update(struct toedev *tod __unused, struct ifnet *ifp __unused, struct sockaddr *sa __unused, uint8_t *lladdr __unused, uint16_t vtag __unused) { return; } static void toedev_route_redirect(struct toedev *tod __unused, struct ifnet *ifp __unused, - struct rtentry *rt0 __unused, struct rtentry *rt1 __unused) + struct nhop_object *nh0 __unused, struct nhop_object *nh1 __unused) { return; } static void toedev_syncache_added(struct toedev *tod __unused, void *ctx __unused) { return; } static void toedev_syncache_removed(struct toedev *tod __unused, void *ctx __unused) { return; } static int toedev_syncache_respond(struct toedev *tod __unused, void *ctx __unused, struct mbuf *m) { m_freem(m); return (0); } static void toedev_offload_socket(struct toedev *tod __unused, void *ctx __unused, struct socket *so __unused) { return; } static void toedev_ctloutput(struct toedev *tod __unused, struct tcpcb *tp __unused, int sopt_dir __unused, int sopt_name __unused) { return; } static void toedev_tcp_info(struct toedev *tod __unused, struct tcpcb *tp __unused, struct tcp_info *ti __unused) { return; } static int toedev_alloc_tls_session(struct toedev *tod __unused, struct tcpcb *tp __unused, struct ktls_session *tls __unused) { return (EINVAL); } /* * Inform one or more TOE devices about a listening socket. */ static void toe_listen_start(struct inpcb *inp, void *arg) { struct toedev *t, *tod; struct tcpcb *tp; INP_WLOCK_ASSERT(inp); KASSERT(inp->inp_pcbinfo == &V_tcbinfo, ("%s: inp is not a TCP inp", __func__)); if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) return; tp = intotcpcb(inp); if (tp->t_state != TCPS_LISTEN) return; t = arg; mtx_lock(&toedev_lock); TAILQ_FOREACH(tod, &toedev_list, link) { if (t == NULL || t == tod) tod->tod_listen_start(tod, tp); } mtx_unlock(&toedev_lock); } static void toe_listen_start_event(void *arg __unused, struct tcpcb *tp) { struct inpcb *inp = tp->t_inpcb; INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_LISTEN, ("%s: t_state %s", __func__, tcpstates[tp->t_state])); toe_listen_start(inp, NULL); } static void toe_listen_stop_event(void *arg __unused, struct tcpcb *tp) { struct toedev *tod; #ifdef INVARIANTS struct inpcb *inp = tp->t_inpcb; #endif INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_LISTEN, ("%s: t_state %s", __func__, tcpstates[tp->t_state])); mtx_lock(&toedev_lock); TAILQ_FOREACH(tod, &toedev_list, link) tod->tod_listen_stop(tod, tp); mtx_unlock(&toedev_lock); } /* * Fill up a freshly allocated toedev struct with reasonable defaults. */ void init_toedev(struct toedev *tod) { tod->tod_softc = NULL; /* * Provide no-op defaults so that the kernel can call any toedev * function without having to check whether the TOE driver supplied one * or not. */ tod->tod_connect = toedev_connect; tod->tod_listen_start = toedev_listen_start; tod->tod_listen_stop = toedev_listen_stop; tod->tod_input = toedev_input; tod->tod_rcvd = toedev_rcvd; tod->tod_output = toedev_output; tod->tod_send_rst = toedev_output; tod->tod_send_fin = toedev_output; tod->tod_pcb_detach = toedev_pcb_detach; tod->tod_l2_update = toedev_l2_update; tod->tod_route_redirect = toedev_route_redirect; tod->tod_syncache_added = toedev_syncache_added; tod->tod_syncache_removed = toedev_syncache_removed; tod->tod_syncache_respond = toedev_syncache_respond; tod->tod_offload_socket = toedev_offload_socket; tod->tod_ctloutput = toedev_ctloutput; tod->tod_tcp_info = toedev_tcp_info; tod->tod_alloc_tls_session = toedev_alloc_tls_session; } /* * Register an active TOE device with the system. This allows it to receive * notifications from the kernel. */ int register_toedev(struct toedev *tod) { struct toedev *t; mtx_lock(&toedev_lock); TAILQ_FOREACH(t, &toedev_list, link) { if (t == tod) { mtx_unlock(&toedev_lock); return (EEXIST); } } TAILQ_INSERT_TAIL(&toedev_list, tod, link); registered_toedevs++; mtx_unlock(&toedev_lock); inp_apply_all(toe_listen_start, tod); return (0); } /* * Remove the TOE device from the global list of active TOE devices. It is the * caller's responsibility to ensure that the TOE device is quiesced prior to * this call. */ int unregister_toedev(struct toedev *tod) { struct toedev *t, *t2; int rc = ENODEV; mtx_lock(&toedev_lock); TAILQ_FOREACH_SAFE(t, &toedev_list, link, t2) { if (t == tod) { TAILQ_REMOVE(&toedev_list, tod, link); registered_toedevs--; rc = 0; break; } } KASSERT(registered_toedevs >= 0, ("%s: registered_toedevs (%d) < 0", __func__, registered_toedevs)); mtx_unlock(&toedev_lock); return (rc); } void toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, void *tod, void *todctx, uint8_t iptos) { struct socket *lso = inp->inp_socket; INP_WLOCK_ASSERT(inp); syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx, iptos); } int toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct socket **lsop) { NET_EPOCH_ASSERT(); return (syncache_expand(inc, to, th, lsop, NULL)); } /* * General purpose check to see if a 4-tuple is in use by the kernel. If a TCP * header (presumably for an incoming SYN) is also provided, an existing 4-tuple * in TIME_WAIT may be assassinated freeing it up for re-use. * * Note that the TCP header must have been run through tcp_fields_to_host() or * equivalent. */ int toe_4tuple_check(struct in_conninfo *inc, struct tcphdr *th, struct ifnet *ifp) { struct inpcb *inp; if (inc->inc_flags & INC_ISIPV6) { inp = in6_pcblookup(&V_tcbinfo, &inc->inc6_faddr, inc->inc_fport, &inc->inc6_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); } else { inp = in_pcblookup(&V_tcbinfo, inc->inc_faddr, inc->inc_fport, inc->inc_laddr, inc->inc_lport, INPLOOKUP_WLOCKPCB, ifp); } if (inp != NULL) { INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) && th != NULL) { if (!tcp_twcheck(inp, NULL, th, NULL, 0)) return (EADDRINUSE); } else { INP_WUNLOCK(inp); return (EADDRINUSE); } } return (0); } static void toe_lle_event(void *arg __unused, struct llentry *lle, int evt) { struct toedev *tod; struct ifnet *ifp; struct sockaddr *sa; uint8_t *lladdr; uint16_t vid, pcp; int family; struct sockaddr_in6 sin6; LLE_WLOCK_ASSERT(lle); ifp = lltable_get_ifp(lle->lle_tbl); family = lltable_get_af(lle->lle_tbl); if (family != AF_INET && family != AF_INET6) return; /* * Not interested if the interface's TOE capability is not enabled. */ if ((family == AF_INET && !(ifp->if_capenable & IFCAP_TOE4)) || (family == AF_INET6 && !(ifp->if_capenable & IFCAP_TOE6))) return; tod = TOEDEV(ifp); if (tod == NULL) return; sa = (struct sockaddr *)&sin6; lltable_fill_sa_entry(lle, sa); vid = 0xfff; pcp = 0; if (evt != LLENTRY_RESOLVED) { /* * LLENTRY_TIMEDOUT, LLENTRY_DELETED, LLENTRY_EXPIRED all mean * this entry is going to be deleted. */ lladdr = NULL; } else { KASSERT(lle->la_flags & LLE_VALID, ("%s: %p resolved but not valid?", __func__, lle)); lladdr = (uint8_t *)lle->ll_addr; VLAN_TAG(ifp, &vid); VLAN_PCP(ifp, &pcp); } tod->tod_l2_update(tod, ifp, sa, lladdr, EVL_MAKETAG(vid, pcp, 0)); } /* * Returns 0 or EWOULDBLOCK on success (any other value is an error). 0 means * lladdr and vtag are valid on return, EWOULDBLOCK means the TOE driver's * tod_l2_update will be called later, when the entry is resolved or times out. */ int toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa, uint8_t *lladdr, uint16_t *vtag) { int rc; uint16_t vid, pcp; switch (sa->sa_family) { #ifdef INET case AF_INET: rc = arpresolve(ifp, 0, NULL, sa, lladdr, NULL, NULL); break; #endif #ifdef INET6 case AF_INET6: rc = nd6_resolve(ifp, 0, NULL, sa, lladdr, NULL, NULL); break; #endif default: return (EPROTONOSUPPORT); } if (rc == 0) { vid = 0xfff; pcp = 0; if (ifp->if_type == IFT_L2VLAN) { VLAN_TAG(ifp, &vid); VLAN_PCP(ifp, &pcp); } else if (ifp->if_pcp != IFNET_PCP_NONE) { vid = 0; pcp = ifp->if_pcp; } *vtag = EVL_MAKETAG(vid, pcp, 0); } return (rc); } void toe_connect_failed(struct toedev *tod, struct inpcb *inp, int err) { NET_EPOCH_ASSERT(); INP_WLOCK_ASSERT(inp); if (!(inp->inp_flags & INP_DROPPED)) { struct tcpcb *tp = intotcpcb(inp); KASSERT(tp->t_flags & TF_TOE, ("%s: tp %p not offloaded.", __func__, tp)); if (err == EAGAIN) { /* * Temporary failure during offload, take this PCB back. * Detach from the TOE driver and do the rest of what * TCP's pru_connect would have done if the connection * wasn't offloaded. */ tod->tod_pcb_detach(tod, tp); KASSERT(!(tp->t_flags & TF_TOE), ("%s: tp %p still offloaded.", __func__, tp)); tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); (void) tp->t_fb->tfb_tcp_output(tp); } else { tp = tcp_drop(tp, err); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } } INP_WLOCK_ASSERT(inp); } static int toecore_load(void) { mtx_init(&toedev_lock, "toedev lock", NULL, MTX_DEF); TAILQ_INIT(&toedev_list); listen_start_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_start, toe_listen_start_event, NULL, EVENTHANDLER_PRI_ANY); listen_stop_eh = EVENTHANDLER_REGISTER(tcp_offload_listen_stop, toe_listen_stop_event, NULL, EVENTHANDLER_PRI_ANY); lle_event_eh = EVENTHANDLER_REGISTER(lle_event, toe_lle_event, NULL, EVENTHANDLER_PRI_ANY); return (0); } static int toecore_unload(void) { mtx_lock(&toedev_lock); if (!TAILQ_EMPTY(&toedev_list)) { mtx_unlock(&toedev_lock); return (EBUSY); } EVENTHANDLER_DEREGISTER(tcp_offload_listen_start, listen_start_eh); EVENTHANDLER_DEREGISTER(tcp_offload_listen_stop, listen_stop_eh); EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh); mtx_unlock(&toedev_lock); mtx_destroy(&toedev_lock); return (0); } static int toecore_mod_handler(module_t mod, int cmd, void *arg) { if (cmd == MOD_LOAD) return (toecore_load()); if (cmd == MOD_UNLOAD) return (toecore_unload()); return (EOPNOTSUPP); } static moduledata_t mod_data= { "toecore", toecore_mod_handler, 0 }; MODULE_VERSION(toecore, 1); DECLARE_MODULE(toecore, mod_data, SI_SUB_EXEC, SI_ORDER_ANY); Index: head/sys/netinet/toecore.h =================================================================== --- head/sys/netinet/toecore.h (revision 360190) +++ head/sys/netinet/toecore.h (revision 360191) @@ -1,143 +1,144 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NETINET_TOE_H_ #define _NETINET_TOE_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif #include struct tcpopt; struct tcphdr; struct in_conninfo; struct tcp_info; +struct nhop_object; struct ktls_session; struct toedev { TAILQ_ENTRY(toedev) link; /* glue for toedev_list */ void *tod_softc; /* TOE driver private data */ /* * Active open. If a failure occurs, it is reported back by the driver * via toe_connect_failed. */ - int (*tod_connect)(struct toedev *, struct socket *, struct rtentry *, + int (*tod_connect)(struct toedev *, struct socket *, struct nhop_object *, struct sockaddr *); /* Passive open. */ int (*tod_listen_start)(struct toedev *, struct tcpcb *); int (*tod_listen_stop)(struct toedev *, struct tcpcb *); /* * The kernel uses this routine to pass on any frame it receives for an * offloaded connection to the TOE driver. This is an unusual event. */ void (*tod_input)(struct toedev *, struct tcpcb *, struct mbuf *); /* * This is called by the kernel during pru_rcvd for an offloaded TCP * connection and provides an opportunity for the TOE driver to manage * its rx window and credits. */ void (*tod_rcvd)(struct toedev *, struct tcpcb *); /* * Transmit routine. The kernel calls this to have the TOE driver * evaluate whether there is data to be transmitted, and transmit it. */ int (*tod_output)(struct toedev *, struct tcpcb *); /* Immediate teardown: send RST to peer. */ int (*tod_send_rst)(struct toedev *, struct tcpcb *); /* Initiate orderly disconnect by sending FIN to the peer. */ int (*tod_send_fin)(struct toedev *, struct tcpcb *); /* Called to indicate that the kernel is done with this TCP PCB. */ void (*tod_pcb_detach)(struct toedev *, struct tcpcb *); /* * The kernel calls this once it has information about an L2 entry that * the TOE driver enquired about previously (via toe_l2_resolve). */ void (*tod_l2_update)(struct toedev *, struct ifnet *, struct sockaddr *, uint8_t *, uint16_t); /* XXX. Route has been redirected. */ void (*tod_route_redirect)(struct toedev *, struct ifnet *, - struct rtentry *, struct rtentry *); + struct nhop_object *, struct nhop_object *); /* Syncache interaction. */ void (*tod_syncache_added)(struct toedev *, void *); void (*tod_syncache_removed)(struct toedev *, void *); int (*tod_syncache_respond)(struct toedev *, void *, struct mbuf *); void (*tod_offload_socket)(struct toedev *, void *, struct socket *); /* TCP socket option */ void (*tod_ctloutput)(struct toedev *, struct tcpcb *, int, int); /* Update software state */ void (*tod_tcp_info)(struct toedev *, struct tcpcb *, struct tcp_info *); /* Create a TLS session */ int (*tod_alloc_tls_session)(struct toedev *, struct tcpcb *, struct ktls_session *); }; typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *); typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *); EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn); EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn); void init_toedev(struct toedev *); int register_toedev(struct toedev *); int unregister_toedev(struct toedev *); /* * General interface for looking up L2 information for an IP address. If an * answer is not available right away then the TOE driver's tod_l2_update will * be called later. */ int toe_l2_resolve(struct toedev *, struct ifnet *, struct sockaddr *, uint8_t *, uint16_t *); void toe_connect_failed(struct toedev *, struct inpcb *, int); void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct inpcb *, void *, void *, uint8_t); int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct socket **); int toe_4tuple_check(struct in_conninfo *, struct tcphdr *, struct ifnet *); #endif