diff --git a/sys/dev/cxgbe/t4_filter.c b/sys/dev/cxgbe/t4_filter.c index 8d4552116d96..92f882dd38cf 100644 --- a/sys/dev/cxgbe/t4_filter.c +++ b/sys/dev/cxgbe/t4_filter.c @@ -1,2030 +1,2236 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2018 Chelsio Communications, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_tcb.h" #include "t4_l2t.h" #include "t4_smt.h" struct filter_entry { LIST_ENTRY(filter_entry) link_4t; LIST_ENTRY(filter_entry) link_tid; uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked or busy */ uint32_t pending:1; /* filter action is pending firmware reply */ int tid; /* tid of the filter TCB */ struct l2t_entry *l2te; /* L2 table entry for DMAC rewrite */ struct smt_entry *smt; /* SMT entry for SMAC rewrite */ struct t4_filter_specification fs; }; static void free_filter_resources(struct filter_entry *); static int get_tcamfilter(struct adapter *, struct t4_filter *); static int get_hashfilter(struct adapter *, struct t4_filter *); static int set_hashfilter(struct adapter *, struct t4_filter *, uint64_t, struct l2t_entry *, struct smt_entry *); static int del_hashfilter(struct adapter *, struct t4_filter *); static int configure_hashfilter_tcb(struct adapter *, struct filter_entry *); static inline bool separate_hpfilter_region(struct adapter *sc) { return (chip_id(sc) >= CHELSIO_T6); } static inline uint32_t hf_hashfn_4t(struct t4_filter_specification *fs) { struct t4_filter_tuple *ft = &fs->val; uint32_t hash; if (fs->type) { /* IPv6 */ hash = fnv_32_buf(&ft->sip[0], 16, FNV1_32_INIT); hash = fnv_32_buf(&ft->dip[0], 16, hash); } else { hash = fnv_32_buf(&ft->sip[0], 4, FNV1_32_INIT); hash = fnv_32_buf(&ft->dip[0], 4, hash); } hash = fnv_32_buf(&ft->sport, sizeof(ft->sport), hash); hash = fnv_32_buf(&ft->dport, sizeof(ft->dport), hash); return (hash); } static inline uint32_t hf_hashfn_tid(int tid) { return (fnv_32_buf(&tid, sizeof(tid), FNV1_32_INIT)); } static int alloc_hftid_hash(struct tid_info *t, int flags) { int n; MPASS(t->ntids > 0); MPASS(t->hftid_hash_4t == NULL); MPASS(t->hftid_hash_tid == NULL); n = max(t->ntids / 1024, 16); t->hftid_hash_4t = hashinit_flags(n, M_CXGBE, &t->hftid_4t_mask, flags); if (t->hftid_hash_4t == NULL) return (ENOMEM); t->hftid_hash_tid = hashinit_flags(n, M_CXGBE, &t->hftid_tid_mask, flags); if (t->hftid_hash_tid == NULL) { hashdestroy(t->hftid_hash_4t, M_CXGBE, t->hftid_4t_mask); t->hftid_hash_4t = NULL; return (ENOMEM); } mtx_init(&t->hftid_lock, "T4 hashfilters", 0, MTX_DEF); cv_init(&t->hftid_cv, "t4hfcv"); return (0); } void free_hftid_hash(struct tid_info *t) { struct filter_entry *f, *ftmp; LIST_HEAD(, filter_entry) *head; int i; #ifdef INVARIANTS int n = 0; #endif if (t->tids_in_use > 0) { /* Remove everything from the tid hash. */ head = t->hftid_hash_tid; for (i = 0; i <= t->hftid_tid_mask; i++) { LIST_FOREACH_SAFE(f, &head[i], link_tid, ftmp) { LIST_REMOVE(f, link_tid); } } /* Remove and then free each filter in the 4t hash. */ head = t->hftid_hash_4t; for (i = 0; i <= t->hftid_4t_mask; i++) { LIST_FOREACH_SAFE(f, &head[i], link_4t, ftmp) { #ifdef INVARIANTS n += f->fs.type ? 2 : 1; #endif LIST_REMOVE(f, link_4t); free(f, M_CXGBE); } } MPASS(t->tids_in_use == n); t->tids_in_use = 0; } if (t->hftid_hash_4t) { hashdestroy(t->hftid_hash_4t, M_CXGBE, t->hftid_4t_mask); t->hftid_hash_4t = NULL; } if (t->hftid_hash_tid) { hashdestroy(t->hftid_hash_tid, M_CXGBE, t->hftid_tid_mask); t->hftid_hash_tid = NULL; } if (mtx_initialized(&t->hftid_lock)) { mtx_destroy(&t->hftid_lock); cv_destroy(&t->hftid_cv); } } static void insert_hf(struct adapter *sc, struct filter_entry *f, uint32_t hash) { struct tid_info *t = &sc->tids; LIST_HEAD(, filter_entry) *head = t->hftid_hash_4t; MPASS(head != NULL); if (hash == 0) hash = hf_hashfn_4t(&f->fs); LIST_INSERT_HEAD(&head[hash & t->hftid_4t_mask], f, link_4t); atomic_add_int(&t->tids_in_use, f->fs.type ? 2 : 1); } static void insert_hftid(struct adapter *sc, struct filter_entry *f) { struct tid_info *t = &sc->tids; LIST_HEAD(, filter_entry) *head = t->hftid_hash_tid; uint32_t hash; MPASS(f->tid >= t->tid_base); MPASS(f->tid - t->tid_base < t->ntids); mtx_assert(&t->hftid_lock, MA_OWNED); hash = hf_hashfn_tid(f->tid); LIST_INSERT_HEAD(&head[hash & t->hftid_tid_mask], f, link_tid); } static bool filter_eq(struct t4_filter_specification *fs1, struct t4_filter_specification *fs2) { int n; MPASS(fs1->hash && fs2->hash); if (fs1->type != fs2->type) return (false); n = fs1->type ? 16 : 4; if (bcmp(&fs1->val.sip[0], &fs2->val.sip[0], n) || bcmp(&fs1->val.dip[0], &fs2->val.dip[0], n) || fs1->val.sport != fs2->val.sport || fs1->val.dport != fs2->val.dport) return (false); /* * We know the masks are the same because all hashfilters conform to the * global tp->filter_mask and the driver has verified that already. */ if ((fs1->mask.pfvf_vld || fs1->mask.ovlan_vld) && fs1->val.vnic != fs2->val.vnic) return (false); if (fs1->mask.vlan_vld && fs1->val.vlan != fs2->val.vlan) return (false); if (fs1->mask.macidx && fs1->val.macidx != fs2->val.macidx) return (false); if (fs1->mask.frag && fs1->val.frag != fs2->val.frag) return (false); if (fs1->mask.matchtype && fs1->val.matchtype != fs2->val.matchtype) return (false); if (fs1->mask.iport && fs1->val.iport != fs2->val.iport) return (false); if (fs1->mask.fcoe && fs1->val.fcoe != fs2->val.fcoe) return (false); if (fs1->mask.proto && fs1->val.proto != fs2->val.proto) return (false); if (fs1->mask.tos && fs1->val.tos != fs2->val.tos) return (false); if (fs1->mask.ethtype && fs1->val.ethtype != fs2->val.ethtype) return (false); return (true); } static struct filter_entry * lookup_hf(struct adapter *sc, struct t4_filter_specification *fs, uint32_t hash) { struct tid_info *t = &sc->tids; LIST_HEAD(, filter_entry) *head = t->hftid_hash_4t; struct filter_entry *f; mtx_assert(&t->hftid_lock, MA_OWNED); MPASS(head != NULL); if (hash == 0) hash = hf_hashfn_4t(fs); LIST_FOREACH(f, &head[hash & t->hftid_4t_mask], link_4t) { if (filter_eq(&f->fs, fs)) return (f); } return (NULL); } static struct filter_entry * lookup_hftid(struct adapter *sc, int tid) { struct tid_info *t = &sc->tids; LIST_HEAD(, filter_entry) *head = t->hftid_hash_tid; struct filter_entry *f; uint32_t hash; mtx_assert(&t->hftid_lock, MA_OWNED); MPASS(head != NULL); hash = hf_hashfn_tid(tid); LIST_FOREACH(f, &head[hash & t->hftid_tid_mask], link_tid) { if (f->tid == tid) return (f); } return (NULL); } static void remove_hf(struct adapter *sc, struct filter_entry *f) { struct tid_info *t = &sc->tids; mtx_assert(&t->hftid_lock, MA_OWNED); LIST_REMOVE(f, link_4t); atomic_subtract_int(&t->tids_in_use, f->fs.type ? 2 : 1); } static void remove_hftid(struct adapter *sc, struct filter_entry *f) { #ifdef INVARIANTS struct tid_info *t = &sc->tids; mtx_assert(&t->hftid_lock, MA_OWNED); #endif LIST_REMOVE(f, link_tid); } -/* - * Input: driver's 32b filter mode. - * Returns: hardware filter mode (bits to set in vlan_pri_map) for the input. - */ static uint16_t -mode_to_fconf(uint32_t mode) +mode_to_fconf_t4(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; - if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; - if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; - if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; - if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; - if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; - if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; - if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; - if (mode & T4_FILTER_PORT) fconf |= F_PORT; - if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } +static uint16_t +mode_to_fconf_t7(uint32_t mode) +{ + uint32_t fconf = 0; + + if (mode & T4_FILTER_TCPFLAGS) + fconf |= F_TCPFLAGS; + if (mode & T4_FILTER_SYNONLY) + fconf |= F_SYNONLY; + if (mode & T4_FILTER_ROCE) + fconf |= F_ROCE; + if (mode & T4_FILTER_IP_FRAGMENT) + fconf |= F_T7_FRAGMENTATION; + if (mode & T4_FILTER_MPS_HIT_TYPE) + fconf |= F_T7_MPSHITTYPE; + if (mode & T4_FILTER_MAC_IDX) + fconf |= F_T7_MACMATCH; + if (mode & T4_FILTER_ETH_TYPE) + fconf |= F_T7_ETHERTYPE; + if (mode & T4_FILTER_IP_PROTO) + fconf |= F_T7_PROTOCOL; + if (mode & T4_FILTER_IP_TOS) + fconf |= F_T7_TOS; + if (mode & T4_FILTER_VLAN) + fconf |= F_T7_VLAN; + if (mode & T4_FILTER_VNIC) + fconf |= F_T7_VNIC_ID; + if (mode & T4_FILTER_PORT) + fconf |= F_T7_PORT; + if (mode & T4_FILTER_FCoE) + fconf |= F_T7_FCOE; + if (mode & T4_FILTER_IPSECIDX) + fconf |= F_IPSECIDX; + + return (fconf); +} + +/* + * Input: driver's 32b filter mode. + * Returns: hardware filter mode (bits to set in vlan_pri_map) for the input. + */ +static uint16_t +mode_to_fconf(struct adapter *sc, uint32_t mode) +{ + if (chip_id(sc) >= CHELSIO_T7) + return (mode_to_fconf_t7(mode)); + else + return (mode_to_fconf_t4(mode)); +} + /* * Input: driver's 32b filter mode. * Returns: hardware vnic mode (ingress config) matching the input. */ static int mode_to_iconf(uint32_t mode) { if ((mode & T4_FILTER_VNIC) == 0) return (-1); /* ingress config doesn't matter. */ if (mode & T4_FILTER_IC_VNIC) return (FW_VNIC_MODE_PF_VF); else if (mode & T4_FILTER_IC_ENCAP) return (FW_VNIC_MODE_ENCAP_EN); else return (FW_VNIC_MODE_OUTER_VLAN); } static int check_fspec_against_fconf_iconf(struct adapter *sc, struct t4_filter_specification *fs) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf = 0; - if (fs->val.frag || fs->mask.frag) - fconf |= F_FRAGMENTATION; - - if (fs->val.matchtype || fs->mask.matchtype) - fconf |= F_MPSHITTYPE; - - if (fs->val.macidx || fs->mask.macidx) - fconf |= F_MACMATCH; - - if (fs->val.ethtype || fs->mask.ethtype) - fconf |= F_ETHERTYPE; - - if (fs->val.proto || fs->mask.proto) - fconf |= F_PROTOCOL; - - if (fs->val.tos || fs->mask.tos) - fconf |= F_TOS; - - if (fs->val.vlan_vld || fs->mask.vlan_vld) - fconf |= F_VLAN; - - if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { - if (tpp->vnic_mode != FW_VNIC_MODE_OUTER_VLAN) - return (EINVAL); - fconf |= F_VNIC_ID; - } - - if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { - if (tpp->vnic_mode != FW_VNIC_MODE_PF_VF) - return (EINVAL); - fconf |= F_VNIC_ID; - } - + if (chip_id(sc) >= CHELSIO_T7) { + if (fs->val.tcpflags || fs->mask.tcpflags) + fconf |= F_TCPFLAGS; + if (fs->val.synonly || fs->mask.synonly) + fconf |= F_SYNONLY; + if (fs->val.roce || fs->mask.roce) + fconf |= F_ROCE; + if (fs->val.frag || fs->mask.frag) + fconf |= F_T7_FRAGMENTATION; + if (fs->val.matchtype || fs->mask.matchtype) + fconf |= F_T7_MPSHITTYPE; + if (fs->val.macidx || fs->mask.macidx) + fconf |= F_T7_MACMATCH; + if (fs->val.ethtype || fs->mask.ethtype) + fconf |= F_T7_ETHERTYPE; + if (fs->val.proto || fs->mask.proto) + fconf |= F_T7_PROTOCOL; + if (fs->val.tos || fs->mask.tos) + fconf |= F_T7_TOS; + if (fs->val.vlan_vld || fs->mask.vlan_vld) + fconf |= F_T7_VLAN; + if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { + if (tpp->vnic_mode != FW_VNIC_MODE_OUTER_VLAN) + return (EINVAL); + fconf |= F_T7_VNIC_ID; + } + if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { + if (tpp->vnic_mode != FW_VNIC_MODE_PF_VF) + return (EINVAL); + fconf |= F_T7_VNIC_ID; + } #ifdef notyet - if (fs->val.encap_vld || fs->mask.encap_vld) { - if (tpp->vnic_mode != FW_VNIC_MODE_ENCAP_EN); + if (fs->val.encap_vld || fs->mask.encap_vld) { + if (tpp->vnic_mode != FW_VNIC_MODE_ENCAP_EN); + return (EINVAL); + fconf |= F_T7_VNIC_ID; + } +#endif + if (fs->val.iport || fs->mask.iport) + fconf |= F_T7_PORT; + if (fs->val.fcoe || fs->mask.fcoe) + fconf |= F_T7_FCOE; + if (fs->val.ipsecidx || fs->mask.ipsecidx) + fconf |= F_IPSECIDX; + } else { + if (fs->val.tcpflags || fs->mask.tcpflags || + fs->val.synonly || fs->mask.synonly || + fs->val.roce || fs->mask.roce || + fs->val.ipsecidx || fs->mask.ipsecidx) return (EINVAL); - fconf |= F_VNIC_ID; - } + if (fs->val.frag || fs->mask.frag) + fconf |= F_FRAGMENTATION; + if (fs->val.matchtype || fs->mask.matchtype) + fconf |= F_MPSHITTYPE; + if (fs->val.macidx || fs->mask.macidx) + fconf |= F_MACMATCH; + if (fs->val.ethtype || fs->mask.ethtype) + fconf |= F_ETHERTYPE; + if (fs->val.proto || fs->mask.proto) + fconf |= F_PROTOCOL; + if (fs->val.tos || fs->mask.tos) + fconf |= F_TOS; + if (fs->val.vlan_vld || fs->mask.vlan_vld) + fconf |= F_VLAN; + if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { + if (tpp->vnic_mode != FW_VNIC_MODE_OUTER_VLAN) + return (EINVAL); + fconf |= F_VNIC_ID; + } + if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { + if (tpp->vnic_mode != FW_VNIC_MODE_PF_VF) + return (EINVAL); + fconf |= F_VNIC_ID; + } +#ifdef notyet + if (fs->val.encap_vld || fs->mask.encap_vld) { + if (tpp->vnic_mode != FW_VNIC_MODE_ENCAP_EN); + return (EINVAL); + fconf |= F_VNIC_ID; + } #endif - - if (fs->val.iport || fs->mask.iport) - fconf |= F_PORT; - - if (fs->val.fcoe || fs->mask.fcoe) - fconf |= F_FCOE; - + if (fs->val.iport || fs->mask.iport) + fconf |= F_PORT; + if (fs->val.fcoe || fs->mask.fcoe) + fconf |= F_FCOE; + } if ((tpp->filter_mode | fconf) != tpp->filter_mode) return (E2BIG); return (0); } -/* - * Input: hardware filter configuration (filter mode/mask, ingress config). - * Input: driver's 32b filter mode matching the input. - */ static uint32_t -fconf_to_mode(uint16_t hwmode, int vnic_mode) +fconf_to_mode_t4(uint16_t hwmode, int vnic_mode) { uint32_t mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (hwmode & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (hwmode & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (hwmode & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (hwmode & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (hwmode & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (hwmode & F_TOS) mode |= T4_FILTER_IP_TOS; if (hwmode & F_VLAN) mode |= T4_FILTER_VLAN; if (hwmode & F_VNIC_ID) mode |= T4_FILTER_VNIC; /* real meaning depends on vnic_mode. */ if (hwmode & F_PORT) mode |= T4_FILTER_PORT; if (hwmode & F_FCOE) mode |= T4_FILTER_FCoE; switch (vnic_mode) { case FW_VNIC_MODE_PF_VF: mode |= T4_FILTER_IC_VNIC; break; case FW_VNIC_MODE_ENCAP_EN: mode |= T4_FILTER_IC_ENCAP; break; case FW_VNIC_MODE_OUTER_VLAN: default: break; } return (mode); } +static uint32_t +fconf_to_mode_t7(uint16_t hwmode, int vnic_mode) +{ + uint32_t mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | + T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; + + if (hwmode & F_TCPFLAGS) + mode |= T4_FILTER_TCPFLAGS; + if (hwmode & F_SYNONLY) + mode |= T4_FILTER_SYNONLY; + if (hwmode & F_ROCE) + mode |= T4_FILTER_ROCE; + if (hwmode & F_T7_FRAGMENTATION) + mode |= T4_FILTER_IP_FRAGMENT; + if (hwmode & F_T7_MPSHITTYPE) + mode |= T4_FILTER_MPS_HIT_TYPE; + if (hwmode & F_T7_MACMATCH) + mode |= T4_FILTER_MAC_IDX; + if (hwmode & F_T7_ETHERTYPE) + mode |= T4_FILTER_ETH_TYPE; + if (hwmode & F_T7_PROTOCOL) + mode |= T4_FILTER_IP_PROTO; + if (hwmode & F_T7_TOS) + mode |= T4_FILTER_IP_TOS; + if (hwmode & F_T7_VLAN) + mode |= T4_FILTER_VLAN; + if (hwmode & F_T7_VNIC_ID) + mode |= T4_FILTER_VNIC; /* real meaning depends on vnic_mode. */ + if (hwmode & F_T7_PORT) + mode |= T4_FILTER_PORT; + if (hwmode & F_T7_FCOE) + mode |= T4_FILTER_FCoE; + if (hwmode & F_IPSECIDX) + mode |= T4_FILTER_IPSECIDX; + + switch (vnic_mode) { + case FW_VNIC_MODE_PF_VF: + mode |= T4_FILTER_IC_VNIC; + break; + case FW_VNIC_MODE_ENCAP_EN: + mode |= T4_FILTER_IC_ENCAP; + break; + case FW_VNIC_MODE_OUTER_VLAN: + default: + break; + } + + return (mode); +} + +/* + * Input: hardware filter configuration (filter mode/mask, ingress config). + * Output: driver's 32b filter mode matching the input. + */ +static inline uint32_t +fconf_to_mode(struct adapter *sc, uint16_t hwmode, int vnic_mode) +{ + if (chip_id(sc) >= CHELSIO_T7) + return (fconf_to_mode_t7(hwmode, vnic_mode)); + else + return (fconf_to_mode_t4(hwmode, vnic_mode)); +} + int get_filter_mode(struct adapter *sc, uint32_t *mode) { struct tp_params *tp = &sc->params.tp; uint16_t filter_mode; /* Filter mask must comply with the global filter mode. */ MPASS((tp->filter_mode | tp->filter_mask) == tp->filter_mode); /* Non-zero incoming value in mode means "hashfilter mode". */ filter_mode = *mode ? tp->filter_mask : tp->filter_mode; - *mode = fconf_to_mode(filter_mode, tp->vnic_mode); + *mode = fconf_to_mode(sc, filter_mode, tp->vnic_mode); return (0); } int set_filter_mode(struct adapter *sc, uint32_t mode) { struct tp_params *tp = &sc->params.tp; int rc, iconf; uint16_t fconf; iconf = mode_to_iconf(mode); - fconf = mode_to_fconf(mode); + fconf = mode_to_fconf(sc, mode); if ((iconf == -1 || iconf == tp->vnic_mode) && fconf == tp->filter_mode) return (0); /* Nothing to do */ rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (!hw_all_ok(sc)) { rc = ENXIO; goto done; } if (sc->tids.ftids_in_use > 0 || /* TCAM filters active */ sc->tids.hpftids_in_use > 0 || /* hi-pri TCAM filters active */ sc->tids.tids_in_use > 0) { /* TOE or hashfilters active */ rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif /* Note that filter mask will get clipped to the new filter mode. */ rc = -t4_set_filter_cfg(sc, fconf, -1, iconf); done: end_synchronized_op(sc, 0); return (rc); } int set_filter_mask(struct adapter *sc, uint32_t mode) { struct tp_params *tp = &sc->params.tp; int rc, iconf; uint16_t fmask; iconf = mode_to_iconf(mode); - fmask = mode_to_fconf(mode); + fmask = mode_to_fconf(sc, mode); if ((iconf == -1 || iconf == tp->vnic_mode) && fmask == tp->filter_mask) return (0); /* Nothing to do */ /* * We aren't going to change the global filter mode or VNIC mode here. * The given filter mask must conform to them. */ if ((fmask | tp->filter_mode) != tp->filter_mode) return (EINVAL); if (iconf != -1 && iconf != tp->vnic_mode) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sethfm"); if (rc) return (rc); if (!hw_all_ok(sc)) { rc = ENXIO; goto done; } if (sc->tids.tids_in_use > 0) { /* TOE or hashfilters active */ rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_cfg(sc, -1, fmask, -1); done: end_synchronized_op(sc, 0); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t tid) { uint32_t tcb_addr; uint64_t hits; tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE; mtx_lock(&sc->reg_lock); if (!hw_all_ok(sc)) hits = 0; else if (is_t4(sc)) { uint64_t t; read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&t, 8); hits = be64toh(t); } else { uint32_t t; read_via_memwin(sc, 0, tcb_addr + 24, &t, 4); hits = be32toh(t); } mtx_unlock(&sc->reg_lock); return (hits); } int get_filter(struct adapter *sc, struct t4_filter *t) { if (t->fs.hash) return (get_hashfilter(sc, t)); else return (get_tcamfilter(sc, t)); } static int set_tcamfilter(struct adapter *sc, struct t4_filter *t, struct l2t_entry *l2te, struct smt_entry *smt) { struct filter_entry *f; struct fw_filter2_wr *fwr; u_int vnic_vld, vnic_vld_mask; struct wrq_cookie cookie; int i, rc, busy, locked; u_int tid; const int ntids = t->fs.type ? 4 : 1; MPASS(!t->fs.hash); /* Already validated against fconf, iconf */ MPASS((t->fs.val.pfvf_vld & t->fs.val.ovlan_vld) == 0); MPASS((t->fs.mask.pfvf_vld & t->fs.mask.ovlan_vld) == 0); if (separate_hpfilter_region(sc) && t->fs.prio) { MPASS(t->idx < sc->tids.nhpftids); f = &sc->tids.hpftid_tab[t->idx]; tid = sc->tids.hpftid_base + t->idx; } else { MPASS(t->idx < sc->tids.nftids); f = &sc->tids.ftid_tab[t->idx]; tid = sc->tids.ftid_base + t->idx; } rc = busy = locked = 0; mtx_lock(&sc->tids.ftid_lock); for (i = 0; i < ntids; i++) { busy += f[i].pending + f[i].valid; locked += f[i].locked; } if (locked > 0) rc = EPERM; else if (busy > 0) rc = EBUSY; else { int len16; if (sc->params.filter2_wr_support) len16 = howmany(sizeof(struct fw_filter2_wr), 16); else len16 = howmany(sizeof(struct fw_filter_wr), 16); fwr = start_wrq_wr(&sc->sge.ctrlq[0], len16, &cookie); if (__predict_false(fwr == NULL)) rc = ENOMEM; else { f->pending = 1; if (separate_hpfilter_region(sc) && t->fs.prio) sc->tids.hpftids_in_use++; else sc->tids.ftids_in_use++; } } mtx_unlock(&sc->tids.ftid_lock); if (rc != 0) return (rc); /* * Can't fail now. A set-filter WR will definitely be sent. */ f->tid = tid; f->fs = t->fs; f->l2te = l2te; f->smt = smt; if (t->fs.val.pfvf_vld || t->fs.val.ovlan_vld) vnic_vld = 1; else vnic_vld = 0; if (t->fs.mask.pfvf_vld || t->fs.mask.ovlan_vld) vnic_vld_mask = 1; else vnic_vld_mask = 0; bzero(fwr, sizeof(*fwr)); if (sc->params.filter2_wr_support) fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER2_WR)); else fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(f->tid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2te ? f->l2te->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); /* sma = 0 tells the fw to use SMAC_SEL for source MAC address */ bzero(fwr->sma, sizeof (fwr->sma)); if (sc->params.filter2_wr_support) { fwr->filter_type_swapmac = V_FW_FILTER2_WR_SWAPMAC(f->fs.swapmac); fwr->natmode_to_ulp_type = V_FW_FILTER2_WR_ULP_TYPE(f->fs.nat_mode ? ULP_MODE_TCPDDP : ULP_MODE_NONE) | V_FW_FILTER2_WR_NATFLAGCHECK(f->fs.nat_flag_chk) | V_FW_FILTER2_WR_NATMODE(f->fs.nat_mode); memcpy(fwr->newlip, f->fs.nat_dip, sizeof(fwr->newlip)); memcpy(fwr->newfip, f->fs.nat_sip, sizeof(fwr->newfip)); fwr->newlport = htobe16(f->fs.nat_dport); fwr->newfport = htobe16(f->fs.nat_sport); fwr->natseqcheck = htobe32(f->fs.nat_seq_chk); } commit_wrq_wr(&sc->sge.ctrlq[0], fwr, &cookie); /* Wait for response. */ mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (cv_wait_sig(&sc->tids.ftid_cv, &sc->tids.ftid_lock) != 0) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); return (rc); } static int hashfilter_ntuple(struct adapter *sc, const struct t4_filter_specification *fs, uint64_t *ftuple) { struct tp_params *tp = &sc->params.tp; uint16_t fmask; - *ftuple = fmask = 0; - /* * Initialize each of the fields which we care about which are present * in the Compressed Filter Tuple. */ - if (tp->vlan_shift >= 0 && fs->mask.vlan) { - *ftuple |= (uint64_t)(F_FT_VLAN_VLD | fs->val.vlan) << - tp->vlan_shift; - fmask |= F_VLAN; - } - - if (tp->port_shift >= 0 && fs->mask.iport) { - *ftuple |= (uint64_t)fs->val.iport << tp->port_shift; - fmask |= F_PORT; - } - - if (tp->protocol_shift >= 0 && fs->mask.proto) { - *ftuple |= (uint64_t)fs->val.proto << tp->protocol_shift; - fmask |= F_PROTOCOL; - } - - if (tp->tos_shift >= 0 && fs->mask.tos) { - *ftuple |= (uint64_t)(fs->val.tos) << tp->tos_shift; - fmask |= F_TOS; - } - - if (tp->vnic_shift >= 0 && fs->mask.vnic) { - /* vnic_mode was already validated. */ - if (tp->vnic_mode == FW_VNIC_MODE_PF_VF) - MPASS(fs->mask.pfvf_vld); - else if (tp->vnic_mode == FW_VNIC_MODE_OUTER_VLAN) - MPASS(fs->mask.ovlan_vld); +#define SFF(V, S) ((uint64_t)(V) << S) /* Shifted Filter Field. */ + *ftuple = fmask = 0; + if (chip_id(sc) >= CHELSIO_T7) { + if (tp->ipsecidx_shift >= 0 && fs->mask.ipsecidx) { + *ftuple |= SFF(fs->val.ipsecidx, tp->ipsecidx_shift); + fmask |= F_IPSECIDX; + } + if (tp->fcoe_shift >= 0 && fs->mask.fcoe) { + *ftuple |= SFF(fs->val.fcoe, tp->fcoe_shift); + fmask |= F_T7_FCOE; + } + if (tp->port_shift >= 0 && fs->mask.iport) { + *ftuple |= (uint64_t)fs->val.iport << tp->port_shift; + fmask |= F_T7_PORT; + } + if (tp->vnic_shift >= 0 && fs->mask.vnic) { + /* vnic_mode was already validated. */ + if (tp->vnic_mode == FW_VNIC_MODE_PF_VF) + MPASS(fs->mask.pfvf_vld); + else if (tp->vnic_mode == FW_VNIC_MODE_OUTER_VLAN) + MPASS(fs->mask.ovlan_vld); #ifdef notyet - else if (tp->vnic_mode == FW_VNIC_MODE_ENCAP_EN) - MPASS(fs->mask.encap_vld); + else if (tp->vnic_mode == FW_VNIC_MODE_ENCAP_EN) + MPASS(fs->mask.encap_vld); #endif - *ftuple |= ((1ULL << 16) | fs->val.vnic) << tp->vnic_shift; - fmask |= F_VNIC_ID; - } - - if (tp->macmatch_shift >= 0 && fs->mask.macidx) { - *ftuple |= (uint64_t)(fs->val.macidx) << tp->macmatch_shift; - fmask |= F_MACMATCH; - } - - if (tp->ethertype_shift >= 0 && fs->mask.ethtype) { - *ftuple |= (uint64_t)(fs->val.ethtype) << tp->ethertype_shift; - fmask |= F_ETHERTYPE; - } - - if (tp->matchtype_shift >= 0 && fs->mask.matchtype) { - *ftuple |= (uint64_t)(fs->val.matchtype) << tp->matchtype_shift; - fmask |= F_MPSHITTYPE; - } - - if (tp->frag_shift >= 0 && fs->mask.frag) { - *ftuple |= (uint64_t)(fs->val.frag) << tp->frag_shift; - fmask |= F_FRAGMENTATION; - } - - if (tp->fcoe_shift >= 0 && fs->mask.fcoe) { - *ftuple |= (uint64_t)(fs->val.fcoe) << tp->fcoe_shift; - fmask |= F_FCOE; + *ftuple |= SFF(F_FT_VNID_ID_VLD | fs->val.vnic, tp->vnic_shift); + fmask |= F_T7_VNIC_ID; + } + if (tp->vlan_shift >= 0 && fs->mask.vlan) { + *ftuple |= SFF(F_FT_VLAN_VLD | fs->val.vlan, tp->vlan_shift); + fmask |= F_T7_VLAN; + } + if (tp->tos_shift >= 0 && fs->mask.tos) { + *ftuple |= SFF(fs->val.tos, tp->tos_shift); + fmask |= F_T7_TOS; + } + if (tp->protocol_shift >= 0 && fs->mask.proto) { + *ftuple |= SFF(fs->val.proto, tp->protocol_shift); + fmask |= F_T7_PROTOCOL; + } + if (tp->ethertype_shift >= 0 && fs->mask.ethtype) { + *ftuple |= SFF(fs->val.ethtype, tp->ethertype_shift); + fmask |= F_T7_ETHERTYPE; + } + if (tp->macmatch_shift >= 0 && fs->mask.macidx) { + *ftuple |= SFF(fs->val.macidx, tp->macmatch_shift); + fmask |= F_T7_MACMATCH; + } + if (tp->matchtype_shift >= 0 && fs->mask.matchtype) { + *ftuple |= SFF(fs->val.matchtype, tp->matchtype_shift); + fmask |= F_T7_MPSHITTYPE; + } + if (tp->frag_shift >= 0 && fs->mask.frag) { + *ftuple |= SFF(fs->val.frag, tp->frag_shift); + fmask |= F_T7_FRAGMENTATION; + } + if (tp->roce_shift >= 0 && fs->mask.roce) { + *ftuple |= SFF(fs->val.roce, tp->roce_shift); + fmask |= F_ROCE; + } + if (tp->synonly_shift >= 0 && fs->mask.synonly) { + *ftuple |= SFF(fs->val.synonly, tp->synonly_shift); + fmask |= F_SYNONLY; + } + if (tp->tcpflags_shift >= 0 && fs->mask.tcpflags) { + *ftuple |= SFF(fs->val.tcpflags, tp->synonly_shift); + fmask |= F_TCPFLAGS; + } + } else { + if (fs->mask.ipsecidx || fs->mask.roce || fs->mask.synonly || + fs->mask.tcpflags) { + MPASS(tp->ipsecidx_shift == -1); + MPASS(tp->roce_shift == -1); + MPASS(tp->synonly_shift == -1); + MPASS(tp->tcpflags_shift == -1); + return (EINVAL); + } + if (tp->fcoe_shift >= 0 && fs->mask.fcoe) { + *ftuple |= SFF(fs->val.fcoe, tp->fcoe_shift); + fmask |= F_FCOE; + } + if (tp->port_shift >= 0 && fs->mask.iport) { + *ftuple |= (uint64_t)fs->val.iport << tp->port_shift; + fmask |= F_PORT; + } + if (tp->vnic_shift >= 0 && fs->mask.vnic) { + /* vnic_mode was already validated. */ + if (tp->vnic_mode == FW_VNIC_MODE_PF_VF) + MPASS(fs->mask.pfvf_vld); + else if (tp->vnic_mode == FW_VNIC_MODE_OUTER_VLAN) + MPASS(fs->mask.ovlan_vld); +#ifdef notyet + else if (tp->vnic_mode == FW_VNIC_MODE_ENCAP_EN) + MPASS(fs->mask.encap_vld); +#endif + *ftuple |= SFF(F_FT_VNID_ID_VLD | fs->val.vnic, tp->vnic_shift); + fmask |= F_VNIC_ID; + } + if (tp->vlan_shift >= 0 && fs->mask.vlan) { + *ftuple |= SFF(F_FT_VLAN_VLD | fs->val.vlan, tp->vlan_shift); + fmask |= F_VLAN; + } + if (tp->tos_shift >= 0 && fs->mask.tos) { + *ftuple |= SFF(fs->val.tos, tp->tos_shift); + fmask |= F_TOS; + } + if (tp->protocol_shift >= 0 && fs->mask.proto) { + *ftuple |= SFF(fs->val.proto, tp->protocol_shift); + fmask |= F_PROTOCOL; + } + if (tp->ethertype_shift >= 0 && fs->mask.ethtype) { + *ftuple |= SFF(fs->val.ethtype, tp->ethertype_shift); + fmask |= F_ETHERTYPE; + } + if (tp->macmatch_shift >= 0 && fs->mask.macidx) { + *ftuple |= SFF(fs->val.macidx, tp->macmatch_shift); + fmask |= F_MACMATCH; + } + if (tp->matchtype_shift >= 0 && fs->mask.matchtype) { + *ftuple |= SFF(fs->val.matchtype, tp->matchtype_shift); + fmask |= F_MPSHITTYPE; + } + if (tp->frag_shift >= 0 && fs->mask.frag) { + *ftuple |= SFF(fs->val.frag, tp->frag_shift); + fmask |= F_FRAGMENTATION; + } } +#undef SFF /* A hashfilter must conform to the hardware filter mask. */ if (fmask != tp->filter_mask) return (EINVAL); return (0); } static bool is_4tuple_specified(struct t4_filter_specification *fs) { int i; const int n = fs->type ? 16 : 4; if (fs->mask.sport != 0xffff || fs->mask.dport != 0xffff) return (false); for (i = 0; i < n; i++) { if (fs->mask.sip[i] != 0xff) return (false); if (fs->mask.dip[i] != 0xff) return (false); } return (true); } int set_filter(struct adapter *sc, struct t4_filter *t) { struct tid_info *ti = &sc->tids; struct l2t_entry *l2te = NULL; struct smt_entry *smt = NULL; uint64_t ftuple; int rc; /* * Basic filter checks first. */ if (t->fs.hash) { if (!is_hashfilter(sc) || ti->ntids == 0) return (ENOTSUP); /* Hardware, not user, selects a tid for hashfilters. */ if (t->idx != (uint32_t)-1) return (EINVAL); /* T5 can't count hashfilter hits. */ if (is_t5(sc) && t->fs.hitcnts) return (EINVAL); if (!is_4tuple_specified(&t->fs)) return (EINVAL); rc = hashfilter_ntuple(sc, &t->fs, &ftuple); if (rc != 0) return (rc); } else { if (separate_hpfilter_region(sc) && t->fs.prio) { if (ti->nhpftids == 0) return (ENOTSUP); if (t->idx >= ti->nhpftids) return (EINVAL); } else { if (ti->nftids == 0) return (ENOTSUP); if (t->idx >= ti->nftids) return (EINVAL); } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= ti->nftids)) return (EINVAL); } /* T4 doesn't support VLAN tag removal or rewrite, swapmac, and NAT. */ if (is_t4(sc) && t->fs.action == FILTER_SWITCH && (t->fs.newvlan == VLAN_REMOVE || t->fs.newvlan == VLAN_REWRITE || t->fs.swapmac || t->fs.nat_mode)) return (ENOTSUP); if (t->fs.action == FILTER_SWITCH && t->fs.eport >= sc->params.nports) return (EINVAL); if (t->fs.val.iport >= sc->params.nports) return (EINVAL); /* Can't specify an iqid/rss_info if not steering. */ if (!t->fs.dirsteer && !t->fs.dirsteerhash && !t->fs.maskhash && t->fs.iq) return (EINVAL); /* Validate against the global filter mode and ingress config */ rc = check_fspec_against_fconf_iconf(sc, &t->fs); if (rc != 0) return (rc); /* * Basic checks passed. Make sure the queues and tid tables are setup. */ rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); if (!hw_all_ok(sc)) { rc = ENXIO; goto done; } if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_init(sc)) != 0)) goto done; if (t->fs.hash) { if (__predict_false(ti->hftid_hash_4t == NULL)) { rc = alloc_hftid_hash(&sc->tids, HASH_NOWAIT); if (rc != 0) goto done; } } else if (separate_hpfilter_region(sc) && t->fs.prio && __predict_false(ti->hpftid_tab == NULL)) { MPASS(ti->nhpftids != 0); KASSERT(ti->hpftids_in_use == 0, ("%s: no memory allocated but hpftids_in_use is %u", __func__, ti->hpftids_in_use)); ti->hpftid_tab = malloc(sizeof(struct filter_entry) * ti->nhpftids, M_CXGBE, M_NOWAIT | M_ZERO); if (ti->hpftid_tab == NULL) { rc = ENOMEM; goto done; } if (!mtx_initialized(&sc->tids.ftid_lock)) { mtx_init(&ti->ftid_lock, "T4 filters", 0, MTX_DEF); cv_init(&ti->ftid_cv, "t4fcv"); } } else if (__predict_false(ti->ftid_tab == NULL)) { MPASS(ti->nftids != 0); KASSERT(ti->ftids_in_use == 0, ("%s: no memory allocated but ftids_in_use is %u", __func__, ti->ftids_in_use)); ti->ftid_tab = malloc(sizeof(struct filter_entry) * ti->nftids, M_CXGBE, M_NOWAIT | M_ZERO); if (ti->ftid_tab == NULL) { rc = ENOMEM; goto done; } if (!mtx_initialized(&sc->tids.ftid_lock)) { mtx_init(&ti->ftid_lock, "T4 filters", 0, MTX_DEF); cv_init(&ti->ftid_cv, "t4fcv"); } } done: end_synchronized_op(sc, 0); if (rc != 0) return (rc); /* * Allocate L2T entry, SMT entry, etc. */ if (t->fs.newdmac || t->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ l2te = t4_l2t_alloc_switching(sc, t->fs.vlan, t->fs.eport, t->fs.dmac); if (__predict_false(l2te == NULL)) { rc = EAGAIN; goto error; } } if (t->fs.newsmac) { /* This filter needs an SMT entry; allocate one. */ smt = t4_smt_alloc_switching(sc->smt, t->fs.smac); if (__predict_false(smt == NULL)) { rc = EAGAIN; goto error; } rc = t4_smt_set_switching(sc, smt, 0x0, t->fs.smac); if (rc) goto error; } if (t->fs.hash) rc = set_hashfilter(sc, t, ftuple, l2te, smt); else rc = set_tcamfilter(sc, t, l2te, smt); if (rc != 0 && rc != EINPROGRESS) { error: if (l2te) t4_l2t_release(l2te); if (smt) t4_smt_release(smt); } return (rc); } static int del_tcamfilter(struct adapter *sc, struct t4_filter *t) { struct filter_entry *f; struct fw_filter_wr *fwr; struct wrq_cookie cookie; int rc, nfilters; #ifdef INVARIANTS u_int tid_base; #endif mtx_lock(&sc->tids.ftid_lock); if (separate_hpfilter_region(sc) && t->fs.prio) { nfilters = sc->tids.nhpftids; f = sc->tids.hpftid_tab; #ifdef INVARIANTS tid_base = sc->tids.hpftid_base; #endif } else { nfilters = sc->tids.nftids; f = sc->tids.ftid_tab; #ifdef INVARIANTS tid_base = sc->tids.ftid_base; #endif } MPASS(f != NULL); /* Caller checked this. */ if (t->idx >= nfilters) { rc = EINVAL; goto done; } f += t->idx; if (f->locked) { rc = EPERM; goto done; } if (f->pending) { rc = EBUSY; goto done; } if (f->valid == 0) { rc = EINVAL; goto done; } MPASS(f->tid == tid_base + t->idx); fwr = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) { rc = ENOMEM; goto done; } bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(f->tid, fwr, sc->sge.fwq.abs_id); f->pending = 1; commit_wrq_wr(&sc->sge.ctrlq[0], fwr, &cookie); t->fs = f->fs; /* extra info for the caller */ for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (cv_wait_sig(&sc->tids.ftid_cv, &sc->tids.ftid_lock) != 0) { rc = EINPROGRESS; break; } } done: mtx_unlock(&sc->tids.ftid_lock); return (rc); } int del_filter(struct adapter *sc, struct t4_filter *t) { /* No filters possible if not initialized yet. */ if (!(sc->flags & FULL_INIT_DONE)) return (EINVAL); /* * The checks for tid tables ensure that the locks that del_* will reach * for are initialized. */ if (t->fs.hash) { if (sc->tids.hftid_hash_4t != NULL) return (del_hashfilter(sc, t)); } else if (separate_hpfilter_region(sc) && t->fs.prio) { if (sc->tids.hpftid_tab != NULL) return (del_tcamfilter(sc, t)); } else { if (sc->tids.ftid_tab != NULL) return (del_tcamfilter(sc, t)); } return (EINVAL); } /* * Release secondary resources associated with the filter. */ static void free_filter_resources(struct filter_entry *f) { if (f->l2te) { t4_l2t_release(f->l2te); f->l2te = NULL; } if (f->smt) { t4_smt_release(f->smt); f->smt = NULL; } } static int set_tcb_field(struct adapter *sc, u_int tid, uint16_t word, uint64_t mask, uint64_t val, int no_reply) { struct wrq_cookie cookie; struct cpl_set_tcb_field *req; req = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*req), 16), &cookie); if (req == NULL) return (ENOMEM); bzero(req, sizeof(*req)); INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid); if (no_reply == 0) { req->reply_ctrl = htobe16(V_QUEUENO(sc->sge.fwq.abs_id) | V_NO_REPLY(0)); } else req->reply_ctrl = htobe16(V_NO_REPLY(1)); req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(CPL_COOKIE_HASHFILTER)); req->mask = htobe64(mask); req->val = htobe64(val); commit_wrq_wr(&sc->sge.ctrlq[0], req, &cookie); return (0); } /* Set one of the t_flags bits in the TCB. */ static inline int set_tcb_tflag(struct adapter *sc, int tid, u_int bit_pos, u_int val, u_int no_reply) { return (set_tcb_field(sc, tid, W_TCB_T_FLAGS, 1ULL << bit_pos, (uint64_t)val << bit_pos, no_reply)); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); u_int tid = GET_TID(rpl); u_int rc, idx; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); if (is_hpftid(sc, tid)) { idx = tid - sc->tids.hpftid_base; f = &sc->tids.hpftid_tab[idx]; } else if (is_ftid(sc, tid)) { idx = tid - sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; } else panic("%s: FW reply for invalid TID %d.", __func__, tid); MPASS(f->tid == tid); rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); KASSERT(f->pending, ("%s: reply %d for filter[%u] that isn't pending.", __func__, rc, tid)); switch(rc) { case FW_FILTER_WR_FLT_ADDED: /* set-filter succeeded */ f->valid = 1; if (f->fs.newsmac) { MPASS(f->smt != NULL); set_tcb_tflag(sc, f->tid, S_TF_CCTRL_CWR, 1, 1); set_tcb_field(sc, f->tid, W_TCB_SMAC_SEL, V_TCB_SMAC_SEL(M_TCB_SMAC_SEL), V_TCB_SMAC_SEL(f->smt->idx), 1); /* XXX: wait for reply to TCB update before !pending */ } break; case FW_FILTER_WR_FLT_DELETED: /* del-filter succeeded */ MPASS(f->valid == 1); f->valid = 0; /* Fall through */ case FW_FILTER_WR_SMT_TBL_FULL: /* set-filter failed due to lack of SMT space. */ MPASS(f->valid == 0); free_filter_resources(f); if (separate_hpfilter_region(sc) && f->fs.prio) sc->tids.hpftids_in_use--; else sc->tids.ftids_in_use--; break; case FW_FILTER_WR_SUCCESS: case FW_FILTER_WR_EINVAL: default: panic("%s: unexpected reply %d for filter[%d].", __func__, rc, idx); } f->pending = 0; cv_broadcast(&sc->tids.ftid_cv); mtx_unlock(&sc->tids.ftid_lock); return (0); } /* * This is the reply to the Active Open that created the filter. Additional TCB * updates may be required to complete the filter configuration. */ int t4_hashfilter_ao_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct filter_entry *f = lookup_atid(sc, atid); KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); mtx_lock(&sc->tids.hftid_lock); KASSERT(f->pending, ("%s: hashfilter[%p] isn't pending.", __func__, f)); KASSERT(f->tid == -1, ("%s: hashfilter[%p] has tid %d already.", __func__, f, f->tid)); if (status == CPL_ERR_NONE) { f->tid = GET_TID(cpl); MPASS(lookup_hftid(sc, f->tid) == NULL); insert_hftid(sc, f); /* * Leave the filter pending until it is fully set up, which will * be indicated by the reply to the last TCB update. No need to * unblock the ioctl thread either. */ if (configure_hashfilter_tcb(sc, f) == EINPROGRESS) goto done; f->valid = 1; f->pending = 0; } else { /* provide errno instead of tid to ioctl */ f->tid = act_open_rpl_status_to_errno(status); f->valid = 0; f->pending = 0; if (act_open_has_tid(status)) release_tid(sc, GET_TID(cpl), &sc->sge.ctrlq[0]); free_filter_resources(f); remove_hf(sc, f); if (f->locked == 0) free(f, M_CXGBE); } cv_broadcast(&sc->tids.hftid_cv); done: mtx_unlock(&sc->tids.hftid_lock); free_atid(sc, atid); return (0); } int t4_hashfilter_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); u_int tid = GET_TID(rpl); struct filter_entry *f; mtx_lock(&sc->tids.hftid_lock); f = lookup_hftid(sc, tid); KASSERT(f->tid == tid, ("%s: filter tid mismatch", __func__)); KASSERT(f->pending, ("%s: hashfilter %p [%u] isn't pending.", __func__, f, tid)); KASSERT(f->valid == 0, ("%s: hashfilter %p [%u] is valid already.", __func__, f, tid)); f->pending = 0; if (rpl->status == 0) { f->valid = 1; } else { f->tid = EIO; f->valid = 0; free_filter_resources(f); remove_hftid(sc, f); remove_hf(sc, f); release_tid(sc, tid, &sc->sge.ctrlq[0]); if (f->locked == 0) free(f, M_CXGBE); } cv_broadcast(&sc->tids.hftid_cv); mtx_unlock(&sc->tids.hftid_lock); return (0); } int t4_del_hashfilter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); unsigned int tid = GET_TID(cpl); struct filter_entry *f; mtx_lock(&sc->tids.hftid_lock); f = lookup_hftid(sc, tid); KASSERT(f->tid == tid, ("%s: filter tid mismatch", __func__)); KASSERT(f->pending, ("%s: hashfilter %p [%u] isn't pending.", __func__, f, tid)); KASSERT(f->valid, ("%s: hashfilter %p [%u] isn't valid.", __func__, f, tid)); f->pending = 0; if (cpl->status == 0) { f->valid = 0; free_filter_resources(f); remove_hftid(sc, f); remove_hf(sc, f); release_tid(sc, tid, &sc->sge.ctrlq[0]); if (f->locked == 0) free(f, M_CXGBE); } cv_broadcast(&sc->tids.hftid_cv); mtx_unlock(&sc->tids.hftid_lock); return (0); } static int get_tcamfilter(struct adapter *sc, struct t4_filter *t) { int i, nfilters; struct filter_entry *f; u_int in_use; #ifdef INVARIANTS u_int tid_base; #endif MPASS(!t->fs.hash); if (separate_hpfilter_region(sc) && t->fs.prio) { nfilters = sc->tids.nhpftids; f = sc->tids.hpftid_tab; in_use = sc->tids.hpftids_in_use; #ifdef INVARIANTS tid_base = sc->tids.hpftid_base; #endif } else { nfilters = sc->tids.nftids; f = sc->tids.ftid_tab; in_use = sc->tids.ftids_in_use; #ifdef INVARIANTS tid_base = sc->tids.ftid_base; #endif } if (in_use == 0 || f == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; return (0); } f += t->idx; mtx_lock(&sc->tids.ftid_lock); for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { MPASS(f->tid == tid_base + i); t->idx = i; t->l2tidx = f->l2te ? f->l2te->idx : 0; t->smtidx = f->smt ? f->smt->idx : 0; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, f->tid); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: mtx_unlock(&sc->tids.ftid_lock); return (0); } static int get_hashfilter(struct adapter *sc, struct t4_filter *t) { struct tid_info *ti = &sc->tids; int tid; struct filter_entry *f; const int inv_tid = ti->ntids + ti->tid_base; MPASS(t->fs.hash); if (ti->tids_in_use == 0 || ti->hftid_hash_tid == NULL || t->idx >= inv_tid) { t->idx = 0xffffffff; return (0); } if (t->idx < ti->tid_base) t->idx = ti->tid_base; mtx_lock(&ti->hftid_lock); for (tid = t->idx; tid < inv_tid; tid++) { f = lookup_hftid(sc, tid); if (f != NULL && f->valid) { t->idx = tid; t->l2tidx = f->l2te ? f->l2te->idx : 0; t->smtidx = f->smt ? f->smt->idx : 0; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, tid); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: mtx_unlock(&ti->hftid_lock); return (0); } static void mk_act_open_req6(struct adapter *sc, struct filter_entry *f, int atid, uint64_t ftuple, struct cpl_act_open_req6 *cpl) { struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl; struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl; /* Review changes to CPL after cpl_t6_act_open_req if this goes off. */ MPASS(chip_id(sc) >= CHELSIO_T5 && chip_id(sc) <= CHELSIO_T6); MPASS(atid >= 0); if (chip_id(sc) == CHELSIO_T5) { INIT_TP_WR(cpl5, 0); } else { INIT_TP_WR(cpl6, 0); cpl6->rsvd2 = 0; cpl6->opt3 = 0; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | V_TID_COOKIE(CPL_COOKIE_HASHFILTER))); cpl->local_port = htobe16(f->fs.val.dport); cpl->peer_port = htobe16(f->fs.val.sport); cpl->local_ip_hi = *(uint64_t *)(&f->fs.val.dip); cpl->local_ip_lo = *(((uint64_t *)&f->fs.val.dip) + 1); cpl->peer_ip_hi = *(uint64_t *)(&f->fs.val.sip); cpl->peer_ip_lo = *(((uint64_t *)&f->fs.val.sip) + 1); cpl->opt0 = htobe64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_DELACK(f->fs.hitcnts) | V_L2T_IDX(f->l2te ? f->l2te->idx : 0) | V_TX_CHAN(f->fs.eport) | V_NO_CONG(f->fs.rpttid) | V_ULP_MODE(f->fs.nat_mode ? ULP_MODE_TCPDDP : ULP_MODE_NONE) | F_TCAM_BYPASS | F_NON_OFFLOAD); cpl6->params = htobe64(V_FILTER_TUPLE(ftuple)); cpl6->opt2 = htobe32(F_RSS_QUEUE_VALID | V_RSS_QUEUE(f->fs.iq) | V_TX_QUEUE(f->fs.nat_mode) | V_WND_SCALE_EN(f->fs.nat_flag_chk) | V_RX_FC_DISABLE(f->fs.nat_seq_chk ? 1 : 0) | F_T5_OPT_2_VALID | F_RX_CHANNEL | V_SACK_EN(f->fs.swapmac) | V_CONG_CNTRL((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) | V_PACE(f->fs.maskhash | (f->fs.dirsteerhash << 1))); } static void mk_act_open_req(struct adapter *sc, struct filter_entry *f, int atid, uint64_t ftuple, struct cpl_act_open_req *cpl) { struct cpl_t5_act_open_req *cpl5 = (void *)cpl; struct cpl_t6_act_open_req *cpl6 = (void *)cpl; /* Review changes to CPL after cpl_t6_act_open_req if this goes off. */ MPASS(chip_id(sc) >= CHELSIO_T5 && chip_id(sc) <= CHELSIO_T6); MPASS(atid >= 0); if (chip_id(sc) == CHELSIO_T5) { INIT_TP_WR(cpl5, 0); } else { INIT_TP_WR(cpl6, 0); cpl6->rsvd2 = 0; cpl6->opt3 = 0; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, V_TID_QID(sc->sge.fwq.abs_id) | V_TID_TID(atid) | V_TID_COOKIE(CPL_COOKIE_HASHFILTER))); cpl->local_port = htobe16(f->fs.val.dport); cpl->peer_port = htobe16(f->fs.val.sport); cpl->local_ip = f->fs.val.dip[0] | f->fs.val.dip[1] << 8 | f->fs.val.dip[2] << 16 | f->fs.val.dip[3] << 24; cpl->peer_ip = f->fs.val.sip[0] | f->fs.val.sip[1] << 8 | f->fs.val.sip[2] << 16 | f->fs.val.sip[3] << 24; cpl->opt0 = htobe64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_DELACK(f->fs.hitcnts) | V_L2T_IDX(f->l2te ? f->l2te->idx : 0) | V_TX_CHAN(f->fs.eport) | V_NO_CONG(f->fs.rpttid) | V_ULP_MODE(f->fs.nat_mode ? ULP_MODE_TCPDDP : ULP_MODE_NONE) | F_TCAM_BYPASS | F_NON_OFFLOAD); cpl6->params = htobe64(V_FILTER_TUPLE(ftuple)); cpl6->opt2 = htobe32(F_RSS_QUEUE_VALID | V_RSS_QUEUE(f->fs.iq) | V_TX_QUEUE(f->fs.nat_mode) | V_WND_SCALE_EN(f->fs.nat_flag_chk) | V_RX_FC_DISABLE(f->fs.nat_seq_chk ? 1 : 0) | F_T5_OPT_2_VALID | F_RX_CHANNEL | V_SACK_EN(f->fs.swapmac) | V_CONG_CNTRL((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) | V_PACE(f->fs.maskhash | (f->fs.dirsteerhash << 1))); } static int act_open_cpl_len16(struct adapter *sc, int isipv6) { int idx; - static const int sz_table[3][2] = { + static const int sz_table[4][2] = { { howmany(sizeof (struct cpl_act_open_req), 16), howmany(sizeof (struct cpl_act_open_req6), 16) }, { howmany(sizeof (struct cpl_t5_act_open_req), 16), howmany(sizeof (struct cpl_t5_act_open_req6), 16) }, { howmany(sizeof (struct cpl_t6_act_open_req), 16), howmany(sizeof (struct cpl_t6_act_open_req6), 16) }, + { + howmany(sizeof (struct cpl_t7_act_open_req), 16), + howmany(sizeof (struct cpl_t7_act_open_req6), 16) + }, }; MPASS(chip_id(sc) >= CHELSIO_T4); - idx = min(chip_id(sc) - CHELSIO_T4, 2); + idx = min(chip_id(sc) - CHELSIO_T4, 3); return (sz_table[idx][!!isipv6]); } static int set_hashfilter(struct adapter *sc, struct t4_filter *t, uint64_t ftuple, struct l2t_entry *l2te, struct smt_entry *smt) { void *wr; struct wrq_cookie cookie; struct filter_entry *f; int rc, atid = -1; uint32_t hash; MPASS(t->fs.hash); /* Already validated against fconf, iconf */ MPASS((t->fs.val.pfvf_vld & t->fs.val.ovlan_vld) == 0); MPASS((t->fs.mask.pfvf_vld & t->fs.mask.ovlan_vld) == 0); hash = hf_hashfn_4t(&t->fs); mtx_lock(&sc->tids.hftid_lock); if (lookup_hf(sc, &t->fs, hash) != NULL) { rc = EEXIST; goto done; } f = malloc(sizeof(*f), M_CXGBE, M_ZERO | M_NOWAIT); if (__predict_false(f == NULL)) { rc = ENOMEM; goto done; } f->fs = t->fs; f->l2te = l2te; f->smt = smt; atid = alloc_atid(sc, f); if (__predict_false(atid) == -1) { free(f, M_CXGBE); rc = EAGAIN; goto done; } MPASS(atid >= 0); wr = start_wrq_wr(&sc->sge.ctrlq[0], act_open_cpl_len16(sc, f->fs.type), &cookie); if (wr == NULL) { free_atid(sc, atid); free(f, M_CXGBE); rc = ENOMEM; goto done; } if (f->fs.type) mk_act_open_req6(sc, f, atid, ftuple, wr); else mk_act_open_req(sc, f, atid, ftuple, wr); f->locked = 1; /* ithread mustn't free f if ioctl is still around. */ f->pending = 1; f->tid = -1; insert_hf(sc, f, hash); commit_wrq_wr(&sc->sge.ctrlq[0], wr, &cookie); for (;;) { MPASS(f->locked); if (f->pending == 0) { if (f->valid) { rc = 0; f->locked = 0; t->idx = f->tid; } else { rc = f->tid; free(f, M_CXGBE); } break; } if (cv_wait_sig(&sc->tids.hftid_cv, &sc->tids.hftid_lock) != 0) { f->locked = 0; rc = EINPROGRESS; break; } } done: mtx_unlock(&sc->tids.hftid_lock); return (rc); } /* ABORT_REQ sent as a ULP command looks like this */ #define LEN__ABORT_REQ_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_abort_req_core)) static void * mk_abort_req_ulp(struct ulp_txpkt *ulpmc, uint32_t tid) { struct ulptx_idata *ulpsc; struct cpl_abort_req_core *req; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); ulpmc->len = htobe32(howmany(LEN__ABORT_REQ_ULP, 16)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(sizeof(*req)); req = (struct cpl_abort_req_core *)(ulpsc + 1); OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); req->rsvd0 = htonl(0); req->rsvd1 = 0; req->cmd = CPL_ABORT_NO_RST; ulpsc = (struct ulptx_idata *)(req + 1); if (LEN__ABORT_REQ_ULP % 16) { ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); ulpsc->len = htobe32(0); return (ulpsc + 1); } return (ulpsc); } /* ABORT_RPL sent as a ULP command looks like this */ #define LEN__ABORT_RPL_ULP (sizeof(struct ulp_txpkt) + \ sizeof(struct ulptx_idata) + sizeof(struct cpl_abort_rpl_core)) static void * mk_abort_rpl_ulp(struct ulp_txpkt *ulpmc, uint32_t tid) { struct ulptx_idata *ulpsc; struct cpl_abort_rpl_core *rpl; ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); ulpmc->len = htobe32(howmany(LEN__ABORT_RPL_ULP, 16)); ulpsc = (struct ulptx_idata *)(ulpmc + 1); ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); ulpsc->len = htobe32(sizeof(*rpl)); rpl = (struct cpl_abort_rpl_core *)(ulpsc + 1); OPCODE_TID(rpl) = htobe32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); rpl->rsvd0 = htonl(0); rpl->rsvd1 = 0; rpl->cmd = CPL_ABORT_NO_RST; ulpsc = (struct ulptx_idata *)(rpl + 1); if (LEN__ABORT_RPL_ULP % 16) { ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); ulpsc->len = htobe32(0); return (ulpsc + 1); } return (ulpsc); } static inline int del_hashfilter_wrlen(void) { return (sizeof(struct work_request_hdr) + roundup2(LEN__SET_TCB_FIELD_ULP, 16) + roundup2(LEN__ABORT_REQ_ULP, 16) + roundup2(LEN__ABORT_RPL_ULP, 16)); } static void mk_del_hashfilter_wr(struct adapter *sc, int tid, struct work_request_hdr *wrh, int wrlen, int qid) { struct ulp_txpkt *ulpmc; INIT_ULPTX_WRH(wrh, wrlen, 0, 0); ulpmc = (struct ulp_txpkt *)(wrh + 1); ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, tid, W_TCB_RSS_INFO, V_TCB_RSS_INFO(M_TCB_RSS_INFO), V_TCB_RSS_INFO(qid)); ulpmc = mk_abort_req_ulp(ulpmc, tid); ulpmc = mk_abort_rpl_ulp(ulpmc, tid); } static int del_hashfilter(struct adapter *sc, struct t4_filter *t) { struct tid_info *ti = &sc->tids; void *wr; struct filter_entry *f; struct wrq_cookie cookie; int rc; const int wrlen = del_hashfilter_wrlen(); const int inv_tid = ti->ntids + ti->tid_base; MPASS(sc->tids.hftid_hash_4t != NULL); MPASS(sc->tids.ntids > 0); if (t->idx < sc->tids.tid_base || t->idx >= inv_tid) return (EINVAL); mtx_lock(&ti->hftid_lock); f = lookup_hftid(sc, t->idx); if (f == NULL || f->valid == 0) { rc = EINVAL; goto done; } MPASS(f->tid == t->idx); if (f->locked) { rc = EPERM; goto done; } if (f->pending) { rc = EBUSY; goto done; } wr = start_wrq_wr(&sc->sge.ctrlq[0], howmany(wrlen, 16), &cookie); if (wr == NULL) { rc = ENOMEM; goto done; } mk_del_hashfilter_wr(sc, t->idx, wr, wrlen, sc->sge.fwq.abs_id); f->locked = 1; f->pending = 1; commit_wrq_wr(&sc->sge.ctrlq[0], wr, &cookie); t->fs = f->fs; /* extra info for the caller */ for (;;) { MPASS(f->locked); if (f->pending == 0) { if (f->valid) { f->locked = 0; rc = EIO; } else { rc = 0; free(f, M_CXGBE); } break; } if (cv_wait_sig(&ti->hftid_cv, &ti->hftid_lock) != 0) { f->locked = 0; rc = EINPROGRESS; break; } } done: mtx_unlock(&ti->hftid_lock); return (rc); } #define WORD_MASK 0xffffffff static void set_nat_params(struct adapter *sc, struct filter_entry *f, const bool dip, const bool sip, const bool dp, const bool sp) { if (dip) { if (f->fs.type) { set_tcb_field(sc, f->tid, W_TCB_SND_UNA_RAW, WORD_MASK, f->fs.nat_dip[15] | f->fs.nat_dip[14] << 8 | f->fs.nat_dip[13] << 16 | f->fs.nat_dip[12] << 24, 1); set_tcb_field(sc, f->tid, W_TCB_SND_UNA_RAW + 1, WORD_MASK, f->fs.nat_dip[11] | f->fs.nat_dip[10] << 8 | f->fs.nat_dip[9] << 16 | f->fs.nat_dip[8] << 24, 1); set_tcb_field(sc, f->tid, W_TCB_SND_UNA_RAW + 2, WORD_MASK, f->fs.nat_dip[7] | f->fs.nat_dip[6] << 8 | f->fs.nat_dip[5] << 16 | f->fs.nat_dip[4] << 24, 1); set_tcb_field(sc, f->tid, W_TCB_SND_UNA_RAW + 3, WORD_MASK, f->fs.nat_dip[3] | f->fs.nat_dip[2] << 8 | f->fs.nat_dip[1] << 16 | f->fs.nat_dip[0] << 24, 1); } else { set_tcb_field(sc, f->tid, W_TCB_RX_FRAG3_LEN_RAW, WORD_MASK, f->fs.nat_dip[3] | f->fs.nat_dip[2] << 8 | f->fs.nat_dip[1] << 16 | f->fs.nat_dip[0] << 24, 1); } } if (sip) { if (f->fs.type) { set_tcb_field(sc, f->tid, W_TCB_RX_FRAG2_PTR_RAW, WORD_MASK, f->fs.nat_sip[15] | f->fs.nat_sip[14] << 8 | f->fs.nat_sip[13] << 16 | f->fs.nat_sip[12] << 24, 1); set_tcb_field(sc, f->tid, W_TCB_RX_FRAG2_PTR_RAW + 1, WORD_MASK, f->fs.nat_sip[11] | f->fs.nat_sip[10] << 8 | f->fs.nat_sip[9] << 16 | f->fs.nat_sip[8] << 24, 1); set_tcb_field(sc, f->tid, W_TCB_RX_FRAG2_PTR_RAW + 2, WORD_MASK, f->fs.nat_sip[7] | f->fs.nat_sip[6] << 8 | f->fs.nat_sip[5] << 16 | f->fs.nat_sip[4] << 24, 1); set_tcb_field(sc, f->tid, W_TCB_RX_FRAG2_PTR_RAW + 3, WORD_MASK, f->fs.nat_sip[3] | f->fs.nat_sip[2] << 8 | f->fs.nat_sip[1] << 16 | f->fs.nat_sip[0] << 24, 1); } else { set_tcb_field(sc, f->tid, W_TCB_RX_FRAG3_START_IDX_OFFSET_RAW, WORD_MASK, f->fs.nat_sip[3] | f->fs.nat_sip[2] << 8 | f->fs.nat_sip[1] << 16 | f->fs.nat_sip[0] << 24, 1); } } set_tcb_field(sc, f->tid, W_TCB_PDU_HDR_LEN, WORD_MASK, (dp ? f->fs.nat_dport : 0) | (sp ? f->fs.nat_sport << 16 : 0), 1); } /* * Returns EINPROGRESS to indicate that at least one TCB update was sent and the * last of the series of updates requested a reply. The reply informs the * driver that the filter is fully setup. */ static int configure_hashfilter_tcb(struct adapter *sc, struct filter_entry *f) { int updated = 0; MPASS(f->tid < sc->tids.ntids); MPASS(f->fs.hash); MPASS(f->pending); MPASS(f->valid == 0); if (f->fs.newdmac) { set_tcb_tflag(sc, f->tid, S_TF_CCTRL_ECE, 1, 1); updated++; } if (f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) { set_tcb_tflag(sc, f->tid, S_TF_CCTRL_RFR, 1, 1); updated++; } if (f->fs.newsmac) { MPASS(f->smt != NULL); set_tcb_tflag(sc, f->tid, S_TF_CCTRL_CWR, 1, 1); set_tcb_field(sc, f->tid, W_TCB_SMAC_SEL, V_TCB_SMAC_SEL(M_TCB_SMAC_SEL), V_TCB_SMAC_SEL(f->smt->idx), 1); updated++; } switch(f->fs.nat_mode) { case NAT_MODE_NONE: break; case NAT_MODE_DIP: set_nat_params(sc, f, true, false, false, false); updated++; break; case NAT_MODE_DIP_DP: set_nat_params(sc, f, true, false, true, false); updated++; break; case NAT_MODE_DIP_DP_SIP: set_nat_params(sc, f, true, true, true, false); updated++; break; case NAT_MODE_DIP_DP_SP: set_nat_params(sc, f, true, false, true, true); updated++; break; case NAT_MODE_SIP_SP: set_nat_params(sc, f, false, true, false, true); updated++; break; case NAT_MODE_DIP_SIP_SP: set_nat_params(sc, f, true, true, false, true); updated++; break; case NAT_MODE_ALL: set_nat_params(sc, f, true, true, true, true); updated++; break; default: MPASS(0); /* should have been validated earlier */ break; } if (f->fs.nat_seq_chk) { set_tcb_field(sc, f->tid, W_TCB_RCV_NXT, V_TCB_RCV_NXT(M_TCB_RCV_NXT), V_TCB_RCV_NXT(f->fs.nat_seq_chk), 1); updated++; } if (is_t5(sc) && f->fs.action == FILTER_DROP) { /* * Migrating = 1, Non-offload = 0 to get a T5 hashfilter to drop. */ set_tcb_field(sc, f->tid, W_TCB_T_FLAGS, V_TF_NON_OFFLOAD(1) | V_TF_MIGRATING(1), V_TF_MIGRATING(1), 1); updated++; } /* * Enable switching after all secondary resources (L2T entry, SMT entry, * etc.) are setup so that any switched packet will use correct * values. */ if (f->fs.action == FILTER_SWITCH) { set_tcb_tflag(sc, f->tid, S_TF_CCTRL_ECN, 1, 1); updated++; } if (f->fs.hitcnts || updated > 0) { set_tcb_field(sc, f->tid, W_TCB_TIMESTAMP, V_TCB_TIMESTAMP(M_TCB_TIMESTAMP) | V_TCB_T_RTT_TS_RECENT_AGE(M_TCB_T_RTT_TS_RECENT_AGE), V_TCB_TIMESTAMP(0ULL) | V_TCB_T_RTT_TS_RECENT_AGE(0ULL), 0); return (EINPROGRESS); } return (0); } diff --git a/sys/dev/cxgbe/t4_ioctl.h b/sys/dev/cxgbe/t4_ioctl.h index 4698986c0a0e..f7c8ee24d596 100644 --- a/sys/dev/cxgbe/t4_ioctl.h +++ b/sys/dev/cxgbe/t4_ioctl.h @@ -1,456 +1,464 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #ifndef __T4_IOCTL_H__ #define __T4_IOCTL_H__ #include #include #include /* * Ioctl commands specific to this driver. */ enum { T4_GETREG = 0x40, /* read register */ T4_SETREG, /* write register */ T4_REGDUMP, /* dump of all registers */ T4_GET_FILTER_MODE, /* get global filter mode */ T4_SET_FILTER_MODE, /* set global filter mode */ T4_GET_FILTER, /* get information about a filter */ T4_SET_FILTER, /* program a filter */ T4_DEL_FILTER, /* delete a filter */ T4_GET_SGE_CONTEXT, /* get SGE context for a queue */ T4_LOAD_FW, /* flash firmware */ T4_GET_MEM, /* read memory */ T4_GET_I2C, /* read from i2c addressible device */ T4_CLEAR_STATS, /* clear a port's MAC statistics */ T4_SET_OFLD_POLICY, /* Set offload policy */ T4_SET_SCHED_CLASS, /* set sched class */ T4_SET_SCHED_QUEUE, /* set queue class */ T4_GET_TRACER, /* get information about a tracer */ T4_SET_TRACER, /* program a tracer */ T4_LOAD_CFG, /* copy a config file to card's flash */ T4_LOAD_BOOT, /* flash boot rom */ T4_LOAD_BOOTCFG, /* flash bootcfg */ T4_CUDBG_DUMP, /* debug dump of chip state */ T4_SET_FILTER_MASK, /* set filter mask (hashfilter mode) */ T4_HOLD_CLIP_ADDR, /* add ref on an IP in the CLIP */ T4_RELEASE_CLIP_ADDR, /* remove ref from an IP in the CLIP */ T4_GET_SGE_CTXT, /* get SGE context for a queue */ }; struct t4_reg { uint32_t addr; uint32_t size; uint64_t val; }; #define T4_REGDUMP_SIZE (160 * 1024) #define T5_REGDUMP_SIZE (332 * 1024) struct t4_regdump { uint32_t version; uint32_t len; /* bytes */ uint32_t *data; }; struct t4_data { uint32_t len; uint8_t *data; }; struct t4_bootrom { uint32_t pf_offset; uint32_t pfidx_addr; uint32_t len; uint8_t *data; }; struct t4_i2c_data { uint8_t port_id; uint8_t dev_addr; uint8_t offset; uint8_t len; uint8_t data[8]; }; /* * A hardware filter is some valid combination of these. */ #define T4_FILTER_IPv4 0x1 /* IPv4 packet */ #define T4_FILTER_IPv6 0x2 /* IPv6 packet */ #define T4_FILTER_IP_SADDR 0x4 /* Source IP address or network */ #define T4_FILTER_IP_DADDR 0x8 /* Destination IP address or network */ #define T4_FILTER_IP_SPORT 0x10 /* Source IP port */ #define T4_FILTER_IP_DPORT 0x20 /* Destination IP port */ #define T4_FILTER_FCoE 0x40 /* Fibre Channel over Ethernet packet */ #define T4_FILTER_PORT 0x80 /* Physical ingress port */ #define T4_FILTER_VNIC 0x100 /* See the IC_* bits towards the end */ #define T4_FILTER_VLAN 0x200 /* VLAN ID */ #define T4_FILTER_IP_TOS 0x400 /* IPv4 TOS/IPv6 Traffic Class */ #define T4_FILTER_IP_PROTO 0x800 /* IP protocol */ #define T4_FILTER_ETH_TYPE 0x1000 /* Ethernet Type */ #define T4_FILTER_MAC_IDX 0x2000 /* MPS MAC address match index */ #define T4_FILTER_MPS_HIT_TYPE 0x4000 /* MPS match type */ #define T4_FILTER_IP_FRAGMENT 0x8000 /* IP fragment */ +#define T4_FILTER_IPSECIDX 0x10000 +#define T4_FILTER_ROCE 0x20000 +#define T4_FILTER_SYNONLY 0x40000 +#define T4_FILTER_TCPFLAGS 0x80000 /* * T4_FILTER_VNIC's real meaning depends on the ingress config. */ #define T4_FILTER_IC_OVLAN 0 /* outer VLAN */ #define T4_FILTER_IC_VNIC 0x80000000 /* VNIC id (PF/VF) */ #define T4_FILTER_IC_ENCAP 0x40000000 /* Filter action */ enum { FILTER_PASS = 0, /* default */ FILTER_DROP, FILTER_SWITCH }; /* 802.1q manipulation on FILTER_SWITCH */ enum { VLAN_NOCHANGE = 0, /* default */ VLAN_REMOVE, VLAN_INSERT, VLAN_REWRITE }; /* MPS match type */ enum { UCAST_EXACT = 0, /* exact unicast match */ UCAST_HASH = 1, /* inexact (hashed) unicast match */ MCAST_EXACT = 2, /* exact multicast match */ MCAST_HASH = 3, /* inexact (hashed) multicast match */ PROMISC = 4, /* no match but port is promiscuous */ HYPPROMISC = 5, /* port is hypervisor-promisuous + not bcast */ BCAST = 6, /* broadcast packet */ }; /* Rx steering */ enum { DST_MODE_QUEUE, /* queue is directly specified by filter */ DST_MODE_RSS_QUEUE, /* filter specifies RSS entry containing queue */ DST_MODE_RSS, /* queue selected by default RSS hash lookup */ DST_MODE_FILT_RSS /* queue selected by hashing in filter-specified RSS subtable */ }; enum { NAT_MODE_NONE = 0, /* No NAT performed */ NAT_MODE_DIP, /* NAT on Dst IP */ NAT_MODE_DIP_DP, /* NAT on Dst IP, Dst Port */ NAT_MODE_DIP_DP_SIP, /* NAT on Dst IP, Dst Port and Src IP */ NAT_MODE_DIP_DP_SP, /* NAT on Dst IP, Dst Port and Src Port */ NAT_MODE_SIP_SP, /* NAT on Src IP and Src Port */ NAT_MODE_DIP_SIP_SP, /* NAT on Dst IP, Src IP and Src Port */ NAT_MODE_ALL /* NAT on entire 4-tuple */ }; struct t4_filter_tuple { /* * These are always available. */ uint8_t sip[16]; /* source IP address (IPv4 in [3:0]) */ uint8_t dip[16]; /* destination IP address (IPv4 in [3:0]) */ uint16_t sport; /* source port */ uint16_t dport; /* destination port */ /* * A combination of these (up to 36 bits) is available. TP_VLAN_PRI_MAP * is used to select the global mode and all filters are limited to the * set of fields allowed by the global mode. */ uint16_t vnic; /* VNIC id (PF/VF) or outer VLAN tag */ uint16_t vlan; /* VLAN tag */ uint16_t ethtype; /* Ethernet type */ uint8_t tos; /* TOS/Traffic Type */ uint8_t proto; /* protocol type */ uint32_t fcoe:1; /* FCoE packet */ uint32_t iport:3; /* ingress port */ uint32_t matchtype:3; /* MPS match type */ uint32_t frag:1; /* fragmentation extension header */ uint32_t macidx:9; /* exact match MAC index */ uint32_t vlan_vld:1; /* VLAN valid */ uint32_t ovlan_vld:1; /* outer VLAN tag valid, value in "vnic" */ uint32_t pfvf_vld:1; /* VNIC id (PF/VF) valid, value in "vnic" */ + uint32_t roce:1; + uint32_t synonly:1; + uint32_t tcpflags:6; + uint32_t ipsecidx:12; }; struct t4_filter_specification { uint32_t hitcnts:1; /* count filter hits in TCB */ uint32_t prio:1; /* filter has priority over active/server */ uint32_t type:1; /* 0 => IPv4, 1 => IPv6 */ uint32_t hash:1; /* 0 => LE TCAM, 1 => Hash */ uint32_t action:2; /* drop, pass, switch */ uint32_t rpttid:1; /* report TID in RSS hash field */ uint32_t dirsteer:1; /* 0 => RSS, 1 => steer to iq */ uint32_t iq:10; /* ingress queue */ uint32_t maskhash:1; /* dirsteer=0: steer to an RSS sub-region */ uint32_t dirsteerhash:1;/* dirsteer=1: 0 => TCB contains RSS hash */ /* 1 => TCB contains IQ ID */ /* * Switch proxy/rewrite fields. An ingress packet which matches a * filter with "switch" set will be looped back out as an egress * packet -- potentially with some Ethernet header rewriting. */ uint32_t eport:2; /* egress port to switch packet out */ uint32_t newdmac:1; /* rewrite destination MAC address */ uint32_t newsmac:1; /* rewrite source MAC address */ uint32_t swapmac:1; /* swap SMAC/DMAC for loopback packet */ uint32_t newvlan:2; /* rewrite VLAN Tag */ uint32_t nat_mode:3; /* NAT operation mode */ uint32_t nat_flag_chk:1;/* check TCP flags before NAT'ing */ uint32_t nat_seq_chk; /* sequence value to use for NAT check*/ uint8_t dmac[ETHER_ADDR_LEN]; /* new destination MAC address */ uint8_t smac[ETHER_ADDR_LEN]; /* new source MAC address */ uint16_t vlan; /* VLAN Tag to insert */ uint8_t nat_dip[16]; /* destination IP to use after NAT'ing */ uint8_t nat_sip[16]; /* source IP to use after NAT'ing */ uint16_t nat_dport; /* destination port to use after NAT'ing */ uint16_t nat_sport; /* source port to use after NAT'ing */ /* * Filter rule value/mask pairs. */ struct t4_filter_tuple val; struct t4_filter_tuple mask; }; struct t4_filter { uint32_t idx; uint16_t l2tidx; uint16_t smtidx; uint64_t hits; struct t4_filter_specification fs; }; /* Tx Scheduling Class parameters */ struct t4_sched_class_params { int8_t level; /* scheduler hierarchy level */ int8_t mode; /* per-class or per-flow */ int8_t rateunit; /* bit or packet rate */ int8_t ratemode; /* %port relative or kbps absolute */ int8_t channel; /* scheduler channel [0..N] */ int8_t cl; /* scheduler class [0..N] */ int32_t minrate; /* minimum rate */ int32_t maxrate; /* maximum rate */ int16_t weight; /* percent weight */ int16_t pktsize; /* average packet size */ }; /* * Support for "sched-class" command to allow a TX Scheduling Class to be * programmed with various parameters. */ struct t4_sched_params { int8_t subcmd; /* sub-command */ int8_t type; /* packet or flow */ union { struct { /* sub-command SCHED_CLASS_CONFIG */ int8_t minmax; /* minmax enable */ } config; struct t4_sched_class_params params; uint8_t reserved[6 + 8 * 8]; } u; }; enum { SCHED_CLASS_SUBCMD_CONFIG, /* config sub-command */ SCHED_CLASS_SUBCMD_PARAMS, /* params sub-command */ }; enum { SCHED_CLASS_TYPE_PACKET, }; enum { SCHED_CLASS_LEVEL_CL_RL, /* class rate limiter */ SCHED_CLASS_LEVEL_CL_WRR, /* class weighted round robin */ SCHED_CLASS_LEVEL_CH_RL, /* channel rate limiter */ }; enum { SCHED_CLASS_MODE_CLASS, /* per-class scheduling */ SCHED_CLASS_MODE_FLOW, /* per-flow scheduling */ }; enum { SCHED_CLASS_RATEUNIT_BITS, /* bit rate scheduling */ SCHED_CLASS_RATEUNIT_PKTS, /* packet rate scheduling */ }; enum { SCHED_CLASS_RATEMODE_REL, /* percent of port bandwidth */ SCHED_CLASS_RATEMODE_ABS, /* Kb/s */ }; /* * Support for "sched_queue" command to allow one or more NIC TX Queues to be * bound to a TX Scheduling Class. */ struct t4_sched_queue { uint8_t port; int8_t queue; /* queue index; -1 => all queues */ int8_t cl; /* class index; -1 => unbind */ }; #define T4_SGE_CONTEXT_SIZE 24 #define T7_SGE_CONTEXT_SIZE 28 enum { SGE_CONTEXT_EGRESS, SGE_CONTEXT_INGRESS, SGE_CONTEXT_FLM, SGE_CONTEXT_CNM }; struct t4_sge_context { uint32_t mem_id; uint32_t cid; uint32_t data[T4_SGE_CONTEXT_SIZE / 4]; }; struct t4_sge_ctxt { uint32_t mem_id; uint32_t cid; uint32_t data[T7_SGE_CONTEXT_SIZE / 4]; }; struct t4_mem_range { uint32_t addr; uint32_t len; uint32_t *data; }; #define T4_TRACE_LEN 112 struct t4_trace_params { uint32_t data[T4_TRACE_LEN / 4]; uint32_t mask[T4_TRACE_LEN / 4]; uint16_t snap_len; uint16_t min_len; uint8_t skip_ofst; uint8_t skip_len; uint8_t invert; uint8_t port; }; struct t4_tracer { uint8_t idx; uint8_t enabled; uint8_t valid; struct t4_trace_params tp; }; struct t4_cudbg_dump { uint8_t wr_flash; uint8_t bitmap[16]; uint32_t len; uint8_t *data; }; enum { OPEN_TYPE_LISTEN = 'L', OPEN_TYPE_ACTIVE = 'A', OPEN_TYPE_PASSIVE = 'P', OPEN_TYPE_DONTCARE = 'D', }; enum { QUEUE_RANDOM = -1, QUEUE_ROUNDROBIN = -2, }; struct offload_settings { int8_t offload; int8_t rx_coalesce; int8_t cong_algo; int8_t sched_class; int8_t tstamp; int8_t sack; int8_t nagle; int8_t ecn; int8_t ddp; int8_t tls; int16_t txq; int16_t rxq; int16_t mss; }; struct offload_rule { char open_type; struct offload_settings settings; struct bpf_program bpf_prog; /* compiled program/filter */ }; /* * An offload policy consists of a set of rules matched in sequence. The * settings of the first rule that matches are applied to that connection. */ struct t4_offload_policy { uint32_t nrules; struct offload_rule *rule; }; /* Address/mask entry in the CLIP. FW_CLIP2_CMD is aware of the mask. */ struct t4_clip_addr { uint8_t addr[16]; uint8_t mask[16]; }; #define CHELSIO_T4_GETREG _IOWR('f', T4_GETREG, struct t4_reg) #define CHELSIO_T4_SETREG _IOW('f', T4_SETREG, struct t4_reg) #define CHELSIO_T4_REGDUMP _IOWR('f', T4_REGDUMP, struct t4_regdump) #define CHELSIO_T4_GET_FILTER_MODE _IOWR('f', T4_GET_FILTER_MODE, uint32_t) #define CHELSIO_T4_SET_FILTER_MODE _IOW('f', T4_SET_FILTER_MODE, uint32_t) #define CHELSIO_T4_GET_FILTER _IOWR('f', T4_GET_FILTER, struct t4_filter) #define CHELSIO_T4_SET_FILTER _IOWR('f', T4_SET_FILTER, struct t4_filter) #define CHELSIO_T4_DEL_FILTER _IOW('f', T4_DEL_FILTER, struct t4_filter) #define CHELSIO_T4_GET_SGE_CONTEXT _IOWR('f', T4_GET_SGE_CONTEXT, \ struct t4_sge_context) #define CHELSIO_T4_LOAD_FW _IOW('f', T4_LOAD_FW, struct t4_data) #define CHELSIO_T4_GET_MEM _IOW('f', T4_GET_MEM, struct t4_mem_range) #define CHELSIO_T4_GET_I2C _IOWR('f', T4_GET_I2C, struct t4_i2c_data) #define CHELSIO_T4_CLEAR_STATS _IOW('f', T4_CLEAR_STATS, uint32_t) #define CHELSIO_T4_SCHED_CLASS _IOW('f', T4_SET_SCHED_CLASS, \ struct t4_sched_params) #define CHELSIO_T4_SCHED_QUEUE _IOW('f', T4_SET_SCHED_QUEUE, \ struct t4_sched_queue) #define CHELSIO_T4_GET_TRACER _IOWR('f', T4_GET_TRACER, struct t4_tracer) #define CHELSIO_T4_SET_TRACER _IOW('f', T4_SET_TRACER, struct t4_tracer) #define CHELSIO_T4_LOAD_CFG _IOW('f', T4_LOAD_CFG, struct t4_data) #define CHELSIO_T4_LOAD_BOOT _IOW('f', T4_LOAD_BOOT, struct t4_bootrom) #define CHELSIO_T4_LOAD_BOOTCFG _IOW('f', T4_LOAD_BOOTCFG, struct t4_data) #define CHELSIO_T4_CUDBG_DUMP _IOWR('f', T4_CUDBG_DUMP, struct t4_cudbg_dump) #define CHELSIO_T4_SET_OFLD_POLICY _IOW('f', T4_SET_OFLD_POLICY, struct t4_offload_policy) #define CHELSIO_T4_SET_FILTER_MASK _IOW('f', T4_SET_FILTER_MASK, uint32_t) #define CHELSIO_T4_HOLD_CLIP_ADDR _IOW('f', T4_HOLD_CLIP_ADDR, struct t4_clip_addr) #define CHELSIO_T4_RELEASE_CLIP_ADDR _IOW('f', T4_RELEASE_CLIP_ADDR, struct t4_clip_addr) #define CHELSIO_T4_GET_SGE_CTXT _IOWR('f', T4_GET_SGE_CTXT, struct t4_sge_ctxt) #endif diff --git a/sys/dev/cxgbe/tom/t4_connect.c b/sys/dev/cxgbe/tom/t4_connect.c index fc092eab3223..c236ee060bc2 100644 --- a/sys/dev/cxgbe/tom/t4_connect.c +++ b/sys/dev/cxgbe/tom/t4_connect.c @@ -1,433 +1,450 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #ifdef TCP_OFFLOAD #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_clip.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" /* * Active open succeeded. */ static int do_act_establish(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_establish *cpl = (const void *)(rss + 1); u_int tid = GET_TID(cpl); u_int atid = G_TID_TID(ntohl(cpl->tos_atid)); struct toepcb *toep = lookup_atid(sc, atid); struct inpcb *inp = toep->inp; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid); free_atid(sc, atid); CURVNET_SET(toep->vnet); INP_WLOCK(inp); toep->tid = tid; insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1); if (sc->params.tid_qid_sel_mask != 0) { update_tid_qid_sel(toep->vi, &toep->params, tid); toep->ofld_txq = &sc->sge.ofld_txq[toep->params.txq_idx]; toep->ctrlq = &sc->sge.ctrlq[toep->params.ctrlq_idx]; } if (inp->inp_flags & INP_DROPPED) { /* socket closed by the kernel before hw told us it connected */ send_flowc_wr(toep, NULL); send_reset(sc, toep, be32toh(cpl->snd_isn)); goto done; } make_established(toep, be32toh(cpl->snd_isn) - 1, be32toh(cpl->rcv_isn) - 1, cpl->tcp_opt); inp->inp_flowtype = M_HASHTYPE_OPAQUE; inp->inp_flowid = tid; done: INP_WUNLOCK(inp); CURVNET_RESTORE(); return (0); } void act_open_failure_cleanup(struct adapter *sc, struct toepcb *toep, u_int status) { struct inpcb *inp = toep->inp; struct toedev *tod = &toep->td->tod; struct epoch_tracker et; struct tom_data *td = sc->tom_softc; if (toep->tid >= 0) { free_atid(sc, toep->tid); toep->tid = -1; mtx_lock(&td->toep_list_lock); if (toep->flags & TPF_IN_TOEP_LIST) { toep->flags &= ~TPF_IN_TOEP_LIST; TAILQ_REMOVE(&td->toep_list, toep, link); } mtx_unlock(&td->toep_list_lock); } CURVNET_SET(toep->vnet); if (status != EAGAIN) NET_EPOCH_ENTER(et); INP_WLOCK(inp); toe_connect_failed(tod, inp, status); final_cpl_received(toep); /* unlocks inp */ if (status != EAGAIN) NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } /* * Active open failed. */ static int do_act_open_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_act_open_rpl *cpl = (const void *)(rss + 1); u_int atid = G_TID_TID(G_AOPEN_ATID(be32toh(cpl->atid_status))); u_int status = G_AOPEN_STATUS(be32toh(cpl->atid_status)); struct toepcb *toep = lookup_atid(sc, atid); int rc; KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == atid, ("%s: toep tid/atid mismatch", __func__)); CTR3(KTR_CXGBE, "%s: atid %u, status %u ", __func__, atid, status); /* Ignore negative advice */ if (negative_advice(status)) return (0); if (status && act_open_has_tid(status)) release_tid(sc, GET_TID(cpl), toep->ctrlq); rc = act_open_rpl_status_to_errno(status); act_open_failure_cleanup(sc, toep, rc); return (0); } void t4_init_connect_cpl_handlers(void) { t4_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl, CPL_COOKIE_TOM); } void t4_uninit_connect_cpl_handlers(void) { t4_register_cpl_handler(CPL_ACT_ESTABLISH, NULL); t4_register_shared_cpl_handler(CPL_ACT_OPEN_RPL, NULL, CPL_COOKIE_TOM); } #ifdef KTR #define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \ reason = __LINE__; \ rc = (x); \ goto failed; \ } while (0) #else #define DONT_OFFLOAD_ACTIVE_OPEN(x) do { \ rc = (x); \ goto failed; \ } while (0) #endif static inline int act_open_cpl_size(struct adapter *sc, int isipv6) { int idx; - static const int sz_table[3][2] = { + static const int sz_table[4][2] = { { sizeof (struct cpl_act_open_req), sizeof (struct cpl_act_open_req6) }, { sizeof (struct cpl_t5_act_open_req), sizeof (struct cpl_t5_act_open_req6) }, { sizeof (struct cpl_t6_act_open_req), sizeof (struct cpl_t6_act_open_req6) }, + { + sizeof (struct cpl_t7_act_open_req), + sizeof (struct cpl_t7_act_open_req6) + }, }; MPASS(chip_id(sc) >= CHELSIO_T4); - idx = min(chip_id(sc) - CHELSIO_T4, 2); + idx = min(chip_id(sc) - CHELSIO_T4, 3); return (sz_table[idx][!!isipv6]); } /* * active open (soconnect). * * State of affairs on entry: * soisconnecting (so_state |= SS_ISCONNECTING) * tcbinfo not locked (This has changed - used to be WLOCKed) * inp WLOCKed * tp->t_state = TCPS_SYN_SENT * rtalloc1, RT_UNLOCK on rt. */ int t4_connect(struct toedev *tod, struct socket *so, struct nhop_object *nh, struct sockaddr *nam) { struct adapter *sc = tod->tod_softc; struct tom_data *td; struct toepcb *toep = NULL; struct wrqe *wr = NULL; if_t rt_ifp = nh->nh_ifp; struct vi_info *vi; int qid_atid, rc, isipv6; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); #ifdef KTR int reason; #endif struct offload_settings settings; struct epoch_tracker et; uint16_t vid = 0xfff, pcp = 0; + uint64_t ntuple; INP_WLOCK_ASSERT(inp); KASSERT(nam->sa_family == AF_INET || nam->sa_family == AF_INET6, ("%s: dest addr %p has family %u", __func__, nam, nam->sa_family)); if (if_gettype(rt_ifp) == IFT_ETHER) vi = if_getsoftc(rt_ifp); else if (if_gettype(rt_ifp) == IFT_L2VLAN) { if_t ifp = VLAN_TRUNKDEV(rt_ifp); vi = if_getsoftc(ifp); VLAN_TAG(rt_ifp, &vid); VLAN_PCP(rt_ifp, &pcp); } else if (if_gettype(rt_ifp) == IFT_IEEE8023ADLAG) DONT_OFFLOAD_ACTIVE_OPEN(ENOSYS); /* XXX: implement lagg+TOE */ else DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); if (sc->flags & KERN_TLS_ON) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); rw_rlock(&sc->policy_lock); settings = *lookup_offload_policy(sc, OPEN_TYPE_ACTIVE, NULL, EVL_MAKETAG(vid, pcp, 0), inp); rw_runlock(&sc->policy_lock); if (!settings.offload) DONT_OFFLOAD_ACTIVE_OPEN(EPERM); toep = alloc_toepcb(vi, M_NOWAIT); if (toep == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->tid = alloc_atid(sc, toep); if (toep->tid < 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->l2te = t4_l2t_get(vi->pi, rt_ifp, nh->nh_flags & NHF_GATEWAY ? &nh->gw_sa : nam); if (toep->l2te == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); toep->vnet = so->so_vnet; init_conn_params(vi, &settings, &inp->inp_inc, so, NULL, toep->l2te->idx, &toep->params); init_toepcb(vi, toep); isipv6 = nam->sa_family == AF_INET6; wr = alloc_wrqe(act_open_cpl_size(sc, isipv6), toep->ctrlq); if (wr == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); qid_atid = V_TID_QID(toep->ofld_rxq->iq.abs_id) | V_TID_TID(toep->tid) | V_TID_COOKIE(CPL_COOKIE_TOM); + ntuple = select_ntuple(vi, toep->l2te); if (isipv6) { struct cpl_act_open_req6 *cpl = wrtod(wr); struct cpl_t5_act_open_req6 *cpl5 = (void *)cpl; struct cpl_t6_act_open_req6 *cpl6 = (void *)cpl; + struct cpl_t7_act_open_req6 *cpl7 = (void *)cpl; if ((inp->inp_vflag & INP_IPV6) == 0) DONT_OFFLOAD_ACTIVE_OPEN(ENOTSUP); toep->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true); if (toep->ce == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOENT); switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); - cpl->params = select_ntuple(vi, toep->l2te); + cpl->params = htobe32((uint32_t)ntuple); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); - cpl5->params = select_ntuple(vi, toep->l2te); + cpl5->params = htobe64(V_FILTER_TUPLE(ntuple)); break; case CHELSIO_T6: - default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); - cpl6->params = select_ntuple(vi, toep->l2te); + cpl6->params = htobe64(V_FILTER_TUPLE(ntuple)); + break; + case CHELSIO_T7: + default: + INIT_TP_WR(cpl7, 0); + cpl7->iss = htobe32(tp->iss); + cpl7->params = htobe64(V_T7_FILTER_TUPLE(ntuple)); break; } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6, qid_atid)); cpl->local_port = inp->inp_lport; cpl->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0]; cpl->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8]; cpl->peer_port = inp->inp_fport; cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0]; cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8]; cpl->opt0 = calc_options0(vi, &toep->params); cpl->opt2 = calc_options2(vi, &toep->params); CTR6(KTR_CXGBE, "%s: atid %u, toep %p, inp %p, opt0 %#016lx, opt2 %#08x", __func__, toep->tid, toep, inp, be64toh(cpl->opt0), be32toh(cpl->opt2)); } else { struct cpl_act_open_req *cpl = wrtod(wr); struct cpl_t5_act_open_req *cpl5 = (void *)cpl; struct cpl_t6_act_open_req *cpl6 = (void *)cpl; + struct cpl_t7_act_open_req *cpl7 = (void *)cpl; switch (chip_id(sc)) { case CHELSIO_T4: INIT_TP_WR(cpl, 0); - cpl->params = select_ntuple(vi, toep->l2te); + cpl->params = htobe32((uint32_t)ntuple); break; case CHELSIO_T5: INIT_TP_WR(cpl5, 0); cpl5->iss = htobe32(tp->iss); - cpl5->params = select_ntuple(vi, toep->l2te); + cpl5->params = htobe64(V_FILTER_TUPLE(ntuple)); break; case CHELSIO_T6: - default: INIT_TP_WR(cpl6, 0); cpl6->iss = htobe32(tp->iss); - cpl6->params = select_ntuple(vi, toep->l2te); + cpl6->params = htobe64(V_FILTER_TUPLE(ntuple)); break; + case CHELSIO_T7: + default: + INIT_TP_WR(cpl7, 0); + cpl7->iss = htobe32(tp->iss); + cpl7->params = htobe64(V_T7_FILTER_TUPLE(ntuple)); } OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, qid_atid)); inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port, &cpl->peer_ip, &cpl->peer_port); cpl->opt0 = calc_options0(vi, &toep->params); cpl->opt2 = calc_options2(vi, &toep->params); CTR6(KTR_CXGBE, "%s: atid %u, toep %p, inp %p, opt0 %#016lx, opt2 %#08x", __func__, toep->tid, toep, inp, be64toh(cpl->opt0), be32toh(cpl->opt2)); } offload_socket(so, toep); /* Add the TOE PCB to the active list */ td = toep->td; mtx_lock(&td->toep_list_lock); TAILQ_INSERT_TAIL(&td->toep_list, toep, link); toep->flags |= TPF_IN_TOEP_LIST; mtx_unlock(&td->toep_list_lock); NET_EPOCH_ENTER(et); rc = t4_l2t_send(sc, wr, toep->l2te); NET_EPOCH_EXIT(et); if (rc == 0) { toep->flags |= TPF_CPL_PENDING; return (0); } undo_offload_socket(so); #if defined(KTR) reason = __LINE__; #endif failed: CTR3(KTR_CXGBE, "%s: not offloading (%d), rc %d", __func__, reason, rc); if (wr) free_wrqe(wr); if (toep) { if (toep->tid >= 0) free_atid(sc, toep->tid); if (toep->l2te) t4_l2t_release(toep->l2te); if (toep->ce) t4_release_clip_entry(sc, toep->ce); free_toepcb(toep); } return (rc); } #endif diff --git a/sys/dev/cxgbe/tom/t4_tom.c b/sys/dev/cxgbe/tom/t4_tom.c index 024164ef4c51..82d8f724b382 100644 --- a/sys/dev/cxgbe/tom/t4_tom.c +++ b/sys/dev/cxgbe/tom/t4_tom.c @@ -1,2364 +1,2361 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2012 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TCPSTATES #include #include #include #include #include #include #ifdef TCP_OFFLOAD #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "common/t4_tcb.h" #include "t4_clip.h" #include "tom/t4_tom_l2t.h" #include "tom/t4_tom.h" #include "tom/t4_tls.h" static struct protosw toe_protosw; static struct protosw toe6_protosw; /* Module ops */ static int t4_tom_mod_load(void); static int t4_tom_mod_unload(void); static int t4_tom_modevent(module_t, int, void *); /* ULD ops and helpers */ static int t4_tom_activate(struct adapter *); static int t4_tom_deactivate(struct adapter *); static int t4_tom_stop(struct adapter *); static int t4_tom_restart(struct adapter *); static struct uld_info tom_uld_info = { .uld_activate = t4_tom_activate, .uld_deactivate = t4_tom_deactivate, .uld_stop = t4_tom_stop, .uld_restart = t4_tom_restart, }; static void release_offload_resources(struct toepcb *); static void done_with_toepcb(struct toepcb *); static int alloc_tid_tabs(struct adapter *); static void free_tid_tabs(struct adapter *); static void free_tom_data(struct adapter *, struct tom_data *); static void reclaim_wr_resources(void *, int); static void cleanup_stranded_tids(void *, int); struct toepcb * alloc_toepcb(struct vi_info *vi, int flags) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct toepcb *toep; int tx_credits, txsd_total, len; /* * The firmware counts tx work request credits in units of 16 bytes * each. Reserve room for an ABORT_REQ so the driver never has to worry * about tx credits if it wants to abort a connection. */ tx_credits = sc->params.ofldq_wr_cred; tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); /* * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte * immediate payload, and firmware counts tx work request credits in * units of 16 byte. Calculate the maximum work requests possible. */ txsd_total = tx_credits / howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); len = offsetof(struct toepcb, txsd) + txsd_total * sizeof(struct ofld_tx_sdesc); toep = malloc(len, M_CXGBE, M_ZERO | flags); if (toep == NULL) return (NULL); refcount_init(&toep->refcount, 1); toep->td = sc->tom_softc; toep->incarnation = sc->incarnation; toep->vi = vi; toep->tid = -1; toep->tx_total = tx_credits; toep->tx_credits = tx_credits; mbufq_init(&toep->ulp_pduq, INT_MAX); mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); toep->txsd_total = txsd_total; toep->txsd_avail = txsd_total; toep->txsd_pidx = 0; toep->txsd_cidx = 0; aiotx_init_toep(toep); return (toep); } /* * Initialize a toepcb after its params have been filled out. */ int init_toepcb(struct vi_info *vi, struct toepcb *toep) { struct conn_params *cp = &toep->params; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tx_cl_rl_params *tc; if (cp->tc_idx >= 0 && cp->tc_idx < sc->params.nsched_cls) { tc = &pi->sched_params->cl_rl[cp->tc_idx]; mtx_lock(&sc->tc_lock); if (tc->state != CS_HW_CONFIGURED) { CH_ERR(vi, "tid %d cannot be bound to traffic class %d " "because it is not configured (its state is %d)\n", toep->tid, cp->tc_idx, tc->state); cp->tc_idx = -1; } else { tc->refcount++; } mtx_unlock(&sc->tc_lock); } toep->ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; toep->ofld_rxq = &sc->sge.ofld_rxq[cp->rxq_idx]; toep->ctrlq = &sc->sge.ctrlq[cp->ctrlq_idx]; tls_init_toep(toep); MPASS(ulp_mode(toep) != ULP_MODE_TCPDDP); toep->flags |= TPF_INITIALIZED; return (0); } struct toepcb * hold_toepcb(struct toepcb *toep) { refcount_acquire(&toep->refcount); return (toep); } void free_toepcb(struct toepcb *toep) { if (refcount_release(&toep->refcount) == 0) return; KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: attached to an inpcb", __func__)); KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: CPL pending", __func__)); if (toep->flags & TPF_INITIALIZED) { if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_uninit_toep(toep); tls_uninit_toep(toep); } free(toep, M_CXGBE); } /* * Set up the socket for TCP offload. */ void offload_socket(struct socket *so, struct toepcb *toep) { struct tom_data *td = toep->td; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct sockbuf *sb; INP_WLOCK_ASSERT(inp); /* Update socket */ sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags |= SB_NOCOALESCE; if (inp->inp_vflag & INP_IPV6) so->so_proto = &toe6_protosw; else so->so_proto = &toe_protosw; SOCKBUF_UNLOCK(sb); /* Update TCP PCB */ tp->tod = &td->tod; tp->t_toe = toep; tp->t_flags |= TF_TOE; /* Install an extra hold on inp */ toep->inp = inp; toep->flags |= TPF_ATTACHED; in_pcbref(inp); } void restore_so_proto(struct socket *so, bool v6) { if (v6) so->so_proto = &tcp6_protosw; else so->so_proto = &tcp_protosw; } /* This is _not_ the normal way to "unoffload" a socket. */ void undo_offload_socket(struct socket *so) { struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); struct toepcb *toep = tp->t_toe; struct sockbuf *sb; INP_WLOCK_ASSERT(inp); sb = &so->so_snd; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; SOCKBUF_UNLOCK(sb); sb = &so->so_rcv; SOCKBUF_LOCK(sb); sb->sb_flags &= ~SB_NOCOALESCE; restore_so_proto(so, inp->inp_vflag & INP_IPV6); SOCKBUF_UNLOCK(sb); tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->inp = NULL; toep->flags &= ~TPF_ATTACHED; if (in_pcbrele_wlocked(inp)) panic("%s: inp freed.", __func__); } static void release_offload_resources(struct toepcb *toep) { struct tom_data *td = toep->td; struct adapter *sc = td_adapter(td); int tid = toep->tid; KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: %p has CPL pending.", __func__, toep)); CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", __func__, toep, tid, toep->l2te, toep->ce); if (toep->l2te) { t4_l2t_release(toep->l2te); toep->l2te = NULL; } if (tid >= 0) { remove_tid(sc, tid, toep->ce ? 2 : 1); release_tid(sc, tid, toep->ctrlq); toep->tid = -1; mtx_lock(&td->toep_list_lock); if (toep->flags & TPF_IN_TOEP_LIST) { toep->flags &= ~TPF_IN_TOEP_LIST; TAILQ_REMOVE(&td->toep_list, toep, link); } mtx_unlock(&td->toep_list_lock); } if (toep->ce) { t4_release_clip_entry(sc, toep->ce); toep->ce = NULL; } if (toep->params.tc_idx != -1) t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->params.tc_idx); } /* * Both the driver and kernel are done with the toepcb. */ static void done_with_toepcb(struct toepcb *toep) { KASSERT(!(toep->flags & TPF_CPL_PENDING), ("%s: %p has CPL pending.", __func__, toep)); KASSERT(!(toep->flags & TPF_ATTACHED), ("%s: %p is still attached.", __func__, toep)); CTR(KTR_CXGBE, "%s: toep %p (0x%x)", __func__, toep, toep->flags); /* * These queues should have been emptied at approximately the same time * that a normal connection's socket's so_snd would have been purged or * drained. Do _not_ clean up here. */ MPASS(mbufq_empty(&toep->ulp_pduq)); MPASS(mbufq_empty(&toep->ulp_pdu_reclaimq)); #ifdef INVARIANTS if (ulp_mode(toep) == ULP_MODE_TCPDDP) ddp_assert_empty(toep); #endif MPASS(TAILQ_EMPTY(&toep->aiotx_jobq)); MPASS(toep->tid == -1); MPASS(toep->l2te == NULL); MPASS(toep->ce == NULL); MPASS((toep->flags & TPF_IN_TOEP_LIST) == 0); free_toepcb(toep); } /* * The kernel is done with the TCP PCB and this is our opportunity to unhook the * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no * pending CPL) then it is time to release all resources tied to the toepcb. * * Also gets called when an offloaded active open fails and the TOM wants the * kernel to take the TCP PCB back. */ void t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) { #if defined(KTR) || defined(INVARIANTS) struct inpcb *inp = tptoinpcb(tp); #endif struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(inp); KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); KASSERT(toep->flags & TPF_ATTACHED, ("%s: not attached", __func__)); #ifdef KTR if (tp->t_state == TCPS_SYN_SENT) { CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); } else { CTR6(KTR_CXGBE, "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, inp->inp_flags); } #endif tp->tod = NULL; tp->t_toe = NULL; tp->t_flags &= ~TF_TOE; toep->flags &= ~TPF_ATTACHED; if (!(toep->flags & TPF_CPL_PENDING)) done_with_toepcb(toep); } /* * setsockopt handler. */ static void t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = tp->t_toe; if (dir == SOPT_GET) return; CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); switch (name) { case TCP_NODELAY: if (tp->t_state != TCPS_ESTABLISHED) break; toep->params.nagle = tp->t_flags & TF_NODELAY ? 0 : 1; t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, V_TF_NAGLE(1), V_TF_NAGLE(toep->params.nagle), 0, 0); break; default: break; } } static inline uint64_t get_tcb_tflags(const uint64_t *tcb) { return ((be64toh(tcb[14]) << 32) | (be64toh(tcb[15]) >> 32)); } static inline uint32_t get_tcb_field(const uint64_t *tcb, u_int word, uint32_t mask, u_int shift) { #define LAST_WORD ((TCB_SIZE / 4) - 1) uint64_t t1, t2; int flit_idx; MPASS(mask != 0); MPASS(word <= LAST_WORD); MPASS(shift < 32); flit_idx = (LAST_WORD - word) / 2; if (word & 0x1) shift += 32; t1 = be64toh(tcb[flit_idx]) >> shift; t2 = 0; if (fls(mask) > 64 - shift) { /* * Will spill over into the next logical flit, which is the flit * before this one. The flit_idx before this one must be valid. */ MPASS(flit_idx > 0); t2 = be64toh(tcb[flit_idx - 1]) << (64 - shift); } return ((t2 | t1) & mask); #undef LAST_WORD } #define GET_TCB_FIELD(tcb, F) \ get_tcb_field(tcb, W_TCB_##F, M_TCB_##F, S_TCB_##F) /* * Issues a CPL_GET_TCB to read the entire TCB for the tid. */ static int send_get_tcb(struct adapter *sc, u_int tid) { struct cpl_get_tcb *cpl; struct wrq_cookie cookie; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); cpl = start_wrq_wr(&sc->sge.ctrlq[0], howmany(sizeof(*cpl), 16), &cookie); if (__predict_false(cpl == NULL)) return (ENOMEM); bzero(cpl, sizeof(*cpl)); INIT_TP_WR(cpl, tid); OPCODE_TID(cpl) = htobe32(MK_OPCODE_TID(CPL_GET_TCB, tid)); cpl->reply_ctrl = htobe16(V_REPLY_CHAN(0) | V_QUEUENO(sc->sge.ofld_rxq[0].iq.cntxt_id)); cpl->cookie = 0xff; commit_wrq_wr(&sc->sge.ctrlq[0], cpl, &cookie); return (0); } static struct tcb_histent * alloc_tcb_histent(struct adapter *sc, u_int tid, int flags) { struct tcb_histent *te; MPASS(flags == M_NOWAIT || flags == M_WAITOK); te = malloc(sizeof(*te), M_CXGBE, M_ZERO | flags); if (te == NULL) return (NULL); mtx_init(&te->te_lock, "TCB entry", NULL, MTX_DEF); callout_init_mtx(&te->te_callout, &te->te_lock, 0); te->te_adapter = sc; te->te_tid = tid; return (te); } static void free_tcb_histent(struct tcb_histent *te) { mtx_destroy(&te->te_lock); free(te, M_CXGBE); } /* * Start tracking the tid in the TCB history. */ int add_tid_to_history(struct adapter *sc, u_int tid) { struct tcb_histent *te = NULL; struct tom_data *td = sc->tom_softc; int rc; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); if (td->tcb_history == NULL) return (ENXIO); rw_wlock(&td->tcb_history_lock); if (td->tcb_history[tid] != NULL) { rc = EEXIST; goto done; } te = alloc_tcb_histent(sc, tid, M_NOWAIT); if (te == NULL) { rc = ENOMEM; goto done; } mtx_lock(&te->te_lock); rc = send_get_tcb(sc, tid); if (rc == 0) { te->te_flags |= TE_RPL_PENDING; td->tcb_history[tid] = te; } else { free(te, M_CXGBE); } mtx_unlock(&te->te_lock); done: rw_wunlock(&td->tcb_history_lock); return (rc); } static void remove_tcb_histent(struct tcb_histent *te) { struct adapter *sc = te->te_adapter; struct tom_data *td = sc->tom_softc; rw_assert(&td->tcb_history_lock, RA_WLOCKED); mtx_assert(&te->te_lock, MA_OWNED); MPASS(td->tcb_history[te->te_tid] == te); td->tcb_history[te->te_tid] = NULL; free_tcb_histent(te); rw_wunlock(&td->tcb_history_lock); } static inline struct tcb_histent * lookup_tcb_histent(struct adapter *sc, u_int tid, bool addrem) { struct tcb_histent *te; struct tom_data *td = sc->tom_softc; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); if (td->tcb_history == NULL) return (NULL); if (addrem) rw_wlock(&td->tcb_history_lock); else rw_rlock(&td->tcb_history_lock); te = td->tcb_history[tid]; if (te != NULL) { mtx_lock(&te->te_lock); return (te); /* with both locks held */ } if (addrem) rw_wunlock(&td->tcb_history_lock); else rw_runlock(&td->tcb_history_lock); return (te); } static inline void release_tcb_histent(struct tcb_histent *te) { struct adapter *sc = te->te_adapter; struct tom_data *td = sc->tom_softc; mtx_assert(&te->te_lock, MA_OWNED); mtx_unlock(&te->te_lock); rw_assert(&td->tcb_history_lock, RA_RLOCKED); rw_runlock(&td->tcb_history_lock); } static void request_tcb(void *arg) { struct tcb_histent *te = arg; mtx_assert(&te->te_lock, MA_OWNED); /* Noone else is supposed to update the histent. */ MPASS(!(te->te_flags & TE_RPL_PENDING)); if (send_get_tcb(te->te_adapter, te->te_tid) == 0) te->te_flags |= TE_RPL_PENDING; else callout_schedule(&te->te_callout, hz / 100); } static void update_tcb_histent(struct tcb_histent *te, const uint64_t *tcb) { struct tom_data *td = te->te_adapter->tom_softc; uint64_t tflags = get_tcb_tflags(tcb); uint8_t sample = 0; if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != GET_TCB_FIELD(tcb, SND_UNA_RAW)) { if (GET_TCB_FIELD(tcb, T_RXTSHIFT) != 0) sample |= TS_RTO; if (GET_TCB_FIELD(tcb, T_DUPACKS) != 0) sample |= TS_DUPACKS; if (GET_TCB_FIELD(tcb, T_DUPACKS) >= td->dupack_threshold) sample |= TS_FASTREXMT; } if (GET_TCB_FIELD(tcb, SND_MAX_RAW) != 0) { uint32_t snd_wnd; sample |= TS_SND_BACKLOGGED; /* for whatever reason. */ snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV); if (tflags & V_TF_RECV_SCALE(1)) snd_wnd <<= GET_TCB_FIELD(tcb, RCV_SCALE); if (GET_TCB_FIELD(tcb, SND_CWND) < snd_wnd) sample |= TS_CWND_LIMITED; /* maybe due to CWND */ } if (tflags & V_TF_CCTRL_ECN(1)) { /* * CE marker on incoming IP hdr, echoing ECE back in the TCP * hdr. Indicates congestion somewhere on the way from the peer * to this node. */ if (tflags & V_TF_CCTRL_ECE(1)) sample |= TS_ECN_ECE; /* * ECE seen and CWR sent (or about to be sent). Might indicate * congestion on the way to the peer. This node is reducing its * congestion window in response. */ if (tflags & (V_TF_CCTRL_CWR(1) | V_TF_CCTRL_RFR(1))) sample |= TS_ECN_CWR; } te->te_sample[te->te_pidx] = sample; if (++te->te_pidx == nitems(te->te_sample)) te->te_pidx = 0; memcpy(te->te_tcb, tcb, TCB_SIZE); te->te_flags |= TE_ACTIVE; } static int do_get_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_get_tcb_rpl *cpl = mtod(m, const void *); const uint64_t *tcb = (const uint64_t *)(const void *)(cpl + 1); struct tcb_histent *te; const u_int tid = GET_TID(cpl); bool remove; remove = GET_TCB_FIELD(tcb, T_STATE) == TCPS_CLOSED; te = lookup_tcb_histent(sc, tid, remove); if (te == NULL) { /* Not in the history. Who issued the GET_TCB for this? */ device_printf(sc->dev, "tcb %u: flags 0x%016jx, state %u, " "srtt %u, sscale %u, rscale %u, cookie 0x%x\n", tid, (uintmax_t)get_tcb_tflags(tcb), GET_TCB_FIELD(tcb, T_STATE), GET_TCB_FIELD(tcb, T_SRTT), GET_TCB_FIELD(tcb, SND_SCALE), GET_TCB_FIELD(tcb, RCV_SCALE), cpl->cookie); goto done; } MPASS(te->te_flags & TE_RPL_PENDING); te->te_flags &= ~TE_RPL_PENDING; if (remove) { remove_tcb_histent(te); } else { update_tcb_histent(te, tcb); callout_reset(&te->te_callout, hz / 10, request_tcb, te); release_tcb_histent(te); } done: m_freem(m); return (0); } static void fill_tcp_info_from_tcb(struct adapter *sc, uint64_t *tcb, struct tcp_info *ti) { uint32_t v; ti->tcpi_state = GET_TCB_FIELD(tcb, T_STATE); v = GET_TCB_FIELD(tcb, T_SRTT); ti->tcpi_rtt = tcp_ticks_to_us(sc, v); v = GET_TCB_FIELD(tcb, T_RTTVAR); ti->tcpi_rttvar = tcp_ticks_to_us(sc, v); ti->tcpi_snd_ssthresh = GET_TCB_FIELD(tcb, SND_SSTHRESH); ti->tcpi_snd_cwnd = GET_TCB_FIELD(tcb, SND_CWND); ti->tcpi_rcv_nxt = GET_TCB_FIELD(tcb, RCV_NXT); ti->tcpi_rcv_adv = GET_TCB_FIELD(tcb, RCV_ADV); ti->tcpi_dupacks = GET_TCB_FIELD(tcb, T_DUPACKS); v = GET_TCB_FIELD(tcb, TX_MAX); ti->tcpi_snd_nxt = v - GET_TCB_FIELD(tcb, SND_NXT_RAW); ti->tcpi_snd_una = v - GET_TCB_FIELD(tcb, SND_UNA_RAW); ti->tcpi_snd_max = v - GET_TCB_FIELD(tcb, SND_MAX_RAW); /* Receive window being advertised by us. */ ti->tcpi_rcv_wscale = GET_TCB_FIELD(tcb, SND_SCALE); /* Yes, SND. */ ti->tcpi_rcv_space = GET_TCB_FIELD(tcb, RCV_WND); /* Send window */ ti->tcpi_snd_wscale = GET_TCB_FIELD(tcb, RCV_SCALE); /* Yes, RCV. */ ti->tcpi_snd_wnd = GET_TCB_FIELD(tcb, RCV_ADV); if (get_tcb_tflags(tcb) & V_TF_RECV_SCALE(1)) ti->tcpi_snd_wnd <<= ti->tcpi_snd_wscale; else ti->tcpi_snd_wscale = 0; } static void fill_tcp_info_from_history(struct adapter *sc, struct tcb_histent *te, struct tcp_info *ti) { fill_tcp_info_from_tcb(sc, te->te_tcb, ti); } /* * Reads the TCB for the given tid using a memory window and copies it to 'buf' * in the same format as CPL_GET_TCB_RPL. */ static void read_tcb_using_memwin(struct adapter *sc, u_int tid, uint64_t *buf) { int i, j, k, rc; uint32_t addr; u_char *tcb, tmp; MPASS(tid >= sc->tids.tid_base); MPASS(tid - sc->tids.tid_base < sc->tids.ntids); addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + tid * TCB_SIZE; rc = read_via_memwin(sc, 2, addr, (uint32_t *)buf, TCB_SIZE); if (rc != 0) return; tcb = (u_char *)buf; for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) { for (k = 0; k < 16; k++) { tmp = tcb[i + k]; tcb[i + k] = tcb[j + k]; tcb[j + k] = tmp; } } } static void fill_tcp_info(struct adapter *sc, u_int tid, struct tcp_info *ti) { uint64_t tcb[TCB_SIZE / sizeof(uint64_t)]; struct tcb_histent *te; ti->tcpi_toe_tid = tid; te = lookup_tcb_histent(sc, tid, false); if (te != NULL) { fill_tcp_info_from_history(sc, te, ti); release_tcb_histent(te); } else { if (!(sc->debug_flags & DF_DISABLE_TCB_CACHE)) { /* XXX: tell firmware to flush TCB cache. */ } read_tcb_using_memwin(sc, tid, tcb); fill_tcp_info_from_tcb(sc, tcb, ti); } } /* * Called by the kernel to allow the TOE driver to "refine" values filled up in * the tcp_info for an offloaded connection. */ static void t4_tcp_info(struct toedev *tod, const struct tcpcb *tp, struct tcp_info *ti) { struct adapter *sc = tod->tod_softc; struct toepcb *toep = tp->t_toe; INP_LOCK_ASSERT(tptoinpcb(tp)); MPASS(ti != NULL); fill_tcp_info(sc, toep->tid, ti); } #ifdef KERN_TLS static int t4_alloc_tls_session(struct toedev *tod, struct tcpcb *tp, struct ktls_session *tls, int direction) { struct toepcb *toep = tp->t_toe; INP_WLOCK_ASSERT(tptoinpcb(tp)); MPASS(tls != NULL); return (tls_alloc_ktls(toep, tls, direction)); } #endif static void send_mss_flowc_wr(struct adapter *sc, struct toepcb *toep) { struct wrq_cookie cookie; struct fw_flowc_wr *flowc; struct ofld_tx_sdesc *txsd; const int flowclen = sizeof(*flowc) + sizeof(struct fw_flowc_mnemval); const int flowclen16 = howmany(flowclen, 16); if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0) { CH_ERR(sc, "%s: tid %u out of tx credits (%d, %d).\n", __func__, toep->tid, toep->tx_credits, toep->txsd_avail); return; } flowc = start_wrq_wr(&toep->ofld_txq->wrq, flowclen16, &cookie); if (__predict_false(flowc == NULL)) { CH_ERR(sc, "ENOMEM in %s for tid %u.\n", __func__, toep->tid); return; } flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | V_FW_FLOWC_WR_NPARAMS(1)); flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | V_FW_WR_FLOWID(toep->tid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[0].val = htobe32(toep->params.emss); txsd = &toep->txsd[toep->txsd_pidx]; _Static_assert(flowclen16 <= MAX_OFLD_TX_SDESC_CREDITS, "MAX_OFLD_TX_SDESC_CREDITS too small"); txsd->tx_credits = flowclen16; txsd->plen = 0; toep->tx_credits -= txsd->tx_credits; if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) toep->txsd_pidx = 0; toep->txsd_avail--; commit_wrq_wr(&toep->ofld_txq->wrq, flowc, &cookie); } static void t4_pmtu_update(struct toedev *tod, struct tcpcb *tp, tcp_seq seq, int mtu) { struct work_request_hdr *wrh; struct ulp_txpkt *ulpmc; int idx, len; struct wrq_cookie cookie; struct inpcb *inp = tptoinpcb(tp); struct toepcb *toep = tp->t_toe; struct adapter *sc = td_adapter(toep->td); unsigned short *mtus = &sc->params.mtus[0]; INP_WLOCK_ASSERT(inp); MPASS(mtu > 0); /* kernel is supposed to provide something usable. */ /* tp->snd_una and snd_max are in host byte order too. */ seq = be32toh(seq); CTR6(KTR_CXGBE, "%s: tid %d, seq 0x%08x, mtu %u, mtu_idx %u (%d)", __func__, toep->tid, seq, mtu, toep->params.mtu_idx, mtus[toep->params.mtu_idx]); if (ulp_mode(toep) == ULP_MODE_NONE && /* XXX: Read TCB otherwise? */ (SEQ_LT(seq, tp->snd_una) || SEQ_GEQ(seq, tp->snd_max))) { CTR5(KTR_CXGBE, "%s: tid %d, seq 0x%08x not in range [0x%08x, 0x%08x).", __func__, toep->tid, seq, tp->snd_una, tp->snd_max); return; } /* Find the best mtu_idx for the suggested MTU. */ for (idx = 0; idx < NMTUS - 1 && mtus[idx + 1] <= mtu; idx++) continue; if (idx >= toep->params.mtu_idx) return; /* Never increase the PMTU (just like the kernel). */ /* * We'll send a compound work request with 2 SET_TCB_FIELDs -- the first * one updates the mtu_idx and the second one triggers a retransmit. */ len = sizeof(*wrh) + 2 * roundup2(LEN__SET_TCB_FIELD_ULP, 16); wrh = start_wrq_wr(toep->ctrlq, howmany(len, 16), &cookie); if (wrh == NULL) { CH_ERR(sc, "failed to change mtu_idx of tid %d (%u -> %u).\n", toep->tid, toep->params.mtu_idx, idx); return; } INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ ulpmc = (struct ulp_txpkt *)(wrh + 1); ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_MAXSEG, V_TCB_T_MAXSEG(M_TCB_T_MAXSEG), V_TCB_T_MAXSEG(idx)); ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_TIMESTAMP, V_TCB_TIMESTAMP(0x7FFFFULL << 11), 0); commit_wrq_wr(toep->ctrlq, wrh, &cookie); /* Update the software toepcb and tcpcb. */ toep->params.mtu_idx = idx; tp->t_maxseg = mtus[toep->params.mtu_idx]; if (inp->inp_inc.inc_flags & INC_ISIPV6) tp->t_maxseg -= sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else tp->t_maxseg -= sizeof(struct ip) + sizeof(struct tcphdr); toep->params.emss = tp->t_maxseg; if (tp->t_flags & TF_RCVD_TSTMP) toep->params.emss -= TCPOLEN_TSTAMP_APPA; /* Update the firmware flowc. */ send_mss_flowc_wr(sc, toep); /* Update the MTU in the kernel's hostcache. */ if (sc->tt.update_hc_on_pmtu_change != 0) { struct in_conninfo inc = {0}; inc.inc_fibnum = inp->inp_inc.inc_fibnum; if (inp->inp_inc.inc_flags & INC_ISIPV6) { inc.inc_flags |= INC_ISIPV6; inc.inc6_faddr = inp->inp_inc.inc6_faddr; } else { inc.inc_faddr = inp->inp_inc.inc_faddr; } tcp_hc_updatemtu(&inc, mtu); } CTR6(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u), t_maxseg %u, emss %u", __func__, toep->tid, toep->params.mtu_idx, mtus[toep->params.mtu_idx], tp->t_maxseg, toep->params.emss); } /* * The TOE driver will not receive any more CPLs for the tid associated with the * toepcb; release the hold on the inpcb. */ void final_cpl_received(struct toepcb *toep) { struct inpcb *inp = toep->inp; bool need_wakeup; KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); INP_WLOCK_ASSERT(inp); KASSERT(toep->flags & TPF_CPL_PENDING, ("%s: CPL not pending already?", __func__)); CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); if (ulp_mode(toep) == ULP_MODE_TCPDDP) release_ddp_resources(toep); toep->inp = NULL; need_wakeup = (toep->flags & TPF_WAITING_FOR_FINAL) != 0; toep->flags &= ~(TPF_CPL_PENDING | TPF_WAITING_FOR_FINAL); mbufq_drain(&toep->ulp_pduq); mbufq_drain(&toep->ulp_pdu_reclaimq); release_offload_resources(toep); if (!(toep->flags & TPF_ATTACHED)) done_with_toepcb(toep); if (!in_pcbrele_wlocked(inp)) INP_WUNLOCK(inp); if (need_wakeup) { struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep); mtx_lock(lock); wakeup(toep); mtx_unlock(lock); } } void insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) { struct tid_info *t = &sc->tids; MPASS(tid >= t->tid_base); MPASS(tid - t->tid_base < t->ntids); t->tid_tab[tid - t->tid_base] = ctx; atomic_add_int(&t->tids_in_use, ntids); } void * lookup_tid(struct adapter *sc, int tid) { struct tid_info *t = &sc->tids; return (t->tid_tab[tid - t->tid_base]); } void update_tid(struct adapter *sc, int tid, void *ctx) { struct tid_info *t = &sc->tids; t->tid_tab[tid - t->tid_base] = ctx; } void remove_tid(struct adapter *sc, int tid, int ntids) { struct tid_info *t = &sc->tids; t->tid_tab[tid - t->tid_base] = NULL; atomic_subtract_int(&t->tids_in_use, ntids); } /* * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt * have the MSS that we should advertise in our SYN. Advertised MSS doesn't * account for any TCP options so the effective MSS (only payload, no headers or * options) could be different. */ static int find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, struct offload_settings *s) { unsigned short *mtus = &sc->params.mtus[0]; int i, mss, mtu; MPASS(inc != NULL); mss = s->mss > 0 ? s->mss : tcp_mssopt(inc); if (inc->inc_flags & INC_ISIPV6) mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); else mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr); for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++) continue; return (i); } /* * Determine the receive window size for a socket. */ u_long select_rcv_wnd(struct socket *so) { unsigned long wnd; SOCKBUF_LOCK_ASSERT(&so->so_rcv); wnd = sbspace(&so->so_rcv); if (wnd < MIN_RCV_WND) wnd = MIN_RCV_WND; return min(wnd, MAX_RCV_WND); } int select_rcv_wscale(void) { int wscale = 0; unsigned long space = sb_max; if (space > MAX_RCV_WND) space = MAX_RCV_WND; while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) wscale++; return (wscale); } __be64 calc_options0(struct vi_info *vi, struct conn_params *cp) { uint64_t opt0 = 0; opt0 |= F_TCAM_BYPASS; MPASS(cp->wscale >= 0 && cp->wscale <= M_WND_SCALE); opt0 |= V_WND_SCALE(cp->wscale); MPASS(cp->mtu_idx >= 0 && cp->mtu_idx < NMTUS); opt0 |= V_MSS_IDX(cp->mtu_idx); MPASS(cp->ulp_mode >= 0 && cp->ulp_mode <= M_ULP_MODE); opt0 |= V_ULP_MODE(cp->ulp_mode); MPASS(cp->opt0_bufsize >= 0 && cp->opt0_bufsize <= M_RCV_BUFSIZ); opt0 |= V_RCV_BUFSIZ(cp->opt0_bufsize); MPASS(cp->l2t_idx >= 0 && cp->l2t_idx < vi->adapter->vres.l2t.size); opt0 |= V_L2T_IDX(cp->l2t_idx); opt0 |= V_SMAC_SEL(vi->smt_idx); opt0 |= V_TX_CHAN(vi->pi->tx_chan); MPASS(cp->keepalive == 0 || cp->keepalive == 1); opt0 |= V_KEEP_ALIVE(cp->keepalive); MPASS(cp->nagle == 0 || cp->nagle == 1); opt0 |= V_NAGLE(cp->nagle); return (htobe64(opt0)); } __be32 calc_options2(struct vi_info *vi, struct conn_params *cp) { uint32_t opt2 = 0; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; /* * rx flow control, rx coalesce, congestion control, and tx pace are all * explicitly set by the driver. On T5+ the ISS is also set by the * driver to the value picked by the kernel. */ if (is_t4(sc)) { opt2 |= F_RX_FC_VALID | F_RX_COALESCE_VALID; opt2 |= F_CONG_CNTRL_VALID | F_PACE_VALID; } else { opt2 |= F_T5_OPT_2_VALID; /* all 4 valid */ opt2 |= F_T5_ISS; /* ISS provided in CPL */ } MPASS(cp->sack == 0 || cp->sack == 1); opt2 |= V_SACK_EN(cp->sack); MPASS(cp->tstamp == 0 || cp->tstamp == 1); opt2 |= V_TSTAMPS_EN(cp->tstamp); if (cp->wscale > 0) opt2 |= F_WND_SCALE_EN; MPASS(cp->ecn == 0 || cp->ecn == 1); opt2 |= V_CCTRL_ECN(cp->ecn); opt2 |= V_TX_QUEUE(TX_MODQ(pi->tx_chan)); opt2 |= V_PACE(0); opt2 |= F_RSS_QUEUE_VALID; opt2 |= V_RSS_QUEUE(sc->sge.ofld_rxq[cp->rxq_idx].iq.abs_id); if (chip_id(sc) <= CHELSIO_T6) { MPASS(pi->rx_chan == 0 || pi->rx_chan == 1); opt2 |= V_RX_CHANNEL(pi->rx_chan); } MPASS(cp->cong_algo >= 0 && cp->cong_algo <= M_CONG_CNTRL); opt2 |= V_CONG_CNTRL(cp->cong_algo); MPASS(cp->rx_coalesce == 0 || cp->rx_coalesce == 1); if (cp->rx_coalesce == 1) opt2 |= V_RX_COALESCE(M_RX_COALESCE); opt2 |= V_RX_FC_DDP(0) | V_RX_FC_DISABLE(0); MPASS(cp->ulp_mode != ULP_MODE_TCPDDP); return (htobe32(opt2)); } uint64_t select_ntuple(struct vi_info *vi, struct l2t_entry *e) { struct adapter *sc = vi->adapter; struct tp_params *tp = &sc->params.tp; uint64_t ntuple = 0; /* * Initialize each of the fields which we care about which are present * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && EVL_VLANOFTAG(e->vlan) != CPL_L2T_VLAN_NONE) ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; if (tp->port_shift >= 0) ntuple |= (uint64_t)e->hw_port << tp->port_shift; if (tp->protocol_shift >= 0) ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; if (tp->vnic_shift >= 0 && tp->vnic_mode == FW_VNIC_MODE_PF_VF) { ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vi->vin) | V_FT_VNID_ID_PF(sc->pf) | V_FT_VNID_ID_VLD(vi->vfvld)) << tp->vnic_shift; } - if (is_t4(sc)) - return (htobe32((uint32_t)ntuple)); - else - return (htobe64(V_FILTER_TUPLE(ntuple))); + return (ntuple); } /* * Initialize various connection parameters. */ void init_conn_params(struct vi_info *vi , struct offload_settings *s, struct in_conninfo *inc, struct socket *so, const struct tcp_options *tcpopt, int16_t l2t_idx, struct conn_params *cp) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct tom_tunables *tt = &sc->tt; struct inpcb *inp = sotoinpcb(so); struct tcpcb *tp = intotcpcb(inp); u_long wnd; u_int q_idx; MPASS(s->offload != 0); /* Congestion control algorithm */ if (s->cong_algo >= 0) cp->cong_algo = s->cong_algo & M_CONG_CNTRL; else if (sc->tt.cong_algorithm >= 0) cp->cong_algo = tt->cong_algorithm & M_CONG_CNTRL; else { struct cc_algo *cc = CC_ALGO(tp); if (strcasecmp(cc->name, "reno") == 0) cp->cong_algo = CONG_ALG_RENO; else if (strcasecmp(cc->name, "tahoe") == 0) cp->cong_algo = CONG_ALG_TAHOE; if (strcasecmp(cc->name, "newreno") == 0) cp->cong_algo = CONG_ALG_NEWRENO; if (strcasecmp(cc->name, "highspeed") == 0) cp->cong_algo = CONG_ALG_HIGHSPEED; else { /* * Use newreno in case the algorithm selected by the * host stack is not supported by the hardware. */ cp->cong_algo = CONG_ALG_NEWRENO; } } /* Tx traffic scheduling class. */ if (s->sched_class >= 0 && s->sched_class < sc->params.nsched_cls) cp->tc_idx = s->sched_class; else cp->tc_idx = -1; /* Nagle's algorithm. */ if (s->nagle >= 0) cp->nagle = s->nagle > 0 ? 1 : 0; else cp->nagle = tp->t_flags & TF_NODELAY ? 0 : 1; /* TCP Keepalive. */ if (V_tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE) cp->keepalive = 1; else cp->keepalive = 0; /* Optimization that's specific to T5 @ 40G. */ if (tt->tx_align >= 0) cp->tx_align = tt->tx_align > 0 ? 1 : 0; else if (chip_id(sc) == CHELSIO_T5 && (port_top_speed(pi) > 10 || sc->params.nports > 2)) cp->tx_align = 1; else cp->tx_align = 0; /* ULP mode. */ cp->ulp_mode = ULP_MODE_NONE; /* Rx coalescing. */ if (s->rx_coalesce >= 0) cp->rx_coalesce = s->rx_coalesce > 0 ? 1 : 0; else if (tt->rx_coalesce >= 0) cp->rx_coalesce = tt->rx_coalesce > 0 ? 1 : 0; else cp->rx_coalesce = 1; /* default */ /* * Index in the PMTU table. This controls the MSS that we announce in * our SYN initially, but after ESTABLISHED it controls the MSS that we * use to send data. */ cp->mtu_idx = find_best_mtu_idx(sc, inc, s); /* Control queue. */ cp->ctrlq_idx = vi->pi->port_id; /* Tx queue for this connection. */ if (s->txq == QUEUE_RANDOM) q_idx = arc4random(); else if (s->txq == QUEUE_ROUNDROBIN) q_idx = atomic_fetchadd_int(&vi->txq_rr, 1); else q_idx = s->txq; cp->txq_idx = vi->first_ofld_txq + q_idx % vi->nofldtxq; /* Rx queue for this connection. */ if (s->rxq == QUEUE_RANDOM) q_idx = arc4random(); else if (s->rxq == QUEUE_ROUNDROBIN) q_idx = atomic_fetchadd_int(&vi->rxq_rr, 1); else q_idx = s->rxq; cp->rxq_idx = vi->first_ofld_rxq + q_idx % vi->nofldrxq; if (SOLISTENING(so)) { /* Passive open */ MPASS(tcpopt != NULL); /* TCP timestamp option */ if (tcpopt->tstamp && (s->tstamp > 0 || (s->tstamp < 0 && V_tcp_do_rfc1323))) cp->tstamp = 1; else cp->tstamp = 0; /* SACK */ if (tcpopt->sack && (s->sack > 0 || (s->sack < 0 && V_tcp_do_sack))) cp->sack = 1; else cp->sack = 0; /* Receive window scaling. */ if (tcpopt->wsf > 0 && tcpopt->wsf < 15 && V_tcp_do_rfc1323) cp->wscale = select_rcv_wscale(); else cp->wscale = 0; /* ECN */ if (tcpopt->ecn && /* XXX: review. */ (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn))) cp->ecn = 1; else cp->ecn = 0; wnd = max(so->sol_sbrcv_hiwat, MIN_RCV_WND); cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ); if (tt->sndbuf > 0) cp->sndbuf = tt->sndbuf; else if (so->sol_sbsnd_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) cp->sndbuf = 256 * 1024; else cp->sndbuf = so->sol_sbsnd_hiwat; } else { /* Active open */ /* TCP timestamp option */ if (s->tstamp > 0 || (s->tstamp < 0 && (tp->t_flags & TF_REQ_TSTMP))) cp->tstamp = 1; else cp->tstamp = 0; /* SACK */ if (s->sack > 0 || (s->sack < 0 && (tp->t_flags & TF_SACK_PERMIT))) cp->sack = 1; else cp->sack = 0; /* Receive window scaling */ if (tp->t_flags & TF_REQ_SCALE) cp->wscale = select_rcv_wscale(); else cp->wscale = 0; /* ECN */ if (s->ecn > 0 || (s->ecn < 0 && V_tcp_do_ecn == 1)) cp->ecn = 1; else cp->ecn = 0; SOCKBUF_LOCK(&so->so_rcv); wnd = max(select_rcv_wnd(so), MIN_RCV_WND); SOCKBUF_UNLOCK(&so->so_rcv); cp->opt0_bufsize = min(wnd >> 10, M_RCV_BUFSIZ); if (tt->sndbuf > 0) cp->sndbuf = tt->sndbuf; else { SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) cp->sndbuf = 256 * 1024; else cp->sndbuf = so->so_snd.sb_hiwat; SOCKBUF_UNLOCK(&so->so_snd); } } cp->l2t_idx = l2t_idx; /* This will be initialized on ESTABLISHED. */ cp->emss = 0; } void update_tid_qid_sel(struct vi_info *vi, struct conn_params *cp, int tid) { struct adapter *sc = vi->adapter; const int mask = sc->params.tid_qid_sel_mask; struct sge_ofld_txq *ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; uint32_t ngroup; int g, nqpg; cp->ctrlq_idx = ofld_txq_group(tid, mask); CTR(KTR_CXGBE, "tid %u is on core %u", tid, cp->ctrlq_idx); if ((ofld_txq->wrq.eq.cntxt_id & mask) == (tid & mask)) return; ngroup = 1 << bitcount32(mask); MPASS(vi->nofldtxq % ngroup == 0); g = ofld_txq_group(tid, mask); nqpg = vi->nofldtxq / ngroup; cp->txq_idx = vi->first_ofld_txq + g * nqpg + arc4random() % nqpg; #ifdef INVARIANTS MPASS(cp->txq_idx < vi->first_ofld_txq + vi->nofldtxq); ofld_txq = &sc->sge.ofld_txq[cp->txq_idx]; MPASS((ofld_txq->wrq.eq.cntxt_id & mask) == (tid & mask)); #endif } int negative_advice(int status) { return (status == CPL_ERR_RTX_NEG_ADVICE || status == CPL_ERR_PERSIST_NEG_ADVICE || status == CPL_ERR_KEEPALV_NEG_ADVICE); } static int alloc_tid_tab(struct adapter *sc) { struct tid_info *t = &sc->tids; MPASS(t->ntids > 0); MPASS(t->tid_tab == NULL); t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE, M_ZERO | M_NOWAIT); if (t->tid_tab == NULL) return (ENOMEM); atomic_store_rel_int(&t->tids_in_use, 0); return (0); } static void free_tid_tab(struct adapter *sc) { struct tid_info *t = &sc->tids; KASSERT(t->tids_in_use == 0, ("%s: %d tids still in use.", __func__, t->tids_in_use)); free(t->tid_tab, M_CXGBE); t->tid_tab = NULL; } static void free_tid_tabs(struct adapter *sc) { free_tid_tab(sc); free_stid_tab(sc); } static int alloc_tid_tabs(struct adapter *sc) { int rc; rc = alloc_tid_tab(sc); if (rc != 0) goto failed; rc = alloc_stid_tab(sc); if (rc != 0) goto failed; return (0); failed: free_tid_tabs(sc); return (rc); } static inline void alloc_tcb_history(struct adapter *sc, struct tom_data *td) { if (sc->tids.ntids == 0 || sc->tids.ntids > 1024) return; rw_init(&td->tcb_history_lock, "TCB history"); td->tcb_history = malloc(sc->tids.ntids * sizeof(*td->tcb_history), M_CXGBE, M_ZERO | M_NOWAIT); td->dupack_threshold = G_DUPACKTHRESH(t4_read_reg(sc, A_TP_PARA_REG0)); } static inline void free_tcb_history(struct adapter *sc, struct tom_data *td) { #ifdef INVARIANTS int i; if (td->tcb_history != NULL) { for (i = 0; i < sc->tids.ntids; i++) { MPASS(td->tcb_history[i] == NULL); } } #endif free(td->tcb_history, M_CXGBE); if (rw_initialized(&td->tcb_history_lock)) rw_destroy(&td->tcb_history_lock); } static void free_tom_data(struct adapter *sc, struct tom_data *td) { ASSERT_SYNCHRONIZED_OP(sc); KASSERT(TAILQ_EMPTY(&td->toep_list), ("%s: TOE PCB list is not empty.", __func__)); KASSERT(td->lctx_count == 0, ("%s: lctx hash table is not empty.", __func__)); t4_free_ppod_region(&td->pr); if (td->listen_mask != 0) hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); if (mtx_initialized(&td->unsent_wr_lock)) mtx_destroy(&td->unsent_wr_lock); if (mtx_initialized(&td->lctx_hash_lock)) mtx_destroy(&td->lctx_hash_lock); if (mtx_initialized(&td->toep_list_lock)) mtx_destroy(&td->toep_list_lock); free_tcb_history(sc, td); free_tid_tabs(sc); free(td, M_CXGBE); } static char * prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen, int *buflen) { char *pkt; struct tcphdr *th; int ipv6, len; const int maxlen = max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) + max(sizeof(struct ip), sizeof(struct ip6_hdr)) + sizeof(struct tcphdr); MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN); pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT); if (pkt == NULL) return (NULL); ipv6 = inp->inp_vflag & INP_IPV6; len = 0; if (EVL_VLANOFTAG(vtag) == 0xfff) { struct ether_header *eh = (void *)pkt; if (ipv6) eh->ether_type = htons(ETHERTYPE_IPV6); else eh->ether_type = htons(ETHERTYPE_IP); len += sizeof(*eh); } else { struct ether_vlan_header *evh = (void *)pkt; evh->evl_encap_proto = htons(ETHERTYPE_VLAN); evh->evl_tag = htons(vtag); if (ipv6) evh->evl_proto = htons(ETHERTYPE_IPV6); else evh->evl_proto = htons(ETHERTYPE_IP); len += sizeof(*evh); } if (ipv6) { struct ip6_hdr *ip6 = (void *)&pkt[len]; ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_nxt = IPPROTO_TCP; if (open_type == OPEN_TYPE_ACTIVE) { ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; } else if (open_type == OPEN_TYPE_LISTEN) { ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = ip6->ip6_src; } len += sizeof(*ip6); } else { struct ip *ip = (void *)&pkt[len]; ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = inp->inp_ip_tos; ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr)); ip->ip_ttl = inp->inp_ip_ttl; ip->ip_p = IPPROTO_TCP; if (open_type == OPEN_TYPE_ACTIVE) { ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } else if (open_type == OPEN_TYPE_LISTEN) { ip->ip_src = inp->inp_laddr; ip->ip_dst = ip->ip_src; } len += sizeof(*ip); } th = (void *)&pkt[len]; if (open_type == OPEN_TYPE_ACTIVE) { th->th_sport = inp->inp_lport; /* network byte order already */ th->th_dport = inp->inp_fport; /* ditto */ } else if (open_type == OPEN_TYPE_LISTEN) { th->th_sport = inp->inp_lport; /* network byte order already */ th->th_dport = th->th_sport; } len += sizeof(th); *pktlen = *buflen = len; return (pkt); } const struct offload_settings * lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m, uint16_t vtag, struct inpcb *inp) { const struct t4_offload_policy *op; char *pkt; struct offload_rule *r; int i, matched, pktlen, buflen; static const struct offload_settings allow_offloading_settings = { .offload = 1, .rx_coalesce = -1, .cong_algo = -1, .sched_class = -1, .tstamp = -1, .sack = -1, .nagle = -1, .ecn = -1, .ddp = -1, .tls = -1, .txq = QUEUE_RANDOM, .rxq = QUEUE_RANDOM, .mss = -1, }; static const struct offload_settings disallow_offloading_settings = { .offload = 0, /* rest is irrelevant when offload is off. */ }; rw_assert(&sc->policy_lock, RA_LOCKED); /* * If there's no Connection Offloading Policy attached to the device * then we need to return a default static policy. If * "cop_managed_offloading" is true, then we need to disallow * offloading until a COP is attached to the device. Otherwise we * allow offloading ... */ op = sc->policy; if (op == NULL) { if (sc->tt.cop_managed_offloading) return (&disallow_offloading_settings); else return (&allow_offloading_settings); } switch (open_type) { case OPEN_TYPE_ACTIVE: case OPEN_TYPE_LISTEN: pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen); break; case OPEN_TYPE_PASSIVE: MPASS(m != NULL); pkt = mtod(m, char *); MPASS(*pkt == CPL_PASS_ACCEPT_REQ); pkt += sizeof(struct cpl_pass_accept_req); pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req); buflen = m->m_len - sizeof(struct cpl_pass_accept_req); break; default: MPASS(0); return (&disallow_offloading_settings); } if (pkt == NULL || pktlen == 0 || buflen == 0) return (&disallow_offloading_settings); matched = 0; r = &op->rule[0]; for (i = 0; i < op->nrules; i++, r++) { if (r->open_type != open_type && r->open_type != OPEN_TYPE_DONTCARE) { continue; } matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen); if (matched) break; } if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN) free(pkt, M_CXGBE); return (matched ? &r->settings : &disallow_offloading_settings); } static void reclaim_wr_resources(void *arg, int count) { struct tom_data *td = arg; STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); struct cpl_act_open_req *cpl; u_int opcode, atid, tid; struct wrqe *wr; struct adapter *sc = td_adapter(td); mtx_lock(&td->unsent_wr_lock); STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); mtx_unlock(&td->unsent_wr_lock); while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { STAILQ_REMOVE_HEAD(&twr_list, link); cpl = wrtod(wr); opcode = GET_OPCODE(cpl); switch (opcode) { case CPL_ACT_OPEN_REQ: case CPL_ACT_OPEN_REQ6: atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); act_open_failure_cleanup(sc, lookup_atid(sc, atid), EHOSTUNREACH); free(wr, M_CXGBE); break; case CPL_PASS_ACCEPT_RPL: tid = GET_TID(cpl); CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid); synack_failure_cleanup(sc, lookup_tid(sc, tid)); free(wr, M_CXGBE); break; default: log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " "opcode %x\n", __func__, wr, wr->wr_len, opcode); /* WR not freed here; go look at it with a debugger. */ } } } /* * Based on do_abort_req. We treat an abrupt hardware stop as a connection * abort from the hardware. */ static void live_tid_failure_cleanup(struct adapter *sc, struct toepcb *toep, u_int status) { struct inpcb *inp; struct tcpcb *tp; struct epoch_tracker et; MPASS(!(toep->flags & TPF_SYNQE)); inp = toep->inp; CURVNET_SET(toep->vnet); NET_EPOCH_ENTER(et); /* for tcp_close */ INP_WLOCK(inp); tp = intotcpcb(inp); toep->flags |= TPF_ABORT_SHUTDOWN; if ((inp->inp_flags & INP_DROPPED) == 0) { struct socket *so = inp->inp_socket; if (so != NULL) so_error_set(so, status); tp = tcp_close(tp); if (tp == NULL) INP_WLOCK(inp); /* re-acquire */ } final_cpl_received(toep); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } static void cleanup_stranded_tids(void *arg, int count) { TAILQ_HEAD(, toepcb) tlist = TAILQ_HEAD_INITIALIZER(tlist); TAILQ_HEAD(, synq_entry) slist = TAILQ_HEAD_INITIALIZER(slist); struct tom_data *td = arg; struct adapter *sc = td_adapter(td); struct toepcb *toep; struct synq_entry *synqe; /* Clean up synq entries. */ mtx_lock(&td->toep_list_lock); TAILQ_SWAP(&td->stranded_synqe, &slist, synq_entry, link); mtx_unlock(&td->toep_list_lock); while ((synqe = TAILQ_FIRST(&slist)) != NULL) { TAILQ_REMOVE(&slist, synqe, link); MPASS(synqe->tid >= 0); /* stale, was kept around for debug */ synqe->tid = -1; synack_failure_cleanup(sc, synqe); } /* Clean up in-flight active opens. */ mtx_lock(&td->toep_list_lock); TAILQ_SWAP(&td->stranded_atids, &tlist, toepcb, link); mtx_unlock(&td->toep_list_lock); while ((toep = TAILQ_FIRST(&tlist)) != NULL) { TAILQ_REMOVE(&tlist, toep, link); MPASS(toep->tid >= 0); /* stale, was kept around for debug */ toep->tid = -1; act_open_failure_cleanup(sc, toep, EHOSTUNREACH); } /* Clean up live connections. */ mtx_lock(&td->toep_list_lock); TAILQ_SWAP(&td->stranded_tids, &tlist, toepcb, link); mtx_unlock(&td->toep_list_lock); while ((toep = TAILQ_FIRST(&tlist)) != NULL) { TAILQ_REMOVE(&tlist, toep, link); MPASS(toep->tid >= 0); /* stale, was kept around for debug */ toep->tid = -1; live_tid_failure_cleanup(sc, toep, ECONNABORTED); } } /* * Ground control to Major TOM * Commencing countdown, engines on */ static int t4_tom_activate(struct adapter *sc) { struct tom_data *td; struct toedev *tod; struct vi_info *vi; int i, rc, v; ASSERT_SYNCHRONIZED_OP(sc); /* per-adapter softc for TOM */ td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); if (td == NULL) return (ENOMEM); /* List of TOE PCBs and associated lock */ mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); TAILQ_INIT(&td->toep_list); TAILQ_INIT(&td->synqe_list); TAILQ_INIT(&td->stranded_atids); TAILQ_INIT(&td->stranded_tids); TASK_INIT(&td->cleanup_stranded_tids, 0, cleanup_stranded_tids, td); /* Listen context */ mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, &td->listen_mask, HASH_NOWAIT); /* List of WRs for which L2 resolution failed */ mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); STAILQ_INIT(&td->unsent_wr_list); TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); /* TID tables */ rc = alloc_tid_tabs(sc); if (rc != 0) goto done; rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); if (rc != 0) goto done; t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); alloc_tcb_history(sc, td); /* toedev ops */ tod = &td->tod; init_toedev(tod); tod->tod_softc = sc; tod->tod_connect = t4_connect; tod->tod_listen_start = t4_listen_start; tod->tod_listen_stop = t4_listen_stop; tod->tod_rcvd = t4_rcvd; tod->tod_output = t4_tod_output; tod->tod_send_rst = t4_send_rst; tod->tod_send_fin = t4_send_fin; tod->tod_pcb_detach = t4_pcb_detach; tod->tod_l2_update = t4_l2_update; tod->tod_syncache_added = t4_syncache_added; tod->tod_syncache_removed = t4_syncache_removed; tod->tod_syncache_respond = t4_syncache_respond; tod->tod_offload_socket = t4_offload_socket; tod->tod_ctloutput = t4_ctloutput; tod->tod_tcp_info = t4_tcp_info; #ifdef KERN_TLS tod->tod_alloc_tls_session = t4_alloc_tls_session; #endif tod->tod_pmtu_update = t4_pmtu_update; for_each_port(sc, i) { for_each_vi(sc->port[i], v, vi) { SETTOEDEV(vi->ifp, &td->tod); } } sc->tom_softc = td; register_toedev(sc->tom_softc); done: if (rc != 0) free_tom_data(sc, td); return (rc); } static int t4_tom_deactivate(struct adapter *sc) { int rc = 0, i, v; struct tom_data *td = sc->tom_softc; struct vi_info *vi; ASSERT_SYNCHRONIZED_OP(sc); if (td == NULL) return (0); /* XXX. KASSERT? */ if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ if (sc->offload_map != 0) { for_each_port(sc, i) { for_each_vi(sc->port[i], v, vi) { toe_capability(vi, false); if_setcapenablebit(vi->ifp, 0, IFCAP_TOE); SETTOEDEV(vi->ifp, NULL); } } MPASS(sc->offload_map == 0); } mtx_lock(&td->toep_list_lock); if (!TAILQ_EMPTY(&td->toep_list)) rc = EBUSY; MPASS(TAILQ_EMPTY(&td->synqe_list)); MPASS(TAILQ_EMPTY(&td->stranded_tids)); mtx_unlock(&td->toep_list_lock); mtx_lock(&td->lctx_hash_lock); if (td->lctx_count > 0) rc = EBUSY; mtx_unlock(&td->lctx_hash_lock); taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); taskqueue_drain(taskqueue_thread, &td->cleanup_stranded_tids); mtx_lock(&td->unsent_wr_lock); if (!STAILQ_EMPTY(&td->unsent_wr_list)) rc = EBUSY; mtx_unlock(&td->unsent_wr_lock); if (rc == 0) { unregister_toedev(sc->tom_softc); free_tom_data(sc, td); sc->tom_softc = NULL; } return (rc); } static void stop_atids(struct adapter *sc) { struct tom_data *td = sc->tom_softc; struct tid_info *t = &sc->tids; struct toepcb *toep; int atid; /* * Hashfilters and T6-KTLS are the only other users of atids but they're * both mutually exclusive with TOE. That means t4_tom owns all the * atids in the table. */ MPASS(!is_hashfilter(sc)); if (is_t6(sc)) MPASS(!(sc->flags & KERN_TLS_ON)); /* New atids are not being allocated. */ #ifdef INVARIANTS mtx_lock(&t->atid_lock); MPASS(t->atid_alloc_stopped == true); mtx_unlock(&t->atid_lock); #endif /* * In-use atids fall in one of these two categories: * a) Those waiting for L2 resolution before being submitted to * hardware. * b) Those that have been submitted to hardware and are awaiting * replies that will never arrive because the LLD is stopped. */ for (atid = 0; atid < t->natids; atid++) { toep = lookup_atid(sc, atid); if ((uintptr_t)toep >= (uintptr_t)&t->atid_tab[0] && (uintptr_t)toep < (uintptr_t)&t->atid_tab[t->natids]) continue; if (__predict_false(toep == NULL)) continue; MPASS(toep->tid == atid); MPASS(toep->incarnation == sc->incarnation); /* * Take the atid out of the lookup table. toep->tid is stale * after this but useful for debug. */ CTR(KTR_CXGBE, "%s: atid %d@%d STRANDED, removed from table", __func__, atid, toep->incarnation); free_atid(sc, toep->tid); #if 0 toep->tid = -1; #endif mtx_lock(&td->toep_list_lock); toep->flags &= ~TPF_IN_TOEP_LIST; TAILQ_REMOVE(&td->toep_list, toep, link); TAILQ_INSERT_TAIL(&td->stranded_atids, toep, link); mtx_unlock(&td->toep_list_lock); } MPASS(atomic_load_int(&t->atids_in_use) == 0); } static void stop_tids(struct adapter *sc) { struct tom_data *td = sc->tom_softc; struct toepcb *toep; #ifdef INVARIANTS struct tid_info *t = &sc->tids; #endif /* * The LLD's offload queues are stopped so do_act_establish and * do_pass_accept_req cannot run and insert tids in parallel with this * thread. stop_stid_tab has also run and removed the synq entries' * tids from the table. The only tids in the table are for connections * at or beyond ESTABLISHED that are still waiting for the final CPL. */ mtx_lock(&td->toep_list_lock); TAILQ_FOREACH(toep, &td->toep_list, link) { MPASS(sc->incarnation == toep->incarnation); MPASS(toep->tid >= 0); MPASS(toep == lookup_tid(sc, toep->tid)); /* Remove tid from the lookup table immediately. */ CTR(KTR_CXGBE, "%s: tid %d@%d STRANDED, removed from table", __func__, toep->tid, toep->incarnation); remove_tid(sc, toep->tid, toep->ce ? 2 : 1); #if 0 /* toep->tid is stale now but left alone for debug. */ toep->tid = -1; #endif /* All toep in this list will get bulk moved to stranded_tids */ toep->flags &= ~TPF_IN_TOEP_LIST; } MPASS(TAILQ_EMPTY(&td->stranded_tids)); TAILQ_CONCAT(&td->stranded_tids, &td->toep_list, link); MPASS(TAILQ_EMPTY(&td->toep_list)); mtx_unlock(&td->toep_list_lock); MPASS(atomic_load_int(&t->tids_in_use) == 0); } /* * L2T is stable because * 1. stop_lld stopped all new allocations. * 2. stop_lld also stopped the tx wrq so nothing is enqueueing new WRs to the * queue or to l2t_entry->wr_list. * 3. t4_l2t_update is ignoring all L2 updates. */ static void stop_tom_l2t(struct adapter *sc) { struct l2t_data *d = sc->l2t; struct tom_data *td = sc->tom_softc; struct l2t_entry *e; struct wrqe *wr; int i; /* * This task cannot be enqueued because L2 state changes are not being * processed. But if it's already scheduled or running then we need to * wait for it to cleanup the atids in the unsent_wr_list. */ taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); MPASS(STAILQ_EMPTY(&td->unsent_wr_list)); for (i = 0; i < d->l2t_size; i++) { e = &d->l2tab[i]; mtx_lock(&e->lock); if (e->state == L2T_STATE_VALID || e->state == L2T_STATE_STALE) e->state = L2T_STATE_RESOLVING; /* * stop_atids is going to clean up _all_ atids in use, including * these that were pending L2 resolution. Just discard the WRs. */ while ((wr = STAILQ_FIRST(&e->wr_list)) != NULL) { STAILQ_REMOVE_HEAD(&e->wr_list, link); free(wr, M_CXGBE); } mtx_unlock(&e->lock); } } static int t4_tom_stop(struct adapter *sc) { struct tid_info *t = &sc->tids; struct tom_data *td = sc->tom_softc; ASSERT_SYNCHRONIZED_OP(sc); stop_tom_l2t(sc); if (atomic_load_int(&t->atids_in_use) > 0) stop_atids(sc); if (atomic_load_int(&t->stids_in_use) > 0) stop_stid_tab(sc); if (atomic_load_int(&t->tids_in_use) > 0) stop_tids(sc); taskqueue_enqueue(taskqueue_thread, &td->cleanup_stranded_tids); /* * L2T and atid_tab are restarted before t4_tom_restart so this assert * is not valid in t4_tom_restart. This is the next best place for it. */ MPASS(STAILQ_EMPTY(&td->unsent_wr_list)); return (0); } static int t4_tom_restart(struct adapter *sc) { ASSERT_SYNCHRONIZED_OP(sc); restart_stid_tab(sc); return (0); } static int t4_ctloutput_tom(struct socket *so, struct sockopt *sopt) { struct tcpcb *tp = sototcpcb(so); struct toepcb *toep = tp->t_toe; int error, optval; if (sopt->sopt_level == IPPROTO_TCP && sopt->sopt_name == TCP_USE_DDP) { if (sopt->sopt_dir != SOPT_SET) return (EOPNOTSUPP); if (sopt->sopt_td != NULL) { /* Only settable by the kernel. */ return (EPERM); } error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (error != 0) return (error); if (optval != 0) return (t4_enable_ddp_rcv(so, toep)); else return (EOPNOTSUPP); } return (tcp_ctloutput(so, sopt)); } static int t4_aio_queue_tom(struct socket *so, struct kaiocb *job) { struct tcpcb *tp = sototcpcb(so); struct toepcb *toep = tp->t_toe; int error; /* * No lock is needed as TOE sockets never change between * active and passive. */ if (SOLISTENING(so)) return (EINVAL); if (ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_NONE) { error = t4_aio_queue_ddp(so, job); if (error == 0) return (0); else if (error != EOPNOTSUPP) return (soaio_queue_generic(so, job)); } if (t4_aio_queue_aiotx(so, job) != 0) return (soaio_queue_generic(so, job)); else return (0); } static int t4_tom_mod_load(void) { /* CPL handlers */ t4_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2, CPL_COOKIE_TOM); t4_init_connect_cpl_handlers(); t4_init_listen_cpl_handlers(); t4_init_cpl_io_handlers(); t4_ddp_mod_load(); t4_tls_mod_load(); bcopy(&tcp_protosw, &toe_protosw, sizeof(toe_protosw)); toe_protosw.pr_ctloutput = t4_ctloutput_tom; toe_protosw.pr_aio_queue = t4_aio_queue_tom; bcopy(&tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); toe6_protosw.pr_ctloutput = t4_ctloutput_tom; toe6_protosw.pr_aio_queue = t4_aio_queue_tom; return (t4_register_uld(&tom_uld_info, ULD_TOM)); } static void tom_uninit(struct adapter *sc, void *arg) { bool *ok_to_unload = arg; if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) return; /* Try to free resources (works only if no port has IFCAP_TOE) */ if (uld_active(sc, ULD_TOM) && t4_deactivate_uld(sc, ULD_TOM) != 0) *ok_to_unload = false; end_synchronized_op(sc, 0); } static int t4_tom_mod_unload(void) { bool ok_to_unload = true; t4_iterate(tom_uninit, &ok_to_unload); if (!ok_to_unload) return (EBUSY); if (t4_unregister_uld(&tom_uld_info, ULD_TOM) == EBUSY) return (EBUSY); t4_tls_mod_unload(); t4_ddp_mod_unload(); t4_uninit_connect_cpl_handlers(); t4_uninit_listen_cpl_handlers(); t4_uninit_cpl_io_handlers(); t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM); t4_register_cpl_handler(CPL_GET_TCB_RPL, NULL); return (0); } #endif /* TCP_OFFLOAD */ static int t4_tom_modevent(module_t mod, int cmd, void *arg) { int rc = 0; #ifdef TCP_OFFLOAD switch (cmd) { case MOD_LOAD: rc = t4_tom_mod_load(); break; case MOD_UNLOAD: rc = t4_tom_mod_unload(); break; default: rc = EINVAL; } #else printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); rc = EOPNOTSUPP; #endif return (rc); } static moduledata_t t4_tom_moddata= { "t4_tom", t4_tom_modevent, 0 }; MODULE_VERSION(t4_tom, 1); MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY);