Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES +++ sys/conf/NOTES @@ -1966,6 +1966,7 @@ # (and SMC COM90c66 in '56 compatibility mode) adapters. # cxgb: Chelsio T3 based 1GbE/10GbE PCIe Ethernet adapters. # cxgbe:Chelsio T4 and T5 based 1GbE/10GbE/40GbE PCIe Ethernet adapters. +# cxgbev: Chelsio T4 and T5 based PCIe Virtual Functions. # dc: Support for PCI fast ethernet adapters based on the DEC/Intel 21143 # and various workalikes including: # the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics @@ -2119,6 +2120,7 @@ device cxgb # Chelsio T3 10 Gigabit Ethernet device cxgb_t3fw # Chelsio T3 10 Gigabit Ethernet firmware device cxgbe # Chelsio T4 and T5 1GbE/10GbE/40GbE +device cxgbev # Chelsio T4 and T5 1GbE/10GbE/40GbE VF device dc # DEC/Intel 21143 and various workalikes device et # Agere ET1310 10/100/Gigabit Ethernet device fxp # Intel EtherExpress PRO/100B (82557, 82558) Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -1263,8 +1263,12 @@ compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/t4_tracer.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" +dev/cxgbe/t4_vf.c optional cxgbev pci \ + compile-with "${NORMAL_C} -I$S/dev/cxgbe" dev/cxgbe/common/t4_hw.c optional cxgbe pci \ compile-with "${NORMAL_C} -I$S/dev/cxgbe" +dev/cxgbe/common/t4vf_hw.c optional cxgbev pci \ + compile-with "${NORMAL_C} -I$S/dev/cxgbe" t4fw_cfg.c optional cxgbe \ compile-with "${AWK} -f $S/tools/fw_stub.awk t4fw_cfg.fw:t4fw_cfg t4fw_cfg_uwire.fw:t4fw_cfg_uwire t4fw.fw:t4fw -mt4fw_cfg -c${.TARGET}" \ no-implicit-rule before-depend local \ Index: sys/dev/cxgbe/adapter.h =================================================================== --- sys/dev/cxgbe/adapter.h +++ sys/dev/cxgbe/adapter.h @@ -944,6 +944,9 @@ /* One for errors, one for firmware events */ #define T4_EXTRA_INTR 2 +/* One for firmware events */ +#define T4VF_EXTRA_INTR 1 + static inline uint32_t t4_read_reg(struct adapter *sc, uint32_t reg) { @@ -1078,13 +1081,34 @@ } /* t4_main.c */ +extern int t4_ntxq10g; +extern int t4_nrxq10g; +extern int t4_ntxq1g; +extern int t4_nrxq1g; +extern int t4_intr_types; +extern int t4_tmr_idx_10g; +extern int t4_pktc_idx_10g; +extern int t4_tmr_idx_1g; +extern int t4_pktc_idx_1g; +extern unsigned int t4_qsize_rxq; +extern unsigned int t4_qsize_txq; +extern device_method_t cxgbe_methods[]; + int t4_os_find_pci_capability(struct adapter *, int); int t4_os_pci_save_state(struct adapter *); int t4_os_pci_restore_state(struct adapter *); void t4_os_portmod_changed(const struct adapter *, int); void t4_os_link_changed(struct adapter *, int, int, int); void t4_iterate(void (*)(struct adapter *, void *), void *); +void t4_add_adapter(struct adapter *); +int t4_detach_common(device_t); int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); +int t4_map_bars_0_and_4(struct adapter *); +int t4_map_bar_2(struct adapter *); +int t4_set_sched_class(struct adapter *, struct t4_sched_params *); +int t4_set_sched_queue(struct adapter *, struct t4_sched_queue *); +int t4_setup_intr_handlers(struct adapter *); +void t4_sysctls(struct adapter *); int begin_synchronized_op(struct adapter *, struct vi_info *, int, char *); void doom_vi(struct adapter *, struct vi_info *); void end_synchronized_op(struct adapter *, int); @@ -1125,7 +1149,7 @@ void t4_intr_evt(void *); void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *); void t4_update_fl_bufsize(struct ifnet *); -int parse_pkt(struct mbuf **); +int parse_pkt(struct adapter *, struct mbuf **); void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *); void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *); int tnl_cong(struct port_info *, int); Index: sys/dev/cxgbe/common/common.h =================================================================== --- sys/dev/cxgbe/common/common.h +++ sys/dev/cxgbe/common/common.h @@ -551,6 +551,7 @@ int t4_get_tp_version(struct adapter *adapter, u32 *vers); int t4_get_exprom_version(struct adapter *adapter, u32 *vers); int t4_init_hw(struct adapter *adapter, u32 fw_params); +const struct chip_params *t4_get_chip_params(int chipid); int t4_prep_adapter(struct adapter *adapter, u8 *buf); int t4_shutdown_adapter(struct adapter *adapter); int t4_init_devlog_params(struct adapter *adapter, int fw_attach); @@ -758,4 +759,32 @@ int t4_get_devlog_level(struct adapter *adapter, unsigned int *level); int t4_set_devlog_level(struct adapter *adapter, unsigned int level); void t4_sge_decode_idma_state(struct adapter *adapter, int state); + +static inline int t4vf_query_params(struct adapter *adapter, + unsigned int nparams, const u32 *params, + u32 *vals) +{ + return t4_query_params(adapter, 0, 0, 0, nparams, params, vals); +} + +static inline int t4vf_set_params(struct adapter *adapter, + unsigned int nparams, const u32 *params, + const u32 *vals) +{ + return t4_set_params(adapter, 0, 0, 0, nparams, params, vals); +} + +static inline int t4vf_wr_mbox(struct adapter *adap, const void *cmd, + int size, void *rpl) +{ + return t4_wr_mbox(adap, adap->mbox, cmd, size, rpl); +} + +int t4vf_wait_dev_ready(struct adapter *adapter); +int t4vf_fw_reset(struct adapter *adapter); +int t4vf_get_sge_params(struct adapter *adapter); +int t4vf_get_rss_glb_config(struct adapter *adapter); +int t4vf_get_vfres(struct adapter *adapter); +int t4vf_prep_adapter(struct adapter *adapter); + #endif /* __CHELSIO_COMMON_H */ Index: sys/dev/cxgbe/common/t4_hw.c =================================================================== --- sys/dev/cxgbe/common/t4_hw.c +++ sys/dev/cxgbe/common/t4_hw.c @@ -7498,7 +7498,7 @@ } } -static const struct chip_params *get_chip_params(int chipid) +const struct chip_params *t4_get_chip_params(int chipid) { static const struct chip_params chip_params[] = { { @@ -7577,7 +7577,7 @@ } } - adapter->chip_params = get_chip_params(chip_id(adapter)); + adapter->chip_params = t4_get_chip_params(chip_id(adapter)); if (adapter->chip_params == NULL) return -EINVAL; Index: sys/dev/cxgbe/common/t4vf_hw.c =================================================================== --- /dev/null +++ sys/dev/cxgbe/common/t4vf_hw.c @@ -0,0 +1,376 @@ +/*- + * Copyright (c) 2016 Chelsio Communications, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "common.h" +#include "t4_regs.h" + +#undef msleep +#define msleep(x) do { \ + if (cold) \ + DELAY((x) * 1000); \ + else \ + pause("t4hw", (x) * hz / 1000); \ +} while (0) + +/* + * Wait for the device to become ready (signified by our "who am I" register + * returning a value other than all 1's). Return an error if it doesn't + * become ready ... + */ +int t4vf_wait_dev_ready(struct adapter *adapter) +{ + const u32 whoami = VF_PL_REG(A_PL_VF_WHOAMI); + const u32 notready1 = 0xffffffff; + const u32 notready2 = 0xeeeeeeee; + u32 val; + + val = t4_read_reg(adapter, whoami); + if (val != notready1 && val != notready2) + return 0; + msleep(500); + val = t4_read_reg(adapter, whoami); + if (val != notready1 && val != notready2) + return 0; + else + return -EIO; +} + + +/** + * t4vf_fw_reset - issue a reset to FW + * @adapter: the adapter + * + * Issues a reset command to FW. For a Physical Function this would + * result in the Firmware reseting all of its state. For a Virtual + * Function this just resets the state associated with the VF. + */ +int t4vf_fw_reset(struct adapter *adapter) +{ + struct fw_reset_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_write = cpu_to_be32(V_FW_CMD_OP(FW_RESET_CMD) | + F_FW_CMD_WRITE); + cmd.retval_len16 = cpu_to_be32(V_FW_CMD_LEN16(FW_LEN16(cmd))); + return t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), NULL); +} + +/** + * t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters + * @adapter: the adapter + * + * Retrieves various core SGE parameters in the form of hardware SGE + * register values. The caller is responsible for decoding these as + * needed. The SGE parameters are stored in @adapter->params.sge. + */ +int t4vf_get_sge_params(struct adapter *adapter) +{ + struct sge_params *sp = &adapter->params.sge; + u32 params[7], vals[7]; + u32 whoami; + unsigned int pf, s_hps; + int i, v; + + params[0] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_CONTROL)); + params[1] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_HOST_PAGE_SIZE)); + params[2] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_TIMER_VALUE_0_AND_1)); + params[3] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_TIMER_VALUE_2_AND_3)); + params[4] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_TIMER_VALUE_4_AND_5)); + params[5] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_CONM_CTRL)); + params[6] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_INGRESS_RX_THRESHOLD)); + v = t4vf_query_params(adapter, 7, params, vals); + if (v != FW_SUCCESS) + return v; + + sp->sge_control = vals[0]; + sp->counter_val[0] = G_THRESHOLD_0(vals[6]); + sp->counter_val[1] = G_THRESHOLD_1(vals[6]); + sp->counter_val[2] = G_THRESHOLD_2(vals[6]); + sp->counter_val[3] = G_THRESHOLD_3(vals[6]); + sp->timer_val[0] = core_ticks_to_us(adapter, G_TIMERVALUE0(vals[2])); + sp->timer_val[1] = core_ticks_to_us(adapter, G_TIMERVALUE1(vals[2])); + sp->timer_val[2] = core_ticks_to_us(adapter, G_TIMERVALUE2(vals[3])); + sp->timer_val[3] = core_ticks_to_us(adapter, G_TIMERVALUE3(vals[3])); + sp->timer_val[4] = core_ticks_to_us(adapter, G_TIMERVALUE4(vals[4])); + sp->timer_val[5] = core_ticks_to_us(adapter, G_TIMERVALUE5(vals[4])); + + sp->fl_starve_threshold = G_EGRTHRESHOLD(vals[5]) * 2 + 1; + if (is_t4(adapter)) + sp->fl_starve_threshold2 = sp->fl_starve_threshold; + else + sp->fl_starve_threshold2 = G_EGRTHRESHOLDPACKING(vals[5]) * 2 + + 1; + + /* + * We need the Queues/Page and Host Page Size for our VF. + * This is based on the PF from which we're instantiated. + */ + whoami = t4_read_reg(adapter, VF_PL_REG(A_PL_VF_WHOAMI)); + pf = G_SOURCEPF(whoami); + + s_hps = (S_HOSTPAGESIZEPF0 + + (S_HOSTPAGESIZEPF1 - S_HOSTPAGESIZEPF0) * pf); + sp->page_shift = ((vals[1] >> s_hps) & M_HOSTPAGESIZEPF0) + 10; + + for (i = 0; i < SGE_FLBUF_SIZES; i++) { + params[0] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_FL_BUFFER_SIZE0 + (4 * i))); + v = t4vf_query_params(adapter, 1, params, vals); + if (v != FW_SUCCESS) + return v; + + sp->sge_fl_buffer_size[i] = vals[0]; + } + + /* + * T4 uses a single control field to specify both the PCIe Padding and + * Packing Boundary. T5 introduced the ability to specify these + * separately with the Padding Boundary in SGE_CONTROL and and Packing + * Boundary in SGE_CONTROL2. So for T5 and later we need to grab + * SGE_CONTROL in order to determine how ingress packet data will be + * laid out in Packed Buffer Mode. Unfortunately, older versions of + * the firmware won't let us retrieve SGE_CONTROL2 so if we get a + * failure grabbing it we throw an error since we can't figure out the + * right value. + */ + sp->spg_len = sp->sge_control & F_EGRSTATUSPAGESIZE ? 128 : 64; + sp->fl_pktshift = G_PKTSHIFT(sp->sge_control); + sp->pad_boundary = 1 << (G_INGPADBOUNDARY(sp->sge_control) + 5); + if (is_t4(adapter)) + sp->pack_boundary = sp->pad_boundary; + else { + params[0] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_CONTROL2)); + v = t4vf_query_params(adapter, 1, params, vals); + if (v != FW_SUCCESS) { + CH_ERR(adapter, "Unable to get SGE Control2; " + "probably old firmware.\n"); + return v; + } + if (G_INGPACKBOUNDARY(vals[0]) == 0) + sp->pack_boundary = 16; + else + sp->pack_boundary = 1 << (G_INGPACKBOUNDARY(vals[0]) + + 5); + } + + /* + * For T5 and later we want to use the new BAR2 Doorbells. + * Unfortunately, older firmware didn't allow the this register to be + * read. + */ + if (!is_t4(adapter)) { + unsigned int s_qpp; + + params[0] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_EGRESS_QUEUES_PER_PAGE_VF)); + params[1] = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_REG) | + V_FW_PARAMS_PARAM_XYZ(A_SGE_INGRESS_QUEUES_PER_PAGE_VF)); + v = t4vf_query_params(adapter, 2, params, vals); + if (v != FW_SUCCESS) { + CH_WARN(adapter, "Unable to get VF SGE Queues/Page; " + "probably old firmware.\n"); + return v; + } + + s_qpp = (S_QUEUESPERPAGEPF0 + + (S_QUEUESPERPAGEPF1 - S_QUEUESPERPAGEPF0) * pf); + sp->eq_s_qpp = ((vals[0] >> s_qpp) & M_QUEUESPERPAGEPF0); + sp->iq_s_qpp = ((vals[1] >> s_qpp) & M_QUEUESPERPAGEPF0); + } + + return 0; +} + +/** + * t4vf_get_rss_glb_config - retrieve adapter RSS Global Configuration + * @adapter: the adapter + * + * Retrieves global RSS mode and parameters with which we have to live + * and stores them in the @adapter's RSS parameters. + */ +int t4vf_get_rss_glb_config(struct adapter *adapter) +{ + struct rss_params *rss = &adapter->params.rss; + struct fw_rss_glb_config_cmd cmd, rpl; + int v; + + /* + * Execute an RSS Global Configuration read command to retrieve + * our RSS configuration. + */ + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_write = cpu_to_be32(V_FW_CMD_OP(FW_RSS_GLB_CONFIG_CMD) | + F_FW_CMD_REQUEST | + F_FW_CMD_READ); + cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd)); + v = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl); + if (v != FW_SUCCESS) + return v; + + /* + * Transate the big-endian RSS Global Configuration into our + * cpu-endian format based on the RSS mode. We also do first level + * filtering at this point to weed out modes which don't support + * VF Drivers ... + */ + rss->mode = G_FW_RSS_GLB_CONFIG_CMD_MODE( + be32_to_cpu(rpl.u.manual.mode_pkd)); + switch (rss->mode) { + case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: { + u32 word = be32_to_cpu( + rpl.u.basicvirtual.synmapen_to_hashtoeplitz); + + rss->u.basicvirtual.synmapen = + ((word & F_FW_RSS_GLB_CONFIG_CMD_SYNMAPEN) != 0); + rss->u.basicvirtual.syn4tupenipv6 = + ((word & F_FW_RSS_GLB_CONFIG_CMD_SYN4TUPENIPV6) != 0); + rss->u.basicvirtual.syn2tupenipv6 = + ((word & F_FW_RSS_GLB_CONFIG_CMD_SYN2TUPENIPV6) != 0); + rss->u.basicvirtual.syn4tupenipv4 = + ((word & F_FW_RSS_GLB_CONFIG_CMD_SYN4TUPENIPV4) != 0); + rss->u.basicvirtual.syn2tupenipv4 = + ((word & F_FW_RSS_GLB_CONFIG_CMD_SYN2TUPENIPV4) != 0); + + rss->u.basicvirtual.ofdmapen = + ((word & F_FW_RSS_GLB_CONFIG_CMD_OFDMAPEN) != 0); + + rss->u.basicvirtual.tnlmapen = + ((word & F_FW_RSS_GLB_CONFIG_CMD_TNLMAPEN) != 0); + rss->u.basicvirtual.tnlalllookup = + ((word & F_FW_RSS_GLB_CONFIG_CMD_TNLALLLKP) != 0); + + rss->u.basicvirtual.hashtoeplitz = + ((word & F_FW_RSS_GLB_CONFIG_CMD_HASHTOEPLITZ) != 0); + + /* we need at least Tunnel Map Enable to be set */ + if (!rss->u.basicvirtual.tnlmapen) + return -EINVAL; + break; + } + + default: + /* all unknown/unsupported RSS modes result in an error */ + return -EINVAL; + } + + return 0; +} + +/** + * t4vf_get_vfres - retrieve VF resource limits + * @adapter: the adapter + * + * Retrieves configured resource limits and capabilities for a virtual + * function. The results are stored in @adapter->vfres. + */ +int t4vf_get_vfres(struct adapter *adapter) +{ + struct vf_resources *vfres = &adapter->params.vfres; + struct fw_pfvf_cmd cmd, rpl; + int v; + u32 word; + + /* + * Execute PFVF Read command to get VF resource limits; bail out early + * with error on command failure. + */ + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_vfn = cpu_to_be32(V_FW_CMD_OP(FW_PFVF_CMD) | + F_FW_CMD_REQUEST | + F_FW_CMD_READ); + cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd)); + v = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &rpl); + if (v != FW_SUCCESS) + return v; + + /* + * Extract VF resource limits and return success. + */ + word = be32_to_cpu(rpl.niqflint_niq); + vfres->niqflint = G_FW_PFVF_CMD_NIQFLINT(word); + vfres->niq = G_FW_PFVF_CMD_NIQ(word); + + word = be32_to_cpu(rpl.type_to_neq); + vfres->neq = G_FW_PFVF_CMD_NEQ(word); + vfres->pmask = G_FW_PFVF_CMD_PMASK(word); + + word = be32_to_cpu(rpl.tc_to_nexactf); + vfres->tc = G_FW_PFVF_CMD_TC(word); + vfres->nvi = G_FW_PFVF_CMD_NVI(word); + vfres->nexactf = G_FW_PFVF_CMD_NEXACTF(word); + + word = be32_to_cpu(rpl.r_caps_to_nethctrl); + vfres->r_caps = G_FW_PFVF_CMD_R_CAPS(word); + vfres->wx_caps = G_FW_PFVF_CMD_WX_CAPS(word); + vfres->nethctrl = G_FW_PFVF_CMD_NETHCTRL(word); + + return 0; +} + +/** + */ +int t4vf_prep_adapter(struct adapter *adapter) +{ + int err; + + /* + * Wait for the device to become ready before proceeding ... + */ + err = t4vf_wait_dev_ready(adapter); + if (err) + return err; + + adapter->params.chipid = pci_get_device(adapter->dev) >> 12; + if (adapter->params.chipid >= 0xa) { + adapter->params.chipid -= (0xa - 0x4); + adapter->params.fpga = 1; + } + + /* + * Default port and clock for debugging in case we can't reach + * firmware. + */ + adapter->params.nports = 1; + adapter->params.vfres.pmask = 1; + adapter->params.vpd.cclk = 50000; + + adapter->chip_params = t4_get_chip_params(chip_id(adapter)); + if (adapter->chip_params == NULL) + return -EINVAL; + + return 0; +} Index: sys/dev/cxgbe/if_cxlv.c =================================================================== --- /dev/null +++ sys/dev/cxgbe/if_cxlv.c @@ -0,0 +1,44 @@ +/*- + * Copyright (c) 2015 Chelsio Communications, Inc. + * All rights reserved. + * Written by: Navdeep Parhar + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +static int +mod_event(module_t mod, int cmd, void *arg) +{ + + return (0); +} +static moduledata_t if_cxlv_mod = {"if_cxlv", mod_event}; +DECLARE_MODULE(if_cxlv, if_cxlv_mod, SI_SUB_EXEC, SI_ORDER_ANY); +MODULE_VERSION(if_cxlv, 1); +MODULE_DEPEND(if_cxlv, cxlv, 1, 1, 1); Index: sys/dev/cxgbe/t4_main.c =================================================================== --- sys/dev/cxgbe/t4_main.c +++ sys/dev/cxgbe/t4_main.c @@ -105,7 +105,7 @@ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); -static device_method_t cxgbe_methods[] = { +device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), @@ -210,19 +210,19 @@ * Number of queues for tx and rx, 10G and 1G, NIC and offload. */ #define NTXQ_10G 16 -static int t4_ntxq10g = -1; +int t4_ntxq10g = -1; TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq10g); #define NRXQ_10G 8 -static int t4_nrxq10g = -1; +int t4_nrxq10g = -1; TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq10g); #define NTXQ_1G 4 -static int t4_ntxq1g = -1; +int t4_ntxq1g = -1; TUNABLE_INT("hw.cxgbe.ntxq1g", &t4_ntxq1g); #define NRXQ_1G 2 -static int t4_nrxq1g = -1; +int t4_nrxq1g = -1; TUNABLE_INT("hw.cxgbe.nrxq1g", &t4_nrxq1g); #define NTXQ_VI 1 @@ -276,34 +276,34 @@ * Holdoff parameters for 10G and 1G ports. */ #define TMR_IDX_10G 1 -static int t4_tmr_idx_10g = TMR_IDX_10G; +int t4_tmr_idx_10g = TMR_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx_10g); #define PKTC_IDX_10G (-1) -static int t4_pktc_idx_10g = PKTC_IDX_10G; +int t4_pktc_idx_10g = PKTC_IDX_10G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx_10g); #define TMR_IDX_1G 1 -static int t4_tmr_idx_1g = TMR_IDX_1G; +int t4_tmr_idx_1g = TMR_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_1G", &t4_tmr_idx_1g); #define PKTC_IDX_1G (-1) -static int t4_pktc_idx_1g = PKTC_IDX_1G; +int t4_pktc_idx_1g = PKTC_IDX_1G; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_1G", &t4_pktc_idx_1g); /* * Size (# of entries) of each tx and rx queue. */ -static unsigned int t4_qsize_txq = TX_EQ_QSIZE; +unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); -static unsigned int t4_qsize_rxq = RX_IQ_QSIZE; +unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ -static int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; +int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* @@ -414,8 +414,6 @@ struct t4_filter_specification fs; }; -static int map_bars_0_and_4(struct adapter *); -static int map_bar_2(struct adapter *); static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); @@ -440,7 +438,6 @@ static void build_medialist(struct port_info *, struct ifmedia *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); -static int setup_intr_handlers(struct adapter *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); @@ -453,7 +450,6 @@ static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); -static void t4_sysctls(struct adapter *); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); @@ -522,8 +518,6 @@ static int load_fw(struct adapter *, struct t4_data *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); -static int set_sched_class(struct adapter *, struct t4_sched_params *); -static int set_sched_queue(struct adapter *, struct t4_sched_queue *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif @@ -707,9 +701,7 @@ snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); - sx_xlock(&t4_list_lock); - SLIST_INSERT_HEAD(&t4_list, sc, link); - sx_xunlock(&t4_list_lock); + t4_add_adapter(sc); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); @@ -717,7 +709,7 @@ mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); - rc = map_bars_0_and_4(sc); + rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ @@ -787,7 +779,7 @@ if (rc != 0) goto done; /* error message displayed already */ - rc = map_bar_2(sc); + rc = t4_map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ @@ -1041,7 +1033,7 @@ } } - rc = setup_intr_handlers(sc); + rc = t4_setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); @@ -1075,7 +1067,7 @@ } if (rc != 0) - t4_detach(dev); + t4_detach_common(dev); else t4_sysctls(sc); @@ -1140,8 +1132,7 @@ t4_detach(device_t dev) { struct adapter *sc; - struct port_info *pi; - int i, rc; + int rc; sc = device_get_softc(dev); @@ -1152,19 +1143,35 @@ return (rc); } - if (sc->flags & FULL_INIT_DONE) - t4_intr_disable(sc); + return (t4_detach_common(dev)); +} + +int +t4_detach_common(device_t dev) +{ + struct adapter *sc; + struct port_info *pi; + int i, rc; + + sc = device_get_softc(dev); + + if (sc->flags & FULL_INIT_DONE) { + if (!(sc->flags & IS_VF)) + t4_intr_disable(sc); + } if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } - rc = bus_generic_detach(dev); - if (rc) { - device_printf(dev, - "failed to detach child devices: %d\n", rc); - return (rc); + if (device_is_attached(dev)) { + rc = bus_generic_detach(dev); + if (rc) { + device_printf(dev, + "failed to detach child devices: %d\n", rc); + return (rc); + } } for (i = 0; i < sc->intr_count; i++) @@ -1187,7 +1194,7 @@ if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); - if (sc->flags & FW_OK) + if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) @@ -1677,7 +1684,7 @@ return (ENETDOWN); } - rc = parse_pkt(&m); + rc = parse_pkt(sc, &m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ @@ -1778,7 +1785,7 @@ struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; - if (pi->nvi > 1) + if (pi->nvi > 1 || sc->flags & IS_VF) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); @@ -1966,8 +1973,16 @@ device_get_nameunit(sc->dev)); } -static int -map_bars_0_and_4(struct adapter *sc) +void +t4_add_adapter(struct adapter *sc) +{ + sx_xlock(&t4_list_lock); + SLIST_INSERT_HEAD(&t4_list, sc, link); + sx_xunlock(&t4_list_lock); +} + +int +t4_map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, @@ -1992,8 +2007,8 @@ return (0); } -static int -map_bar_2(struct adapter *sc) +int +t4_map_bar_2(struct adapter *sc) { /* @@ -3775,7 +3790,7 @@ ifp->if_drv_flags |= IFF_DRV_RUNNING; pi->up_vis++; - if (pi->nvi > 1) + if (pi->nvi > 1 || sc->flags & IS_VF) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); @@ -3827,10 +3842,10 @@ } PORT_LOCK(pi); - if (pi->nvi == 1) - callout_stop(&pi->tick); - else + if (pi->nvi > 1 || sc->flags & IS_VF) callout_stop(&vi->tick); + else + callout_stop(&pi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); @@ -3855,8 +3870,8 @@ * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ -static int -setup_intr_handlers(struct adapter *sc) +int +t4_setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; @@ -3884,17 +3899,23 @@ return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ - KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, - ("%s: too few intr.", __func__)); + if (sc->flags & IS_VF) + KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports, + ("%s: too few intr.", __func__)); + else + KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, + ("%s: too few intr.", __func__)); - /* The first one is always error intr */ - rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); - if (rc != 0) - return (rc); - irq++; - rid++; + /* The first one is always error intr on PFs */ + if (!(sc->flags & IS_VF)) { + rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); + if (rc != 0) + return (rc); + irq++; + rid++; + } - /* The second one is always the firmware event queue */ + /* The second one is always the firmware event queue (first on VFs) */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); @@ -4001,7 +4022,8 @@ device_get_nameunit(sc->dev), i); } - t4_intr_enable(sc); + if (!(sc->flags & IS_VF)) + t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) @@ -4250,7 +4272,7 @@ /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ - if (IS_MAIN_VI(vi)) + if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { @@ -4417,10 +4439,16 @@ u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); - t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | - V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); - stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); - stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); + if (sc->flags & IS_VF) { + stats[0] = t4_read_reg(sc, VF_MPS_REG(reg)); + stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4)); + } else { + t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | + V_PL_VFID(G_FW_VIID_VIN(viid)) | + V_PL_ADDR(VF_MPS_REG(reg))); + stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); + stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); + } return (((uint64_t)stats[1]) << 32 | stats[0]); } @@ -4569,7 +4597,7 @@ "\004PO_INITIATOR\005PO_TARGET", }; -static void +void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; @@ -4618,6 +4646,9 @@ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "debug_flags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); + if (sc->flags & IS_VF) + return; + SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); @@ -5030,6 +5061,9 @@ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); + if (sc->flags & IS_VF) + return; + /* * dev.(cxgbe|cxl).X.tc. */ @@ -8524,8 +8558,8 @@ return (rc); } -static int -set_sched_class(struct adapter *sc, struct t4_sched_params *p) +int +t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p) { if (p->type != SCHED_CLASS_TYPE_PACKET) @@ -8540,8 +8574,8 @@ return (EINVAL); } -static int -set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) +int +t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) { struct port_info *pi = NULL; struct vi_info *vi; @@ -8879,10 +8913,10 @@ break; } case CHELSIO_T4_SCHED_CLASS: - rc = set_sched_class(sc, (struct t4_sched_params *)data); + rc = t4_set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: - rc = set_sched_queue(sc, (struct t4_sched_queue *)data); + rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); Index: sys/dev/cxgbe/t4_sge.c =================================================================== --- sys/dev/cxgbe/t4_sge.c +++ sys/dev/cxgbe/t4_sge.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -223,10 +224,13 @@ static inline void get_pkt_gl(struct mbuf *, struct sglist *); static inline u_int txpkt_len16(u_int, u_int); +static inline u_int txpkt_vm_len16(u_int, u_int); static inline u_int txpkts0_len16(u_int); static inline u_int txpkts1_len16(void); static u_int write_txpkt_wr(struct sge_txq *, struct fw_eth_tx_pkt_wr *, struct mbuf *, u_int); +static u_int write_txpkt_vm_wr(struct sge_txq *, struct fw_eth_tx_pkt_vm_wr *, + struct mbuf *, u_int); static int try_txpkts(struct mbuf *, struct mbuf *, struct txpkts *, u_int); static int add_to_txpkts(struct mbuf *, struct txpkts *, u_int); static u_int write_txpkts_wr(struct sge_txq *, struct fw_eth_tx_pkts_wr *, @@ -750,6 +754,9 @@ } } + if (sc->flags & IS_VF) + return (0); + v = V_HPZ0(0) | V_HPZ1(2) | V_HPZ2(4) | V_HPZ3(6); r = t4_read_reg(sc, A_ULP_RX_TDDP_PSZ); if (r != v) { @@ -860,7 +867,8 @@ * Management queue. This is just a control queue that uses the fwq as * its associated iq. */ - rc = alloc_mgmtq(sc); + if (!(sc->flags & IS_VF)) + rc = alloc_mgmtq(sc); return (rc); } @@ -1166,7 +1174,7 @@ /* * Finally, the control queue. */ - if (!IS_MAIN_VI(vi)) + if (!IS_MAIN_VI(vi) || sc->flags & IS_VF) goto done; oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, "ctrlq", CTLFLAG_RD, NULL, "ctrl queue"); @@ -1227,7 +1235,7 @@ * (for egress updates, etc.). */ - if (IS_MAIN_VI(vi)) + if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) free_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { @@ -2073,7 +2081,7 @@ MPASS(len > 0); - while (len) { + for (;;) { if (offset + len < m->m_len) { offset += len; p = mtod(m, uintptr_t) + offset; @@ -2144,7 +2152,7 @@ * b) it may get defragged up if the gather list is too long for the hardware. */ int -parse_pkt(struct mbuf **mp) +parse_pkt(struct adapter *sc, struct mbuf **mp) { struct mbuf *m0 = *mp, *m; int rc, nsegs, defragged = 0, offset; @@ -2191,9 +2199,13 @@ goto restart; } set_mbuf_nsegs(m0, nsegs); - set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); + if (sc->flags & IS_VF) + set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0))); + else + set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); - if (!needs_tso(m0)) + if (!needs_tso(m0) && + !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) return (0); m = m0; @@ -2216,7 +2228,7 @@ { struct ip6_hdr *ip6 = l3hdr; - MPASS(ip6->ip6_nxt == IPPROTO_TCP); + MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP); m0->m_pkthdr.l3hlen = sizeof(*ip6); break; @@ -2238,8 +2250,10 @@ } #if defined(INET) || defined(INET6) - tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); - m0->m_pkthdr.l4hlen = tcp->th_off * 4; + if (needs_tso(m0)) { + tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); + m0->m_pkthdr.l4hlen = tcp->th_off * 4; + } #endif MPASS(m0 == *mp); return (0); @@ -2434,7 +2448,12 @@ next_cidx = 0; wr = (void *)&eq->desc[eq->pidx]; - if (remaining > 1 && + if (sc->flags & IS_VF) { + total++; + remaining--; + ETHER_BPF_MTAP(ifp, m0); + n = write_txpkt_vm_wr(txq, (void *)wr, m0, available); + } else if (remaining > 1 && try_txpkts(m0, r->items[next_cidx], &txp, available) == 0) { /* pkts at cidx, next_cidx should both be in txp. */ @@ -2765,7 +2784,7 @@ FL_UNLOCK(fl); } - if (is_t5(sc) && cong >= 0) { + if (is_t5(sc) && !(sc->flags & IS_VF) && cong >= 0) { uint32_t param, val; param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | @@ -2889,9 +2908,13 @@ init_iq(fwq, sc, 0, 0, FW_IQ_QSIZE); fwq->flags |= IQ_INTR; /* always */ - intr_idx = sc->intr_count > 1 ? 1 : 0; - fwq->set_tcb_rpl = t4_filter_rpl; - fwq->l2t_write_rpl = do_l2t_write_rpl; + if (sc->flags & IS_VF) + intr_idx = 0; + else { + intr_idx = sc->intr_count > 1 ? 1 : 0; + fwq->set_tcb_rpl = t4_filter_rpl; + fwq->l2t_write_rpl = do_l2t_write_rpl; + } rc = alloc_iq_fl(&sc->port[0]->vi[0], fwq, NULL, intr_idx, -1); if (rc != 0) { device_printf(sc->dev, @@ -3577,9 +3600,13 @@ TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq); txq->ifp = vi->ifp; txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK); - txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | - V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | - V_TXPKT_VF(vi->viid)); + if (sc->flags & IS_VF) + txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | + V_TXPKT_INTF(pi->tx_chan)); + else + txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | + V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_VF_VLD(1) | + V_TXPKT_VF(vi->viid)); txq->tc_idx = -1; txq->sdesc = malloc(eq->sidx * sizeof(struct tx_sdesc), M_CXGBE, M_ZERO | M_WAITOK); @@ -3932,6 +3959,27 @@ } /* + * len16 for a txpkt_vm WR with a GL. Includes the firmware work + * request header. + */ +static inline u_int +txpkt_vm_len16(u_int nsegs, u_int tso) +{ + u_int n; + + MPASS(nsegs > 0); + + nsegs--; /* first segment is part of ulptx_sgl */ + n = sizeof(struct fw_eth_tx_pkt_vm_wr) + + sizeof(struct cpl_tx_pkt_core) + + sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); + if (tso) + n += sizeof(struct cpl_tx_pkt_lso_core); + + return (howmany(n, 16)); +} + +/* * len16 for a txpkts type 0 WR with a GL. Does not include the firmware work * request header. */ @@ -3976,6 +4024,179 @@ } /* + * Write a VM txpkt WR for this packet to the hardware descriptors, update the + * software descriptor, and advance the pidx. It is guaranteed that enough + * descriptors are available. + * + * The return value is the # of hardware descriptors used. + */ +static u_int +write_txpkt_vm_wr(struct sge_txq *txq, struct fw_eth_tx_pkt_vm_wr *wr, + struct mbuf *m0, u_int available) +{ + struct sge_eq *eq = &txq->eq; + struct tx_sdesc *txsd; + struct cpl_tx_pkt_core *cpl; + uint32_t ctrl; /* used in many unrelated places */ + uint64_t ctrl1; + int csum_type, len16, ndesc, pktlen, nsegs; + caddr_t dst; + + TXQ_LOCK_ASSERT_OWNED(txq); + M_ASSERTPKTHDR(m0); + MPASS(available > 0 && available < eq->sidx); + + len16 = mbuf_len16(m0); + nsegs = mbuf_nsegs(m0); + pktlen = m0->m_pkthdr.len; + ctrl = sizeof(struct cpl_tx_pkt_core); + if (needs_tso(m0)) + ctrl += sizeof(struct cpl_tx_pkt_lso_core); + ndesc = howmany(len16, EQ_ESIZE / 16); + MPASS(ndesc <= available); + + /* Firmware work request header */ + MPASS(wr == (void *)&eq->desc[eq->pidx]); + wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_VM_WR) | + V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl)); + + ctrl = V_FW_WR_LEN16(len16); + wr->equiq_to_len16 = htobe32(ctrl); + wr->r3[0] = 0; + wr->r3[1] = 0; + + /* + * Copy over ethmacdst, ethmacsrc, ethtype, and vlantci. + * vlantci is ignored unless the ethtype is 0x8100, so it's + * simpler to always copy it rather than making it + * conditional. Also, it seems that we do not have to set + * vlantci or fake the ethtype when doing VLAN tag insertion. + */ + m_copydata(m0, 0, sizeof(struct ether_header) + 2, wr->ethmacdst); + + csum_type = -1; + if (needs_tso(m0)) { + struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); + + KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && + m0->m_pkthdr.l4hlen > 0, + ("%s: mbuf %p needs TSO but missing header lengths", + __func__, m0)); + + ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | + F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) + | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); + if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) + ctrl |= V_LSO_ETHHDR_LEN(1); + if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) + ctrl |= F_LSO_IPV6; + + lso->lso_ctrl = htobe32(ctrl); + lso->ipid_ofst = htobe16(0); + lso->mss = htobe16(m0->m_pkthdr.tso_segsz); + lso->seqno_offset = htobe32(0); + lso->len = htobe32(pktlen); + + if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) + csum_type = TX_CSUM_TCPIP6; + else + csum_type = TX_CSUM_TCPIP; + + cpl = (void *)(lso + 1); + + txq->tso_wrs++; + } else { + if (m0->m_pkthdr.csum_flags & CSUM_IP_TCP) + csum_type = TX_CSUM_TCPIP; + else if (m0->m_pkthdr.csum_flags & CSUM_IP_UDP) + csum_type = TX_CSUM_UDPIP; + else if (m0->m_pkthdr.csum_flags & CSUM_IP6_TCP) + csum_type = TX_CSUM_TCPIP6; + else if (m0->m_pkthdr.csum_flags & CSUM_IP6_UDP) + csum_type = TX_CSUM_UDPIP6; + else if (m0->m_pkthdr.csum_flags & CSUM_IP) { + /* + * XXX: The firmware appears to stomp on the + * fragment/flags field of the IP header when + * using TX_CSUM_IP. Fall back to doing + * software checksums. + */ + u_short *sump; + struct mbuf *m; + int offset; + + m = m0; + offset = 0; + sump = m_advance(&m, &offset, m0->m_pkthdr.l2hlen + + offsetof(struct ip, ip_sum)); + *sump = in_cksum_skip(m0, m0->m_pkthdr.l2hlen + + m0->m_pkthdr.l3hlen, m0->m_pkthdr.l2hlen); + m0->m_pkthdr.csum_flags &= ~CSUM_IP; + } + + cpl = (void *)(wr + 1); + } + + /* Checksum offload */ + ctrl1 = 0; + if (needs_l3_csum(m0) == 0) + ctrl1 |= F_TXPKT_IPCSUM_DIS; + if (csum_type >= 0) { + KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0, + ("%s: mbuf %p needs checksum offload but missing header lengths", + __func__, m0)); + + /* XXX: T6 */ + ctrl1 |= V_TXPKT_ETHHDR_LEN(m0->m_pkthdr.l2hlen - + ETHER_HDR_LEN); + ctrl1 |= V_TXPKT_IPHDR_LEN(m0->m_pkthdr.l3hlen); + ctrl1 |= V_TXPKT_CSUM_TYPE(csum_type); + } else + ctrl1 |= F_TXPKT_L4CSUM_DIS; + if (m0->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | + CSUM_UDP_IPV6 | CSUM_TCP_IPV6 | CSUM_TSO)) + txq->txcsum++; /* some hardware assistance provided */ + + /* VLAN tag insertion */ + if (needs_vlan_insertion(m0)) { + ctrl1 |= F_TXPKT_VLAN_VLD | + V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); + txq->vlan_insertion++; + } + + /* CPL header */ + cpl->ctrl0 = txq->cpl_ctrl0; + cpl->pack = 0; + cpl->len = htobe16(pktlen); + cpl->ctrl1 = htobe64(ctrl1); + + /* SGL */ + dst = (void *)(cpl + 1); + + /* + * A packet using TSO will use up an entire descriptor for the + * firmware work request header, LSO CPL, and TX_PKT_XT CPL. + * If this descriptor is the last descriptor in the ring, wrap + * around to the front of the ring explicitly for the start of + * the sgl. + */ + if (dst == (void *)&eq->desc[eq->sidx]) { + dst = (void *)&eq->desc[0]; + write_gl_to_txd(txq, m0, &dst, 0); + } else + write_gl_to_txd(txq, m0, &dst, eq->sidx - ndesc < eq->pidx); + txq->sgl_wrs++; + + txq->txpkt_wrs++; + + txsd = &txq->sdesc[eq->pidx]; + txsd->m = m0; + txsd->desc_used = ndesc; + + return (ndesc); +} + +/* * Write a txpkt WR for this packet to the hardware descriptors, update the * software descriptor, and advance the pidx. It is guaranteed that enough * descriptors are available. Index: sys/dev/cxgbe/t4_vf.c =================================================================== --- /dev/null +++ sys/dev/cxgbe/t4_vf.c @@ -0,0 +1,937 @@ +/*- + * Copyright (c) 2016 Chelsio Communications, Inc. + * All rights reserved. + * Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#if defined(__i386__) || defined(__amd64__) +#include +#include +#endif + +#include "common/common.h" +#include "common/t4_regs.h" +#include "t4_ioctl.h" +#include "t4_mp_ring.h" + +/* + * Some notes: + * + * The Virtual Interfaces are connected to an internal switch on the chip + * which allows VIs attached to the same port to talk to each other even when + * the port link is down. As a result, we might want to always report a + * VF's link as being "up". + * + * XXX: Add a TUNABLE and possible per-device sysctl for this? + */ + +struct intrs_and_queues { + uint16_t intr_type; /* MSI, or MSI-X */ + uint16_t nirq; /* Total # of vectors */ + uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */ + uint16_t intr_flags_1g; /* Interrupt flags for each 1G port */ + uint16_t ntxq10g; /* # of NIC txq's for each 10G port */ + uint16_t nrxq10g; /* # of NIC rxq's for each 10G port */ + uint16_t ntxq1g; /* # of NIC txq's for each 1G port */ + uint16_t nrxq1g; /* # of NIC rxq's for each 1G port */ +}; + +struct { + uint16_t device; + char *desc; +} t4vf_pciids[] = { + {0x4800, "Chelsio T440-dbg VF"}, + {0x4801, "Chelsio T420-CR VF"}, + {0x4802, "Chelsio T422-CR VF"}, + {0x4803, "Chelsio T440-CR VF"}, + {0x4804, "Chelsio T420-BCH VF"}, + {0x4805, "Chelsio T440-BCH VF"}, + {0x4806, "Chelsio T440-CH VF"}, + {0x4807, "Chelsio T420-SO VF"}, + {0x4808, "Chelsio T420-CX VF"}, + {0x4809, "Chelsio T420-BT VF"}, + {0x480a, "Chelsio T404-BT VF"}, + {0x480e, "Chelsio T440-LP-CR VF"}, +}, t5vf_pciids[] = { + {0x5800, "Chelsio T580-dbg VF"}, + {0x5801, "Chelsio T520-CR VF"}, /* 2 x 10G */ + {0x5802, "Chelsio T522-CR VF"}, /* 2 x 10G, 2 X 1G */ + {0x5803, "Chelsio T540-CR VF"}, /* 4 x 10G */ + {0x5807, "Chelsio T520-SO VF"}, /* 2 x 10G, nomem */ + {0x5809, "Chelsio T520-BT VF"}, /* 2 x 10GBaseT */ + {0x580a, "Chelsio T504-BT VF"}, /* 4 x 1G */ + {0x580d, "Chelsio T580-CR VF"}, /* 2 x 40G */ + {0x580e, "Chelsio T540-LP-CR VF"}, /* 4 x 10G */ + {0x5810, "Chelsio T580-LP-CR VF"}, /* 2 x 40G */ + {0x5811, "Chelsio T520-LL-CR VF"}, /* 2 x 10G */ + {0x5812, "Chelsio T560-CR VF"}, /* 1 x 40G, 2 x 10G */ + {0x5814, "Chelsio T580-LP-SO-CR VF"}, /* 2 x 40G, nomem */ + {0x5815, "Chelsio T502-BT VF"}, /* 2 x 1G */ +#ifdef notyet + {0x5804, "Chelsio T520-BCH VF"}, + {0x5805, "Chelsio T540-BCH VF"}, + {0x5806, "Chelsio T540-CH VF"}, + {0x5808, "Chelsio T520-CX VF"}, + {0x580b, "Chelsio B520-SR VF"}, + {0x580c, "Chelsio B504-BT VF"}, + {0x580f, "Chelsio Amsterdam VF"}, + {0x5813, "Chelsio T580-CHR VF"}, +#endif +}; + +static d_ioctl_t t4vf_ioctl; + +static struct cdevsw t4vf_cdevsw = { + .d_version = D_VERSION, + .d_ioctl = t4vf_ioctl, + .d_name = "t4vf", +}; + +static int +t4vf_probe(device_t dev) +{ + uint16_t d; + size_t i; + + d = pci_get_device(dev); + for (i = 0; i < nitems(t4vf_pciids); i++) { + if (d == t4vf_pciids[i].device) { + device_set_desc(dev, t4vf_pciids[i].desc); + return (BUS_PROBE_DEFAULT); + } + } + return (ENXIO); +} + +static int +t5vf_probe(device_t dev) +{ + uint16_t d; + size_t i; + + d = pci_get_device(dev); + for (i = 0; i < nitems(t5vf_pciids); i++) { + if (d == t5vf_pciids[i].device) { + device_set_desc(dev, t5vf_pciids[i].desc); + return (BUS_PROBE_DEFAULT); + } + } + return (ENXIO); +} + +#define FW_PARAM_DEV(param) \ + (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) +#define FW_PARAM_PFVF(param) \ + (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ + V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) + +static int +get_params__pre_init(struct adapter *sc) +{ + int rc; + uint32_t param[3], val[3]; + + param[0] = FW_PARAM_DEV(FWREV); + param[1] = FW_PARAM_DEV(TPREV); + param[2] = FW_PARAM_DEV(CCLK); + rc = -t4vf_query_params(sc, nitems(param), param, val); + if (rc != 0) { + device_printf(sc->dev, + "failed to query parameters (pre_init): %d.\n", rc); + return (rc); + } + + sc->params.fw_vers = val[0]; + sc->params.tp_vers = val[1]; + sc->params.vpd.cclk = val[2]; + return (0); +} + +static int +get_params__post_init(struct adapter *sc) +{ + int rc; + + rc = -t4vf_get_sge_params(sc); + if (rc != 0) { + device_printf(sc->dev, + "unable to retrieve adapter SGE parameters: %d\n", rc); + return (rc); + } + + rc = -t4vf_get_rss_glb_config(sc); + if (rc != 0) { + device_printf(sc->dev, + "unable to retrieve adapter RSS parameters: %d\n", rc); + return (rc); + } + if (sc->params.rss.mode != FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) { + device_printf(sc->dev, + "unable to operate with global RSS mode %d\n", + sc->params.rss.mode); + return (EINVAL); + } + + rc = t4_read_chip_settings(sc); + if (rc != 0) + return (rc); + + /* + * Grab our Virtual Interface resource allocation, extract the + * features that we're interested in and do a bit of sanity testing on + * what we discover. + */ + rc = -t4vf_get_vfres(sc); + if (rc != 0) { + device_printf(sc->dev, + "unable to get virtual interface resources: %d\n", rc); + return (rc); + } + + /* + * Check for various parameter sanity issues. + */ + if (sc->params.vfres.pmask == 0) { + device_printf(sc->dev, "no port access configured/usable!\n"); + return (EINVAL); + } + if (sc->params.vfres.nvi == 0) { + device_printf(sc->dev, + "no virtual interfaces configured/usable!\n"); + return (EINVAL); + } + sc->params.portvec = sc->params.vfres.pmask; + + return (0); +} + +static int +set_params__post_init(struct adapter *sc) +{ + uint32_t param, val; + + /* ask for encapsulated CPLs */ + param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); + val = 1; + (void)t4vf_set_params(sc, 1, ¶m, &val); + + return (0); +} + +#undef FW_PARAM_PFVF +#undef FW_PARAM_DEV + +static int +cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g, + struct intrs_and_queues *iaq) +{ + struct vf_resources *vfres; + int nrxq10g, nrxq1g, nrxq; + int ntxq10g, ntxq1g, ntxq; + int itype, iq_avail, navail, rc; + + /* + * Figure out the layout of queues across our VIs and ensure + * we can allocate enough interrupts for our layout. + */ + vfres = &sc->params.vfres; + bzero(iaq, sizeof(*iaq)); + + for (itype = INTR_MSIX; itype != 0; itype >>= 1) { + if (itype == INTR_INTX) + continue; + + if (itype == INTR_MSIX) + navail = pci_msix_count(sc->dev); + else + navail = pci_msi_count(sc->dev); + + if (navail == 0) + continue; + + iaq->intr_type = itype; + iaq->intr_flags_10g = 0; + iaq->intr_flags_1g = 0; + + /* + * XXX: The Linux driver reserves an Ingress Queue for + * forwarded interrupts when using MSI (but not MSI-X). + * It seems it just always asks for 2 interrupts and + * forwards all rxqs to the forwarded interrupt. + * + * We must reserve one IRQ for the for the firmware + * event queue. + * + * Every rxq requires an ingress queue with a free + * list and interrupts and an egress queue. Every txq + * requires an ETH egress queue. + */ + iaq->nirq = T4VF_EXTRA_INTR; + + /* + * First, determine how many queues we can allocate. + * Start by finding the upper bound on rxqs from the + * limit on ingress queues. + */ + iq_avail = vfres->niqflint - iaq->nirq; + if (iq_avail < n10g + n1g) { + device_printf(sc->dev, + "Not enough ingress queues (%d) for %d ports\n", + vfres->niqflint, n10g + n1g); + return (ENXIO); + } + + /* + * Try to honor the cap on interrupts. If there aren't + * enough interrupts for at least one interrupt per + * port, then don't bother, we will just forward all + * interrupts to one interrupt in that case. + */ + if (iaq->nirq + n10g + n1g <= navail) { + if (iq_avail > navail - iaq->nirq) + iq_avail = navail - iaq->nirq; + } + + nrxq10g = t4_nrxq10g; + nrxq1g = t4_nrxq1g; + nrxq = n10g * nrxq10g + n1g * nrxq1g; + if (nrxq > iq_avail && nrxq1g > 1) { + /* Too many ingress queues. Try just 1 for 1G. */ + nrxq1g = 1; + nrxq = n10g * nrxq10g + n1g * nrxq1g; + } + if (nrxq > iq_avail) { + /* + * Still too many ingress queues. Use what we + * can for each 10G port. + */ + nrxq10g = (iq_avail - n1g) / n10g; + nrxq = n10g * nrxq10g + n1g * nrxq1g; + } + KASSERT(nrxq <= iq_avail, ("too many ingress queues")); + + /* + * Next, determine the upper bound on txqs from the limit + * on ETH queues. + */ + if (vfres->nethctrl < n10g + n1g) { + device_printf(sc->dev, + "Not enough ETH queues (%d) for %d ports\n", + vfres->nethctrl, n10g + n1g); + return (ENXIO); + } + + ntxq10g = t4_ntxq10g; + ntxq1g = t4_ntxq1g; + ntxq = n10g * ntxq10g + n1g * ntxq1g; + if (ntxq > vfres->nethctrl) { + /* Too many ETH queues. Try just 1 for 1G. */ + ntxq1g = 1; + ntxq = n10g * ntxq10g + n1g * ntxq1g; + } + if (ntxq > vfres->nethctrl) { + /* + * Still too many ETH queues. Use what we + * can for each 10G port. + */ + ntxq10g = (vfres->nethctrl - n1g) / n10g; + ntxq = n10g * ntxq10g + n1g * ntxq1g; + } + KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues")); + + /* + * Finally, ensure we have enough egress queues. + */ + if (vfres->neq < (n10g + n1g) * 2) { + device_printf(sc->dev, + "Not enough egress queues (%d) for %d ports\n", + vfres->neq, n10g + n1g); + return (ENXIO); + } + if (nrxq + ntxq > vfres->neq) { + /* Just punt and use 1 for everything. */ + nrxq1g = ntxq1g = nrxq10g = ntxq10g = 1; + nrxq = n10g * nrxq10g + n1g * nrxq1g; + ntxq = n10g * ntxq10g + n1g * ntxq1g; + } + KASSERT(nrxq <= iq_avail, ("too many ingress queues")); + KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues")); + KASSERT(nrxq + ntxq <= vfres->neq, ("too many egress queues")); + + /* + * Do we have enough interrupts? For MSI the interrupts + * have to be a power of 2 as well. + */ + iaq->nirq += nrxq; + iaq->ntxq10g = ntxq10g; + iaq->ntxq1g = ntxq1g; + iaq->nrxq10g = nrxq10g; + iaq->nrxq1g = nrxq1g; + if (iaq->nirq <= navail && + (itype != INTR_MSI || powerof2(iaq->nirq))) { + navail = iaq->nirq; + if (itype == INTR_MSIX) + rc = pci_alloc_msix(sc->dev, &navail); + else + rc = pci_alloc_msi(sc->dev, &navail); + if (rc != 0) { + device_printf(sc->dev, + "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", + itype, rc, iaq->nirq, navail); + return (rc); + } + if (navail == iaq->nirq) { + iaq->intr_flags_10g = INTR_RXQ; + iaq->intr_flags_1g = INTR_RXQ; + return (0); + } + pci_release_msi(sc->dev); + } + + /* Fall back to a single interrupt. */ + iaq->nirq = 1; + navail = iaq->nirq; + if (itype == INTR_MSIX) + rc = pci_alloc_msix(sc->dev, &navail); + else + rc = pci_alloc_msi(sc->dev, &navail); + if (rc != 0) + device_printf(sc->dev, + "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", + itype, rc, iaq->nirq, navail); + iaq->intr_flags_10g = 0; + iaq->intr_flags_1g = 0; + return (rc); + } + + device_printf(sc->dev, + "failed to find a usable interrupt type. " + "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, + pci_msix_count(sc->dev), pci_msi_count(sc->dev)); + + return (ENXIO); +} + +static int +t4vf_attach(device_t dev) +{ + struct adapter *sc; + int rc = 0, i, j, n10g, n1g, rqidx, tqidx; + struct make_dev_args mda; + struct intrs_and_queues iaq; + struct sge *s; + + sc = device_get_softc(dev); + sc->dev = dev; + pci_enable_busmaster(dev); + pci_set_max_read_req(dev, 4096); + sc->params.pci.mps = pci_get_max_payload(dev); + + sc->flags |= IS_VF; + + sc->sge_gts_reg = VF_SGE_REG(A_SGE_VF_GTS); + sc->sge_kdoorbell_reg = VF_SGE_REG(A_SGE_VF_KDOORBELL); + snprintf(sc->lockname, sizeof(sc->lockname), "%s", + device_get_nameunit(dev)); + mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); + t4_add_adapter(sc); + + mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); + TAILQ_INIT(&sc->sfl); + callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); + + mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); + + rc = t4_map_bars_0_and_4(sc); + if (rc != 0) + goto done; /* error message displayed already */ + + rc = -t4vf_prep_adapter(sc); + if (rc != 0) + goto done; + + /* + * Leave the 'pf' and 'mbox' values as zero. This ensures + * that various firmware messages do not set the fields which + * is the correct thing to do for a VF. + */ + + memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); + + make_dev_args_init(&mda); + mda.mda_devsw = &t4vf_cdevsw; + mda.mda_uid = UID_ROOT; + mda.mda_gid = GID_WHEEL; + mda.mda_mode = 0600; + mda.mda_si_drv1 = sc; + rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); + if (rc != 0) + device_printf(dev, "failed to create nexus char device: %d.\n", + rc); + +#if defined(__i386__) + if ((cpu_feature & CPUID_CX8) == 0) { + device_printf(dev, "64 bit atomics not available.\n"); + rc = ENOTSUP; + goto done; + } +#endif + + /* + * Some environments do not properly handle PCIE FLRs -- e.g. in Linux + * 2.6.31 and later we can't call pci_reset_function() in order to + * issue an FLR because of a self- deadlock on the device semaphore. + * Meanwhile, the OS infrastructure doesn't issue FLRs in all the + * cases where they're needed -- for instance, some versions of KVM + * fail to reset "Assigned Devices" when the VM reboots. Therefore we + * use the firmware based reset in order to reset any per function + * state. + */ + rc = -t4vf_fw_reset(sc); + if (rc != 0) { + device_printf(dev, "FW reset failed: %d\n", rc); + goto done; + } + sc->flags |= FW_OK; + + /* + * Grab basic operational parameters. These will predominantly have + * been set up by the Physical Function Driver or will be hard coded + * into the adapter. We just have to live with them ... Note that + * we _must_ get our VPD parameters before our SGE parameters because + * we need to know the adapter's core clock from the VPD in order to + * properly decode the SGE Timer Values. + */ + rc = get_params__pre_init(sc); + if (rc != 0) + goto done; /* error message displayed already */ + rc = get_params__post_init(sc); + if (rc != 0) + goto done; /* error message displayed already */ + + rc = set_params__post_init(sc); + if (rc != 0) + goto done; /* error message displayed already */ + + rc = t4_map_bar_2(sc); + if (rc != 0) + goto done; /* error message displayed already */ + + rc = t4_create_dma_tag(sc); + if (rc != 0) + goto done; /* error message displayed already */ + + /* + * The number of "ports" which we support is equal to the number of + * Virtual Interfaces with which we've been provisioned. + */ + sc->params.nports = imin(sc->params.vfres.nvi, MAX_NPORTS); + + /* + * We may have been provisioned with more VIs than the number of + * ports we're allowed to access (our Port Access Rights Mask). + * Just use a single VI for each port. + */ + sc->params.nports = imin(sc->params.nports, + bitcount32(sc->params.vfres.pmask)); + +#ifdef notyet + /* + * XXX: The Linux VF driver will lower nports if it thinks there + * are too few resources in vfres (niqflint, nethctrl, neq). + */ +#endif + + /* + * First pass over all the ports - allocate VIs and initialize some + * basic parameters like mac address, port type, etc. We also figure + * out whether a port is 10G or 1G and use that information when + * calculating how many interrupts to attempt to allocate. + */ + n10g = n1g = 0; + for_each_port(sc, i) { + struct port_info *pi; + + pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); + sc->port[i] = pi; + + /* These must be set before t4_port_init */ + pi->adapter = sc; + pi->port_id = i; + pi->nvi = 1; + pi->vi = malloc(sizeof(struct vi_info) * pi->nvi, M_CXGBE, + M_ZERO | M_WAITOK); + + /* + * Allocate the "main" VI and initialize parameters + * like mac addr. + */ + rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); + if (rc != 0) { + device_printf(dev, "unable to initialize port %d: %d\n", + i, rc); + free(pi->vi, M_CXGBE); + free(pi, M_CXGBE); + sc->port[i] = NULL; + goto done; + } + + /* No t4_link_start. */ + + snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", + device_get_nameunit(dev), i); + mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); + sc->chan_map[pi->tx_chan] = i; + + pi->tc = malloc(sizeof(struct tx_sched_class) * + sc->chip_params->nsched_cls, M_CXGBE, M_ZERO | M_WAITOK); + + if (is_10G_port(pi) || is_40G_port(pi)) { + n10g++; + } else { + n1g++; + } + + pi->linkdnrc = -1; + + pi->dev = device_add_child(dev, is_t4(sc) ? "cxgbev" : "cxlv", + -1); + if (pi->dev == NULL) { + device_printf(dev, + "failed to add device for port %d.\n", i); + rc = ENXIO; + goto done; + } + pi->vi[0].dev = pi->dev; + device_set_softc(pi->dev, pi); + } + + /* + * Interrupt type, # of interrupts, # of rx/tx queues, etc. + */ + rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq); + if (rc != 0) + goto done; /* error message displayed already */ + + sc->intr_type = iaq.intr_type; + sc->intr_count = iaq.nirq; + + s = &sc->sge; + s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g; + s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g; + s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ + s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ + s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ + + s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, + M_ZERO | M_WAITOK); + s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, + M_ZERO | M_WAITOK); + s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, + M_ZERO | M_WAITOK); + s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, + M_ZERO | M_WAITOK); + + sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, + M_ZERO | M_WAITOK); + + /* + * Second pass over the ports. This time we know the number of rx and + * tx queues that each port should get. + */ + rqidx = tqidx = 0; + for_each_port(sc, i) { + struct port_info *pi = sc->port[i]; + struct vi_info *vi; + + if (pi == NULL) + continue; + + for_each_vi(pi, j, vi) { + vi->pi = pi; + vi->qsize_rxq = t4_qsize_rxq; + vi->qsize_txq = t4_qsize_txq; + + vi->first_rxq = rqidx; + vi->first_txq = tqidx; + if (is_10G_port(pi) || is_40G_port(pi)) { + vi->tmr_idx = t4_tmr_idx_10g; + vi->pktc_idx = t4_pktc_idx_10g; + vi->flags |= iaq.intr_flags_10g & INTR_RXQ; + vi->nrxq = j == 0 ? iaq.nrxq10g : 1; + vi->ntxq = j == 0 ? iaq.ntxq10g : 1; + } else { + vi->tmr_idx = t4_tmr_idx_1g; + vi->pktc_idx = t4_pktc_idx_1g; + vi->flags |= iaq.intr_flags_1g & INTR_RXQ; + vi->nrxq = j == 0 ? iaq.nrxq1g : 1; + vi->ntxq = j == 0 ? iaq.ntxq1g : 1; + } + rqidx += vi->nrxq; + tqidx += vi->ntxq; + + vi->rsrv_noflowq = 0; + } + } + + rc = t4_setup_intr_handlers(sc); + if (rc != 0) { + device_printf(dev, + "failed to setup interrupt handlers: %d\n", rc); + goto done; + } + + rc = bus_generic_attach(dev); + if (rc != 0) { + device_printf(dev, + "failed to attach all child ports: %d\n", rc); + goto done; + } + + device_printf(dev, + "%d ports, %d %s interrupt%s, %d eq, %d iq\n", + sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? + "MSI-X" : "MSI", sc->intr_count > 1 ? "s" : "", sc->sge.neq, + sc->sge.niq); + +done: + if (rc != 0) + t4_detach_common(dev); + else + t4_sysctls(sc); + + return (rc); +} + +static void +get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) +{ + + /* 0x3f is used as the revision for VFs. */ + regs->version = chip_id(sc) | (0x3f << 10); + t4_get_regs(sc, buf, regs->len); +} + +static void +t4_clr_vi_stats(struct adapter *sc) +{ + int reg; + + for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; + reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) + t4_write_reg(sc, VF_MPS_REG(reg), 0); +} + +static int +t4vf_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, + struct thread *td) +{ + int rc; + struct adapter *sc = dev->si_drv1; + + rc = priv_check(td, PRIV_DRIVER); + if (rc != 0) + return (rc); + + switch (cmd) { + case CHELSIO_T4_GETREG: { + struct t4_reg *edata = (struct t4_reg *)data; + + if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) + return (EFAULT); + + if (edata->size == 4) + edata->val = t4_read_reg(sc, edata->addr); + else if (edata->size == 8) + edata->val = t4_read_reg64(sc, edata->addr); + else + return (EINVAL); + + break; + } + case CHELSIO_T4_SETREG: { + struct t4_reg *edata = (struct t4_reg *)data; + + if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) + return (EFAULT); + + if (edata->size == 4) { + if (edata->val & 0xffffffff00000000) + return (EINVAL); + t4_write_reg(sc, edata->addr, (uint32_t) edata->val); + } else if (edata->size == 8) + t4_write_reg64(sc, edata->addr, edata->val); + else + return (EINVAL); + break; + } + case CHELSIO_T4_REGDUMP: { + struct t4_regdump *regs = (struct t4_regdump *)data; + int reglen = t4_get_regs_len(sc); + uint8_t *buf; + + if (regs->len < reglen) { + regs->len = reglen; /* hint to the caller */ + return (ENOBUFS); + } + + regs->len = reglen; + buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); + get_regs(sc, regs, buf); + rc = copyout(buf, regs->data, reglen); + free(buf, M_CXGBE); + break; + } + case CHELSIO_T4_CLEAR_STATS: { + int i, v; + u_int port_id = *(uint32_t *)data; + struct port_info *pi; + struct vi_info *vi; + + if (port_id >= sc->params.nports) + return (EINVAL); + pi = sc->port[port_id]; + + /* MAC stats */ + pi->tx_parse_error = 0; + t4_clr_vi_stats(sc); + + /* + * Since this command accepts a port, clear stats for + * all VIs on this port. + */ + for_each_vi(pi, v, vi) { + if (vi->flags & VI_INIT_DONE) { + struct sge_rxq *rxq; + struct sge_txq *txq; + + for_each_rxq(vi, i, rxq) { +#if defined(INET) || defined(INET6) + rxq->lro.lro_queued = 0; + rxq->lro.lro_flushed = 0; +#endif + rxq->rxcsum = 0; + rxq->vlan_extraction = 0; + } + + for_each_txq(vi, i, txq) { + txq->txcsum = 0; + txq->tso_wrs = 0; + txq->vlan_insertion = 0; + txq->imm_wrs = 0; + txq->sgl_wrs = 0; + txq->txpkt_wrs = 0; + txq->txpkts0_wrs = 0; + txq->txpkts1_wrs = 0; + txq->txpkts0_pkts = 0; + txq->txpkts1_pkts = 0; + mp_ring_reset_stats(txq->r); + } + } + } + break; + } + case CHELSIO_T4_SCHED_CLASS: + rc = t4_set_sched_class(sc, (struct t4_sched_params *)data); + break; + case CHELSIO_T4_SCHED_QUEUE: + rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data); + break; + default: + rc = ENOTTY; + } + + return (rc); +} + +static device_method_t t4vf_methods[] = { + DEVMETHOD(device_probe, t4vf_probe), + DEVMETHOD(device_attach, t4vf_attach), + DEVMETHOD(device_detach, t4_detach_common), + + DEVMETHOD_END +}; + +static driver_t t4vf_driver = { + "t4vf", + t4vf_methods, + sizeof(struct adapter) +}; + +static device_method_t t5vf_methods[] = { + DEVMETHOD(device_probe, t5vf_probe), + DEVMETHOD(device_attach, t4vf_attach), + DEVMETHOD(device_detach, t4_detach_common), + + DEVMETHOD_END +}; + +static driver_t t5vf_driver = { + "t5vf", + t5vf_methods, + sizeof(struct adapter) +}; + +static driver_t cxgbev_driver = { + "cxgbev", + cxgbe_methods, + sizeof(struct port_info) +}; + +static driver_t cxlv_driver = { + "cxlv", + cxgbe_methods, + sizeof(struct port_info) +}; + +static devclass_t t4vf_devclass, t5vf_devclass; +static devclass_t cxgbev_devclass, cxlv_devclass; + +DRIVER_MODULE(t4vf, pci, t4vf_driver, t4vf_devclass, 0, 0); +MODULE_VERSION(t4vf, 1); +MODULE_DEPEND(t4vf, t4nex, 1, 1, 1); + +DRIVER_MODULE(t5vf, pci, t5vf_driver, t5vf_devclass, 0, 0); +MODULE_VERSION(t5vf, 1); +MODULE_DEPEND(t5vf, t5nex, 1, 1, 1); + +DRIVER_MODULE(cxgbev, t4vf, cxgbev_driver, cxgbev_devclass, 0, 0); +MODULE_VERSION(cxgbev, 1); + +DRIVER_MODULE(cxlv, t5vf, cxlv_driver, cxlv_devclass, 0, 0); +MODULE_VERSION(cxlv, 1); Index: sys/modules/cxgbe/Makefile =================================================================== --- sys/modules/cxgbe/Makefile +++ sys/modules/cxgbe/Makefile @@ -7,6 +7,8 @@ SUBDIR= if_cxgbe SUBDIR+= if_cxl +SUBDIR+= if_cxgbev +SUBDIR+= if_cxlv SUBDIR+= t4_firmware SUBDIR+= t5_firmware SUBDIR+= ${_tom} Index: sys/modules/cxgbe/if_cxgbev/Makefile =================================================================== --- /dev/null +++ sys/modules/cxgbe/if_cxgbev/Makefile @@ -0,0 +1,21 @@ +# +# $FreeBSD$ +# + +CXGBE= ${.CURDIR}/../../../dev/cxgbe +.PATH: ${CXGBE} ${CXGBE}/common + +KMOD= if_cxgbev +SRCS= bus_if.h +SRCS+= device_if.h +SRCS+= opt_inet.h +SRCS+= opt_inet6.h +SRCS+= opt_ofed.h +SRCS+= opt_rss.h +SRCS+= pci_if.h pci_iov_if.h +SRCS+= t4_vf.c +SRCS+= t4vf_hw.c + +CFLAGS+= -I${CXGBE} + +.include Index: sys/modules/cxgbe/if_cxlv/Makefile =================================================================== --- /dev/null +++ sys/modules/cxgbe/if_cxlv/Makefile @@ -0,0 +1,11 @@ +# +# $FreeBSD$ +# + +CXGBE= ${.CURDIR}/../../../dev/cxgbe +.PATH: ${CXGBE} + +KMOD= if_cxlv +SRCS= if_cxlv.c + +.include