Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -4137,6 +4137,7 @@ net/if_vxlan.c optional vxlan inet | vxlan inet6 net/ifdi_if.m optional ether pci net/iflib.c optional ether pci +net/iflib_clone.c optional ether pci net/mp_ring.c optional ether net/mppcc.c optional netgraph_mppc_compression net/mppcd.c optional netgraph_mppc_compression Index: head/sys/net/if.h =================================================================== --- head/sys/net/if.h +++ head/sys/net/if.h @@ -162,6 +162,9 @@ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ +#define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */ + + /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. Index: head/sys/net/if_clone.h =================================================================== --- head/sys/net/if_clone.h +++ head/sys/net/if_clone.h @@ -37,6 +37,8 @@ #ifdef _KERNEL +#define IFC_NOGROUP 0x1 + struct if_clone; /* Methods. */ @@ -59,6 +61,9 @@ int ifc_name2unit(const char *name, int *unit); int ifc_alloc_unit(struct if_clone *, int *); void ifc_free_unit(struct if_clone *, int); +const char *ifc_name(struct if_clone *); +void ifc_flags_set(struct if_clone *, int flags); +int ifc_flags_get(struct if_clone *); #ifdef _SYS_EVENTHANDLER_H_ /* Interface clone event. */ Index: head/sys/net/if_clone.c =================================================================== --- head/sys/net/if_clone.c +++ head/sys/net/if_clone.c @@ -67,6 +67,7 @@ char ifc_name[IFCLOSIZ]; /* (c) Name of device, e.g. `gif' */ struct unrhdr *ifc_unrhdr; /* (c) alloc_unr(9) header */ int ifc_maxunit; /* (c) maximum unit number */ + int ifc_flags; long ifc_refcnt; /* (i) Reference count. */ LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */ struct mtx ifc_mtx; /* Mutex to protect members. */ @@ -232,7 +233,8 @@ if (ifp == NULL) panic("%s: lookup failed for %s", __func__, name); - if_addgroup(ifp, ifc->ifc_name); + if ((ifc->ifc_flags & IFC_NOGROUP) == 0) + if_addgroup(ifp, ifc->ifc_name); IF_CLONE_LOCK(ifc); IFC_IFLIST_INSERT(ifc, ifp); @@ -319,16 +321,17 @@ CURVNET_RESTORE(); return (ENXIO); /* ifp is not on the list. */ } + if ((ifc->ifc_flags & IFC_NOGROUP) == 0) + if_delgroup(ifp, ifc->ifc_name); - if_delgroup(ifp, ifc->ifc_name); - if (ifc->ifc_type == SIMPLE) err = ifc_simple_destroy(ifc, ifp); else err = (*ifc->ifc_destroy)(ifc, ifp); if (err != 0) { - if_addgroup(ifp, ifc->ifc_name); + if ((ifc->ifc_flags & IFC_NOGROUP) == 0) + if_addgroup(ifp, ifc->ifc_name); IF_CLONE_LOCK(ifc); IFC_IFLIST_INSERT(ifc, ifp); @@ -553,9 +556,10 @@ void if_clone_addgroup(struct ifnet *ifp, struct if_clone *ifc) { - - if_addgroup(ifp, ifc->ifc_name); - IF_CLONE_REMREF(ifc); + if ((ifc->ifc_flags & IFC_NOGROUP) == 0) { + if_addgroup(ifp, ifc->ifc_name); + IF_CLONE_REMREF(ifc); + } } /* @@ -731,4 +735,22 @@ ifc_free_unit(ifc, unit); return (0); +} + +const char * +ifc_name(struct if_clone *ifc) +{ + return (ifc->ifc_name); +} + +void +ifc_flags_set(struct if_clone *ifc, int flags) +{ + ifc->ifc_flags = flags; +} + +int +ifc_flags_get(struct if_clone *ifc) +{ + return (ifc->ifc_flags); } Index: head/sys/net/ifdi_if.m =================================================================== --- head/sys/net/ifdi_if.m +++ head/sys/net/ifdi_if.m @@ -1,5 +1,5 @@ # -# Copyright (c) 2014, Matthew Macy (mmacy@mattmacy.io) +# Copyright (c) 2014-2018, Matthew Macy (mmacy@mattmacy.io) # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -39,6 +39,9 @@ #include #include #include +#include +#include +#include INTERFACE ifdi; @@ -49,6 +52,18 @@ { } + static int + null_knlist_add(if_ctx_t _ctx __unused, struct knote *_kn) + { + return (0); + } + + static int + null_knote_event(if_ctx_t _ctx __unused, struct knote *_kn, int _hint) + { + return (0); + } + static void null_timer_op(if_ctx_t _ctx __unused, uint16_t _qsidx __unused) { @@ -61,6 +76,12 @@ } static int + null_int_int_op(if_ctx_t _ctx __unused, int arg0 __unused) + { + return (ENOTSUP); + } + + static int null_queue_intr_enable(if_ctx_t _ctx __unused, uint16_t _qid __unused) { return (ENOTSUP); @@ -111,20 +132,98 @@ { return (ENOTSUP); } + + static void + null_media_status(if_ctx_t ctx __unused, struct ifmediareq *ifmr) + { + ifmr->ifm_status = IFM_AVALID | IFM_ACTIVE; + ifmr->ifm_active = IFM_ETHER | IFM_25G_ACC | IFM_FDX; + } + + static int + null_cloneattach(if_ctx_t ctx __unused, struct if_clone *ifc __unused, + const char *name __unused, caddr_t params __unused) + { + return (0); + } + + static void + null_rx_clset(if_ctx_t _ctx __unused, uint16_t _flid __unused, + uint16_t _qid __unused, caddr_t *_sdcl __unused) + { + } + static void + null_object_info_get(if_ctx_t ctx __unused, void *data __unused, int size __unused) + { + } + static int + default_mac_set(if_ctx_t ctx, const uint8_t *mac) + { + struct ifnet *ifp = iflib_get_ifp(ctx); + struct sockaddr_dl *sdl; + + if (ifp && ifp->if_addr) { + sdl = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; + MPASS(sdl->sdl_type == IFT_ETHER); + memcpy(LLADDR(sdl), mac, ETHER_ADDR_LEN); + } + return (0); + } }; # +# kevent interfaces +# + +METHOD int knlist_add { + if_ctx_t _ctx; + struct knote *_kn; +} DEFAULT null_knlist_add; + +METHOD int knote_event { + if_ctx_t _ctx; + struct knote *_kn; + int hint; +} DEFAULT null_knote_event; + + +# +# query +# + +METHOD int object_info_get { + if_ctx_t _ctx; + void *data; + int size; +} DEFAULT null_object_info_get; + +# # bus interfaces # METHOD int attach_pre { if_ctx_t _ctx; -}; +} DEFAULT null_int_op; METHOD int attach_post { if_ctx_t _ctx; -}; +} DEFAULT null_int_op; +METHOD int reinit_pre { + if_ctx_t _ctx; +} DEFAULT null_int_op; + +METHOD int reinit_post { + if_ctx_t _ctx; +} DEFAULT null_int_op; + +METHOD int cloneattach { + if_ctx_t _ctx; + struct if_clone *_ifc; + const char *_name; + caddr_t params; +} DEFAULT null_cloneattach; + METHOD int detach { if_ctx_t _ctx; }; @@ -164,8 +263,15 @@ METHOD void queues_free { if_ctx_t _ctx; -}; +} DEFAULT null_void_op; +METHOD void rx_clset { + if_ctx_t _ctx; + uint16_t _fl; + uint16_t _qsetid; + caddr_t *_sdcl; +} DEFAULT null_rx_clset; + # # interface reset / stop # @@ -185,7 +291,7 @@ METHOD int msix_intr_assign { if_ctx_t _sctx; int msix; -}; +} DEFAULT null_int_int_op; METHOD void intr_enable { if_ctx_t _ctx; @@ -221,6 +327,10 @@ if_ctx_t _ctx; uint32_t _mtu; }; +METHOD int mac_set { + if_ctx_t _ctx; + const uint8_t *_mac; +} DEFAULT default_mac_set; METHOD void media_set{ if_ctx_t _ctx; @@ -273,11 +383,11 @@ METHOD void media_status { if_ctx_t _ctx; struct ifmediareq *_ifm; -}; +} DEFAULT null_media_status; METHOD int media_change { if_ctx_t _ctx; -}; +} DEFAULT null_int_op; METHOD uint64_t get_counter { if_ctx_t _ctx; @@ -317,6 +427,11 @@ METHOD void watchdog_reset { if_ctx_t _ctx; } DEFAULT null_void_op; + +METHOD void watchdog_reset_queue { + if_ctx_t _ctx; + uint16_t _q; +} DEFAULT null_timer_op; METHOD void led_func { if_ctx_t _ctx; Index: head/sys/net/iflib.h =================================================================== --- head/sys/net/iflib.h +++ head/sys/net/iflib.h @@ -36,6 +36,8 @@ #include #include +struct if_clone; + /* * The value type for indexing, limits max descriptors * to 65535 can be conditionally redefined to uint32_t @@ -57,6 +59,8 @@ typedef struct if_shared_ctx *if_shared_ctx_t; struct if_int_delay_info; typedef struct if_int_delay_info *if_int_delay_info_t; +struct if_pseudo; +typedef struct if_pseudo *if_pseudo_t; /* * File organization: @@ -194,6 +198,9 @@ int isc_vectors; int isc_nrxqsets; int isc_ntxqsets; + uint8_t isc_min_tx_latency; /* disable doorbell update batching */ + uint8_t isc_rx_mvec_enable; /* generate mvecs on rx */ + uint32_t isc_txrx_budget_bytes_max; int isc_msix_bar; /* can be model specific - initialize in attach_pre */ int isc_tx_nsegments; /* can be model specific - initialize in attach_pre */ int isc_ntxd[8]; @@ -214,6 +221,7 @@ int isc_rss_table_mask; int isc_nrxqsets_max; int isc_ntxqsets_max; + uint32_t isc_tx_qdepth; iflib_intr_mode_t isc_intr; uint16_t isc_max_frame_size; /* set at init time by driver */ @@ -259,6 +267,7 @@ int isc_rx_process_limit; int isc_tx_reclaim_thresh; int isc_flags; + const char *isc_name; }; typedef struct iflib_dma_info { @@ -320,6 +329,35 @@ * Driver needs frames padded to some minimum length */ #define IFLIB_NEED_ETHER_PAD 0x100 +/* + * Packets can be freed immediately after encap + */ +#define IFLIB_TXD_ENCAP_PIO 0x00200 +/* + * Use RX completion handler + */ +#define IFLIB_RX_COMPLETION 0x00400 +/* + * Skip refilling cluster free lists + */ +#define IFLIB_SKIP_CLREFILL 0x00800 +/* + * Don't reset on hang + */ +#define IFLIB_NO_HANG_RESET 0x01000 +/* + * Don't need/want most of the niceties of + * queue management + */ +#define IFLIB_PSEUDO 0x02000 +/* + * No DMA support needed / wanted + */ +#define IFLIB_VIRTUAL 0x04000 +/* + * autogenerate a MAC address + */ +#define IFLIB_GEN_MAC 0x08000 @@ -404,4 +442,9 @@ void iflib_add_int_delay_sysctl(if_ctx_t, const char *, const char *, if_int_delay_info_t, int, int); +/* + * Pseudo device support + */ +if_pseudo_t iflib_clone_register(if_shared_ctx_t); +void iflib_clone_deregister(if_pseudo_t); #endif /* __IFLIB_H_ */ Index: head/sys/net/iflib.c =================================================================== --- head/sys/net/iflib.c +++ head/sys/net/iflib.c @@ -37,16 +37,19 @@ #include #include #include -#include +#include #include #include +#include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -85,6 +88,7 @@ #include #include +#include #include "ifdi_if.h" @@ -130,7 +134,7 @@ * * */ -static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); +MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; @@ -241,7 +245,19 @@ return (&ctx->ifc_media); } +uint32_t +iflib_get_flags(if_ctx_t ctx) +{ + return (ctx->ifc_flags); +} + void +iflib_set_detach(if_ctx_t ctx) +{ + ctx->ifc_in_detach = 1; +} + +void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { @@ -310,18 +326,7 @@ #define IFLIB_RESTART_BUDGET 8 -#define IFC_LEGACY 0x001 -#define IFC_QFLUSH 0x002 -#define IFC_MULTISEG 0x004 -#define IFC_DMAR 0x008 -#define IFC_SC_ALLOCATED 0x010 -#define IFC_INIT_DONE 0x020 -#define IFC_PREFETCH 0x040 -#define IFC_DO_RESET 0x080 -#define IFC_DO_WATCHDOG 0x100 -#define IFC_CHECK_HUNG 0x200 - #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) @@ -511,6 +516,16 @@ #endif } +static device_method_t iflib_pseudo_methods[] = { + DEVMETHOD(device_attach, noop_attach), + DEVMETHOD(device_detach, iflib_pseudo_detach), + DEVMETHOD_END +}; + +driver_t iflib_pseudodriver = { + "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), +}; + static inline void rxd_info_zero(if_rxd_info_t ri) { @@ -709,8 +724,6 @@ static void iflib_debug_reset(void) {} #endif - - #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); @@ -729,7 +742,6 @@ static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); -static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); @@ -1242,6 +1254,40 @@ #endif static void +iflib_gen_mac(if_ctx_t ctx) +{ + struct thread *td; + struct ifnet *ifp; + MD5_CTX mdctx; + char uuid[HOSTUUIDLEN+1]; + char buf[HOSTUUIDLEN+16]; + uint8_t *mac; + unsigned char digest[16]; + + td = curthread; + ifp = ctx->ifc_ifp; + mac = ctx->ifc_mac; + uuid[HOSTUUIDLEN] = 0; + bcopy(td->td_ucred->cr_prison->pr_hostuuid, uuid, HOSTUUIDLEN); + snprintf(buf, HOSTUUIDLEN+16, "%s-%s", uuid, device_get_nameunit(ctx->ifc_dev)); + /* + * Generate a pseudo-random, deterministic MAC + * address based on the UUID and unit number. + * The FreeBSD Foundation OUI of 58-9C-FC is used. + */ + MD5Init(&mdctx); + MD5Update(&mdctx, buf, strlen(buf)); + MD5Final(digest, &mdctx); + + mac[0] = 0x58; + mac[1] = 0x9C; + mac[2] = 0xFC; + mac[3] = digest[0]; + mac[4] = digest[1]; + mac[5] = digest[2]; +} + +static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; @@ -2251,7 +2297,7 @@ CTX_UNLOCK(ctx); } -static void +void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; @@ -4202,40 +4248,19 @@ return (ENXIO); } -int -iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) +static void +iflib_reset_qvalues(if_ctx_t ctx) { - int err, rid, msix; - if_ctx_t ctx; - if_t ifp; - if_softc_ctx_t scctx; - int i; - uint16_t main_txq; - uint16_t main_rxq; + if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; + if_shared_ctx_t sctx = ctx->ifc_sctx; + device_t dev = ctx->ifc_dev; + int i, main_txq, main_rxq; + main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; + main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; - ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); - - if (sc == NULL) { - sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); - device_set_softc(dev, ctx); - ctx->ifc_flags |= IFC_SC_ALLOCATED; - } - - ctx->ifc_sctx = sctx; - ctx->ifc_dev = dev; - ctx->ifc_softc = sc; - - if ((err = iflib_register(ctx)) != 0) { - device_printf(dev, "iflib_register failed %d\n", err); - return (err); - } - iflib_add_device_sysctl_pre(ctx); - - scctx = &ctx->ifc_softc_ctx; - ifp = ctx->ifc_ifp; - ctx->ifc_nhwtxqs = sctx->isc_ntxqs; - + scctx->isc_txrx_budget_bytes_max = IFLIB_MAX_TX_BYTES; + scctx->isc_tx_qdepth = IFLIB_DEFAULT_TX_QDEPTH; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ @@ -4283,7 +4308,45 @@ scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } +} +int +iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) +{ + int err, rid, msix; + if_ctx_t ctx; + if_t ifp; + if_softc_ctx_t scctx; + int i; + uint16_t main_txq; + uint16_t main_rxq; + + + ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); + + if (sc == NULL) { + sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); + device_set_softc(dev, ctx); + ctx->ifc_flags |= IFC_SC_ALLOCATED; + } + + ctx->ifc_sctx = sctx; + ctx->ifc_dev = dev; + ctx->ifc_softc = sc; + + if ((err = iflib_register(ctx)) != 0) { + if (ctx->ifc_flags & IFC_SC_ALLOCATED) + free(sc, M_IFLIB); + free(ctx, M_IFLIB); + device_printf(dev, "iflib_register failed %d\n", err); + return (err); + } + iflib_add_device_sysctl_pre(ctx); + + scctx = &ctx->ifc_softc_ctx; + ifp = ctx->ifc_ifp; + + iflib_reset_qvalues(ctx); CTX_LOCK(ctx); if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { CTX_UNLOCK(ctx); @@ -4457,6 +4520,232 @@ IFDI_DETACH(ctx); CTX_UNLOCK(ctx); return (err); +} + +int +iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, + struct iflib_cloneattach_ctx *clctx) +{ + int err; + if_ctx_t ctx; + if_t ifp; + if_softc_ctx_t scctx; + int i; + void *sc; + uint16_t main_txq; + uint16_t main_rxq; + + ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); + sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); + ctx->ifc_flags |= IFC_SC_ALLOCATED; + if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) + ctx->ifc_flags |= IFC_PSEUDO; + + ctx->ifc_sctx = sctx; + ctx->ifc_softc = sc; + ctx->ifc_dev = dev; + + if ((err = iflib_register(ctx)) != 0) { + device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); + free(sc, M_IFLIB); + free(ctx, M_IFLIB); + return (err); + } + iflib_add_device_sysctl_pre(ctx); + + scctx = &ctx->ifc_softc_ctx; + ifp = ctx->ifc_ifp; + + /* + * XXX sanity check that ntxd & nrxd are a power of 2 + */ + iflib_reset_qvalues(ctx); + + if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { + device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); + return (err); + } + if (sctx->isc_flags & IFLIB_GEN_MAC) + iflib_gen_mac(ctx); + if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, + clctx->cc_params)) != 0) { + device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); + return (err); + } + ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); + ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&ctx->ifc_media, IFM_ETHER | IFM_AUTO); + +#ifdef INVARIANTS + MPASS(scctx->isc_capenable); + if (scctx->isc_capenable & IFCAP_TXCSUM) + MPASS(scctx->isc_tx_csum_flags); +#endif + + if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); + if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); + + ifp->if_flags |= IFF_NOGROUP; + if (sctx->isc_flags & IFLIB_PSEUDO) { + ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); + + if ((err = IFDI_ATTACH_POST(ctx)) != 0) { + device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); + goto fail_detach; + } + *ctxp = ctx; + + if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); + iflib_add_device_sysctl_post(ctx); + ctx->ifc_flags |= IFC_INIT_DONE; + return (0); + } + _iflib_pre_assert(scctx); + ctx->ifc_txrx = *scctx->isc_txrx; + + if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) + scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; + if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) + scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; + + main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; + main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; + + /* XXX change for per-queue sizes */ + device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", + scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); + for (i = 0; i < sctx->isc_nrxqs; i++) { + if (!powerof2(scctx->isc_nrxd[i])) { + /* round down instead? */ + device_printf(dev, "# rx descriptors must be a power of 2\n"); + err = EINVAL; + goto fail; + } + } + for (i = 0; i < sctx->isc_ntxqs; i++) { + if (!powerof2(scctx->isc_ntxd[i])) { + device_printf(dev, + "# tx descriptors must be a power of 2"); + err = EINVAL; + goto fail; + } + } + + if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION) + scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION); + if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / + MAX_SINGLE_PACKET_FRACTION) + scctx->isc_tx_tso_segments_max = max(1, + scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); + + /* + * Protect the stack against modern hardware + */ + if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) + scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; + + /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ + ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; + ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; + ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; + if (scctx->isc_rss_table_size == 0) + scctx->isc_rss_table_size = 64; + scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; + + GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); + /* XXX format name */ + taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); + + /* XXX --- can support > 1 -- but keep it simple for now */ + scctx->isc_intr = IFLIB_INTR_LEGACY; + + /* Get memory for the station queues */ + if ((err = iflib_queues_alloc(ctx))) { + device_printf(dev, "Unable to allocate queue memory\n"); + goto fail; + } + + if ((err = iflib_qset_structures_setup(ctx))) { + device_printf(dev, "qset structure setup failed %d\n", err); + goto fail_queues; + } + /* + * XXX What if anything do we want to do about interrupts? + */ + ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); + if ((err = IFDI_ATTACH_POST(ctx)) != 0) { + device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); + goto fail_detach; + } + /* XXX handle more than one queue */ + for (i = 0; i < scctx->isc_nrxqsets; i++) + IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); + + *ctxp = ctx; + + if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); + iflib_add_device_sysctl_post(ctx); + ctx->ifc_flags |= IFC_INIT_DONE; + return (0); +fail_detach: + ether_ifdetach(ctx->ifc_ifp); +fail_queues: + iflib_tx_structures_free(ctx); + iflib_rx_structures_free(ctx); +fail: + IFDI_DETACH(ctx); + return (err); +} + +int +iflib_pseudo_deregister(if_ctx_t ctx) +{ + if_t ifp = ctx->ifc_ifp; + iflib_txq_t txq; + iflib_rxq_t rxq; + int i, j; + struct taskqgroup *tqg; + iflib_fl_t fl; + + /* Unregister VLAN events */ + if (ctx->ifc_vlan_attach_event != NULL) + EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); + if (ctx->ifc_vlan_detach_event != NULL) + EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); + + ether_ifdetach(ifp); + /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ + CTX_LOCK_DESTROY(ctx); + /* XXX drain any dependent tasks */ + tqg = qgroup_if_io_tqg; + for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { + callout_drain(&txq->ift_timer); + if (txq->ift_task.gt_uniq != NULL) + taskqgroup_detach(tqg, &txq->ift_task); + } + for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { + if (rxq->ifr_task.gt_uniq != NULL) + taskqgroup_detach(tqg, &rxq->ifr_task); + + for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) + free(fl->ifl_rx_bitmap, M_IFLIB); + } + tqg = qgroup_if_config_tqg; + if (ctx->ifc_admin_task.gt_uniq != NULL) + taskqgroup_detach(tqg, &ctx->ifc_admin_task); + if (ctx->ifc_vflr_task.gt_uniq != NULL) + taskqgroup_detach(tqg, &ctx->ifc_vflr_task); + + if_free(ifp); + + iflib_tx_structures_free(ctx); + iflib_rx_structures_free(ctx); + if (ctx->ifc_flags & IFC_SC_ALLOCATED) + free(ctx->ifc_softc, M_IFLIB); + free(ctx, M_IFLIB); + return (0); } int Index: head/sys/net/iflib_clone.c =================================================================== --- head/sys/net/iflib_clone.c +++ head/sys/net/iflib_clone.c @@ -0,0 +1,303 @@ +/*- + * Copyright (c) 2014-2018, Matthew Macy + * Copyright (C) 2017-2018 Joyent Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of Matthew Macy nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_acpi.h" +#include "opt_sched.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "ifdi_if.h" + +int +noop_attach(device_t dev) +{ + return (0); +} + +int +iflib_pseudo_detach(device_t dev) +{ + if_ctx_t ctx; + uint32_t ifc_flags; + + ctx = device_get_softc(dev); + ifc_flags = iflib_get_flags(ctx); + if ((ifc_flags & IFC_INIT_DONE) == 0) + return (0); + return (IFDI_DETACH(ctx)); +} + +static device_t iflib_pseudodev; + +static struct mtx pseudoif_mtx; +MTX_SYSINIT(pseudoif_mtx, &pseudoif_mtx, "pseudoif_mtx", MTX_DEF); + +#define PSEUDO_LOCK() mtx_lock(&pseudoif_mtx); +#define PSEUDO_UNLOCK() mtx_unlock(&pseudoif_mtx); + +struct if_pseudo { + eventhandler_tag ip_detach_tag; + eventhandler_tag ip_lladdr_tag; + struct if_clone *ip_ifc; + if_shared_ctx_t ip_sctx; + devclass_t ip_dc; + LIST_ENTRY(if_pseudo) ip_list; + int ip_on_list; +}; + +static LIST_HEAD(, if_pseudo) iflib_pseudos = LIST_HEAD_INITIALIZER(iflib_pseudos); + +/* + * XXX this assumes that the rest of the + * code won't hang on to it after it's + * removed / unloaded + */ +static if_pseudo_t +iflib_ip_lookup(const char *name) +{ + if_pseudo_t ip = NULL; + + PSEUDO_LOCK(); + LIST_FOREACH(ip, &iflib_pseudos, ip_list) { + if (!strcmp(ip->ip_sctx->isc_name, name)) + break; + } + PSEUDO_UNLOCK(); + return (ip); +} + +static void +iflib_ip_delete(if_pseudo_t ip) +{ + PSEUDO_LOCK(); + if (ip->ip_on_list) { + LIST_REMOVE(ip, ip_list); + ip->ip_on_list = 0; + } + PSEUDO_UNLOCK(); +} + +static void +iflib_ip_insert(if_pseudo_t ip) +{ + PSEUDO_LOCK(); + if (!ip->ip_on_list) { + LIST_INSERT_HEAD(&iflib_pseudos, ip, ip_list); + ip->ip_on_list = 1; + } + PSEUDO_UNLOCK(); +} + +static void +iflib_ifdetach(void *arg __unused, struct ifnet *ifp) +{ + /* If the ifnet is just being renamed, don't do anything. */ + if (ifp->if_flags & IFF_RENAMING) + return; +} + +static void +iflib_iflladdr(void *arg __unused, struct ifnet *ifp) +{ +} + +static int +iflib_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + const char *name = ifc_name(ifc); + struct iflib_cloneattach_ctx clctx; + if_ctx_t ctx; + if_pseudo_t ip; + device_t dev; + int rc; + + clctx.cc_ifc = ifc; + clctx.cc_len = 0; + clctx.cc_params = params; + clctx.cc_name = name; + + if (__predict_false(iflib_pseudodev == NULL)) { + /* SYSINIT initialization would panic !?! */ + mtx_lock(&Giant); + iflib_pseudodev = device_add_child(root_bus, "ifpseudo", 0); + mtx_unlock(&Giant); + MPASS(iflib_pseudodev != NULL); + } + ip = iflib_ip_lookup(name); + if (ip == NULL) { + printf("no ip found for %s\n", name); + return (ENOENT); + } + if ((dev = devclass_get_device(ip->ip_dc, unit)) != NULL) { + printf("unit %d allocated\n", unit); + bus_generic_print_child(iflib_pseudodev, dev); + return (EBUSY); + } + PSEUDO_LOCK(); + dev = device_add_child(iflib_pseudodev, name, unit); + device_set_driver(dev, &iflib_pseudodriver); + PSEUDO_UNLOCK(); + device_quiet(dev); + rc = device_attach(dev); + MPASS(rc == 0); + MPASS(dev != NULL); + MPASS(devclass_get_device(ip->ip_dc, unit) == dev); + rc = iflib_pseudo_register(dev, ip->ip_sctx, &ctx, &clctx); + if (rc) { + mtx_lock(&Giant); + device_delete_child(iflib_pseudodev, dev); + mtx_unlock(&Giant); + } else + device_set_softc(dev, ctx); + + return (rc); +} + +static void +iflib_clone_destroy(struct ifnet *ifp) +{ + if_ctx_t ctx; + device_t dev; + struct sx *ctx_lock; + int rc; + /* + * Detach device / free / free unit + * + */ + ctx = if_getsoftc(ifp); + dev = iflib_get_dev(ctx); + ctx_lock = iflib_ctx_lock_get(ctx); + sx_xlock(ctx_lock); + iflib_set_detach(ctx); + iflib_stop(ctx); + sx_xunlock(ctx_lock); + + mtx_lock(&Giant); + rc = device_delete_child(iflib_pseudodev, dev); + mtx_unlock(&Giant); + if (rc == 0) + iflib_pseudo_deregister(ctx); +} + +if_pseudo_t +iflib_clone_register(if_shared_ctx_t sctx) +{ + if_pseudo_t ip; + + if (sctx->isc_name == NULL) { + printf("iflib_clone_register failed - shared_ctx needs to have a device name\n"); + return (NULL); + } + if (iflib_ip_lookup(sctx->isc_name) != NULL) { + printf("iflib_clone_register failed - shared_ctx %s alread registered\n", + sctx->isc_name); + return (NULL); + } + ip = malloc(sizeof(*ip), M_IFLIB, M_WAITOK|M_ZERO); + ip->ip_sctx = sctx; + ip->ip_dc = devclass_create(sctx->isc_name); + if (ip->ip_dc == NULL) + goto fail_clone; + /* XXX --- we can handle clone_advanced later */ + ip->ip_ifc = if_clone_simple(sctx->isc_name, iflib_clone_create, iflib_clone_destroy, 0); + if (ip->ip_ifc == NULL) { + printf("clone_simple failed -- cloned %s devices will not be available\n", sctx->isc_name); + goto fail_clone; + } + ifc_flags_set(ip->ip_ifc, IFC_NOGROUP); + ip->ip_lladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, + iflib_iflladdr, NULL, EVENTHANDLER_PRI_ANY); + if (ip->ip_lladdr_tag == NULL) + goto fail_addr; + ip->ip_detach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, + iflib_ifdetach, NULL, EVENTHANDLER_PRI_ANY); + + if (ip->ip_detach_tag == NULL) + goto fail_depart; + + iflib_ip_insert(ip); + return (ip); + fail_depart: + EVENTHANDLER_DEREGISTER(iflladdr_event, ip->ip_lladdr_tag); + fail_addr: + if_clone_detach(ip->ip_ifc); + fail_clone: + free(ip, M_IFLIB); + return (NULL); +} + +void +iflib_clone_deregister(if_pseudo_t ip) +{ + /* XXX check that is not still in use */ + iflib_ip_delete(ip); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, ip->ip_detach_tag); + EVENTHANDLER_DEREGISTER(iflladdr_event, ip->ip_lladdr_tag); + if_clone_detach(ip->ip_ifc); + /* XXX free devclass */ + free(ip, M_IFLIB); +} Index: head/sys/net/iflib_private.h =================================================================== --- head/sys/net/iflib_private.h +++ head/sys/net/iflib_private.h @@ -0,0 +1,73 @@ +/*- + * Copyright (c) 2018, Matthew Macy (mmacy@freebsd.org) + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of Matthew Macy nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __NET_IFLIB_PRIVATE_H_ +#define __NET_IFLIB_PRIVATE_H_ + + +#define IFC_LEGACY 0x001 +#define IFC_QFLUSH 0x002 +#define IFC_MULTISEG 0x004 +#define IFC_DMAR 0x008 +#define IFC_SC_ALLOCATED 0x010 +#define IFC_INIT_DONE 0x020 +#define IFC_PREFETCH 0x040 +#define IFC_DO_RESET 0x080 +#define IFC_DO_WATCHDOG 0x100 +#define IFC_CHECK_HUNG 0x200 +#define IFC_PSEUDO 0x400 + +MALLOC_DECLARE(M_IFLIB); + +#define IFLIB_MAX_TX_BYTES (2*1024*1024) +#define IFLIB_MIN_TX_BYTES (8*1024) +#define IFLIB_DEFAULT_TX_QDEPTH 2048 + + +struct iflib_cloneattach_ctx { + struct if_clone *cc_ifc; + caddr_t cc_params; + const char *cc_name; + int cc_len; +}; + +extern driver_t iflib_pseudodriver; +int noop_attach(device_t dev); +int iflib_pseudo_detach(device_t dev); + +int iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, + struct iflib_cloneattach_ctx *clctx); + +int iflib_pseudo_deregister(if_ctx_t ctx); + +uint32_t iflib_get_flags(if_ctx_t ctx); +void iflib_set_detach(if_ctx_t ctx); +void iflib_stop(if_ctx_t ctx); + +#endif