diff --git a/sys/net/if.c b/sys/net/if.c --- a/sys/net/if.c +++ b/sys/net/if.c @@ -476,12 +476,23 @@ } #ifdef VIMAGE +static void +vnet_if_return_one(struct ifnet *ifp) +{ + int found __diagused; + + if (ifc_vmove(ifp, ifp->if_home_vnet, NULL) == ENOTSUP) { + found = if_unlink_ifnet(ifp, true); + MPASS(found); + if_vmove(ifp, ifp->if_home_vnet); + } +} + static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; struct ifnet **pending; - int found __diagused; int i; i = 0; @@ -505,17 +516,16 @@ /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) { - found = if_unlink_ifnet(ifp, true); - MPASS(found); - - pending[i++] = ifp; + if (if_try_ref(ifp)) + pending[i++] = ifp; } } IFNET_WUNLOCK(); for (int j = 0; j < i; j++) { sx_xlock(&ifnet_detach_sxlock); - if_vmove(pending[j], pending[j]->if_home_vnet); + vnet_if_return_one(pending[j]); + if_rele(pending[j]); sx_xunlock(&ifnet_detach_sxlock); } @@ -653,7 +663,7 @@ for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); - free(ifp->if_description, M_IFDESCR); + if_freedescr(ifp->if_description); free(ifp->if_hw_addr, M_IFADDR); free(ifp, M_IFNET); } @@ -708,7 +718,6 @@ bool if_try_ref(struct ifnet *ifp) { - NET_EPOCH_ASSERT(); return (refcount_acquire_if_not_zero(&ifp->if_refcount)); } @@ -1350,8 +1359,8 @@ } sx_xlock(&ifnet_detach_sxlock); - /* Make sure the VNET is stable. */ - shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); + /* Make sure the VNET and interface are stable. */ + shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet) || (ifp->if_flags & IFF_DYING); if (shutdown) { sx_xunlock(&ifnet_detach_sxlock); CURVNET_RESTORE(); @@ -1360,6 +1369,14 @@ } CURVNET_RESTORE(); + error = ifc_vmove(ifp, pr->pr_vnet, NULL); + if (error != ENOTSUP) { + sx_xunlock(&ifnet_detach_sxlock); + CURVNET_RESTORE(); + prison_free(pr); + return (error); + } + found = if_unlink_ifnet(ifp, true); if (! found) { sx_xunlock(&ifnet_detach_sxlock); @@ -1424,6 +1441,15 @@ return (EBUSY); } + sx_xlock(&ifnet_detach_sxlock); + error = ifc_vmove(ifp, vnet_dst, NULL); + sx_xunlock(&ifnet_detach_sxlock); + if (error != ENOTSUP) { + CURVNET_RESTORE(); + prison_free(pr); + return (error); + } + /* Get interface back from child jail/vnet. */ found = if_unlink_ifnet(ifp, true); MPASS(found); @@ -2478,7 +2504,7 @@ int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen, nvbuflen; - char *descrbuf, *odescrbuf; + char *descrbuf; char new_name[IFNAMSIZ]; char old_name[IFNAMSIZ], strbuf[IFNAMSIZ + 8]; struct ifaddr *ifa; @@ -2615,18 +2641,13 @@ error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, ifr_buffer_get_length(ifr) - 1); if (error) { - free(descrbuf, M_IFDESCR); + if_freedescr(descrbuf); break; } } - sx_xlock(&ifdescr_sx); - odescrbuf = ifp->if_description; - ifp->if_description = descrbuf; - sx_xunlock(&ifdescr_sx); - + if_setdescr(ifp, descrbuf); getmicrotime(&ifp->if_lastchange); - free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: @@ -4267,6 +4288,39 @@ return ((struct ifnet *)ifp)->if_capenable; } +char * +if_copydescr(if_t ifp) +{ + char *descrbuf = NULL; + + sx_xlock(&ifdescr_sx); + int len = ifp->if_description != NULL ? strlen(ifp->if_description) : 0; + if (len > 0) { + descrbuf = malloc(len + 1, M_IFDESCR, M_WAITOK | M_ZERO); + memcpy(descrbuf, ifp->if_description, len); + } + sx_xunlock(&ifdescr_sx); + + return (descrbuf); +} + +void +if_setdescr(if_t ifp, char *descrbuf) +{ + sx_xlock(&ifdescr_sx); + char *odescrbuf = ifp->if_description; + ifp->if_description = descrbuf; + sx_xunlock(&ifdescr_sx); + + if_freedescr(odescrbuf); +} + +void +if_freedescr(char *descrbuf) +{ + free(descrbuf, M_IFDESCR); +} + /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -55,6 +55,8 @@ typedef int ifc_create_f(struct if_clone *ifc, char *name, size_t maxlen, struct ifc_data *ifd, struct ifnet **ifpp); typedef int ifc_destroy_f(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags); +typedef int ifc_vmove_f(struct if_clone *ifc, struct ifnet *ifp_src, + struct ifc_data *ifd, struct ifnet **ifpp); struct if_clone_addreq { uint16_t version; /* Always 0 for now */ @@ -64,12 +66,16 @@ ifc_match_f *match_f; ifc_create_f *create_f; ifc_destroy_f *destroy_f; + ifc_vmove_f *vmove_f; }; -#define IFC_F_NOGROUP 0x01 /* Creation flag: don't add unit group */ -#define IFC_F_AUTOUNIT 0x02 /* Creation flag: automatically select unit */ -#define IFC_F_SYSSPACE 0x04 /* Cloner callback: params pointer is in kernel memory */ -#define IFC_F_FORCE 0x08 /* Deletion flag: force interface deletion */ +#define IFC_F_NOGROUP 0x01 /* Creation flag: don't add unit group */ +#define IFC_F_AUTOUNIT 0x02 /* Creation flag: automatically select unit */ +#define IFC_F_SYSSPACE 0x04 /* Cloner callback: params pointer is in kernel memory */ +#define IFC_F_FORCE 0x08 /* Deletion flag: force interface deletion */ +#define IFC_F_NOMOVE 0x10 /* Creation flag: moving between vnets unsupported */ +#define IFC_F_SECONDUNIT 0x20 /* Creation flag: alloc second unit index */ +#define IFC_F_MOVED 0x40 /* Deletion flag: indicate interface was moved */ #define IFC_NOGROUP IFC_F_NOGROUP @@ -78,7 +84,22 @@ int ifc_create_ifp(const char *name, struct ifc_data *ifd, struct ifnet **ifpp); +void ifc_link_ifp(struct if_clone *ifc, struct ifnet *ifp); +bool ifc_unlink_ifp(struct if_clone *ifc, struct ifnet *ifp); + int ifc_copyin(const struct ifc_data *ifd, void *target, size_t len); +int ifc_vmove(struct ifnet *ifp, struct vnet *vnet, struct ifnet **ifpp); + +struct ifc_vparam_data { + char ifname[IFNAMSIZ]; + int unit; + struct vnet *home_vnet; + char *ifdescr; + int mtu; +}; +void ifc_save_vparams(struct ifnet *ifp, struct ifc_vparam_data *vparams); +void ifc_apply_vparams(struct ifnet *ifp, struct ifc_vparam_data *vparams); + #ifdef CLONE_COMPAT_13 /* Methods. */ @@ -101,7 +122,9 @@ /* Unit (de)allocating functions. */ int ifc_name2unit(const char *name, int *unit); int ifc_alloc_unit(struct if_clone *, int *); +int ifc_alloc_unit_secondary(struct if_clone *, int *); void ifc_free_unit(struct if_clone *, int); +void ifc_free_unit_secondary(struct if_clone *, int); const char *ifc_name(struct if_clone *); void ifc_flags_set(struct if_clone *, int flags); int ifc_flags_get(struct if_clone *); diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -51,6 +51,12 @@ #include #include +/* + * Interface is always linked to the ifc in the home vnet. + * Similarly, interface units are referenced in the home vnet + * + */ + /* Current IF_MAXUNIT expands maximum to 5 characters. */ #define IFCLOSIZ (IFNAMSIZ - 5) @@ -66,6 +72,7 @@ struct if_clone { char ifc_name[IFCLOSIZ]; /* (c) Name of device, e.g. `gif' */ struct unrhdr *ifc_unrhdr; /* (c) alloc_unr(9) header */ + struct unrhdr *ifc_unrhdr2; /* (c) alloc_unr(9) secondary header */ int ifc_maxunit; /* (c) maximum unit number */ int ifc_flags; long ifc_refcnt; /* (i) Reference count. */ @@ -75,6 +82,9 @@ ifc_match_f *ifc_match; /* (c) Matcher function */ ifc_create_f *ifc_create; /* (c) Creates new interface */ ifc_destroy_f *ifc_destroy; /* (c) Destroys cloned interface */ +#ifdef VIMAGE + ifc_vmove_f *ifc_vmove; /* (c) Moves between vnets */ +#endif #ifdef CLONE_COMPAT_13 /* (c) Driver specific cloning functions. Called with no locks held. */ @@ -105,6 +115,8 @@ static void if_clone_free(struct if_clone *ifc); static int if_clone_createif(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp); +static int if_clone_destroyif_flags(struct if_clone *ifc, struct ifnet *ifp, + uint32_t flags); static int ifc_simple_match(struct if_clone *ifc, const char *name); static int ifc_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit); @@ -227,7 +239,7 @@ } void -if_clone_addif(struct if_clone *ifc, struct ifnet *ifp) +ifc_link_ifp(struct if_clone *ifc, struct ifnet *ifp) { if ((ifc->ifc_flags & IFC_NOGROUP) == 0) @@ -238,6 +250,156 @@ IF_CLONE_UNLOCK(ifc); } +void +if_clone_addif(struct if_clone *ifc, struct ifnet *ifp) +{ + ifc_link_ifp(ifc, ifp); +} + +bool +ifc_unlink_ifp(struct if_clone *ifc, struct ifnet *ifp) +{ + struct ifnet *ifcifp; + + IF_CLONE_LOCK(ifc); + LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) { + if (ifcifp == ifp) { + IFC_IFLIST_REMOVE(ifc, ifp); + break; + } + } + IF_CLONE_UNLOCK(ifc); + + if (ifcifp != NULL && (ifc->ifc_flags & IFC_F_NOGROUP) == 0) + if_delgroup(ifp, ifc->ifc_name); + + return (ifcifp != NULL); +} + +#ifdef VIMAGE +/* + * "Default" vmove handler deleting and re-creating interface + * without any creation params + */ +static int +if_vmove_simple(struct if_clone *ifc, struct ifnet *ifp_src, struct ifc_data *ifd, + struct ifnet **ifpp) +{ + struct ifc_vparam_data vp = {}; + int error; + + ifc_save_vparams(ifp_src, &vp); + + printf("SIMPLE vmove %s\n", if_name(ifp_src)); + + CURVNET_SET_QUIET(ifd->vnet); + error = (*ifc->ifc_create)(ifc, vp.ifname, sizeof(vp.ifname), ifd, ifpp); + ifc_apply_vparams(*ifpp, &vp); + CURVNET_RESTORE(); + printf("errno %d\n", error); + + if (error == 0) + error = if_clone_destroyif_flags(ifc, ifp_src, IFC_F_MOVED); + + return (error); +} + +static struct if_clone * +ifc_find_cloner(const char *name, struct vnet *vnet) +{ + struct if_clone *ifc; + + CURVNET_SET_QUIET(vnet); + IF_CLONERS_LOCK(); + LIST_FOREACH(ifc, &V_if_cloners, ifc_list) { + if (strcmp(ifc->ifc_name, name) == 0) { + break; + } + } + IF_CLONERS_UNLOCK(); + CURVNET_RESTORE(); + + return (ifc); +} + +void +ifc_save_vparams(struct ifnet *ifp, struct ifc_vparam_data *ivd) +{ + bzero(ivd, sizeof(*ivd)); + + strlcpy(ivd->ifname, if_name(ifp), sizeof(ivd->ifname)); + ivd->unit = ifp->if_dunit; + ivd->home_vnet = ifp->if_home_vnet; + ivd->ifdescr = if_copydescr(ifp); + ivd->mtu = if_getmtu(ifp); +} + +void +ifc_apply_vparams(struct ifnet *ifp, struct ifc_vparam_data *vp) +{ + if (ifp == NULL) { + if (vp->ifdescr != NULL) + if_freedescr(vp->ifdescr); + return; + } + + if (vp->home_vnet != NULL) + ifp->if_home_vnet = vp->home_vnet; + if (vp->ifdescr != NULL) + if_setdescr(ifp, vp->ifdescr); + if (vp->mtu > 0) + if_setmtu(ifp, vp->mtu); +} + +int +ifc_vmove(struct ifnet *ifp, struct vnet *vnet, + struct ifnet **ifpp) +{ + struct ifnet *ifp_dst = NULL; + int error = 0; + + sx_assert(&ifnet_detach_sxlock, SA_XLOCKED); + + if (!if_try_ref(ifp)) + return (EINVAL); + + struct if_clone *ifc = ifc_find_cloner(ifp->if_dname, ifp->if_home_vnet); + + /* Check if the interface with this name exists in target vnet first */ + CURVNET_SET_QUIET(vnet); + bool found = ifunit(if_name(ifp)) != NULL; + printf("ifc=%p ifname=%s %d\n", ifc, if_name(ifp), (int)found); + + if (found) + error = EEXIST; + if (ifc == NULL || (ifc->ifc_flags & IFC_F_NOMOVE)) + error = ENOTSUP; + + if (error == 0) { + struct ifc_data ifd = { + .vnet = vnet, + .flags = IFC_F_MOVED, + .unit = ifp->if_dunit, + }; + CURVNET_SET_QUIET(ifp->if_vnet); + error = ifc->ifc_vmove(ifc, ifp, &ifd, &ifp_dst); + CURVNET_RESTORE(); + } + if_rele(ifp); + + if (error == 0) + ifc_link_ifp(ifc, ifp_dst); + CURVNET_RESTORE(); + + printf("ifc_vmove: %p -> %p\n", ifp, ifp_dst); + + if (ifpp != NULL) + *ifpp = ifp_dst; + + return (error); +} +#endif + /* * Create a clone network interface. */ @@ -246,11 +408,13 @@ struct ifc_data *ifd, struct ifnet **ifpp) { int err, unit = 0; + bool need_unit; if (ifunit(name) != NULL) return (EEXIST); - if (ifc->ifc_flags & IFC_F_AUTOUNIT) { + need_unit = ((ifc->ifc_flags & IFC_F_AUTOUNIT) && (!(ifd->flags & IFC_F_MOVED))); + if (need_unit) { if ((err = ifc_handle_unit(ifc, name, len, &unit)) != 0) return (err); ifd->unit = unit; @@ -261,7 +425,7 @@ if (err == 0) { MPASS(*ifpp != NULL); if_clone_addif(ifc, *ifpp); - } else if (ifc->ifc_flags & IFC_F_AUTOUNIT) + } else if (need_unit) ifc_free_unit(ifc, unit); return (err); @@ -281,16 +445,7 @@ if (ifp == NULL) return (ENXIO); - /* Find the cloner for this interface */ - CURVNET_SET_QUIET(ifp->if_home_vnet); - IF_CLONERS_LOCK(); - LIST_FOREACH(ifc, &V_if_cloners, ifc_list) { - if (strcmp(ifc->ifc_name, ifp->if_dname) == 0) { - break; - } - } - IF_CLONERS_UNLOCK(); - CURVNET_RESTORE(); + ifc = ifc_find_cloner(ifp->if_dname, ifp->if_home_vnet); if (ifc == NULL) { if_rele(ifp); return (EINVAL); @@ -308,7 +463,8 @@ if_clone_destroyif_flags(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { int err; - struct ifnet *ifcifp; + + sx_assert(&ifnet_detach_sxlock, SA_XLOCKED); /* * Given that the cloned ifnet might be attached to a different @@ -317,32 +473,17 @@ */ CURVNET_SET_QUIET(ifp->if_vnet); - IF_CLONE_LOCK(ifc); - LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) { - if (ifcifp == ifp) { - IFC_IFLIST_REMOVE(ifc, ifp); - break; - } - } - IF_CLONE_UNLOCK(ifc); - if (ifcifp == NULL) { + if (!ifc_unlink_ifp(ifc, ifp)) { CURVNET_RESTORE(); return (ENXIO); /* ifp is not on the list. */ } - if ((ifc->ifc_flags & IFC_F_NOGROUP) == 0) - if_delgroup(ifp, ifc->ifc_name); int unit = ifp->if_dunit; err = (*ifc->ifc_destroy)(ifc, ifp, flags); - if (err != 0) { - if ((ifc->ifc_flags & IFC_F_NOGROUP) == 0) - if_addgroup(ifp, ifc->ifc_name); - - IF_CLONE_LOCK(ifc); - IFC_IFLIST_INSERT(ifc, ifp); - IF_CLONE_UNLOCK(ifc); - } else if (ifc->ifc_flags & IFC_F_AUTOUNIT) + if (err != 0) + ifc_link_ifp(ifc, ifp); + else if ((ifc->ifc_flags & IFC_F_AUTOUNIT) && (!(flags & IFC_F_MOVED))) ifc_free_unit(ifc, unit); CURVNET_RESTORE(); return (err); @@ -400,10 +541,13 @@ return (NULL); struct if_clone *ifc = if_clone_alloc(name, req->maxunit); + ifc->ifc_unrhdr2 = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx); + ifc->ifc_match = req->match_f != NULL ? req->match_f : ifc_simple_match; ifc->ifc_create = req->create_f; ifc->ifc_destroy = req->destroy_f; - ifc->ifc_flags = (req->flags & (IFC_F_AUTOUNIT | IFC_F_NOGROUP)); + ifc->ifc_vmove = (req->vmove_f != NULL) ? req->vmove_f : if_vmove_simple; + ifc->ifc_flags = (req->flags & (IFC_F_AUTOUNIT | IFC_F_NOGROUP | IFC_F_NOMOVE)); if (if_clone_attach(ifc) != 0) return (NULL); @@ -453,6 +597,7 @@ ifc->ifc_destroy = ifc_advanced_destroy_wrapper; ifc->ifca_destroy = destroy; ifc->ifca_create = create; + ifc->ifc_flags = IFC_F_NOMOVE; if (if_clone_attach(ifc) != 0) return (NULL); @@ -499,7 +644,7 @@ ifc->ifcs_create = create; ifc->ifcs_destroy = destroy; ifc->ifcs_minifs = minifs; - ifc->ifc_flags = IFC_F_AUTOUNIT; + ifc->ifc_flags = IFC_F_AUTOUNIT | IFC_F_NOMOVE; if (if_clone_attach(ifc) != 0) return (NULL); @@ -551,6 +696,8 @@ IF_CLONE_LOCK_DESTROY(ifc); delete_unrhdr(ifc->ifc_unrhdr); + if (ifc->ifc_unrhdr2 != NULL) + delete_unrhdr(ifc->ifc_unrhdr2); free(ifc, M_CLONE); } @@ -687,19 +834,19 @@ } static int -ifc_alloc_unit_specific(struct if_clone *ifc, int *unit) +ifc_alloc_unit_specific(struct if_clone *ifc, struct unrhdr *unr, int *unit) { char name[IFNAMSIZ]; if (*unit > ifc->ifc_maxunit) return (ENOSPC); - if (alloc_unr_specific(ifc->ifc_unrhdr, *unit) == -1) + if (alloc_unr_specific(unr, *unit) == -1) return (EEXIST); snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit); if (ifunit(name) != NULL) { - free_unr(ifc->ifc_unrhdr, *unit); + free_unr(unr, *unit); return (EEXIST); } @@ -709,17 +856,17 @@ } static int -ifc_alloc_unit_next(struct if_clone *ifc, int *unit) +ifc_alloc_unit_next(struct if_clone *ifc, struct unrhdr *unr, int *unit) { int error; - *unit = alloc_unr(ifc->ifc_unrhdr); + *unit = alloc_unr(unr); if (*unit == -1) return (ENOSPC); - free_unr(ifc->ifc_unrhdr, *unit); + free_unr(unr, *unit); for (;;) { - error = ifc_alloc_unit_specific(ifc, unit); + error = ifc_alloc_unit_specific(ifc, unr, unit); if (error != EEXIST) break; @@ -733,9 +880,9 @@ ifc_alloc_unit(struct if_clone *ifc, int *unit) { if (*unit < 0) - return (ifc_alloc_unit_next(ifc, unit)); + return (ifc_alloc_unit_next(ifc, ifc->ifc_unrhdr, unit)); else - return (ifc_alloc_unit_specific(ifc, unit)); + return (ifc_alloc_unit_specific(ifc, ifc->ifc_unrhdr, unit)); } void @@ -746,6 +893,23 @@ IF_CLONE_REMREF(ifc); } +int +ifc_alloc_unit_secondary(struct if_clone *ifc, int *unit) +{ + if (*unit < 0) + return (ifc_alloc_unit_next(ifc, ifc->ifc_unrhdr2, unit)); + else + return (ifc_alloc_unit_specific(ifc, ifc->ifc_unrhdr2, unit)); +} + +void +ifc_free_unit_secondary(struct if_clone *ifc, int unit) +{ + + free_unr(ifc->ifc_unrhdr2, unit); + IF_CLONE_REMREF(ifc); +} + static int ifc_simple_match(struct if_clone *ifc, const char *name) { diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c --- a/sys/net/if_epair.c +++ b/sys/net/if_epair.c @@ -118,7 +118,10 @@ struct ifnet *ifp; /* This ifp. */ struct ifnet *oifp; /* other ifp of pair. */ int num_queues; + bool is_a; /* true if epairXa */ + bool is_moved; /* true if moved */ struct epair_queue *queues; + struct if_clone *ifc; /* cloner used to create this instance */ struct ifmedia media; /* Media config (fake). */ STAILQ_ENTRY(epair_softc) entry; }; @@ -184,39 +187,13 @@ if_rele(sc->ifp); } -static int -epair_menq(struct mbuf *m, struct epair_softc *osc) +static struct epair_queue * +epair_select_queue(struct epair_softc *sc, const struct mbuf *m) { - struct ifnet *ifp, *oifp; - int len, ret; - int ridx; - short mflags; - struct epair_queue *q = NULL; uint32_t bucket; #ifdef RSS struct ether_header *eh; -#endif - - /* - * I know this looks weird. We pass the "other sc" as we need that one - * and can get both ifps from it as well. - */ - oifp = osc->ifp; - ifp = osc->oifp; - - M_ASSERTPKTHDR(m); - epair_clear_mbuf(m); - if_setrcvif(m, oifp); - M_SETFIB(m, oifp->if_fib); - - /* Save values as once the mbuf is queued, it's not ours anymore. */ - len = m->m_pkthdr.len; - mflags = m->m_flags; - MPASS(m->m_nextpkt == NULL); - MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); - -#ifdef RSS ret = rss_m2bucket(m, &bucket); if (ret) { /* Actually hash the packet. */ @@ -238,46 +215,72 @@ break; } } - bucket %= osc->num_queues; + bucket %= sc->num_queues; #else bucket = 0; #endif - q = &osc->queues[bucket]; + return (&sc->queues[bucket]); +} + +static void +epair_prepare_mbuf(struct mbuf *m, struct ifnet *src_ifp) +{ + M_ASSERTPKTHDR(m); + epair_clear_mbuf(m); + if_setrcvif(m, src_ifp); + M_SETFIB(m, src_ifp->if_fib); + + MPASS(m->m_nextpkt == NULL); + MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); +} + +static void +epair_menq(struct epair_queue *q, struct mbuf *m, struct ifnet *input_ifp, + struct ifnet *output_ifp) +{ + int len, ret; + int ridx; + short mflags; + + epair_prepare_mbuf(m, input_ifp); + + /* Save values as once the mbuf is queued, it's not ours anymore. */ + len = m->m_pkthdr.len; + mflags = m->m_flags; + atomic_set_long(&q->state, (1 << BIT_MBUF_QUEUED)); ridx = atomic_load_int(&q->ridx); ret = buf_ring_enqueue(q->rxring[ridx], m); if (ret != 0) { /* Ring is full. */ - if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); + if_inc_counter(output_ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); - return (0); + return; } - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(output_ifp, IFCOUNTER_OPACKETS, 1); /* * IFQ_HANDOFF_ADJ/ip_handoff() update statistics, * but as we bypass all this we have to duplicate * the logic another time. */ - if_inc_counter(ifp, IFCOUNTER_OBYTES, len); + if_inc_counter(output_ifp, IFCOUNTER_OBYTES, len); if (mflags & (M_BCAST|M_MCAST)) - if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); + if_inc_counter(output_ifp, IFCOUNTER_OMCASTS, 1); /* Someone else received the packet. */ - if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(input_ifp, IFCOUNTER_IPACKETS, 1); if (!atomic_testandset_long(&q->state, BIT_QUEUE_TASK)) - taskqueue_enqueue(epair_tasks.tq[bucket], &q->tx_task); - - return (0); + taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task); } static void epair_start(struct ifnet *ifp) { struct mbuf *m; - struct epair_softc *sc; struct ifnet *oifp; + struct epair_softc *sc; /* * We get packets here from ether_output via if_handoff() @@ -286,8 +289,7 @@ * other interface (oifp) of our pair. */ sc = ifp->if_softc; - oifp = sc->oifp; - sc = oifp->if_softc; + oifp = atomic_load_ptr(&sc->oifp); for (;;) { IFQ_DEQUEUE(&ifp->if_snd, m); if (m == NULL) @@ -298,13 +300,14 @@ /* In case either interface is not usable drop the packet. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_UP) == 0 || - (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + (oifp == NULL || oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (oifp->if_flags & IFF_UP) == 0) { m_freem(m); continue; } - (void) epair_menq(m, sc); + struct epair_queue *q = epair_select_queue(oifp->if_softc, m); + epair_menq(q, m, oifp, ifp); } } @@ -313,7 +316,6 @@ { struct epair_softc *sc; struct ifnet *oifp; - int error; #ifdef ALTQ int len; short mflags; @@ -347,8 +349,8 @@ * drop the packet. */ sc = ifp->if_softc; - oifp = sc->oifp; - if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + oifp = atomic_load_ptr(&sc->oifp); + if (oifp == NULL || (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (oifp->if_flags & IFF_UP) == 0) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); @@ -358,6 +360,7 @@ #ifdef ALTQ len = m->m_pkthdr.len; mflags = m->m_flags; + int error = 0; /* Support ALTQ via the classic if_start() path. */ IF_LOCK(&ifp->if_snd); @@ -377,8 +380,9 @@ IF_UNLOCK(&ifp->if_snd); #endif - error = epair_menq(m, oifp->if_softc); - return (error); + struct epair_queue *q = epair_select_queue(oifp->if_softc, m); + epair_menq(q, m, oifp, ifp); + return (0); } static void @@ -473,18 +477,16 @@ } static void -epair_clone_add(struct if_clone *ifc, struct epair_softc *scb) +epair_clone_add(struct if_clone *ifc, const struct ifnet *src_ifp, struct ifnet *dst_ifp) { - struct ifnet *ifp; uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ - ifp = scb->ifp; /* Copy epairNa etheraddr and change the last byte. */ - memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN); + memcpy(eaddr, src_ifp->if_hw_addr, ETHER_ADDR_LEN); eaddr[5] = 0x0b; - ether_ifattach(ifp, eaddr); + ether_ifattach(dst_ifp, eaddr); - if_clone_addif(ifc, ifp); + if_clone_addif(ifc, dst_ifp); } static struct epair_softc * @@ -497,6 +499,7 @@ return (NULL); sc = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); + sc->ifc = ifc; sc->ifp = ifp; sc->num_queues = epair_tasks.tasks; sc->queues = mallocarray(sc->num_queues, sizeof(struct epair_queue), @@ -607,16 +610,24 @@ free(sc, M_EPAIR); } +static void +epair_set_state(struct ifnet *ifp, bool running) +{ + if (running) { + ifp->if_drv_flags |= IFF_DRV_RUNNING; + if_link_state_change(ifp, LINK_STATE_UP); + } else { + if_link_state_change(ifp, LINK_STATE_DOWN); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + } +} + static int -epair_clone_create(struct if_clone *ifc, char *name, size_t len, - struct ifc_data *ifd, struct ifnet **ifpp) +epair_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) { - struct epair_softc *sca, *scb; - struct ifnet *ifp; + int error = 0, unit, wildcard; char *dp; - int error, unit, wildcard; - uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ - + /* Try to see if a special unit was requested. */ error = ifc_name2unit(name, &unit); if (error != 0) @@ -627,35 +638,68 @@ if (error != 0) return (error); + error = ifc_alloc_unit_secondary(ifc, &unit); + if (error != 0) { + ifc_free_unit(ifc, unit); + return (error); + } + /* * If no unit had been given, we need to adjust the ifName. * Also make sure there is space for our extra [ab] suffix. */ for (dp = name; *dp != '\0'; dp++); if (wildcard) { - error = snprintf(dp, len - (dp - name), "%d", unit); - if (error > len - (dp - name) - 1) { + int slen = snprintf(dp, len - (dp - name), "%d", unit); + if (slen > len - (dp - name) - 1) { /* ifName too long. */ - ifc_free_unit(ifc, unit); - return (ENOSPC); + error = ENOSPC; + goto done; } - dp += error; + dp += slen; } if (len - (dp - name) - 1 < 1) { /* No space left for our [ab] suffix. */ - ifc_free_unit(ifc, unit); - return (ENOSPC); + error = ENOSPC; + goto done; } *dp = 'b'; /* Must not change dp so we can replace 'a' by 'b' later. */ *(dp+1) = '\0'; /* Check if 'a' and 'b' interfaces already exist. */ - if (ifunit(name) != NULL) - return (EEXIST); + if (ifunit(name) != NULL) { + error = EEXIST; + goto done; + } + *dp = 'a'; - if (ifunit(name) != NULL) - return (EEXIST); + if (ifunit(name) != NULL) { + error = EEXIST; + goto done; + } + *punit = unit; +done: + if (error != 0) { + ifc_free_unit(ifc, unit); + ifc_free_unit_secondary(ifc, unit); + } + + return (error); +} + +static int +epair_clone_create(struct if_clone *ifc, char *name, size_t len, + struct ifc_data *ifd, struct ifnet **ifpp) +{ + struct epair_softc *sca, *scb; + char *dp; + int error, unit; + uint8_t eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ + + error = epair_handle_unit(ifc, name, len, &unit); + if (error != 0) + return (error); /* Allocate memory for both [ab] interfaces */ sca = epair_alloc_sc(ifc); @@ -664,8 +708,10 @@ epair_free_sc(sca); epair_free_sc(scb); ifc_free_unit(ifc, unit); + ifc_free_unit_secondary(ifc, unit); return (ENOSPC); } + sca->is_a = true; /* * Cross-reference the interfaces so we will be able to free both. @@ -674,24 +720,23 @@ scb->oifp = sca->ifp; /* Finish initialization of interface a. */ - ifp = sca->ifp; epair_setup_ifp(sca, name, unit); epair_generate_mac(sca, eaddr); - ether_ifattach(ifp, eaddr); + ether_ifattach(sca->ifp, eaddr); /* Swap the name and finish initialization of interface b. */ + dp = name + strlen(name) - 1; *dp = 'b'; epair_setup_ifp(scb, name, unit); - ifp = scb->ifp; /* We need to play some tricks here for the second interface. */ strlcpy(name, epairname, len); /* Correctly set the name for the cloner list. */ strlcpy(name, scb->ifp->if_xname, len); - epair_clone_add(ifc, scb); + epair_clone_add(ifc, sca->ifp, scb->ifp); /* * Restore name to a as the ifp for this will go into the @@ -700,10 +745,8 @@ strlcpy(name, sca->ifp->if_xname, len); /* Tell the world, that we are ready to rock. */ - sca->ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(sca->ifp, LINK_STATE_UP); - scb->ifp->if_drv_flags |= IFF_DRV_RUNNING; - if_link_state_change(scb->ifp, LINK_STATE_UP); + epair_set_state(sca->ifp, true); + epair_set_state(scb->ifp, true); *ifpp = sca->ifp; @@ -735,49 +778,123 @@ struct ifnet *oifp; struct epair_softc *sca, *scb; int unit, error; + bool is_a, is_moved; - /* - * In case we called into if_clone_destroyif() ourselves - * again to remove the second interface, the softc will be - * NULL. In that case so not do anything but return success. - */ - if (ifp->if_softc == NULL) - return (0); + sx_assert(&ifnet_detach_sxlock, SA_XLOCKED); unit = ifp->if_dunit; sca = ifp->if_softc; - oifp = sca->oifp; - scb = oifp->if_softc; + is_a = sca->is_a; + is_moved = sca->is_moved; + oifp = atomic_load_ptr(&sca->oifp); /* Frist get the interfaces down and detached. */ - if_link_state_change(ifp, LINK_STATE_DOWN); - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - if_link_state_change(oifp, LINK_STATE_DOWN); - oifp->if_drv_flags &= ~IFF_DRV_RUNNING; - + epair_set_state(ifp, false); + if (oifp != NULL) + epair_set_state(oifp, false); ether_ifdetach(ifp); - ether_ifdetach(oifp); - - /* Third free any queued packets and all the resources. */ - CURVNET_SET_QUIET(oifp->if_vnet); - epair_drain_rings(scb); - oifp->if_softc = NULL; - error = if_clone_destroyif(ifc, oifp); - if (error) - panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", - __func__, error); - epair_free_sc(scb); - CURVNET_RESTORE(); + + if (oifp != NULL) { + CURVNET_SET_QUIET(oifp->if_vnet); + + /* Rely on ifnet_detach_sxlock lock held */ + scb = oifp->if_softc; + scb->oifp = NULL; + sca->oifp = NULL; + + bool result = ifc_unlink_ifp(scb->ifc, oifp); + KASSERT(result == true, ("%s: unable to unlink interface %s", + __func__, if_name(oifp))); + error = epair_clone_destroy(scb->ifc, oifp, flags); + if (error) + panic("%s: if_clone_destroyif() for our 2nd iface failed: %d", + __func__, error); + CURVNET_RESTORE(); + } epair_drain_rings(sca); epair_free_sc(sca); /* Last free the cloner unit. */ - ifc_free_unit(ifc, unit); + if (!is_moved) { + if (is_a) + ifc_free_unit(ifc, unit); + else + ifc_free_unit_secondary(ifc, unit); + } return (0); } +static int +epair_clone_vmove(struct if_clone *ifc, struct ifnet *ifp_src, + struct ifc_data *ifd, struct ifnet **ifpp) +{ + struct ifc_vparam_data vdata = {}; + struct ifnet *oifp; + struct epair_softc *sca_src, *sca_dst, *scb; + uint8_t eaddr[ETHER_ADDR_LEN]; + int error = 0; + + sx_assert(&ifnet_detach_sxlock, SA_XLOCKED); + + sca_src = ifp_src->if_softc; + if (sca_src == NULL) + return (EINVAL); + + /* Copy necessary data to re-create the interface */ + ifc_save_vparams(ifp_src, &vdata); + + /* Create dst interface first to perform atomic swap */ + CURVNET_SET_QUIET(ifd->vnet); + sca_dst = epair_alloc_sc(ifc); + if (sca_dst == NULL) { + ifc_apply_vparams(NULL, &vdata); + CURVNET_RESTORE(); + return (ENOSPC); + } + + if_ref(sca_dst->ifp); + + sca_dst->is_a = sca_src->is_a; + printf("VMOVE: %s unit %d from %s unit %d\n", vdata.ifname, vdata.unit, if_name(ifp_src), ifp_src->if_dunit); + epair_setup_ifp(sca_dst, vdata.ifname, vdata.unit); + memcpy(eaddr, ifp_src->if_hw_addr, sizeof(eaddr)); + ifc_apply_vparams(sca_dst->ifp, &vdata); + ether_ifattach(sca_dst->ifp, eaddr); + + /* Perform swap */ + oifp = sca_src->oifp; + if (oifp != NULL) { + scb = oifp->if_softc; + scb->oifp = sca_dst->ifp; + sca_dst->oifp = scb->ifp; + /* Unlink old interface from the pair */ + sca_src->oifp = NULL; + } else + error = EINVAL; + + if (error == 0) { + /* + * Migration successful, destroy old interface. + * if_clone_destroyif() mananages vnet automatically. + */ + sca_src->is_moved = true; + if_clone_destroyif(ifc, ifp_src); + epair_set_state(sca_dst->ifp, true); + *ifpp = sca_dst->ifp; + } else { + /* Migration failed, remove state */ + sca_dst->is_moved = true; + if_clone_destroyif(ifc, sca_dst->ifp); + } + if_rele(sca_dst->ifp); + + CURVNET_RESTORE(); + + return (error); +} + static void vnet_epair_init(const void *unused __unused) { @@ -785,6 +902,8 @@ .match_f = epair_clone_match, .create_f = epair_clone_create, .destroy_f = epair_clone_destroy, + .vmove_f = epair_clone_vmove, + .flags = IFC_F_SECONDUNIT, }; V_epair_cloner = ifc_attach_cloner(epairname, &req); } diff --git a/sys/net/if_var.h b/sys/net/if_var.h --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -739,6 +739,9 @@ int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(if_t ifp); const char *if_getdname(if_t ifp); +char *if_copydescr(if_t ifp); +void if_setdescr(if_t ifp, char *descrbuf); +void if_freedescr(char *descrbuf); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(if_t ifp); diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -326,6 +326,8 @@ static int vlan_clone_create(struct if_clone *, char *, size_t, struct ifc_data *, struct ifnet **); static int vlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); +static int vlan_clone_vmove(struct if_clone *, struct ifnet *, + struct ifc_data *, struct ifnet **); static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); @@ -907,6 +909,7 @@ .match_f = vlan_clone_match, .create_f = vlan_clone_create, .destroy_f = vlan_clone_destroy, + .vmove_f = vlan_clone_vmove, }; static int @@ -1061,6 +1064,17 @@ return (1); } +struct vlanparams { + struct ifnet *parent; + int vid; + uint16_t proto; + uint8_t pcp; +}; + +static int +vlan_create_specific(struct if_clone *ifc, char *name, int unit, + struct vlanparams *vp, struct ifnet **ifpp); + static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) @@ -1072,13 +1086,8 @@ int error; int vid = 0; uint16_t proto = ETHERTYPE_VLAN; - struct ifvlan *ifv; - struct ifnet *ifp; struct ifnet *p = NULL; - struct ifaddr *ifa; - struct sockaddr_dl *sdl; struct vlanreq vlr; - static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* @@ -1159,14 +1168,38 @@ } } + struct vlanparams vp = { + .parent = p, + .vid = vid, + .proto = proto, + }; + + error = vlan_create_specific(ifc, name, unit, &vp, ifpp); + if (error != 0) { + if (!subinterface) + ifc_free_unit(ifc, unit); + } + if (p != NULL) + if_rele(p); + + return (error); +} + +static int +vlan_create_specific(struct if_clone *ifc, char *name, int unit, + struct vlanparams *vp, struct ifnet **ifpp) +{ + struct ifvlan *ifv; + struct ifnet *ifp; + struct ifaddr *ifa; + struct sockaddr_dl *sdl; + static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ + int error; + ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { - if (!subinterface) - ifc_free_unit(ifc, unit); free(ifv, M_VLAN); - if (p != NULL) - if_rele(p); return (ENOSPC); } CK_SLIST_INIT(&ifv->vlan_mc_listhead); @@ -1205,9 +1238,8 @@ sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; - if (p != NULL) { - error = vlan_config(ifv, p, vid, proto); - if_rele(p); + if (vp->parent != NULL) { + error = vlan_config(ifv, vp->parent, vp->vid, vp->proto); if (error != 0) { /* * Since we've partially failed, we need to back @@ -1217,8 +1249,6 @@ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free(ifp); - if (!subinterface) - ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); @@ -1258,6 +1288,48 @@ return (0); } +static int +vlan_clone_vmove(struct if_clone *ifc, struct ifnet *ifp_src, + struct ifc_data *ifd, struct ifnet **ifpp) +{ + struct ifc_vparam_data vdata = {}; + struct ifvlan *ifv; + int error; + + printf("vlanHERE\n"); + VLAN_XLOCK(); + + /* Copy necessary data to re-create the interface */ + ifc_save_vparams(ifp_src, &vdata); + ifv = ifp_src->if_softc; + + struct vlanparams vp = { + .parent = (ifv->ifv_trunk != NULL) ? ifv->ifv_trunk->parent : NULL, + .vid = ifv->ifv_vid, + .proto = ifv->ifv_proto, + }; + error = if_clone_destroyif(ifc, ifp_src); + printf("DESTROYING %s=%d\n", vdata.ifname, error); + VLAN_XUNLOCK(); + ifp_src = NULL; + + if (error != 0) + return (error); + + CURVNET_SET_QUIET(ifd->vnet); + + VLAN_XLOCK(); + error = vlan_create_specific(ifc, vdata.ifname, + vdata.unit, &vp, ifpp); + printf("CREATING %s = %d\n", vdata.ifname, error); + VLAN_XUNLOCK(); + ifc_apply_vparams(*ifpp, &vdata); + + CURVNET_RESTORE(); + + return (error); +} + /* * The ifp->if_init entry point for vlan(4) is a no-op. */ @@ -1519,7 +1591,7 @@ } static int -vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, +vlan_config_locked(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, uint16_t proto) { struct epoch_tracker et; @@ -1549,8 +1621,6 @@ if (trunk->parent != p) return (EBUSY); - VLAN_XLOCK(); - ifv->ifv_proto = proto; if (ifv->ifv_vid != vid) { @@ -1559,11 +1629,9 @@ ifv->ifv_vid = vid; error = vlan_inshash(trunk, ifv); } - /* Will unlock */ - goto done; + return (error); } - VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); @@ -1581,8 +1649,8 @@ ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ ifv->ifv_pcp = 0; /* Default: best effort delivery. */ error = vlan_inshash(trunk, ifv); - if (error) - goto done; + if (error != 0) + return (error); ifv->ifv_proto = proto; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; @@ -1677,7 +1745,15 @@ */ (void)vlan_setmulti(ifp); -done: + return (error); +} + +static int +vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, + uint16_t proto) +{ + VLAN_XLOCK(); + int error = vlan_config_locked(ifv, p, vid, proto); if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK();