Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -223,6 +223,10 @@ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); +/* global SX-lock for non-critical if_grow_locked() */ +static struct sx ifgrow_sx; +SX_SYSINIT(ifgrow_sx, &ifgrow_sx, "ifnet grow"); + void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ @@ -256,7 +260,8 @@ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); -static void *if_grow(void); +static int if_grow(void); +static int if_grow_locked(void); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); @@ -331,12 +336,16 @@ struct ifnet * ifnet_byindex_locked(u_short idx) { + struct ifnet *ifp; - if (idx > V_if_index) + /* XXX should assert in epoch(9) section */ + + if (idx >= V_if_indexlim) return (NULL); - if (V_ifindex_table[idx] == IFNET_HOLD) - return (NULL); - return (V_ifindex_table[idx]); + ifp = V_ifindex_table[idx]; + if (ifp == IFNET_HOLD) + ifp = NULL; + return (ifp); } struct ifnet * @@ -365,39 +374,54 @@ } /* - * Allocate an ifindex array entry; return 0 on success or an error on - * failure. + * Allocate an ifindex array entry. + * Returns 0 on success or an error on failure. */ -static u_short -ifindex_alloc(void **old) +static int +ifindex_alloc_locked(u_short *pindex) { - u_short idx; + int index; + int error; - IFNET_WLOCK_ASSERT(); /* - * Try to find an empty slot below V_if_index. If we fail, take the - * next slot. + * Try to find an empty slot below V_if_indexlim. + * Else try to grow the pointer array table. */ - for (idx = 1; idx <= V_if_index; idx++) { - if (V_ifindex_table[idx] == NULL) - break; + while (1) { + for (index = 1; index < V_if_indexlim; index++) { + if (V_ifindex_table[index] == NULL) + goto found; + } + error = if_grow(); + if (error) + return (error); } +found: + V_ifindex_table[index] = IFNET_HOLD; + if (index > V_if_index) + V_if_index = index; + *pindex = index; - /* Catch if_index overflow. */ - if (idx >= V_if_indexlim) { - *old = if_grow(); - return (USHRT_MAX); - } - if (idx > V_if_index) - V_if_index = idx; - return (idx); + return (0); /* success */ } +static int +ifindex_alloc(u_short *pindex) +{ + int retval; + + sx_xlock(&ifgrow_sx); + retval = ifindex_alloc_locked(pindex); + sx_xunlock(&ifgrow_sx); + + return (retval); +} + static void ifindex_free_locked(u_short idx) { - IFNET_WLOCK_ASSERT(); + sx_assert(&ifgrow_sx, SA_XLOCKED); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && @@ -409,9 +433,9 @@ ifindex_free(u_short idx) { - IFNET_WLOCK(); + sx_xlock(&ifgrow_sx); ifindex_free_locked(idx); - IFNET_WUNLOCK(); + sx_xunlock(&ifgrow_sx); } static void @@ -445,15 +469,11 @@ static void vnet_if_init(const void *unused __unused) { - void *old; CK_STAILQ_INIT(&V_ifnet); CK_STAILQ_INIT(&V_ifg_head); - IFNET_WLOCK(); - old = if_grow(); /* create initial table */ - IFNET_WUNLOCK(); - epoch_wait_preempt(net_epoch_preempt); - free(old, M_IFNET); + if (if_grow() != 0) + panic("Cannot create initial table\n"); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, @@ -489,35 +509,60 @@ vnet_if_return, NULL); #endif - -static void * -if_grow(void) +static int +if_grow_locked(void) { + struct ifnet **old; + struct ifnet **next; + size_t size; int oldlim; - u_int n; - struct ifnet **e; - void *old; - old = NULL; - IFNET_WLOCK_ASSERT(); + sx_assert(&ifgrow_sx, SA_XLOCKED); + oldlim = V_if_indexlim; - IFNET_WUNLOCK(); - n = (oldlim << 1) * sizeof(*e); - e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); - IFNET_WLOCK(); - if (V_if_indexlim != oldlim) { - free(e, M_IFNET); - return (NULL); - } - if (V_ifindex_table != NULL) { - memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); - old = V_ifindex_table; - } + if (oldlim > USHRT_MAX) + return (ENOMEM); /* limit reached */ + + size = (oldlim << 1) * sizeof(*next); + next = malloc(size, M_IFNET, M_WAITOK | M_ZERO); + old = V_ifindex_table; + + if (old != NULL) + memcpy((caddr_t)next, (caddr_t)old, size / 2); + + V_ifindex_table = next; + + /* + * Make sure all readers see the new table pointer before + * freeing the old one, if any: + */ + epoch_wait_preempt(net_epoch_preempt); + + /* + * It is now safe to increment the index limit: + */ V_if_indexlim <<= 1; - V_ifindex_table = e; - return (old); + + /* + * Free old ifnet pointer array, if any: + */ + free(old, M_IFNET); + + return (0); /* success */ } +static int +if_grow(void) +{ + int retval; + + sx_xlock(&ifgrow_sx); + retval = if_grow_locked(); + sx_xunlock(&ifgrow_sx); + + return (retval); +} + /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is @@ -527,22 +572,12 @@ if_alloc(u_char type) { struct ifnet *ifp; - u_short idx; - void *old; ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); - restart: - IFNET_WLOCK(); - idx = ifindex_alloc(&old); - if (__predict_false(idx == USHRT_MAX)) { - IFNET_WUNLOCK(); - epoch_wait_preempt(net_epoch_preempt); - free(old, M_IFNET); - goto restart; - } - ifnet_setbyindex(idx, IFNET_HOLD); - IFNET_WUNLOCK(); - ifp->if_index = idx; + + if (ifindex_alloc(&ifp->if_index) != 0) + goto err_free; + ifp->if_type = type; ifp->if_alloctype = type; #ifdef VIMAGE @@ -550,11 +585,8 @@ #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); - if (ifp->if_l2com == NULL) { - free(ifp, M_IFNET); - ifindex_free(idx); - return (NULL); - } + if (ifp->if_l2com == NULL) + goto err_free_index; } IF_ADDR_LOCK_INIT(ifp); @@ -576,6 +608,12 @@ ifp->if_pcp = IFNET_PCP_NONE; ifnet_setbyindex(ifp->if_index, ifp); return (ifp); + +err_free_index: + ifindex_free(ifp->if_index); +err_free: + free(ifp, M_IFNET); + return (NULL); } /* @@ -628,12 +666,11 @@ ifp->if_flags |= IFF_DYING; /* XXX: Locking */ CURVNET_SET_QUIET(ifp->if_vnet); - IFNET_WLOCK(); - KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), + + KASSERT(ifp == ifnet_byindex(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); - ifindex_free_locked(ifp->if_index); - IFNET_WUNLOCK(); + ifindex_free(ifp->if_index); if (refcount_release(&ifp->if_refcount)) epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy); @@ -1235,7 +1272,6 @@ { struct if_clone *ifc; u_int bif_dlt, bif_hdrlen; - void *old; int rc; /* @@ -1257,13 +1293,8 @@ /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. - * - * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, - * or we'd lock on one vnet and unlock on another. */ - IFNET_WLOCK(); - ifindex_free_locked(ifp->if_index); - IFNET_WUNLOCK(); + ifindex_free(ifp->if_index); /* * Perform interface-specific reassignment tasks, if provided by @@ -1276,17 +1307,11 @@ * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); - restart: - IFNET_WLOCK(); - ifp->if_index = ifindex_alloc(&old); - if (__predict_false(ifp->if_index == USHRT_MAX)) { - IFNET_WUNLOCK(); - epoch_wait_preempt(net_epoch_preempt); - free(old, M_IFNET); - goto restart; - } + + if (ifindex_alloc(&ifp->if_index) != 0) + panic("Out of interface numbers moving to new VNET\n"); + ifnet_setbyindex(ifp->if_index, ifp); - IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc);