diff --git a/sys/net/if.c b/sys/net/if.c --- a/sys/net/if.c +++ b/sys/net/if.c @@ -311,19 +311,30 @@ VNET_DEFINE(struct ifgrouphead, ifg_head); /* Table of ifnet by index. */ -VNET_DEFINE_STATIC(int, if_index); -#define V_if_index VNET(if_index) -VNET_DEFINE_STATIC(int, if_indexlim) = 8; -#define V_if_indexlim VNET(if_indexlim) -VNET_DEFINE_STATIC(struct ifnet **, ifindex_table); -#define V_ifindex_table VNET(ifindex_table) +static int if_index; +static int if_indexlim = 8; +static struct ifnet **ifindex_table; SYSCTL_NODE(_net_link_generic, IFMIB_SYSTEM, system, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "Variables global to all interfaces"); -SYSCTL_INT(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, - CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(if_index), 0, - "Number of configured interfaces"); +static int +sysctl_ifcount(SYSCTL_HANDLER_ARGS) +{ + int rv = 0; + + IFNET_RLOCK(); + for (int i = 1; i <= if_index; i++) + if (ifindex_table[i] != NULL && + ifindex_table[i]->if_vnet == curvnet) + rv = i; + IFNET_RUNLOCK(); + + return (sysctl_handle_int(oidp, &rv, 0, req)); +} +SYSCTL_PROC(_net_link_generic_system, IFMIB_IFCOUNT, ifcount, + CTLTYPE_INT | CTLFLAG_VNET | CTLFLAG_RD, NULL, 0, sysctl_ifcount, "I", + "Maximum known interface index"); /* * The global network interface list (V_ifnet) and related state (such as @@ -352,13 +363,19 @@ struct ifnet * ifnet_byindex(u_int idx) { + struct ifnet *ifp; NET_EPOCH_ASSERT(); - if (__predict_false(idx > V_if_index)) + if (__predict_false(idx > if_index)) return (NULL); - return (ck_pr_load_ptr(&V_ifindex_table[idx])); + ifp = ck_pr_load_ptr(&ifindex_table[idx]); + + if (curvnet != NULL && ifp != NULL && ifp->if_vnet != curvnet) + ifp = NULL; + + return (ifp); } struct ifnet * @@ -374,58 +391,6 @@ return (ifp); } -/* - * Allocate an ifindex array entry. - */ -static void -ifindex_alloc(struct ifnet *ifp) -{ - u_short idx; - - IFNET_WLOCK(); - /* - * Try to find an empty slot below V_if_index. If we fail, take the - * next slot. - */ - for (idx = 1; idx <= V_if_index; idx++) { - if (V_ifindex_table[idx] == NULL) - break; - } - - /* Catch if_index overflow. */ - if (idx >= V_if_indexlim) { - struct ifnet **new, **old; - int newlim; - - newlim = V_if_indexlim * 2; - new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); - memcpy(new, V_ifindex_table, V_if_indexlim * sizeof(*new)); - old = V_ifindex_table; - ck_pr_store_ptr(&V_ifindex_table, new); - V_if_indexlim = newlim; - epoch_wait_preempt(net_epoch_preempt); - free(old, M_IFNET); - } - if (idx > V_if_index) - V_if_index = idx; - - ifp->if_index = idx; - ck_pr_store_ptr(&V_ifindex_table[idx], ifp); - IFNET_WUNLOCK(); -} - -static void -ifindex_free(u_short idx) -{ - - IFNET_WLOCK_ASSERT(); - - ck_pr_store_ptr(&V_ifindex_table[idx], NULL); - while (V_if_index > 0 && - V_ifindex_table[V_if_index] == NULL) - V_if_index--; -} - struct ifaddr * ifaddr_byindex(u_short idx) { @@ -448,33 +413,24 @@ */ static void -vnet_if_init(const void *unused __unused) +if_init(void *arg __unused) { - CK_STAILQ_INIT(&V_ifnet); - CK_STAILQ_INIT(&V_ifg_head); - V_ifindex_table = malloc(V_if_indexlim * sizeof(*V_ifindex_table), + ifindex_table = malloc(if_indexlim * sizeof(*ifindex_table), M_IFNET, M_WAITOK | M_ZERO); - vnet_if_clone_init(); } -VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, - NULL); +SYSINIT(if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, if_init, NULL); -#ifdef VIMAGE static void -vnet_if_uninit(const void *unused __unused) +vnet_if_init(const void *unused __unused) { - VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " - "not empty", __func__, __LINE__, &V_ifnet)); - VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " - "not empty", __func__, __LINE__, &V_ifg_head)); - - free((caddr_t)V_ifindex_table, M_IFNET); + CK_STAILQ_INIT(&V_ifnet); + CK_STAILQ_INIT(&V_ifg_head); + vnet_if_clone_init(); } -VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, - vnet_if_uninit, NULL); -#endif +VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, + NULL); static void if_link_ifnet(struct ifnet *ifp) @@ -568,6 +524,7 @@ if_alloc_domain(u_char type, int numa_domain) { struct ifnet *ifp; + u_short idx; KASSERT(numa_domain <= IF_NODOM, ("numa_domain too large")); if (numa_domain == IF_NODOM) @@ -607,7 +564,37 @@ ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; - ifindex_alloc(ifp); + /* Allocate an ifindex array entry. */ + IFNET_WLOCK(); + /* + * Try to find an empty slot below if_index. If we fail, take the + * next slot. + */ + for (idx = 1; idx <= if_index; idx++) { + if (ifindex_table[idx] == NULL) + break; + } + + /* Catch if_index overflow. */ + if (idx >= if_indexlim) { + struct ifnet **new, **old; + int newlim; + + newlim = if_indexlim * 2; + new = malloc(newlim * sizeof(*new), M_IFNET, M_WAITOK | M_ZERO); + memcpy(new, ifindex_table, if_indexlim * sizeof(*new)); + old = ifindex_table; + ck_pr_store_ptr(&ifindex_table, new); + if_indexlim = newlim; + epoch_wait_preempt(net_epoch_preempt); + free(old, M_IFNET); + } + if (idx > if_index) + if_index = idx; + + ifp->if_index = idx; + ck_pr_store_ptr(&ifindex_table[idx], ifp); + IFNET_WUNLOCK(); return (ifp); } @@ -677,23 +664,18 @@ * epoch and then dereferencing ifp while we peform if_free(), * and after if_free() finished, too. * - * The reason is the VIMAGE. For some reason it was designed - * to require all sockets drained before destroying, but not all - * ifnets. A vnet destruction calls if_vmove() on ifnet, which - * causes ID change. But ID change and a possible misidentification - * of an ifnet later is a lesser problem, as it doesn't crash kernel. - * A worse problem is that removed interface may outlive the vnet it - * belongs too! The if_free_deferred() would see ifp->if_vnet freed. + * This early index freeing was important back when ifindex was + * virtualized and interface would outlive the vnet. */ - CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); - MPASS(V_ifindex_table[ifp->if_index] == ifp); - ifindex_free(ifp->if_index); + MPASS(ifindex_table[ifp->if_index] == ifp); + ck_pr_store_ptr(&ifindex_table[ifp->if_index], NULL); + while (if_index > 0 && ifindex_table[if_index] == NULL) + if_index--; IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) NET_EPOCH_CALL(if_free_deferred, &ifp->if_epoch_ctx); - CURVNET_RESTORE(); } /* @@ -837,7 +819,7 @@ struct sockaddr_dl *sdl; struct ifaddr *ifa; - MPASS(V_ifindex_table[ifp->if_index] == ifp); + MPASS(ifindex_table[ifp->if_index] == ifp); #ifdef VIMAGE ifp->if_vnet = curvnet; @@ -1287,17 +1269,6 @@ if (rc != 0) return (rc); - /* - * Unlink the ifnet from ifindex_table[] in current vnet, and shrink - * the if_index for that vnet if possible. - * - * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, - * or we'd lock on one vnet and unlock on another. - */ - IFNET_WLOCK(); - ifindex_free(ifp->if_index); - IFNET_WUNLOCK(); - /* * Perform interface-specific reassignment tasks, if provided by * the driver. @@ -1309,7 +1280,6 @@ * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); - ifindex_alloc(ifp); if_attach_internal(ifp, true); #ifdef DEV_BPF @@ -1945,7 +1915,6 @@ struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; - const struct sockaddr_dl *sdl; NET_EPOCH_ASSERT(); /* @@ -1953,14 +1922,9 @@ * so do that if we can. */ if (af == AF_LINK) { - sdl = (const struct sockaddr_dl *)addr; - if (sdl->sdl_index && sdl->sdl_index <= V_if_index) { - ifp = ifnet_byindex(sdl->sdl_index); - if (ifp == NULL) - return (NULL); - - return (ifp->if_addr); - } + ifp = ifnet_byindex( + ((const struct sockaddr_dl *)addr)->sdl_index); + return (ifp ? ifp->if_addr : NULL); } /* @@ -4596,24 +4560,16 @@ DB_SHOW_ALL_COMMAND(ifnets, db_show_all_ifnets) { - VNET_ITERATOR_DECL(vnet_iter); struct ifnet *ifp; u_short idx; - VNET_FOREACH(vnet_iter) { - CURVNET_SET_QUIET(vnet_iter); -#ifdef VIMAGE - db_printf("vnet=%p\n", curvnet); -#endif - for (idx = 1; idx <= V_if_index; idx++) { - ifp = V_ifindex_table[idx]; - if (ifp == NULL) - continue; - db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); - if (db_pager_quit) - break; - } - CURVNET_RESTORE(); + for (idx = 1; idx <= if_index; idx++) { + ifp = ifindex_table[idx]; + if (ifp == NULL) + continue; + db_printf( "%20s ifp=%p\n", ifp->if_xname, ifp); + if (db_pager_quit) + break; } } #endif /* DDB */