Index: sys/net/if_epair.c =================================================================== --- sys/net/if_epair.c +++ sys/net/if_epair.c @@ -40,6 +40,8 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_rss.h" + #include #include #include @@ -53,7 +55,7 @@ #include #include #include -#include +#include #include #include #include @@ -68,6 +70,11 @@ #include #include #include +#ifdef RSS +#include +#include +#include +#endif #include static int epair_clone_match(struct if_clone *, const char *); @@ -90,18 +97,24 @@ #define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx) #define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx) -static void *swi_cookie[MAXCPU]; /* swi(9). */ -static STAILQ_HEAD(, epair_softc) swi_sc[MAXCPU]; +struct epair_softc; +struct epair_queue { + struct buf_ring *rxring[2]; + volatile int ridx; /* 0 || 1 */ + struct taskqueue *tq; + char tq_name[32]; + struct task tx_task; + struct epair_softc *sc; +}; static struct mtx epair_n_index_mtx; struct epair_softc { - struct ifnet *ifp; /* This ifp. */ - struct ifnet *oifp; /* other ifp of pair. */ - void *swi_cookie; /* swi(9). */ - struct buf_ring *rxring[2]; - volatile int ridx; /* 0 || 1 */ - struct ifmedia media; /* Media config (fake). */ - uint32_t cpuidx; + struct ifnet *ifp; /* This ifp. */ + struct ifnet *oifp; /* other ifp of pair. */ + int num_queues; + struct epair_queue *queues; + volatile int ridx; /* 0 || 1 */ + struct ifmedia media; /* Media config (fake). */ STAILQ_ENTRY(epair_softc) entry; }; @@ -119,59 +132,43 @@ } static void -epair_if_input(struct epair_softc *sc, int ridx) +epair_if_input(struct epair_softc *sc, struct epair_queue *q, int ridx) { - struct epoch_tracker et; struct ifnet *ifp; struct mbuf *m; ifp = sc->ifp; - NET_EPOCH_ENTER(et); - do { - m = buf_ring_dequeue_sc(sc->rxring[ridx]); + CURVNET_SET(ifp->if_vnet); + while (! buf_ring_empty(q->rxring[ridx])) { + m = buf_ring_dequeue_mc(q->rxring[ridx]); if (m == NULL) - break; + continue; MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); (*ifp->if_input)(ifp, m); - } while (1); - NET_EPOCH_EXIT(et); + } + CURVNET_RESTORE(); } static void -epair_sintr(struct epair_softc *sc) +epair_tx_start_deferred(void *arg, int pending) { + struct epair_queue *q = (struct epair_queue *)arg; + struct epair_softc *sc = q->sc; int ridx, nidx; if_ref(sc->ifp); do { - ridx = sc->ridx; + ridx = atomic_load_int(&q->ridx); nidx = (ridx == 0) ? 1 : 0; - } while (!atomic_cmpset_int(&sc->ridx, ridx, nidx)); - epair_if_input(sc, ridx); - - if_rele(sc->ifp); -} - -static void -epair_intr(void *arg) -{ - struct epair_softc *sc; - uint32_t cpuidx; + } while (!atomic_cmpset_int(&q->ridx, ridx, nidx)); + epair_if_input(sc, q, ridx); - cpuidx = (uintptr_t)arg; - /* If this is a problem, this is a read-mostly situation. */ - EPAIR_LOCK(); - STAILQ_FOREACH(sc, &swi_sc[cpuidx], entry) { - /* Do this lockless. */ - if (buf_ring_empty(sc->rxring[sc->ridx])) - continue; - epair_sintr(sc); - } - EPAIR_UNLOCK(); + if (! buf_ring_empty(q->rxring[nidx])) + taskqueue_enqueue(q->tq, &q->tx_task); - return; + if_rele(sc->ifp); } static int @@ -181,7 +178,12 @@ int len, ret; int ridx; short mflags; + struct epair_queue *q = NULL; + uint32_t bucket; bool was_empty; +#ifdef RSS + struct ether_header *eh; +#endif /* * I know this looks weird. We pass the "other sc" as we need that one @@ -202,13 +204,38 @@ MPASS(m->m_nextpkt == NULL); MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); - ridx = atomic_load_int(&osc->ridx); - was_empty = buf_ring_empty(osc->rxring[ridx]); - ret = buf_ring_enqueue(osc->rxring[ridx], m); +#ifdef RSS + ret = rss_m2bucket(m, &bucket); + if (ret) { + /* Actually hash the packet. */ + eh = mtod(m, struct ether_header *); + + switch (ntohs(eh->ether_type)) { + case ETHERTYPE_IP: + rss_soft_m2cpuid_v4(m, 0, &bucket); + break; + case ETHERTYPE_IPV6: + rss_soft_m2cpuid_v6(m, 0, &bucket); + break; + default: + bucket = 0; + break; + } + } + bucket %= osc->num_queues; +#else + bucket = 0; +#endif + q = &osc->queues[bucket]; + + ridx = atomic_load_int(&q->ridx); + was_empty = buf_ring_empty(q->rxring[ridx]); + ret = buf_ring_enqueue(q->rxring[ridx], m); if (ret != 0) { /* Ring is full. */ + if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); - return (0); + goto done; } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); @@ -223,9 +250,9 @@ /* Someone else received the packet. */ if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1); - /* Kick the interrupt handler for the first packet. */ - if (was_empty && osc->swi_cookie != NULL) - swi_sched(osc->swi_cookie, 0); +done: + if (was_empty) + taskqueue_enqueue(q->tq, &q->tx_task); return (0); } @@ -495,16 +522,40 @@ /* Allocate memory for both [ab] interfaces */ sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); sca->ifp = if_alloc(IFT_ETHER); +#ifdef RSS + sca->num_queues = rss_getnumbuckets(); +#else + sca->num_queues = 1; +#endif if (sca->ifp == NULL) { free(sca, M_EPAIR); ifc_free_unit(ifc, unit); return (ENOSPC); } - sca->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK,NULL); - sca->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); + sca->queues = mallocarray(sca->num_queues, sizeof(struct epair_queue), + M_EPAIR, M_WAITOK); + for (int i = 0; i < sca->num_queues; i++) { + struct epair_queue *q = &sca->queues[i]; + q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); + q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); + q->ridx = 0; + q->sc = sca; + snprintf(q->tq_name, sizeof(q->tq_name), "epair%da_q%d_tq", + unit, i); + q->tq = taskqueue_create(q->tq_name, M_WAITOK, + taskqueue_thread_enqueue, + &q->tq); + NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); + taskqueue_start_threads(&q->tq, 1, PI_NET, "%s", q->tq_name); + } scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO); scb->ifp = if_alloc(IFT_ETHER); +#ifdef RSS + scb->num_queues = rss_getnumbuckets(); +#else + scb->num_queues = 1; +#endif if (scb->ifp == NULL) { free(scb, M_EPAIR); if_free(sca->ifp); @@ -512,8 +563,22 @@ ifc_free_unit(ifc, unit); return (ENOSPC); } - scb->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); - scb->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); + scb->queues = mallocarray(scb->num_queues, sizeof(struct epair_queue), + M_EPAIR, M_WAITOK); + for (int i = 0; i < scb->num_queues; i++) { + struct epair_queue *q = &scb->queues[i]; + q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); + q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL); + q->ridx = 0; + q->sc = scb; + snprintf(q->tq_name, sizeof(q->tq_name), "epair%db_q%d_tq", + unit, i); + q->tq = taskqueue_create(q->tq_name, M_WAITOK, + taskqueue_thread_enqueue, + &q->tq); + NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q); + taskqueue_start_threads(&q->tq, 1, PI_NET, "%s", q->tq_name); + } /* * Cross-reference the interfaces so we will be able to free both. @@ -528,41 +593,6 @@ #else hash = 0; #endif - if (swi_cookie[hash] == NULL) { - void *cookie; - - EPAIR_UNLOCK(); - error = swi_add(NULL, epairname, - epair_intr, (void *)(uintptr_t)hash, - SWI_NET, INTR_MPSAFE, &cookie); - if (error) { - buf_ring_free(scb->rxring[0], M_EPAIR); - buf_ring_free(scb->rxring[1], M_EPAIR); - if_free(scb->ifp); - free(scb, M_EPAIR); - buf_ring_free(sca->rxring[0], M_EPAIR); - buf_ring_free(sca->rxring[1], M_EPAIR); - if_free(sca->ifp); - free(sca, M_EPAIR); - ifc_free_unit(ifc, unit); - return (ENOSPC); - } - EPAIR_LOCK(); - /* Recheck under lock even though a race is very unlikely. */ - if (swi_cookie[hash] == NULL) { - swi_cookie[hash] = cookie; - } else { - EPAIR_UNLOCK(); - (void) swi_remove(cookie); - EPAIR_LOCK(); - } - } - sca->cpuidx = hash; - STAILQ_INSERT_TAIL(&swi_sc[hash], sca, entry); - sca->swi_cookie = swi_cookie[hash]; - scb->cpuidx = hash; - STAILQ_INSERT_TAIL(&swi_sc[hash], scb, entry); - scb->swi_cookie = swi_cookie[hash]; EPAIR_UNLOCK(); /* Initialise pseudo media types. */ @@ -669,12 +699,15 @@ struct mbuf *m; for (ridx = 0; ridx < 2; ridx++) { - do { - m = buf_ring_dequeue_sc(sc->rxring[ridx]); - if (m == NULL) - break; - m_freem(m); - } while (1); + for (int i = 0; i < sc->num_queues; i++) { + struct epair_queue *q = &sc->queues[i]; + do { + m = buf_ring_dequeue_sc(q->rxring[ridx]); + if (m == NULL) + break; + m_freem(m); + } while (1); + } } } @@ -707,14 +740,6 @@ ether_ifdetach(ifp); ether_ifdetach(oifp); - /* Second stop interrupt handler. */ - EPAIR_LOCK(); - STAILQ_REMOVE(&swi_sc[sca->cpuidx], sca, epair_softc, entry); - STAILQ_REMOVE(&swi_sc[scb->cpuidx], scb, epair_softc, entry); - EPAIR_UNLOCK(); - sca->swi_cookie = NULL; - scb->swi_cookie = NULL; - /* Third free any queued packets and all the resources. */ CURVNET_SET_QUIET(oifp->if_vnet); epair_drain_rings(scb); @@ -725,16 +750,28 @@ __func__, error); if_free(oifp); ifmedia_removeall(&scb->media); - buf_ring_free(scb->rxring[0], M_EPAIR); - buf_ring_free(scb->rxring[1], M_EPAIR); + for (int i = 0; i < scb->num_queues; i++) { + struct epair_queue *q = &scb->queues[i]; + buf_ring_free(q->rxring[0], M_EPAIR); + buf_ring_free(q->rxring[1], M_EPAIR); + taskqueue_drain_all(q->tq); + taskqueue_free(q->tq); + } + free(scb->queues, M_EPAIR); free(scb, M_EPAIR); CURVNET_RESTORE(); epair_drain_rings(sca); if_free(ifp); ifmedia_removeall(&sca->media); - buf_ring_free(sca->rxring[0], M_EPAIR); - buf_ring_free(sca->rxring[1], M_EPAIR); + for (int i = 0; i < sca->num_queues; i++) { + struct epair_queue *q = &sca->queues[i]; + buf_ring_free(q->rxring[0], M_EPAIR); + buf_ring_free(q->rxring[1], M_EPAIR); + taskqueue_drain_all(q->tq); + taskqueue_free(q->tq); + } + free(sca->queues, M_EPAIR); free(sca, M_EPAIR); /* Last free the cloner unit. */ @@ -765,31 +802,13 @@ static int epair_modevent(module_t mod, int type, void *data) { - int i; - switch (type) { case MOD_LOAD: - for (i = 0; i < MAXCPU; i++) { - swi_cookie[i] = NULL; - STAILQ_INIT(&swi_sc[i]); - } EPAIR_LOCK_INIT(); if (bootverbose) printf("%s: %s initialized.\n", __func__, epairname); break; case MOD_UNLOAD: - EPAIR_LOCK(); - for (i = 0; i < MAXCPU; i++) { - if (!STAILQ_EMPTY(&swi_sc[i])) { - printf("%s: swi_sc[%d] active\n", __func__, i); - EPAIR_UNLOCK(); - return (EBUSY); - } - } - EPAIR_UNLOCK(); - for (i = 0; i < MAXCPU; i++) - if (swi_cookie[i] != NULL) - (void) swi_remove(swi_cookie[i]); EPAIR_LOCK_DESTROY(); if (bootverbose) printf("%s: %s unloaded.\n", __func__, epairname);