Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F161384000
D33731.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D33731.diff
View Options
diff --git a/sys/net/if_epair.c b/sys/net/if_epair.c
--- a/sys/net/if_epair.c
+++ b/sys/net/if_epair.c
@@ -40,6 +40,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_rss.h"
+
#include <sys/param.h>
#include <sys/hash.h>
#include <sys/jail.h>
@@ -50,10 +52,11 @@
#include <sys/module.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
-#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
#include <sys/types.h>
#include <sys/buf_ring.h>
#include <sys/bus.h>
@@ -68,6 +71,11 @@
#include <net/if_var.h>
#include <net/if_types.h>
#include <net/netisr.h>
+#ifdef RSS
+#include <net/rss_config.h>
+#include <netinet/in_rss.h>
+#include <netinet6/in6_rss.h>
+#endif
#include <net/vnet.h>
static int epair_clone_match(struct if_clone *, const char *);
@@ -90,21 +98,32 @@
#define EPAIR_LOCK() mtx_lock(&epair_n_index_mtx)
#define EPAIR_UNLOCK() mtx_unlock(&epair_n_index_mtx)
-static void *swi_cookie[MAXCPU]; /* swi(9). */
-static STAILQ_HEAD(, epair_softc) swi_sc[MAXCPU];
+struct epair_softc;
+struct epair_queue {
+ int id;
+ struct buf_ring *rxring[2];
+ volatile int ridx; /* 0 || 1 */
+ struct task tx_task;
+ struct epair_softc *sc;
+};
static struct mtx epair_n_index_mtx;
struct epair_softc {
- struct ifnet *ifp; /* This ifp. */
- struct ifnet *oifp; /* other ifp of pair. */
- void *swi_cookie; /* swi(9). */
- struct buf_ring *rxring[2];
- volatile int ridx; /* 0 || 1 */
- struct ifmedia media; /* Media config (fake). */
- uint32_t cpuidx;
+ struct ifnet *ifp; /* This ifp. */
+ struct ifnet *oifp; /* other ifp of pair. */
+ int num_queues;
+ struct epair_queue *queues;
+ struct ifmedia media; /* Media config (fake). */
STAILQ_ENTRY(epair_softc) entry;
};
+struct epair_tasks_t {
+ int tasks;
+ struct taskqueue *tq[MAXCPU];
+};
+
+static struct epair_tasks_t epair_tasks;
+
static void
epair_clear_mbuf(struct mbuf *m)
{
@@ -119,59 +138,43 @@
}
static void
-epair_if_input(struct epair_softc *sc, int ridx)
+epair_if_input(struct epair_softc *sc, struct epair_queue *q, int ridx)
{
- struct epoch_tracker et;
struct ifnet *ifp;
struct mbuf *m;
ifp = sc->ifp;
- NET_EPOCH_ENTER(et);
- do {
- m = buf_ring_dequeue_sc(sc->rxring[ridx]);
+ CURVNET_SET(ifp->if_vnet);
+ while (! buf_ring_empty(q->rxring[ridx])) {
+ m = buf_ring_dequeue_mc(q->rxring[ridx]);
if (m == NULL)
- break;
+ continue;
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
(*ifp->if_input)(ifp, m);
- } while (1);
- NET_EPOCH_EXIT(et);
+ }
+ CURVNET_RESTORE();
}
static void
-epair_sintr(struct epair_softc *sc)
+epair_tx_start_deferred(void *arg, int pending)
{
+ struct epair_queue *q = (struct epair_queue *)arg;
+ struct epair_softc *sc = q->sc;
int ridx, nidx;
if_ref(sc->ifp);
+ ridx = atomic_load_int(&q->ridx);
do {
- ridx = sc->ridx;
nidx = (ridx == 0) ? 1 : 0;
- } while (!atomic_cmpset_int(&sc->ridx, ridx, nidx));
- epair_if_input(sc, ridx);
+ } while (!atomic_fcmpset_int(&q->ridx, &ridx, nidx));
+ epair_if_input(sc, q, ridx);
- if_rele(sc->ifp);
-}
+ if (! buf_ring_empty(q->rxring[nidx]))
+ taskqueue_enqueue(epair_tasks.tq[q->id], &q->tx_task);
-static void
-epair_intr(void *arg)
-{
- struct epair_softc *sc;
- uint32_t cpuidx;
-
- cpuidx = (uintptr_t)arg;
- /* If this is a problem, this is a read-mostly situation. */
- EPAIR_LOCK();
- STAILQ_FOREACH(sc, &swi_sc[cpuidx], entry) {
- /* Do this lockless. */
- if (buf_ring_empty(sc->rxring[sc->ridx]))
- continue;
- epair_sintr(sc);
- }
- EPAIR_UNLOCK();
-
- return;
+ if_rele(sc->ifp);
}
static int
@@ -181,7 +184,12 @@
int len, ret;
int ridx;
short mflags;
+ struct epair_queue *q = NULL;
+ uint32_t bucket;
bool was_empty;
+#ifdef RSS
+ struct ether_header *eh;
+#endif
/*
* I know this looks weird. We pass the "other sc" as we need that one
@@ -202,13 +210,38 @@
MPASS(m->m_nextpkt == NULL);
MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
- ridx = atomic_load_int(&osc->ridx);
- was_empty = buf_ring_empty(osc->rxring[ridx]);
- ret = buf_ring_enqueue(osc->rxring[ridx], m);
+#ifdef RSS
+ ret = rss_m2bucket(m, &bucket);
+ if (ret) {
+ /* Actually hash the packet. */
+ eh = mtod(m, struct ether_header *);
+
+ switch (ntohs(eh->ether_type)) {
+ case ETHERTYPE_IP:
+ rss_soft_m2cpuid_v4(m, 0, &bucket);
+ break;
+ case ETHERTYPE_IPV6:
+ rss_soft_m2cpuid_v6(m, 0, &bucket);
+ break;
+ default:
+ bucket = 0;
+ break;
+ }
+ }
+ bucket %= osc->num_queues;
+#else
+ bucket = 0;
+#endif
+ q = &osc->queues[bucket];
+
+ ridx = atomic_load_int(&q->ridx);
+ was_empty = buf_ring_empty(q->rxring[ridx]);
+ ret = buf_ring_enqueue(q->rxring[ridx], m);
if (ret != 0) {
/* Ring is full. */
+ if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
m_freem(m);
- return (0);
+ goto done;
}
if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
@@ -223,9 +256,9 @@
/* Someone else received the packet. */
if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
- /* Kick the interrupt handler for the first packet. */
- if (was_empty && osc->swi_cookie != NULL)
- swi_sched(osc->swi_cookie, 0);
+done:
+ if (was_empty)
+ taskqueue_enqueue(epair_tasks.tq[bucket], &q->tx_task);
return (0);
}
@@ -495,16 +528,27 @@
/* Allocate memory for both [ab] interfaces */
sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
sca->ifp = if_alloc(IFT_ETHER);
+ sca->num_queues = epair_tasks.tasks;
if (sca->ifp == NULL) {
free(sca, M_EPAIR);
ifc_free_unit(ifc, unit);
return (ENOSPC);
}
- sca->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK,NULL);
- sca->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
+ sca->queues = mallocarray(sca->num_queues, sizeof(struct epair_queue),
+ M_EPAIR, M_WAITOK);
+ for (int i = 0; i < sca->num_queues; i++) {
+ struct epair_queue *q = &sca->queues[i];
+ q->id = i;
+ q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
+ q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
+ q->ridx = 0;
+ q->sc = sca;
+ NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q);
+ }
scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
scb->ifp = if_alloc(IFT_ETHER);
+ scb->num_queues = epair_tasks.tasks;
if (scb->ifp == NULL) {
free(scb, M_EPAIR);
if_free(sca->ifp);
@@ -512,8 +556,17 @@
ifc_free_unit(ifc, unit);
return (ENOSPC);
}
- scb->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
- scb->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
+ scb->queues = mallocarray(scb->num_queues, sizeof(struct epair_queue),
+ M_EPAIR, M_WAITOK);
+ for (int i = 0; i < scb->num_queues; i++) {
+ struct epair_queue *q = &scb->queues[i];
+ q->id = i;
+ q->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
+ q->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
+ q->ridx = 0;
+ q->sc = scb;
+ NET_TASK_INIT(&q->tx_task, 0, epair_tx_start_deferred, q);
+ }
/*
* Cross-reference the interfaces so we will be able to free both.
@@ -528,41 +581,6 @@
#else
hash = 0;
#endif
- if (swi_cookie[hash] == NULL) {
- void *cookie;
-
- EPAIR_UNLOCK();
- error = swi_add(NULL, epairname,
- epair_intr, (void *)(uintptr_t)hash,
- SWI_NET, INTR_MPSAFE, &cookie);
- if (error) {
- buf_ring_free(scb->rxring[0], M_EPAIR);
- buf_ring_free(scb->rxring[1], M_EPAIR);
- if_free(scb->ifp);
- free(scb, M_EPAIR);
- buf_ring_free(sca->rxring[0], M_EPAIR);
- buf_ring_free(sca->rxring[1], M_EPAIR);
- if_free(sca->ifp);
- free(sca, M_EPAIR);
- ifc_free_unit(ifc, unit);
- return (ENOSPC);
- }
- EPAIR_LOCK();
- /* Recheck under lock even though a race is very unlikely. */
- if (swi_cookie[hash] == NULL) {
- swi_cookie[hash] = cookie;
- } else {
- EPAIR_UNLOCK();
- (void) swi_remove(cookie);
- EPAIR_LOCK();
- }
- }
- sca->cpuidx = hash;
- STAILQ_INSERT_TAIL(&swi_sc[hash], sca, entry);
- sca->swi_cookie = swi_cookie[hash];
- scb->cpuidx = hash;
- STAILQ_INSERT_TAIL(&swi_sc[hash], scb, entry);
- scb->swi_cookie = swi_cookie[hash];
EPAIR_UNLOCK();
/* Initialise pseudo media types. */
@@ -669,12 +687,15 @@
struct mbuf *m;
for (ridx = 0; ridx < 2; ridx++) {
- do {
- m = buf_ring_dequeue_sc(sc->rxring[ridx]);
- if (m == NULL)
- break;
- m_freem(m);
- } while (1);
+ for (int i = 0; i < sc->num_queues; i++) {
+ struct epair_queue *q = &sc->queues[i];
+ do {
+ m = buf_ring_dequeue_sc(q->rxring[ridx]);
+ if (m == NULL)
+ break;
+ m_freem(m);
+ } while (1);
+ }
}
}
@@ -707,14 +728,6 @@
ether_ifdetach(ifp);
ether_ifdetach(oifp);
- /* Second stop interrupt handler. */
- EPAIR_LOCK();
- STAILQ_REMOVE(&swi_sc[sca->cpuidx], sca, epair_softc, entry);
- STAILQ_REMOVE(&swi_sc[scb->cpuidx], scb, epair_softc, entry);
- EPAIR_UNLOCK();
- sca->swi_cookie = NULL;
- scb->swi_cookie = NULL;
-
/* Third free any queued packets and all the resources. */
CURVNET_SET_QUIET(oifp->if_vnet);
epair_drain_rings(scb);
@@ -725,16 +738,24 @@
__func__, error);
if_free(oifp);
ifmedia_removeall(&scb->media);
- buf_ring_free(scb->rxring[0], M_EPAIR);
- buf_ring_free(scb->rxring[1], M_EPAIR);
+ for (int i = 0; i < scb->num_queues; i++) {
+ struct epair_queue *q = &scb->queues[i];
+ buf_ring_free(q->rxring[0], M_EPAIR);
+ buf_ring_free(q->rxring[1], M_EPAIR);
+ }
+ free(scb->queues, M_EPAIR);
free(scb, M_EPAIR);
CURVNET_RESTORE();
epair_drain_rings(sca);
if_free(ifp);
ifmedia_removeall(&sca->media);
- buf_ring_free(sca->rxring[0], M_EPAIR);
- buf_ring_free(sca->rxring[1], M_EPAIR);
+ for (int i = 0; i < sca->num_queues; i++) {
+ struct epair_queue *q = &sca->queues[i];
+ buf_ring_free(q->rxring[0], M_EPAIR);
+ buf_ring_free(q->rxring[1], M_EPAIR);
+ }
+ free(sca->queues, M_EPAIR);
free(sca, M_EPAIR);
/* Last free the cloner unit. */
@@ -762,34 +783,76 @@
VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
vnet_epair_uninit, NULL);
+static int
+epair_mod_init()
+{
+ char name[32];
+ epair_tasks.tasks = 0;
+
+#ifdef RSS
+ struct pcpu *pcpu;
+ int cpu;
+
+ CPU_FOREACH(cpu) {
+ cpuset_t cpu_mask;
+
+ /* Pin to this CPU so we get appropriate NUMA allocations. */
+ pcpu = pcpu_find(cpu);
+ thread_lock(curthread);
+ sched_bind(curthread, cpu);
+ thread_unlock(curthread);
+
+ snprintf(name, sizeof(name), "epair_task_%d", cpu);
+
+ epair_tasks.tq[cpu] = taskqueue_create(name, M_WAITOK,
+ taskqueue_thread_enqueue,
+ &epair_tasks.tq[cpu]);
+ CPU_SETOF(cpu, &cpu_mask);
+ taskqueue_start_threads_cpuset(&epair_tasks.tq[cpu], 1, PI_NET,
+ &cpu_mask, "%s", name);
+
+ epair_tasks.tasks++;
+ }
+#else
+ snprintf(name, sizeof(name), "epair_task");
+
+ epair_tasks.tq[0] = taskqueue_create(name, M_WAITOK,
+ taskqueue_thread_enqueue,
+ &epair_tasks.tq[0]);
+ taskqueue_start_threads(&epair_tasks.tq[0], 1, PI_NET, "%s", name);
+
+ epair_tasks.tasks = 1;
+#endif
+
+ return (0);
+}
+
+static void
+epair_mod_cleanup()
+{
+
+ for (int i = 0; i < epair_tasks.tasks; i++) {
+ taskqueue_drain_all(epair_tasks.tq[i]);
+ taskqueue_free(epair_tasks.tq[i]);
+ }
+}
+
static int
epair_modevent(module_t mod, int type, void *data)
{
- int i;
+ int ret;
switch (type) {
case MOD_LOAD:
- for (i = 0; i < MAXCPU; i++) {
- swi_cookie[i] = NULL;
- STAILQ_INIT(&swi_sc[i]);
- }
EPAIR_LOCK_INIT();
+ ret = epair_mod_init();
+ if (ret != 0)
+ return (ret);
if (bootverbose)
printf("%s: %s initialized.\n", __func__, epairname);
break;
case MOD_UNLOAD:
- EPAIR_LOCK();
- for (i = 0; i < MAXCPU; i++) {
- if (!STAILQ_EMPTY(&swi_sc[i])) {
- printf("%s: swi_sc[%d] active\n", __func__, i);
- EPAIR_UNLOCK();
- return (EBUSY);
- }
- }
- EPAIR_UNLOCK();
- for (i = 0; i < MAXCPU; i++)
- if (swi_cookie[i] != NULL)
- (void) swi_remove(swi_cookie[i]);
+ epair_mod_cleanup();
EPAIR_LOCK_DESTROY();
if (bootverbose)
printf("%s: %s unloaded.\n", __func__, epairname);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Jul 4, 7:37 AM (1 h, 37 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34659145
Default Alt Text
D33731.diff (11 KB)
Attached To
Mode
D33731: if_epair: implement fanout
Attached
Detach File
Event Timeline
Log In to Comment