diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -405,15 +405,20 @@ * addr and len identify the netmap buffer, m is the (preallocated) * mbuf to use for transmissions. * - * We should add a reference to the mbuf so the m_freem() at the end - * of the transmission does not consume resources. + * Zero-copy transmission is possible if netmap is attached directly to a + * hardware interface: when cleaning we simply wait for the mbuf cluster + * refcount to decrement to 1, indicating that the driver has completed + * transmission and is done with the buffer. However, this approach can + * lead to queue deadlocks when attaching to software interfaces (e.g., + * if_bridge) since we cannot rely on member ports to promptly reclaim + * transmitted mbufs. Since there is no easy way to distinguish these + * cases, we currently always copy the buffer. * - * On FreeBSD, and on multiqueue cards, we can force the queue using + * On multiqueue cards, we can force the queue using * if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) * i = m->m_pkthdr.flowid % adapter->num_queues; * else * i = curcpu % adapter->num_queues; - * */ int nm_os_generic_xmit_frame(struct nm_os_gen_arg *a) @@ -423,16 +428,18 @@ struct ifnet *ifp = a->ifp; struct mbuf *m = a->m; - /* Link the external storage to - * the netmap buffer, so that no copy is necessary. */ - m->m_ext.ext_buf = m->m_data = a->addr; - m->m_ext.ext_size = len; + if (MBUF_REFCNT(m) != 1) { + nm_prerr("invalid refcnt %d for %p", MBUF_REFCNT(m), m); + panic("in generic_xmit_frame"); + } + if (unlikely(m->m_ext.ext_size < len)) { + nm_prlim(2, "size %d < len %d", m->m_ext.ext_size, len); + len = m->m_ext.ext_size; + } + memcpy(m->m_data, a->addr, len); m->m_flags |= M_PKTHDR; m->m_len = m->m_pkthdr.len = len; - - /* mbuf refcnt is not contended, no need to use atomic - * (a memory barrier is enough). */ SET_MBUF_REFCNT(m, 2); M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); m->m_pkthdr.flowid = a->ring_nr; @@ -443,7 +450,6 @@ return ret ? -1 : 0; } - struct netmap_adapter * netmap_getna(if_t ifp) { diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c --- a/sys/dev/netmap/netmap_generic.c +++ b/sys/dev/netmap/netmap_generic.c @@ -537,10 +537,12 @@ } /* The event has been consumed, we can go * ahead. */ - } else if (MBUF_REFCNT(m) != 1) { - /* This mbuf is still busy: its refcnt is 2. */ - break; + /* This mbuf has been submitted but is still busy + * (refcount is 2). + * Leave it to the driver and replenish. */ + m_freem(m); + tx_pool[nm_i] = NULL; } } @@ -577,7 +579,7 @@ return e; } -static void +static void __unused generic_set_tx_event(struct netmap_kring *kring, u_int hwcur) { u_int lim = kring->nkr_num_slots - 1; @@ -631,10 +633,8 @@ /* Decrement the refcount. This will free it if we lose the race * with the driver. */ m_freem(m); - smp_mb(); } - /* * generic_netmap_txsync() transforms netmap buffers into mbufs * and passes them to the standard device driver @@ -698,7 +698,7 @@ /* Tale a mbuf from the tx pool (replenishing the pool * entry if necessary) and copy in the user packet. */ m = kring->tx_pool[nm_i]; - if (unlikely(m == NULL)) { + if (m == NULL) { kring->tx_pool[nm_i] = m = nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na)); if (m == NULL) { @@ -728,21 +728,6 @@ tx_ret = nm_os_generic_xmit_frame(&a); if (unlikely(tx_ret)) { if (!gna->txqdisc) { - /* - * No room for this mbuf in the device driver. - * Request a notification FOR A PREVIOUS MBUF, - * then call generic_netmap_tx_clean(kring) to do the - * double check and see if we can free more buffers. - * If there is space continue, else break; - * NOTE: the double check is necessary if the problem - * occurs in the txsync call after selrecord(). - * Also, we need some way to tell the caller that not - * all buffers were queued onto the device (this was - * not a problem with native netmap driver where space - * is preallocated). The bridge has a similar problem - * and we solve it there by dropping the excess packets. - */ - generic_set_tx_event(kring, nm_i); if (generic_netmap_tx_clean(kring, gna->txqdisc)) { /* space now available */ continue; @@ -786,15 +771,6 @@ /* * Second, reclaim completed buffers */ - if (!gna->txqdisc && (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring))) { - /* No more available slots? Set a notification event - * on a netmap slot that will be cleaned in the future. - * No doublecheck is performed, since txsync() will be - * called twice by netmap_poll(). - */ - generic_set_tx_event(kring, nm_i); - } - generic_netmap_tx_clean(kring, gna->txqdisc); return 0; diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -2394,7 +2394,7 @@ * has a KASSERT(), checking that the mbuf dtor function is not NULL. */ -static void void_mbuf_dtor(struct mbuf *m) { } +static inline void void_mbuf_dtor(struct mbuf *m) { } #define SET_MBUF_DESTRUCTOR(m, fn) do { \ (m)->m_ext.ext_free = (fn != NULL) ? \ @@ -2402,22 +2402,9 @@ } while (0) static inline struct mbuf * -nm_os_get_mbuf(struct ifnet *ifp, int len) +nm_os_get_mbuf(struct ifnet *ifp __unused, int len) { - struct mbuf *m; - - (void)ifp; - (void)len; - - m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) { - return m; - } - - m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor, - NULL, NULL, 0, EXT_NET_DRV); - - return m; + return (m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, len)); } #endif /* __FreeBSD__ */