Changeset View
Standalone View
sys/dev/netmap/if_vtnet_netmap.h
/* | /* | ||||
* Copyright (C) 2014 Vincenzo Maffione, Luigi Rizzo. All rights reserved. | * Copyright (C) 2014-2018 Vincenzo Maffione, Luigi Rizzo. | ||||
* | * | ||||
* Redistribution and use in source and binary forms, with or without | * Redistribution and use in source and binary forms, with or without | ||||
* modification, are permitted provided that the following conditions | * modification, are permitted provided that the following conditions | ||||
Context not available. | |||||
#include <vm/pmap.h> /* vtophys ? */ | #include <vm/pmap.h> /* vtophys ? */ | ||||
#include <dev/netmap/netmap_kern.h> | #include <dev/netmap/netmap_kern.h> | ||||
/* | |||||
* Return 1 if the queue identified by 't' and 'idx' is in netmap mode. | |||||
*/ | |||||
static int | |||||
vtnet_netmap_queue_on(struct vtnet_softc *sc, enum txrx t, int idx) | |||||
{ | |||||
struct netmap_adapter *na = NA(sc->vtnet_ifp); | |||||
#define SOFTC_T vtnet_softc | if (!nm_native_on(na)) | ||||
return 0; | |||||
/* Free all the unused buffer in all the RX virtqueues. | if (t == NR_RX) | ||||
* This function is called when entering and exiting netmap mode. | return !!(idx < na->num_rx_rings && | ||||
* - buffers queued by the virtio driver return skbuf/mbuf pointer | na->rx_rings[idx]->nr_mode == NKR_NETMAP_ON); | ||||
* and need to be freed; | |||||
* - buffers queued by netmap return the txq/rxq, and do not need work | return !!(idx < na->num_tx_rings && | ||||
*/ | na->tx_rings[idx]->nr_mode == NKR_NETMAP_ON); | ||||
} | |||||
static void | static void | ||||
vtnet_netmap_free_bufs(struct SOFTC_T* sc) | vtnet_free_used(struct virtqueue *vq, int netmap_bufs, enum txrx t, int idx) | ||||
{ | { | ||||
int i, nmb = 0, n = 0, last; | void *cookie; | ||||
int deq = 0; | |||||
for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { | while ((cookie = virtqueue_dequeue(vq, NULL)) != NULL) { | ||||
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i]; | if (netmap_bufs) { | ||||
struct virtqueue *vq; | /* These are netmap buffers: there is nothing to do. */ | ||||
struct mbuf *m; | } else { | ||||
struct vtnet_txq *txq = &sc->vtnet_txqs[i]; | /* These are mbufs that we need to free. */ | ||||
struct vtnet_tx_header *txhdr; | struct mbuf *m; | ||||
last = 0; | if (t == NR_TX) { | ||||
vq = rxq->vtnrx_vq; | struct vtnet_tx_header *txhdr = cookie; | ||||
while ((m = virtqueue_drain(vq, &last)) != NULL) { | m = txhdr->vth_mbuf; | ||||
n++; | |||||
if (m != (void *)rxq) | |||||
m_freem(m); | m_freem(m); | ||||
else | |||||
nmb++; | |||||
} | |||||
last = 0; | |||||
vq = txq->vtntx_vq; | |||||
while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { | |||||
n++; | |||||
if (txhdr != (void *)txq) { | |||||
m_freem(txhdr->vth_mbuf); | |||||
uma_zfree(vtnet_tx_header_zone, txhdr); | uma_zfree(vtnet_tx_header_zone, txhdr); | ||||
} else | } else { | ||||
nmb++; | m = cookie; | ||||
m_freem(m); | |||||
} | |||||
} | } | ||||
deq++; | |||||
} | } | ||||
D("freed %d mbufs, %d netmap bufs on %d queues", | |||||
n - nmb, nmb, i); | if (deq) | ||||
nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n", | |||||
deq, nm_txrx2str(t), idx, netmap_bufs); | |||||
} | } | ||||
/* Register and unregister. */ | /* Register and unregister. */ | ||||
Context not available. | |||||
vtnet_netmap_reg(struct netmap_adapter *na, int onoff) | vtnet_netmap_reg(struct netmap_adapter *na, int onoff) | ||||
gnn: I'd rename //onoff// to //state// as that's a more common name for such a boolean in our kernel. | |||||
{ | { | ||||
struct ifnet *ifp = na->ifp; | struct ifnet *ifp = na->ifp; | ||||
struct SOFTC_T *sc = ifp->if_softc; | struct vtnet_softc *sc = ifp->if_softc; | ||||
int success; | |||||
enum txrx t; | |||||
int i; | |||||
/* Drain the taskqueues to make sure that there are no worker threads | |||||
* accessing the virtqueues. */ | |||||
vtnet_drain_taskqueues(sc); | |||||
VTNET_CORE_LOCK(sc); | VTNET_CORE_LOCK(sc); | ||||
/* We need nm_netmap_on() to return true when called by | |||||
* vtnet_init_locked() below. */ | |||||
if (onoff) | |||||
nm_set_native_flags(na); | |||||
/* We need to trigger a device reset in order to unexpose guest buffers | |||||
* published to the host. */ | |||||
ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); | ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); | ||||
/* enable or disable flags and callbacks in na and ifp */ | /* Get pending used buffers. The way they are freed depends on whether | ||||
* they are netmap buffer or they are mbufs. We can tell apart the two | |||||
* cases by looking at kring->nr_mode, before this is possibly updated | |||||
* in the loop below. */ | |||||
for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { | |||||
struct vtnet_txq *txq = &sc->vtnet_txqs[i]; | |||||
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i]; | |||||
struct netmap_kring *kring; | |||||
VTNET_TXQ_LOCK(txq); | |||||
kring = NMR(na, NR_TX)[i]; | |||||
vtnet_free_used(txq->vtntx_vq, | |||||
kring->nr_mode == NKR_NETMAP_ON, NR_TX, i); | |||||
VTNET_TXQ_UNLOCK(txq); | |||||
VTNET_RXQ_LOCK(rxq); | |||||
kring = NMR(na, NR_RX)[i]; | |||||
vtnet_free_used(rxq->vtnrx_vq, | |||||
kring->nr_mode == NKR_NETMAP_ON, NR_RX, i); | |||||
VTNET_RXQ_UNLOCK(rxq); | |||||
} | |||||
vtnet_init_locked(sc); | |||||
success = (ifp->if_drv_flags & IFF_DRV_RUNNING) ? 0 : ENXIO; | |||||
if (onoff) { | if (onoff) { | ||||
nm_set_native_flags(na); | for_rx_tx(t) { | ||||
/* Hardware rings. */ | |||||
for (i = 0; i < nma_get_nrings(na, t); i++) { | |||||
struct netmap_kring *kring = NMR(na, t)[i]; | |||||
if (nm_kring_pending_on(kring)) | |||||
kring->nr_mode = NKR_NETMAP_ON; | |||||
} | |||||
/* Host rings. */ | |||||
for (i = 0; i < nma_get_host_nrings(na, t); i++) { | |||||
struct netmap_kring *kring = | |||||
NMR(na, t)[nma_get_nrings(na, t) + i]; | |||||
if (nm_kring_pending_on(kring)) | |||||
kring->nr_mode = NKR_NETMAP_ON; | |||||
} | |||||
} | |||||
} else { | } else { | ||||
nm_clear_native_flags(na); | nm_clear_native_flags(na); | ||||
for_rx_tx(t) { | |||||
/* Hardware rings. */ | |||||
for (i = 0; i < nma_get_nrings(na, t); i++) { | |||||
struct netmap_kring *kring = NMR(na, t)[i]; | |||||
if (nm_kring_pending_off(kring)) | |||||
kring->nr_mode = NKR_NETMAP_OFF; | |||||
} | |||||
/* Host rings. */ | |||||
for (i = 0; i < nma_get_host_nrings(na, t); i++) { | |||||
struct netmap_kring *kring = | |||||
NMR(na, t)[nma_get_nrings(na, t) + i]; | |||||
if (nm_kring_pending_off(kring)) | |||||
kring->nr_mode = NKR_NETMAP_OFF; | |||||
} | |||||
} | |||||
} | } | ||||
/* drain queues so netmap and native drivers | |||||
* do not interfere with each other | VTNET_CORE_UNLOCK(sc); | ||||
*/ | |||||
vtnet_netmap_free_bufs(sc); | return success; | ||||
vtnet_init_locked(sc); /* also enable intr */ | |||||
VTNET_CORE_UNLOCK(sc); | |||||
return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); | |||||
} | } | ||||
Context not available. | |||||
struct netmap_ring *ring = kring->ring; | struct netmap_ring *ring = kring->ring; | ||||
u_int ring_nr = kring->ring_id; | u_int ring_nr = kring->ring_id; | ||||
u_int nm_i; /* index into the netmap ring */ | u_int nm_i; /* index into the netmap ring */ | ||||
u_int nic_i; /* index into the NIC ring */ | |||||
u_int n; | |||||
u_int const lim = kring->nkr_num_slots - 1; | u_int const lim = kring->nkr_num_slots - 1; | ||||
u_int const head = kring->rhead; | u_int const head = kring->rhead; | ||||
/* device-specific */ | /* device-specific */ | ||||
struct SOFTC_T *sc = ifp->if_softc; | struct vtnet_softc *sc = ifp->if_softc; | ||||
struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr]; | struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr]; | ||||
struct virtqueue *vq = txq->vtntx_vq; | struct virtqueue *vq = txq->vtntx_vq; | ||||
int interrupts = !(kring->nr_kflags & NKR_NOINTR); | int interrupts = !(kring->nr_kflags & NKR_NOINTR); | ||||
u_int n; | |||||
/* | /* | ||||
* First part: process new packets to send. | * First part: process new packets to send. | ||||
Context not available. | |||||
if (nm_i != head) { /* we have new packets to send */ | if (nm_i != head) { /* we have new packets to send */ | ||||
struct sglist *sg = txq->vtntx_sg; | struct sglist *sg = txq->vtntx_sg; | ||||
nic_i = netmap_idx_k2n(kring, nm_i); | for (; nm_i != head; nm_i = nm_next(nm_i, lim)) { | ||||
for (n = 0; nm_i != head; n++) { | |||||
/* we use an empty header here */ | /* we use an empty header here */ | ||||
static struct virtio_net_hdr_mrg_rxbuf hdr; | |||||
struct netmap_slot *slot = &ring->slot[nm_i]; | struct netmap_slot *slot = &ring->slot[nm_i]; | ||||
u_int len = slot->len; | u_int len = slot->len; | ||||
uint64_t paddr; | uint64_t paddr; | ||||
Context not available. | |||||
* and kick the hypervisor (if necessary). | * and kick the hypervisor (if necessary). | ||||
*/ | */ | ||||
sglist_reset(sg); // cheap | sglist_reset(sg); // cheap | ||||
// if vtnet_hdr_size > 0 ... | err = sglist_append(sg, &txq->vtntx_shrhdr, sc->vtnet_hdr_size); | ||||
Done Inline ActionsThe use of a shared header here (and more so on the receive side) worries me as rather brittle. Is there some existing vtnet netmap code that un-negotiates most of the offload features? bryanv: The use of a shared header here (and more so on the receive side) worries me as rather brittle. | |||||
Done Inline ActionsNo, renegotiation happens when the netmap application runs something like
which is something that netmap applications normally do. The point here is that netmap does not support offloadings. It simply ignores and strips the vtnet header (on receive), and prepends a zeroed vtnet head on transmission (for compatibility with the hypervisor). So having a shared header is important to avoid dynamic allocation or wasting space with a parallel array. vmaffione: No, renegotiation happens when the netmap application runs something like
# ifconfig vtnet0… | |||||
Done Inline ActionsIs there any code that disables the mergable receive buffers feature? With mergable buffers, vtnet_netmap_kring_refill() does not quite look correct because the header does not get a separate descriptor. bryanv: Is there any code that disables the mergable receive buffers feature? With mergable buffers… | |||||
Done Inline ActionsNo, currently we do not disable mergeable receive buffers, but we could do that on the reinit cycle triggered by vtnet_netmap_reg(). I do not understand your observation about the separate descriptor in case of mergeable buffers, though. vmaffione: No, currently we do not disable mergeable receive buffers, but we could do that on the reinit… | |||||
Not Done Inline ActionsWith the mergable buffer feature, the host device can spread a received packet across multiple descriptors, storing the number of descriptors in the header's num_buffers. I see this being broken when sharing the header. You might not hit this because either your host device does not support mergable buffers, or since LRO is disabled your receive buffers are always large enough so that num_buffers is always one. bryanv: With the mergable buffer feature, the host device can spread a received packet across multiple… | |||||
Done Inline ActionsAbsolutely. This was not under discussion. As I said, we can disable mergeable buffers in vtnet_netmap_reg(), and I will do that. Btw, my host supports mergeable buffers, but large buffers never come because in the guest I disable txcsum/rxcsum, which implies no tso, and therefore no large frames. vmaffione: Absolutely. This was not under discussion. As I said, we can disable mergeable buffers in… | |||||
Done Inline ActionsThe support of mergeable buffers is determined at attach time and various pieces of the driver are configured/sized accordingly and not intended to be changed on the fly at reinit time. I think at the point of your netmap register call is too late (and would impact non-netmap users). Ultimately I feel you're going to have to implement full receive feature support in netmap, or at least fail the netmap register when unsupported features were negotiated. Otherwise, you are risking corruption or other hard to find bugs. Also, I have support for VirtIO v1 [1] that I hope to commit soon which further complicates the Rx/Tx contract when negotiated. I'm fine if you want to commit this since it does seem to be an improvement but I think additional work is eventually needed. After VirtIO V1 support is committed I would really like to see if we can refactor all of this so we don't have to support two Rx/Tx path implementations. [1] - https://github.com/bryanv/freebsd/blob/virtio/sys/dev/virtio/network/if_vtnet.c bryanv: The support of mergeable buffers is determined at attach time and various pieces of the driver… | |||||
Done Inline ActionsYes I've noticed the pieces sized accordingly. So for now I would commit this, which is already a big improvement and does not affect non-netmap users. vmaffione: Yes I've noticed the pieces sized accordingly.
The netmap register is the equivalent of the… | |||||
err = sglist_append(sg, &hdr, sc->vtnet_hdr_size); | |||||
// XXX later, support multi segment | |||||
err = sglist_append_phys(sg, paddr, len); | err = sglist_append_phys(sg, paddr, len); | ||||
/* use na as the cookie */ | err = virtqueue_enqueue(vq, /*cookie=*/txq, sg, | ||||
err = virtqueue_enqueue(vq, txq, sg, sg->sg_nseg, 0); | /*readable=*/sg->sg_nseg, | ||||
if (unlikely(err < 0)) { | /*writeable=*/0); | ||||
D("virtqueue_enqueue failed"); | if (unlikely(err)) { | ||||
if (err != ENOSPC) | |||||
nm_prerr("virtqueue_enqueue(%s) failed: %d\n", | |||||
kring->name, err); | |||||
break; | break; | ||||
} | } | ||||
} | |||||
nm_i = nm_next(nm_i, lim); | virtqueue_notify(vq); | ||||
nic_i = nm_next(nic_i, lim); | |||||
} | |||||
/* Update hwcur depending on where we stopped. */ | /* Update hwcur depending on where we stopped. */ | ||||
kring->nr_hwcur = nm_i; /* note we migth break early */ | kring->nr_hwcur = nm_i; /* note we migth break early */ | ||||
/* No more free TX slots? Ask the hypervisor for notifications, | |||||
* possibly only when a considerable amount of work has been | |||||
* done. | |||||
*/ | |||||
ND(3,"sent %d packets, hwcur %d", n, nm_i); | |||||
virtqueue_disable_intr(vq); | |||||
virtqueue_notify(vq); | |||||
} else { | |||||
if (ring->head != ring->tail) | |||||
ND(5, "pure notify ? head %d tail %d nused %d %d", | |||||
ring->head, ring->tail, virtqueue_nused(vq), | |||||
(virtqueue_dump(vq), 1)); | |||||
virtqueue_notify(vq); | |||||
if (interrupts) { | |||||
virtqueue_enable_intr(vq); // like postpone with 0 | |||||
} | |||||
} | } | ||||
/* Free used slots. We only consider our own used buffers, recognized | /* Free used slots. We only consider our own used buffers, recognized | ||||
* by the token we passed to virtqueue_add_outbuf. | * by the token we passed to virtqueue_enqueue. | ||||
*/ | */ | ||||
n = 0; | n = 0; | ||||
for (;;) { | for (;;) { | ||||
struct vtnet_tx_header *txhdr = virtqueue_dequeue(vq, NULL); | void *token = virtqueue_dequeue(vq, NULL); | ||||
if (txhdr == NULL) | if (token == NULL) | ||||
break; | break; | ||||
if (likely(txhdr == (void *)txq)) { | if (unlikely(token != (void *)txq)) | ||||
n++; | nm_prerr("BUG: TX token mismatch\n"); | ||||
if (virtqueue_nused(vq) < 32) { // XXX slow release | else | ||||
break; | n++; | ||||
} | |||||
} else { /* leftover from previous transmission */ | |||||
m_freem(txhdr->vth_mbuf); | |||||
uma_zfree(vtnet_tx_header_zone, txhdr); | |||||
} | |||||
} | } | ||||
if (n) { | if (n > 0) { | ||||
kring->nr_hwtail += n; | kring->nr_hwtail += n; | ||||
if (kring->nr_hwtail > lim) | if (kring->nr_hwtail > lim) | ||||
kring->nr_hwtail -= lim + 1; | kring->nr_hwtail -= lim + 1; | ||||
} | } | ||||
if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) { | |||||
ND(3, "disable intr, hwcur %d", nm_i); | |||||
virtqueue_disable_intr(vq); | |||||
} else if (interrupts) { | |||||
ND(3, "enable intr, hwcur %d", nm_i); | |||||
virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT); | |||||
} | |||||
if (interrupts && virtqueue_nfree(vq) < 32) | |||||
virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG); | |||||
return 0; | return 0; | ||||
} | } | ||||
static int | static int | ||||
vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_i, u_int head) | vtnet_netmap_kring_refill(struct netmap_kring *kring, u_int nm_i, u_int head) | ||||
{ | { | ||||
struct netmap_adapter *na = kring->na; | struct netmap_adapter *na = kring->na; | ||||
struct ifnet *ifp = na->ifp; | struct ifnet *ifp = na->ifp; | ||||
Context not available. | |||||
struct netmap_ring *ring = kring->ring; | struct netmap_ring *ring = kring->ring; | ||||
u_int ring_nr = kring->ring_id; | u_int ring_nr = kring->ring_id; | ||||
u_int const lim = kring->nkr_num_slots - 1; | u_int const lim = kring->nkr_num_slots - 1; | ||||
u_int n; | |||||
/* device-specific */ | /* device-specific */ | ||||
struct SOFTC_T *sc = ifp->if_softc; | struct vtnet_softc *sc = ifp->if_softc; | ||||
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr]; | struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr]; | ||||
struct virtqueue *vq = rxq->vtnrx_vq; | struct virtqueue *vq = rxq->vtnrx_vq; | ||||
Context not available. | |||||
struct sglist_seg ss[2]; | struct sglist_seg ss[2]; | ||||
struct sglist sg = { ss, 0, 0, 2 }; | struct sglist sg = { ss, 0, 0, 2 }; | ||||
for (n = 0; nm_i != head; n++) { | for (; nm_i != head; nm_i = nm_next(nm_i, lim)) { | ||||
static struct virtio_net_hdr_mrg_rxbuf hdr; | |||||
struct netmap_slot *slot = &ring->slot[nm_i]; | struct netmap_slot *slot = &ring->slot[nm_i]; | ||||
uint64_t paddr; | uint64_t paddr; | ||||
void *addr = PNMB(na, slot, &paddr); | void *addr = PNMB(na, slot, &paddr); | ||||
Context not available. | |||||
} | } | ||||
slot->flags &= ~NS_BUF_CHANGED; | slot->flags &= ~NS_BUF_CHANGED; | ||||
sglist_reset(&sg); // cheap | sglist_reset(&sg); | ||||
err = sglist_append(&sg, &hdr, sc->vtnet_hdr_size); | err = sglist_append(&sg, &rxq->vtnrx_shrhdr, sc->vtnet_hdr_size); | ||||
Done Inline ActionsEither check or assert the value of err? bryanv: Either check or assert the value of `err`? | |||||
err = sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na)); | err = sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na)); | ||||
/* writable for the host */ | /* writable for the host */ | ||||
err = virtqueue_enqueue(vq, rxq, &sg, 0, sg.sg_nseg); | err = virtqueue_enqueue(vq, /*cookie=*/rxq, &sg, | ||||
if (err < 0) { | /*readable=*/0, /*writeable=*/sg.sg_nseg); | ||||
D("virtqueue_enqueue failed"); | if (unlikely(err)) { | ||||
if (err != ENOSPC) | |||||
nm_prerr("virtqueue_enqueue(%s) failed: %d\n", | |||||
kring->name, err); | |||||
break; | break; | ||||
} | } | ||||
nm_i = nm_next(nm_i, lim); | |||||
} | } | ||||
return nm_i; | return nm_i; | ||||
} | } | ||||
/* | |||||
* Publish netmap buffers on a RX virtqueue. | |||||
* Returns -1 if this virtqueue is not being opened in netmap mode. | |||||
* If the virtqueue is being opened in netmap mode, return 0 on success and | |||||
* a positive error code on failure. | |||||
*/ | |||||
static int | |||||
vtnet_netmap_rxq_populate(struct vtnet_rxq *rxq) | |||||
{ | |||||
struct netmap_adapter *na = NA(rxq->vtnrx_sc->vtnet_ifp); | |||||
struct netmap_kring *kring; | |||||
int error; | |||||
if (!nm_native_on(na) || rxq->vtnrx_id >= na->num_rx_rings) | |||||
return -1; | |||||
kring = na->rx_rings[rxq->vtnrx_id]; | |||||
if (!(nm_kring_pending_on(kring) || | |||||
kring->nr_pending_mode == NKR_NETMAP_ON)) | |||||
return -1; | |||||
/* Expose all the RX netmap buffers. Note that the number of | |||||
* netmap slots in the RX ring matches the maximum number of | |||||
* 2-elements sglist that the RX virtqueue can accommodate. */ | |||||
error = vtnet_netmap_kring_refill(kring, 0, na->num_rx_desc); | |||||
virtqueue_notify(rxq->vtnrx_vq); | |||||
return error < 0 ? ENXIO : 0; | |||||
} | |||||
/* Reconcile kernel and user view of the receive ring. */ | /* Reconcile kernel and user view of the receive ring. */ | ||||
static int | static int | ||||
vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) | vtnet_netmap_rxsync(struct netmap_kring *kring, int flags) | ||||
Context not available. | |||||
struct netmap_ring *ring = kring->ring; | struct netmap_ring *ring = kring->ring; | ||||
u_int ring_nr = kring->ring_id; | u_int ring_nr = kring->ring_id; | ||||
u_int nm_i; /* index into the netmap ring */ | u_int nm_i; /* index into the netmap ring */ | ||||
// u_int nic_i; /* index into the NIC ring */ | |||||
u_int n; | |||||
u_int const lim = kring->nkr_num_slots - 1; | u_int const lim = kring->nkr_num_slots - 1; | ||||
u_int const head = kring->rhead; | u_int const head = kring->rhead; | ||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; | int force_update = (flags & NAF_FORCE_READ) || | ||||
(kring->nr_kflags & NKR_PENDINTR); | |||||
int interrupts = !(kring->nr_kflags & NKR_NOINTR); | int interrupts = !(kring->nr_kflags & NKR_NOINTR); | ||||
/* device-specific */ | /* device-specific */ | ||||
struct SOFTC_T *sc = ifp->if_softc; | struct vtnet_softc *sc = ifp->if_softc; | ||||
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr]; | struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr]; | ||||
struct virtqueue *vq = rxq->vtnrx_vq; | struct virtqueue *vq = rxq->vtnrx_vq; | ||||
/* XXX netif_carrier_ok ? */ | |||||
if (head > lim) | |||||
return netmap_ring_reinit(kring); | |||||
rmb(); | rmb(); | ||||
/* | /* | ||||
* First part: import newly received packets. | * First part: import newly received packets. | ||||
* Only accept our | * Only accept our own buffers (matching the token). We should only get | ||||
* own buffers (matching the token). We should only get | * matching buffers. We may need to stop early to avoid hwtail to overrun | ||||
* matching buffers, because of vtnet_netmap_free_rx_unused_bufs() | * hwcur. | ||||
* and vtnet_netmap_init_buffers(). | |||||
*/ | */ | ||||
if (netmap_no_pendintr || force_update) { | if (netmap_no_pendintr || force_update) { | ||||
struct netmap_adapter *token; | uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); | ||||
void *token; | |||||
vtnet_rxq_disable_intr(rxq); | |||||
nm_i = kring->nr_hwtail; | nm_i = kring->nr_hwtail; | ||||
n = 0; | while (nm_i != hwtail_lim) { | ||||
for (;;) { | |||||
int len; | int len; | ||||
token = virtqueue_dequeue(vq, &len); | token = virtqueue_dequeue(vq, &len); | ||||
if (token == NULL) | if (token == NULL) { | ||||
if (interrupts && vtnet_rxq_enable_intr(rxq)) { | |||||
vtnet_rxq_disable_intr(rxq); | |||||
continue; | |||||
} | |||||
break; | break; | ||||
if (likely(token == (void *)rxq)) { | } | ||||
ring->slot[nm_i].len = len; | if (unlikely(token != (void *)rxq)) { | ||||
ring->slot[nm_i].flags = 0; | nm_prerr("BUG: RX token mismatch\n"); | ||||
nm_i = nm_next(nm_i, lim); | } else { | ||||
n++; | /* Skip the virtio-net header. */ | ||||
} else { | len -= sc->vtnet_hdr_size; | ||||
D("This should not happen"); | if (unlikely(len < 0)) { | ||||
RD(1, "Truncated virtio-net-header, " | |||||
"missing %d bytes", -len); | |||||
len = 0; | |||||
} | |||||
ring->slot[nm_i].len = len; | |||||
ring->slot[nm_i].flags = 0; | |||||
nm_i = nm_next(nm_i, lim); | |||||
} | } | ||||
} | } | ||||
kring->nr_hwtail = nm_i; | kring->nr_hwtail = nm_i; | ||||
Context not available. | |||||
*/ | */ | ||||
nm_i = kring->nr_hwcur; /* netmap ring index */ | nm_i = kring->nr_hwcur; /* netmap ring index */ | ||||
if (nm_i != head) { | if (nm_i != head) { | ||||
int err = vtnet_refill_rxq(kring, nm_i, head); | int nm_j = vtnet_netmap_kring_refill(kring, nm_i, head); | ||||
if (err < 0) | if (nm_j < 0) | ||||
return 1; | return nm_j; | ||||
kring->nr_hwcur = err; | kring->nr_hwcur = nm_j; | ||||
virtqueue_notify(vq); | virtqueue_notify(vq); | ||||
/* After draining the queue may need an intr from the hypervisor */ | |||||
if (interrupts) { | |||||
vtnet_rxq_enable_intr(rxq); | |||||
} | |||||
} | } | ||||
ND("[C] h %d c %d t %d hwcur %d hwtail %d", | ND("[C] h %d c %d t %d hwcur %d hwtail %d", | ||||
Context not available. | |||||
static void | static void | ||||
vtnet_netmap_intr(struct netmap_adapter *na, int onoff) | vtnet_netmap_intr(struct netmap_adapter *na, int onoff) | ||||
{ | { | ||||
struct SOFTC_T *sc = na->ifp->if_softc; | struct vtnet_softc *sc = na->ifp->if_softc; | ||||
int i; | int i; | ||||
for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { | for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { | ||||
Context not available. | |||||
} | } | ||||
} | } | ||||
/* Make RX virtqueues buffers pointing to netmap buffers. */ | |||||
static int | static int | ||||
vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc) | vtnet_netmap_tx_slots(struct vtnet_softc *sc) | ||||
{ | { | ||||
struct ifnet *ifp = sc->vtnet_ifp; | int div; | ||||
struct netmap_adapter* na = NA(ifp); | |||||
unsigned int r; | |||||
if (!nm_native_on(na)) | /* We need to prepend a virtio-net header to each netmap buffer to be | ||||
return 0; | * transmitted, therefore calling virtqueue_enqueue() passing sglist | ||||
for (r = 0; r < na->num_rx_rings; r++) { | * with 2 elements. | ||||
struct netmap_kring *kring = na->rx_rings[r]; | * TX virtqueues use indirect descriptors if the feature was negotiated | ||||
struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r]; | * with the host, and if sc->vtnet_tx_nsegs > 1. With indirect | ||||
struct virtqueue *vq = rxq->vtnrx_vq; | * descriptors, a single virtio descriptor is sufficient to reference | ||||
struct netmap_slot* slot; | * each TX sglist. Without them, we need two separate virtio descriptors | ||||
int err = 0; | * for each TX sglist. We therefore compute the number of netmap TX | ||||
* slots according to these assumptions. | |||||
*/ | |||||
if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_tx_nsegs > 1) | |||||
div = 1; | |||||
else | |||||
div = 2; | |||||
slot = netmap_reset(na, NR_RX, r, 0); | return virtqueue_size(sc->vtnet_txqs[0].vtntx_vq) / div; | ||||
if (!slot) { | } | ||||
D("strange, null netmap ring %d", r); | |||||
return 0; | |||||
} | |||||
/* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue. | |||||
* It's important to leave one virtqueue slot free, otherwise | |||||
* we can run into ring->cur/ring->tail wraparounds. | |||||
*/ | |||||
err = vtnet_refill_rxq(kring, 0, na->num_rx_desc-1); | |||||
if (err < 0) | |||||
return 0; | |||||
virtqueue_notify(vq); | |||||
} | |||||
return 1; | static int | ||||
vtnet_netmap_rx_slots(struct vtnet_softc *sc) | |||||
{ | |||||
int div; | |||||
/* We need to prepend a virtio-net header to each netmap buffer to be | |||||
* received, therefore calling virtqueue_enqueue() passing sglist | |||||
* with 2 elements. | |||||
* RX virtqueues use indirect descriptors if the feature was negotiated | |||||
* with the host, and if sc->vtnet_rx_nsegs > 1. With indirect | |||||
* descriptors, a single virtio descriptor is sufficient to reference | |||||
* each RX sglist. Without them, we need two separate virtio descriptors | |||||
* for each RX sglist. We therefore compute the number of netmap RX | |||||
* slots according to these assumptions. | |||||
*/ | |||||
if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_rx_nsegs > 1) | |||||
div = 1; | |||||
else | |||||
div = 2; | |||||
return virtqueue_size(sc->vtnet_rxqs[0].vtnrx_vq) / div; | |||||
} | } | ||||
static int | |||||
vtnet_netmap_config(struct netmap_adapter *na, struct nm_config_info *info) | |||||
{ | |||||
struct vtnet_softc *sc = na->ifp->if_softc; | |||||
info->num_tx_rings = sc->vtnet_act_vq_pairs; | |||||
info->num_rx_rings = sc->vtnet_act_vq_pairs; | |||||
info->num_tx_descs = vtnet_netmap_tx_slots(sc); | |||||
info->num_rx_descs = vtnet_netmap_rx_slots(sc); | |||||
info->rx_buf_maxsize = NETMAP_BUF_SIZE(na); | |||||
return 0; | |||||
} | |||||
static void | static void | ||||
vtnet_netmap_attach(struct SOFTC_T *sc) | vtnet_netmap_attach(struct vtnet_softc *sc) | ||||
{ | { | ||||
struct netmap_adapter na; | struct netmap_adapter na; | ||||
Context not available. | |||||
bzero(&na, sizeof(na)); | bzero(&na, sizeof(na)); | ||||
na.ifp = sc->vtnet_ifp; | na.ifp = sc->vtnet_ifp; | ||||
na.num_tx_desc = 1024;// sc->vtnet_rx_nmbufs; | na.na_flags = 0; | ||||
na.num_rx_desc = 1024; // sc->vtnet_rx_nmbufs; | na.num_tx_desc = vtnet_netmap_tx_slots(sc); | ||||
na.num_rx_desc = vtnet_netmap_rx_slots(sc); | |||||
na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; | |||||
na.rx_buf_maxsize = 0; | |||||
na.nm_register = vtnet_netmap_reg; | na.nm_register = vtnet_netmap_reg; | ||||
na.nm_txsync = vtnet_netmap_txsync; | na.nm_txsync = vtnet_netmap_txsync; | ||||
na.nm_rxsync = vtnet_netmap_rxsync; | na.nm_rxsync = vtnet_netmap_rxsync; | ||||
na.nm_intr = vtnet_netmap_intr; | na.nm_intr = vtnet_netmap_intr; | ||||
na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; | na.nm_config = vtnet_netmap_config; | ||||
D("max rings %d", sc->vtnet_max_vq_pairs); | |||||
netmap_attach(&na); | netmap_attach(&na); | ||||
D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d", | nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n", | ||||
na.num_tx_rings, na.num_tx_desc, | na.num_tx_rings, na.num_tx_desc, | ||||
na.num_tx_rings, na.num_rx_desc); | na.num_tx_rings, na.num_rx_desc); | ||||
} | } | ||||
Context not available. |
I'd rename onoff to state as that's a more common name for such a boolean in our kernel.