Changeset View
Standalone View
usr.sbin/bhyve/pci_virtio_net.c
Show All 26 Lines | |||||
* | * | ||||
* $FreeBSD$ | * $FreeBSD$ | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#ifndef WITHOUT_CAPSICUM | |||||
#include <sys/capsicum.h> | |||||
#endif | |||||
#include <sys/linker_set.h> | #include <sys/linker_set.h> | ||||
#include <sys/select.h> | #include <sys/select.h> | ||||
#include <sys/uio.h> | #include <sys/uio.h> | ||||
#include <sys/ioctl.h> | #include <sys/ioctl.h> | ||||
#include <net/ethernet.h> | #include <net/ethernet.h> | ||||
#ifndef NETMAP_WITH_LIBS | #include <net/if.h> /* IFNAMSIZ */ | ||||
#define NETMAP_WITH_LIBS | |||||
#endif | |||||
#include <net/netmap_user.h> | |||||
#ifndef WITHOUT_CAPSICUM | |||||
#include <capsicum_helpers.h> | |||||
#endif | |||||
#include <err.h> | #include <err.h> | ||||
#include <errno.h> | #include <errno.h> | ||||
#include <fcntl.h> | #include <fcntl.h> | ||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include <strings.h> | #include <strings.h> | ||||
#include <unistd.h> | #include <unistd.h> | ||||
#include <assert.h> | #include <assert.h> | ||||
#include <md5.h> | |||||
#include <pthread.h> | #include <pthread.h> | ||||
#include <pthread_np.h> | #include <pthread_np.h> | ||||
#include <sysexits.h> | |||||
#include "bhyverun.h" | #include "bhyverun.h" | ||||
#include "pci_emul.h" | #include "pci_emul.h" | ||||
#include "mevent.h" | #include "mevent.h" | ||||
#include "virtio.h" | #include "virtio.h" | ||||
#include "net_utils.h" | #include "net_utils.h" | ||||
#include "net_backends.h" | |||||
#define VTNET_RINGSZ 1024 | #define VTNET_RINGSZ 1024 | ||||
#define VTNET_MAXSEGS 256 | #define VTNET_MAXSEGS 256 | ||||
/* | |||||
* Host capabilities. Note that we only offer a few of these. | |||||
*/ | |||||
#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */ | |||||
#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */ | |||||
#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */ | |||||
#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */ | |||||
#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */ | |||||
#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */ | |||||
#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */ | |||||
#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */ | |||||
#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */ | |||||
#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */ | |||||
#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */ | |||||
#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */ | |||||
#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */ | |||||
#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */ | |||||
#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */ | |||||
#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */ | |||||
#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */ | |||||
#define VIRTIO_NET_F_GUEST_ANNOUNCE \ | |||||
(1 << 21) /* guest can send gratuitous pkts */ | |||||
#define VTNET_S_HOSTCAPS \ | #define VTNET_S_HOSTCAPS \ | ||||
( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ | ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \ | ||||
VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) | VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) | ||||
/* | /* | ||||
* PCI config-space "registers" | * PCI config-space "registers" | ||||
*/ | */ | ||||
struct virtio_net_config { | struct virtio_net_config { | ||||
uint8_t mac[6]; | uint8_t mac[6]; | ||||
uint16_t status; | uint16_t status; | ||||
} __packed; | } __packed; | ||||
/* | /* | ||||
* Queue definitions. | * Queue definitions. | ||||
*/ | */ | ||||
#define VTNET_RXQ 0 | #define VTNET_RXQ 0 | ||||
#define VTNET_TXQ 1 | #define VTNET_TXQ 1 | ||||
#define VTNET_CTLQ 2 /* NB: not yet supported */ | #define VTNET_CTLQ 2 /* NB: not yet supported */ | ||||
#define VTNET_MAXQ 3 | #define VTNET_MAXQ 3 | ||||
/* | /* | ||||
* Fixed network header size | |||||
*/ | |||||
struct virtio_net_rxhdr { | |||||
uint8_t vrh_flags; | |||||
uint8_t vrh_gso_type; | |||||
uint16_t vrh_hdr_len; | |||||
uint16_t vrh_gso_size; | |||||
uint16_t vrh_csum_start; | |||||
uint16_t vrh_csum_offset; | |||||
uint16_t vrh_bufs; | |||||
} __packed; | |||||
/* | |||||
* Debug printf | * Debug printf | ||||
*/ | */ | ||||
static int pci_vtnet_debug; | static int pci_vtnet_debug; | ||||
#define DPRINTF(params) if (pci_vtnet_debug) printf params | #define DPRINTF(params) if (pci_vtnet_debug) printf params | ||||
#define WPRINTF(params) printf params | #define WPRINTF(params) printf params | ||||
/* | /* | ||||
* Per-device softc | * Per-device softc | ||||
*/ | */ | ||||
struct pci_vtnet_softc { | struct pci_vtnet_softc { | ||||
struct virtio_softc vsc_vs; | struct virtio_softc vsc_vs; | ||||
struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; | struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; | ||||
pthread_mutex_t vsc_mtx; | pthread_mutex_t vsc_mtx; | ||||
struct mevent *vsc_mevp; | |||||
int vsc_tapfd; | net_backend_t *vsc_be; | ||||
struct nm_desc *vsc_nmd; | |||||
int vsc_rx_ready; | int vsc_rx_ready; | ||||
int resetting; /* protected by tx_mtx */ | int resetting; /* protected by tx_mtx */ | ||||
uint64_t vsc_features; /* negotiated features */ | uint64_t vsc_features; /* negotiated features */ | ||||
struct virtio_net_config vsc_config; | |||||
pthread_mutex_t rx_mtx; | pthread_mutex_t rx_mtx; | ||||
int rx_vhdrlen; | unsigned int rx_vhdrlen; | ||||
int rx_merge; /* merged rx bufs in use */ | int rx_merge; /* merged rx bufs in use */ | ||||
pthread_t tx_tid; | pthread_t tx_tid; | ||||
pthread_mutex_t tx_mtx; | pthread_mutex_t tx_mtx; | ||||
pthread_cond_t tx_cond; | pthread_cond_t tx_cond; | ||||
int tx_in_progress; | int tx_in_progress; | ||||
void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc); | struct virtio_net_config vsc_config; | ||||
void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov, | struct virtio_consts vsc_consts; | ||||
int iovcnt, int len); | |||||
}; | }; | ||||
static void pci_vtnet_reset(void *); | static void pci_vtnet_reset(void *); | ||||
/* static void pci_vtnet_notify(void *, struct vqueue_info *); */ | /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ | ||||
static int pci_vtnet_cfgread(void *, int, int, uint32_t *); | static int pci_vtnet_cfgread(void *, int, int, uint32_t *); | ||||
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); | static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); | ||||
static void pci_vtnet_neg_features(void *, uint64_t); | static void pci_vtnet_neg_features(void *, uint64_t); | ||||
Show All 39 Lines | pci_vtnet_reset(void *vsc) | ||||
*/ | */ | ||||
vi_reset_dev(&sc->vsc_vs); | vi_reset_dev(&sc->vsc_vs); | ||||
sc->resetting = 0; | sc->resetting = 0; | ||||
pthread_mutex_unlock(&sc->tx_mtx); | pthread_mutex_unlock(&sc->tx_mtx); | ||||
pthread_mutex_unlock(&sc->rx_mtx); | pthread_mutex_unlock(&sc->rx_mtx); | ||||
} | } | ||||
/* | |||||
* Called to send a buffer chain out to the tap device | |||||
*/ | |||||
static void | static void | ||||
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, | pci_vtnet_rx(struct pci_vtnet_softc *sc) | ||||
int len) | |||||
{ | { | ||||
static char pad[60]; /* all zero bytes */ | struct iovec iov[VTNET_MAXSEGS + 1]; | ||||
if (sc->vsc_tapfd == -1) | |||||
return; | |||||
/* | |||||
* If the length is < 60, pad out to that and add the | |||||
* extra zero'd segment to the iov. It is guaranteed that | |||||
* there is always an extra iov available by the caller. | |||||
*/ | |||||
if (len < 60) { | |||||
iov[iovcnt].iov_base = pad; | |||||
iov[iovcnt].iov_len = 60 - len; | |||||
iovcnt++; | |||||
} | |||||
(void) writev(sc->vsc_tapfd, iov, iovcnt); | |||||
} | |||||
/* | |||||
* Called when there is read activity on the tap file descriptor. | |||||
* Each buffer posted by the guest is assumed to be able to contain | |||||
* an entire ethernet frame + rx header. | |||||
* MP note: the dummybuf is only used for discarding frames, so there | |||||
* is no need for it to be per-vtnet or locked. | |||||
*/ | |||||
static uint8_t dummybuf[2048]; | |||||
static __inline struct iovec * | |||||
rx_iov_trim(struct iovec *iov, int *niov, int tlen) | |||||
{ | |||||
struct iovec *riov; | |||||
/* XXX short-cut: assume first segment is >= tlen */ | |||||
assert(iov[0].iov_len >= tlen); | |||||
iov[0].iov_len -= tlen; | |||||
if (iov[0].iov_len == 0) { | |||||
assert(*niov > 1); | |||||
*niov -= 1; | |||||
riov = &iov[1]; | |||||
} else { | |||||
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen); | |||||
riov = &iov[0]; | |||||
} | |||||
return (riov); | |||||
} | |||||
static void | |||||
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) | |||||
{ | |||||
struct iovec iov[VTNET_MAXSEGS], *riov; | |||||
struct vqueue_info *vq; | struct vqueue_info *vq; | ||||
void *vrx; | |||||
int len, n; | int len, n; | ||||
uint16_t idx; | uint16_t idx; | ||||
/* | |||||
* Should never be called without a valid tap fd | |||||
*/ | |||||
assert(sc->vsc_tapfd != -1); | |||||
/* | |||||
* But, will be called when the rx ring hasn't yet | |||||
* been set up. | |||||
*/ | |||||
if (!sc->vsc_rx_ready) { | if (!sc->vsc_rx_ready) { | ||||
/* | /* | ||||
* The rx ring has not yet been set up. | |||||
* Drop the packet and try later. | * Drop the packet and try later. | ||||
*/ | */ | ||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); | netbe_rx_discard(sc->vsc_be); | ||||
return; | return; | ||||
} | } | ||||
/* | /* | ||||
* Check for available rx buffers | * Check for available rx buffers | ||||
*/ | */ | ||||
vq = &sc->vsc_queues[VTNET_RXQ]; | vq = &sc->vsc_queues[VTNET_RXQ]; | ||||
if (!vq_has_descs(vq)) { | if (!vq_has_descs(vq)) { | ||||
/* | /* | ||||
* Drop the packet and try later. Interrupt on | * No available rx buffers. Drop the packet and try later. | ||||
* empty, if that's negotiated. | * Interrupt on empty, if that's negotiated. | ||||
*/ | */ | ||||
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); | netbe_rx_discard(sc->vsc_be); | ||||
vq_endchains(vq, 1); | vq_endchains(vq, /*used_all_avail=*/1); | ||||
return; | return; | ||||
} | } | ||||
do { | do { | ||||
/* | /* | ||||
* Get descriptor chain. | * Get descriptor chain. | ||||
*/ | */ | ||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); | n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); | ||||
assert(n >= 1 && n <= VTNET_MAXSEGS); | assert(n >= 1 && n <= VTNET_MAXSEGS); | ||||
/* | len = netbe_recv(sc->vsc_be, iov, n); | ||||
* Get a pointer to the rx header, and use the | |||||
* data immediately following it for the packet buffer. | |||||
*/ | |||||
vrx = iov[0].iov_base; | |||||
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); | |||||
len = readv(sc->vsc_tapfd, riov, n); | if (len <= 0) { | ||||
if (len < 0 && errno == EWOULDBLOCK) { | |||||
/* | /* | ||||
* No more packets, but still some avail ring | * No more packets (len == 0), or backend errored | ||||
* entries. Interrupt if needed/appropriate. | * (err < 0). Return unused available buffers. | ||||
*/ | */ | ||||
vq_retchain(vq); | vq_retchain(vq); | ||||
vq_endchains(vq, 0); | if (len == 0) { | ||||
/* Interrupt if needed/appropriate and stop. */ | |||||
vq_endchains(vq, /*used_all_avail=*/0); | |||||
return; | return; | ||||
} | } | ||||
/* | |||||
* The only valid field in the rx packet header is the | |||||
* number of buffers if merged rx bufs were negotiated. | |||||
*/ | |||||
memset(vrx, 0, sc->rx_vhdrlen); | |||||
if (sc->rx_merge) { | |||||
struct virtio_net_rxhdr *vrxh; | |||||
vrxh = vrx; | |||||
vrxh->vrh_bufs = 1; | |||||
} | } | ||||
/* | /* Publish the info to the guest */ | ||||
afedorov: I think len < 0 must be handled:
```
if (len < 0) {
vq_endchains(vq, 0);
return… | |||||
Done Inline ActionsYou are right. I changed the code so to handle len<=0 vmaffione: You are right. I changed the code so to handle `len<=0` | |||||
* Release this chain and handle more chains. | vq_relchain(vq, idx, (uint32_t)len); | ||||
*/ | |||||
vq_relchain(vq, idx, len + sc->rx_vhdrlen); | |||||
} while (vq_has_descs(vq)); | } while (vq_has_descs(vq)); | ||||
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ | /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ | ||||
vq_endchains(vq, 1); | vq_endchains(vq, /*used_all_avail=*/1); | ||||
} | } | ||||
static __inline int | |||||
pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt) | |||||
{ | |||||
int r, i; | |||||
int len = 0; | |||||
for (r = nmd->cur_tx_ring; ; ) { | |||||
struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r); | |||||
uint32_t cur, idx; | |||||
char *buf; | |||||
if (nm_ring_empty(ring)) { | |||||
r++; | |||||
if (r > nmd->last_tx_ring) | |||||
r = nmd->first_tx_ring; | |||||
if (r == nmd->cur_tx_ring) | |||||
break; | |||||
continue; | |||||
} | |||||
cur = ring->cur; | |||||
idx = ring->slot[cur].buf_idx; | |||||
buf = NETMAP_BUF(ring, idx); | |||||
for (i = 0; i < iovcnt; i++) { | |||||
if (len + iov[i].iov_len > 2048) | |||||
break; | |||||
memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len); | |||||
len += iov[i].iov_len; | |||||
} | |||||
ring->slot[cur].len = len; | |||||
ring->head = ring->cur = nm_ring_next(ring, cur); | |||||
nmd->cur_tx_ring = r; | |||||
ioctl(nmd->fd, NIOCTXSYNC, NULL); | |||||
break; | |||||
} | |||||
return (len); | |||||
} | |||||
static __inline int | |||||
pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt) | |||||
{ | |||||
int len = 0; | |||||
int i = 0; | |||||
int r; | |||||
for (r = nmd->cur_rx_ring; ; ) { | |||||
struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r); | |||||
uint32_t cur, idx; | |||||
char *buf; | |||||
size_t left; | |||||
if (nm_ring_empty(ring)) { | |||||
r++; | |||||
if (r > nmd->last_rx_ring) | |||||
r = nmd->first_rx_ring; | |||||
if (r == nmd->cur_rx_ring) | |||||
break; | |||||
continue; | |||||
} | |||||
cur = ring->cur; | |||||
idx = ring->slot[cur].buf_idx; | |||||
buf = NETMAP_BUF(ring, idx); | |||||
left = ring->slot[cur].len; | |||||
for (i = 0; i < iovcnt && left > 0; i++) { | |||||
if (iov[i].iov_len > left) | |||||
iov[i].iov_len = left; | |||||
memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len); | |||||
len += iov[i].iov_len; | |||||
left -= iov[i].iov_len; | |||||
} | |||||
ring->head = ring->cur = nm_ring_next(ring, cur); | |||||
nmd->cur_rx_ring = r; | |||||
ioctl(nmd->fd, NIOCRXSYNC, NULL); | |||||
break; | |||||
} | |||||
for (; i < iovcnt; i++) | |||||
iov[i].iov_len = 0; | |||||
return (len); | |||||
} | |||||
/* | /* | ||||
* Called to send a buffer chain out to the vale port | * Called when there is read activity on the backend file descriptor. | ||||
* Each buffer posted by the guest is assumed to be able to contain | |||||
* an entire ethernet frame + rx header. | |||||
*/ | */ | ||||
static void | static void | ||||
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, | |||||
int len) | |||||
{ | |||||
static char pad[60]; /* all zero bytes */ | |||||
if (sc->vsc_nmd == NULL) | |||||
return; | |||||
/* | |||||
* If the length is < 60, pad out to that and add the | |||||
* extra zero'd segment to the iov. It is guaranteed that | |||||
* there is always an extra iov available by the caller. | |||||
*/ | |||||
if (len < 60) { | |||||
iov[iovcnt].iov_base = pad; | |||||
iov[iovcnt].iov_len = 60 - len; | |||||
iovcnt++; | |||||
} | |||||
(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt); | |||||
} | |||||
static void | |||||
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc) | |||||
{ | |||||
struct iovec iov[VTNET_MAXSEGS], *riov; | |||||
struct vqueue_info *vq; | |||||
void *vrx; | |||||
int len, n; | |||||
uint16_t idx; | |||||
/* | |||||
* Should never be called without a valid netmap descriptor | |||||
*/ | |||||
assert(sc->vsc_nmd != NULL); | |||||
/* | |||||
* But, will be called when the rx ring hasn't yet | |||||
* been set up. | |||||
*/ | |||||
if (!sc->vsc_rx_ready) { | |||||
/* | |||||
* Drop the packet and try later. | |||||
*/ | |||||
(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); | |||||
return; | |||||
} | |||||
/* | |||||
* Check for available rx buffers | |||||
*/ | |||||
vq = &sc->vsc_queues[VTNET_RXQ]; | |||||
if (!vq_has_descs(vq)) { | |||||
/* | |||||
* Drop the packet and try later. Interrupt on | |||||
* empty, if that's negotiated. | |||||
*/ | |||||
(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf); | |||||
vq_endchains(vq, 1); | |||||
return; | |||||
} | |||||
do { | |||||
/* | |||||
* Get descriptor chain. | |||||
*/ | |||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); | |||||
assert(n >= 1 && n <= VTNET_MAXSEGS); | |||||
/* | |||||
* Get a pointer to the rx header, and use the | |||||
* data immediately following it for the packet buffer. | |||||
*/ | |||||
vrx = iov[0].iov_base; | |||||
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen); | |||||
len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n); | |||||
if (len == 0) { | |||||
/* | |||||
* No more packets, but still some avail ring | |||||
* entries. Interrupt if needed/appropriate. | |||||
*/ | |||||
vq_retchain(vq); | |||||
vq_endchains(vq, 0); | |||||
return; | |||||
} | |||||
/* | |||||
* The only valid field in the rx packet header is the | |||||
* number of buffers if merged rx bufs were negotiated. | |||||
*/ | |||||
memset(vrx, 0, sc->rx_vhdrlen); | |||||
if (sc->rx_merge) { | |||||
struct virtio_net_rxhdr *vrxh; | |||||
vrxh = vrx; | |||||
vrxh->vrh_bufs = 1; | |||||
} | |||||
/* | |||||
* Release this chain and handle more chains. | |||||
*/ | |||||
vq_relchain(vq, idx, len + sc->rx_vhdrlen); | |||||
} while (vq_has_descs(vq)); | |||||
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ | |||||
vq_endchains(vq, 1); | |||||
} | |||||
static void | |||||
pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) | pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) | ||||
{ | { | ||||
struct pci_vtnet_softc *sc = param; | struct pci_vtnet_softc *sc = param; | ||||
pthread_mutex_lock(&sc->rx_mtx); | pthread_mutex_lock(&sc->rx_mtx); | ||||
sc->pci_vtnet_rx(sc); | pci_vtnet_rx(sc); | ||||
pthread_mutex_unlock(&sc->rx_mtx); | pthread_mutex_unlock(&sc->rx_mtx); | ||||
} | } | ||||
/* Called on RX kick. */ | |||||
static void | static void | ||||
pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) | pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) | ||||
{ | { | ||||
struct pci_vtnet_softc *sc = vsc; | struct pci_vtnet_softc *sc = vsc; | ||||
/* | /* | ||||
* A qnotify means that the rx process can now begin | * A qnotify means that the rx process can now begin | ||||
*/ | */ | ||||
if (sc->vsc_rx_ready == 0) { | if (sc->vsc_rx_ready == 0) { | ||||
sc->vsc_rx_ready = 1; | sc->vsc_rx_ready = 1; | ||||
vq_kick_disable(vq); | vq_kick_disable(vq); | ||||
} | } | ||||
} | } | ||||
/* TX virtqueue processing, called by the TX thread. */ | |||||
static void | static void | ||||
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) | pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) | ||||
{ | { | ||||
struct iovec iov[VTNET_MAXSEGS + 1]; | struct iovec iov[VTNET_MAXSEGS + 1]; | ||||
int i, n; | |||||
int plen, tlen; | |||||
uint16_t idx; | uint16_t idx; | ||||
ssize_t len; | |||||
int n; | |||||
/* | /* | ||||
* Obtain chain of descriptors. The first one is | * Obtain chain of descriptors. The first descriptor also | ||||
* really the header descriptor, so we need to sum | * contains the virtio-net header. | ||||
* up two lengths: packet length and transfer length. | |||||
*/ | */ | ||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); | n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); | ||||
assert(n >= 1 && n <= VTNET_MAXSEGS); | assert(n >= 1 && n <= VTNET_MAXSEGS); | ||||
plen = 0; | |||||
tlen = iov[0].iov_len; | |||||
for (i = 1; i < n; i++) { | |||||
plen += iov[i].iov_len; | |||||
tlen += iov[i].iov_len; | |||||
} | |||||
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n)); | len = netbe_send(sc->vsc_be, iov, n); | ||||
sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen); | |||||
/* chain is processed, release it and set tlen */ | /* chain is processed, release it and set len */ | ||||
vq_relchain(vq, idx, tlen); | vq_relchain(vq, idx, len > 0 ? len : 0); | ||||
} | } | ||||
/* Called on TX kick. */ | |||||
static void | static void | ||||
pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) | pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) | ||||
{ | { | ||||
struct pci_vtnet_softc *sc = vsc; | struct pci_vtnet_softc *sc = vsc; | ||||
/* | /* | ||||
* Any ring entries to process? | * Any ring entries to process? | ||||
*/ | */ | ||||
Show All 13 Lines | |||||
*/ | */ | ||||
static void * | static void * | ||||
pci_vtnet_tx_thread(void *param) | pci_vtnet_tx_thread(void *param) | ||||
{ | { | ||||
struct pci_vtnet_softc *sc = param; | struct pci_vtnet_softc *sc = param; | ||||
struct vqueue_info *vq; | struct vqueue_info *vq; | ||||
int error; | int error; | ||||
{ | |||||
jhbUnsubmitted Done Inline ActionsThis seems unrelated (separate change) and is also odd style). Can you instead move this code to when the thread is being created as I think that is more common in the existing bhyve source? jhb: This seems unrelated (separate change) and is also odd style). Can you instead move this code… | |||||
vmaffioneAuthorUnsubmitted Done Inline ActionsIndeed, this is a leftover of a previous iteration. Thanks. vmaffione: Indeed, this is a leftover of a previous iteration. Thanks. | |||||
struct pci_devinst *pi = sc->vsc_vs.vs_pi; | |||||
char tname[MAXCOMLEN + 1]; | |||||
snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, | |||||
pi->pi_func); | |||||
pthread_set_name_np(pthread_self(), tname); | |||||
} | |||||
vq = &sc->vsc_queues[VTNET_TXQ]; | vq = &sc->vsc_queues[VTNET_TXQ]; | ||||
/* | /* | ||||
* Let us wait till the tx queue pointers get initialised & | * Let us wait till the tx queue pointers get initialised & | ||||
* first tx signaled | * first tx signaled | ||||
*/ | */ | ||||
pthread_mutex_lock(&sc->tx_mtx); | pthread_mutex_lock(&sc->tx_mtx); | ||||
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); | error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); | ||||
Show All 21 Lines | do { | ||||
* is found | * is found | ||||
*/ | */ | ||||
pci_vtnet_proctx(sc, vq); | pci_vtnet_proctx(sc, vq); | ||||
} while (vq_has_descs(vq)); | } while (vq_has_descs(vq)); | ||||
/* | /* | ||||
* Generate an interrupt if needed. | * Generate an interrupt if needed. | ||||
*/ | */ | ||||
vq_endchains(vq, 1); | vq_endchains(vq, /*used_all_avail=*/1); | ||||
pthread_mutex_lock(&sc->tx_mtx); | pthread_mutex_lock(&sc->tx_mtx); | ||||
} | } | ||||
} | } | ||||
#ifdef notyet | #ifdef notyet | ||||
static void | static void | ||||
pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) | pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) | ||||
{ | { | ||||
DPRINTF(("vtnet: control qnotify!\n\r")); | DPRINTF(("vtnet: control qnotify!\n\r")); | ||||
} | } | ||||
#endif | #endif | ||||
static void | |||||
pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname) | |||||
{ | |||||
char tbuf[80]; | |||||
#ifndef WITHOUT_CAPSICUM | |||||
cap_rights_t rights; | |||||
#endif | |||||
strcpy(tbuf, "/dev/"); | |||||
strlcat(tbuf, devname, sizeof(tbuf)); | |||||
sc->pci_vtnet_rx = pci_vtnet_tap_rx; | |||||
sc->pci_vtnet_tx = pci_vtnet_tap_tx; | |||||
sc->vsc_tapfd = open(tbuf, O_RDWR); | |||||
if (sc->vsc_tapfd == -1) { | |||||
WPRINTF(("open of tap device %s failed\n", tbuf)); | |||||
return; | |||||
} | |||||
/* | |||||
* Set non-blocking and register for read | |||||
* notifications with the event loop | |||||
*/ | |||||
int opt = 1; | |||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { | |||||
WPRINTF(("tap device O_NONBLOCK failed\n")); | |||||
close(sc->vsc_tapfd); | |||||
sc->vsc_tapfd = -1; | |||||
} | |||||
#ifndef WITHOUT_CAPSICUM | |||||
cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); | |||||
if (caph_rights_limit(sc->vsc_tapfd, &rights) == -1) | |||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | |||||
#endif | |||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd, | |||||
EVF_READ, | |||||
pci_vtnet_rx_callback, | |||||
sc); | |||||
if (sc->vsc_mevp == NULL) { | |||||
WPRINTF(("Could not register event\n")); | |||||
close(sc->vsc_tapfd); | |||||
sc->vsc_tapfd = -1; | |||||
} | |||||
} | |||||
static void | |||||
pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname) | |||||
{ | |||||
sc->pci_vtnet_rx = pci_vtnet_netmap_rx; | |||||
sc->pci_vtnet_tx = pci_vtnet_netmap_tx; | |||||
sc->vsc_nmd = nm_open(ifname, NULL, 0, 0); | |||||
if (sc->vsc_nmd == NULL) { | |||||
WPRINTF(("open of netmap device %s failed\n", ifname)); | |||||
return; | |||||
} | |||||
sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd, | |||||
EVF_READ, | |||||
pci_vtnet_rx_callback, | |||||
sc); | |||||
if (sc->vsc_mevp == NULL) { | |||||
WPRINTF(("Could not register event\n")); | |||||
nm_close(sc->vsc_nmd); | |||||
sc->vsc_nmd = NULL; | |||||
} | |||||
} | |||||
static int | static int | ||||
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) | pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) | ||||
{ | { | ||||
char tname[MAXCOMLEN + 1]; | |||||
struct pci_vtnet_softc *sc; | struct pci_vtnet_softc *sc; | ||||
char *devname; | char tname[MAXCOMLEN + 1]; | ||||
char *vtopts; | |||||
int mac_provided; | int mac_provided; | ||||
/* | |||||
* Allocate data structures for further virtio initializations. | |||||
* sc also contains a copy of vtnet_vi_consts, since capabilities | |||||
* change depending on the backend. | |||||
*/ | |||||
sc = calloc(1, sizeof(struct pci_vtnet_softc)); | sc = calloc(1, sizeof(struct pci_vtnet_softc)); | ||||
sc->vsc_consts = vtnet_vi_consts; | |||||
pthread_mutex_init(&sc->vsc_mtx, NULL); | pthread_mutex_init(&sc->vsc_mtx, NULL); | ||||
vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues); | |||||
sc->vsc_vs.vs_mtx = &sc->vsc_mtx; | |||||
sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; | sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; | ||||
sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; | sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; | ||||
sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; | sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; | ||||
sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; | sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; | ||||
#ifdef notyet | #ifdef notyet | ||||
sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; | sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; | ||||
sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; | sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; | ||||
#endif | #endif | ||||
/* | /* | ||||
* Attempt to open the tap device and read the MAC address | * Attempt to open the backend device and read the MAC address | ||||
* if specified | * if specified. | ||||
*/ | */ | ||||
mac_provided = 0; | mac_provided = 0; | ||||
sc->vsc_tapfd = -1; | |||||
sc->vsc_nmd = NULL; | |||||
if (opts != NULL) { | if (opts != NULL) { | ||||
char *devname; | |||||
char *vtopts; | |||||
int err; | int err; | ||||
devname = vtopts = strdup(opts); | devname = vtopts = strdup(opts); | ||||
(void) strsep(&vtopts, ","); | (void) strsep(&vtopts, ","); | ||||
if (vtopts != NULL) { | if (vtopts != NULL) { | ||||
err = net_parsemac(vtopts, sc->vsc_config.mac); | err = net_parsemac(vtopts, sc->vsc_config.mac); | ||||
if (err != 0) { | if (err != 0) { | ||||
free(devname); | free(devname); | ||||
return (err); | return (err); | ||||
} | } | ||||
mac_provided = 1; | mac_provided = 1; | ||||
} | } | ||||
if (strncmp(devname, "vale", 4) == 0) | err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback, | ||||
pci_vtnet_netmap_setup(sc, devname); | sc); | ||||
if (strncmp(devname, "tap", 3) == 0 || | |||||
strncmp(devname, "vmnet", 5) == 0) | |||||
pci_vtnet_tap_setup(sc, devname); | |||||
free(devname); | free(devname); | ||||
if (err) | |||||
return (err); | |||||
sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); | |||||
Done Inline ActionsNetmap adds TSO flags to the VTNET_S_HOSTCAPS flags, but the latter contain the VIRTIO_NET_F_MRG_RXBUF flag which is not yet supported. I was able to run this patch only by removing VIRTIO_NET_F_MRG_RXBUF from VTNET_S_HOSTCAPS. afedorov: Netmap adds TSO flags to the VTNET_S_HOSTCAPS flags, but the latter contain the… | |||||
Done Inline ActionsIndeed, my original patch had VIRTIO_NET_F_MRG_RXBUF removed. Thanks for spotting. vmaffione: Indeed, my original patch had `VIRTIO_NET_F_MRG_RXBUF` removed. Thanks for spotting.
With the… | |||||
} | } | ||||
if (!mac_provided) { | if (!mac_provided) { | ||||
net_genmac(pi, sc->vsc_config.mac); | net_genmac(pi, sc->vsc_config.mac); | ||||
} | } | ||||
/* initialize config space */ | /* initialize config space */ | ||||
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); | pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); | ||||
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); | pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); | ||||
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); | pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); | ||||
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); | pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); | ||||
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); | pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); | ||||
/* Link is up if we managed to open tap device or vale port. */ | /* Link is up if we managed to open backend device. */ | ||||
sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 || | sc->vsc_config.status = (opts == NULL || sc->vsc_be); | ||||
sc->vsc_nmd != NULL); | |||||
vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); | |||||
sc->vsc_vs.vs_mtx = &sc->vsc_mtx; | |||||
/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ | /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ | ||||
if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) | if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) | ||||
return (1); | return (1); | ||||
/* use BAR 0 to map config regs in IO space */ | /* use BAR 0 to map config regs in IO space */ | ||||
vi_set_io_bar(&sc->vsc_vs, 0); | vi_set_io_bar(&sc->vsc_vs, 0); | ||||
sc->resetting = 0; | sc->resetting = 0; | ||||
sc->rx_merge = 1; | sc->rx_merge = 1; | ||||
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); | sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); | ||||
pthread_mutex_init(&sc->rx_mtx, NULL); | pthread_mutex_init(&sc->rx_mtx, NULL); | ||||
/* | /* | ||||
* Initialize tx semaphore & spawn TX processing thread. | * Initialize tx semaphore & spawn TX processing thread. | ||||
* As of now, only one thread for TX desc processing is | * As of now, only one thread for TX desc processing is | ||||
* spawned. | * spawned. | ||||
*/ | */ | ||||
sc->tx_in_progress = 0; | sc->tx_in_progress = 0; | ||||
pthread_mutex_init(&sc->tx_mtx, NULL); | pthread_mutex_init(&sc->tx_mtx, NULL); | ||||
pthread_cond_init(&sc->tx_cond, NULL); | pthread_cond_init(&sc->tx_cond, NULL); | ||||
pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); | pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); | ||||
snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, | snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, | ||||
pi->pi_func); | pi->pi_func); | ||||
pthread_set_name_np(sc->tx_tid, tname); | pthread_set_name_np(sc->tx_tid, tname); | ||||
jhbUnsubmitted Done Inline ActionsHmm, the name is already set here? jhb: Hmm, the name is already set here? | |||||
return (0); | return (0); | ||||
} | } | ||||
static int | static int | ||||
pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) | pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) | ||||
{ | { | ||||
struct pci_vtnet_softc *sc = vsc; | struct pci_vtnet_softc *sc = vsc; | ||||
void *ptr; | void *ptr; | ||||
if (offset < 6) { | if (offset < (int)sizeof(sc->vsc_config.mac)) { | ||||
assert(offset + size <= 6); | assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); | ||||
/* | /* | ||||
* The driver is allowed to change the MAC address | * The driver is allowed to change the MAC address | ||||
*/ | */ | ||||
ptr = &sc->vsc_config.mac[offset]; | ptr = &sc->vsc_config.mac[offset]; | ||||
memcpy(ptr, &value, size); | memcpy(ptr, &value, size); | ||||
} else { | } else { | ||||
/* silently ignore other writes */ | /* silently ignore other writes */ | ||||
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); | DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); | ||||
Show All 15 Lines | |||||
static void | static void | ||||
pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) | pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) | ||||
{ | { | ||||
struct pci_vtnet_softc *sc = vsc; | struct pci_vtnet_softc *sc = vsc; | ||||
sc->vsc_features = negotiated_features; | sc->vsc_features = negotiated_features; | ||||
if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) { | if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) { | ||||
sc->rx_merge = 0; | sc->rx_merge = 0; | ||||
/* non-merge rx header is 2 bytes shorter */ | /* Without mergeable rx buffers, virtio-net header is 2 | ||||
sc->rx_vhdrlen -= 2; | * bytes shorter than sizeof(struct virtio_net_rxhdr). */ | ||||
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; | |||||
} | } | ||||
/* Tell the backend to enable some capabilities it has advertised. */ | |||||
netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen); | |||||
} | } | ||||
struct pci_devemu pci_de_vnet = { | static struct pci_devemu pci_de_vnet = { | ||||
.pe_emu = "virtio-net", | .pe_emu = "virtio-net", | ||||
.pe_init = pci_vtnet_init, | .pe_init = pci_vtnet_init, | ||||
.pe_barwrite = vi_pci_write, | .pe_barwrite = vi_pci_write, | ||||
.pe_barread = vi_pci_read | .pe_barread = vi_pci_read | ||||
}; | }; | ||||
PCI_EMUL_SET(pci_de_vnet); | PCI_EMUL_SET(pci_de_vnet); |
I think len < 0 must be handled:
Otherwise, we call vq_relchain(vq, idx, (uint32_t)len); with incorrect parameter and increment used_idx.