Changeset View
Changeset View
Standalone View
Standalone View
usr.sbin/bhyve/virtio.c
Show All 30 Lines | |||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/uio.h> | #include <sys/uio.h> | ||||
#include <machine/atomic.h> | #include <machine/atomic.h> | ||||
#include <machine/vmm_snapshot.h> | #include <machine/vmm_snapshot.h> | ||||
#include <dev/virtio/pci/virtio_pci_legacy_var.h> | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdint.h> | #include <stdint.h> | ||||
#include <pthread.h> | #include <pthread.h> | ||||
#include <pthread_np.h> | #include <pthread_np.h> | ||||
#include "bhyverun.h" | #include "bhyverun.h" | ||||
#include "debug.h" | #include "debug.h" | ||||
#include "pci_emul.h" | #include "pci_emul.h" | ||||
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines | |||||
* Set I/O BAR (usually 0) to map PCI config registers. | * Set I/O BAR (usually 0) to map PCI config registers. | ||||
*/ | */ | ||||
void | void | ||||
vi_set_io_bar(struct virtio_softc *vs, int barnum) | vi_set_io_bar(struct virtio_softc *vs, int barnum) | ||||
{ | { | ||||
size_t size; | size_t size; | ||||
/* | /* | ||||
* ??? should we use CFG0 if MSI-X is disabled? | * ??? should we use VIRTIO_PCI_CONFIG_OFF(0) if MSI-X is disabled? | ||||
* Existing code did not... | * Existing code did not... | ||||
*/ | */ | ||||
size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize; | size = VIRTIO_PCI_CONFIG_OFF(1) + vs->vs_vc->vc_cfgsize; | ||||
pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size); | pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size); | ||||
} | } | ||||
/* | /* | ||||
* Initialize MSI-X vector capabilities if we're to use MSI-X, | * Initialize MSI-X vector capabilities if we're to use MSI-X, | ||||
* or MSI capabilities if not. | * or MSI capabilities if not. | ||||
* | * | ||||
* We assume we want one MSI-X vector per queue, here, plus one | * We assume we want one MSI-X vector per queue, here, plus one | ||||
Show All 35 Lines | vi_vq_init(struct virtio_softc *vs, uint32_t pfn) | ||||
struct vqueue_info *vq; | struct vqueue_info *vq; | ||||
uint64_t phys; | uint64_t phys; | ||||
size_t size; | size_t size; | ||||
char *base; | char *base; | ||||
vq = &vs->vs_queues[vs->vs_curq]; | vq = &vs->vs_queues[vs->vs_curq]; | ||||
vq->vq_pfn = pfn; | vq->vq_pfn = pfn; | ||||
phys = (uint64_t)pfn << VRING_PFN; | phys = (uint64_t)pfn << VRING_PFN; | ||||
size = vring_size(vq->vq_qsize); | size = vring_size_aligned(vq->vq_qsize); | ||||
base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); | base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); | ||||
/* First page(s) are descriptors... */ | /* First page(s) are descriptors... */ | ||||
vq->vq_desc = (struct virtio_desc *)base; | vq->vq_desc = (struct vring_desc *)base; | ||||
base += vq->vq_qsize * sizeof(struct virtio_desc); | base += vq->vq_qsize * sizeof(struct vring_desc); | ||||
/* ... immediately followed by "avail" ring (entirely uint16_t's) */ | /* ... immediately followed by "avail" ring (entirely uint16_t's) */ | ||||
vq->vq_avail = (struct vring_avail *)base; | vq->vq_avail = (struct vring_avail *)base; | ||||
base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t); | base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t); | ||||
/* Then it's rounded up to the next page... */ | /* Then it's rounded up to the next page... */ | ||||
base = (char *)roundup2((uintptr_t)base, VRING_ALIGN); | base = (char *)roundup2((uintptr_t)base, VRING_ALIGN); | ||||
/* ... and the last page(s) are the used ring. */ | /* ... and the last page(s) are the used ring. */ | ||||
vq->vq_used = (struct vring_used *)base; | vq->vq_used = (struct vring_used *)base; | ||||
/* Mark queue as allocated, and start at 0 when we use it. */ | /* Mark queue as allocated, and start at 0 when we use it. */ | ||||
vq->vq_flags = VQ_ALLOC; | vq->vq_flags = VQ_ALLOC; | ||||
vq->vq_last_avail = 0; | vq->vq_last_avail = 0; | ||||
vq->vq_next_used = 0; | vq->vq_next_used = 0; | ||||
vq->vq_save_used = 0; | vq->vq_save_used = 0; | ||||
} | } | ||||
/* | /* | ||||
* Helper inline for vq_getchain(): record the i'th "real" | * Helper inline for vq_getchain(): record the i'th "real" | ||||
* descriptor. | * descriptor. | ||||
*/ | */ | ||||
static inline void | static inline void | ||||
_vq_record(int i, volatile struct virtio_desc *vd, struct vmctx *ctx, | _vq_record(int i, volatile struct vring_desc *vd, struct vmctx *ctx, | ||||
struct iovec *iov, int n_iov, uint16_t *flags) { | struct iovec *iov, int n_iov, uint16_t *flags) { | ||||
if (i >= n_iov) | if (i >= n_iov) | ||||
return; | return; | ||||
iov[i].iov_base = paddr_guest2host(ctx, vd->vd_addr, vd->vd_len); | iov[i].iov_base = paddr_guest2host(ctx, vd->addr, vd->len); | ||||
iov[i].iov_len = vd->vd_len; | iov[i].iov_len = vd->len; | ||||
if (flags != NULL) | if (flags != NULL) | ||||
flags[i] = vd->vd_flags; | flags[i] = vd->flags; | ||||
} | } | ||||
#define VQ_MAX_DESCRIPTORS 512 /* see below */ | #define VQ_MAX_DESCRIPTORS 512 /* see below */ | ||||
/* | /* | ||||
* Examine the chain of descriptors starting at the "next one" to | * Examine the chain of descriptors starting at the "next one" to | ||||
* make sure that they describe a sensible request. If so, return | * make sure that they describe a sensible request. If so, return | ||||
* the number of "real" descriptors that would be needed/used in | * the number of "real" descriptors that would be needed/used in | ||||
* acting on this request. This may be smaller than the number of | * acting on this request. This may be smaller than the number of | ||||
* available descriptors, e.g., if there are two available but | * available descriptors, e.g., if there are two available but | ||||
* they are two separate requests, this just returns 1. Or, it | * they are two separate requests, this just returns 1. Or, it | ||||
* may be larger: if there are indirect descriptors involved, | * may be larger: if there are indirect descriptors involved, | ||||
* there may only be one descriptor available but it may be an | * there may only be one descriptor available but it may be an | ||||
* indirect pointing to eight more. We return 8 in this case, | * indirect pointing to eight more. We return 8 in this case, | ||||
* i.e., we do not count the indirect descriptors, only the "real" | * i.e., we do not count the indirect descriptors, only the "real" | ||||
* ones. | * ones. | ||||
* | * | ||||
* Basically, this vets the vd_flags and vd_next field of each | * Basically, this vets the "flags" and "next" field of each | ||||
* descriptor and tells you how many are involved. Since some may | * descriptor and tells you how many are involved. Since some may | ||||
* be indirect, this also needs the vmctx (in the pci_devinst | * be indirect, this also needs the vmctx (in the pci_devinst | ||||
* at vs->vs_pi) so that it can find indirect descriptors. | * at vs->vs_pi) so that it can find indirect descriptors. | ||||
* | * | ||||
* As we process each descriptor, we copy and adjust it (guest to | * As we process each descriptor, we copy and adjust it (guest to | ||||
* host address wise, also using the vmtctx) into the given iov[] | * host address wise, also using the vmtctx) into the given iov[] | ||||
* array (of the given size). If the array overflows, we stop | * array (of the given size). If the array overflows, we stop | ||||
* placing values into the array but keep processing descriptors, | * placing values into the array but keep processing descriptors, | ||||
* up to VQ_MAX_DESCRIPTORS, before giving up and returning -1. | * up to VQ_MAX_DESCRIPTORS, before giving up and returning -1. | ||||
* So you, the caller, must not assume that iov[] is as big as the | * So you, the caller, must not assume that iov[] is as big as the | ||||
* return value (you can process the same thing twice to allocate | * return value (you can process the same thing twice to allocate | ||||
* a larger iov array if needed, or supply a zero length to find | * a larger iov array if needed, or supply a zero length to find | ||||
* out how much space is needed). | * out how much space is needed). | ||||
* | * | ||||
* If you want to verify the WRITE flag on each descriptor, pass a | * If you want to verify the WRITE flag on each descriptor, pass a | ||||
* non-NULL "flags" pointer to an array of "uint16_t" of the same size | * non-NULL "flags" pointer to an array of "uint16_t" of the same size | ||||
* as n_iov and we'll copy each vd_flags field after unwinding any | * as n_iov and we'll copy each "flags" field after unwinding any | ||||
* indirects. | * indirects. | ||||
* | * | ||||
* If some descriptor(s) are invalid, this prints a diagnostic message | * If some descriptor(s) are invalid, this prints a diagnostic message | ||||
* and returns -1. If no descriptors are ready now it simply returns 0. | * and returns -1. If no descriptors are ready now it simply returns 0. | ||||
* | * | ||||
* You are assumed to have done a vq_ring_ready() if needed (note | * You are assumed to have done a vq_ring_ready() if needed (note | ||||
* that vq_has_descs() does one). | * that vq_has_descs() does one). | ||||
*/ | */ | ||||
int | int | ||||
vq_getchain(struct vqueue_info *vq, uint16_t *pidx, | vq_getchain(struct vqueue_info *vq, uint16_t *pidx, | ||||
struct iovec *iov, int n_iov, uint16_t *flags) | struct iovec *iov, int n_iov, uint16_t *flags) | ||||
{ | { | ||||
int i; | int i; | ||||
u_int ndesc, n_indir; | u_int ndesc, n_indir; | ||||
u_int idx, next; | u_int idx, next; | ||||
volatile struct virtio_desc *vdir, *vindir, *vp; | volatile struct vring_desc *vdir, *vindir, *vp; | ||||
struct vmctx *ctx; | struct vmctx *ctx; | ||||
struct virtio_softc *vs; | struct virtio_softc *vs; | ||||
const char *name; | const char *name; | ||||
vs = vq->vq_vs; | vs = vq->vq_vs; | ||||
name = vs->vs_vc->vc_name; | name = vs->vs_vc->vc_name; | ||||
/* | /* | ||||
* Note: it's the responsibility of the guest not to | * Note: it's the responsibility of the guest not to | ||||
* update vq->vq_avail->va_idx until all of the descriptors | * update vq->vq_avail->idx until all of the descriptors | ||||
* the guest has written are valid (including all their | * the guest has written are valid (including all their | ||||
* vd_next fields and vd_flags). | * "next" fields and "flags"). | ||||
* | * | ||||
* Compute (va_idx - last_avail) in integers mod 2**16. This is | * Compute (vq_avail->idx - last_avail) in integers mod 2**16. This is | ||||
* the number of descriptors the device has made available | * the number of descriptors the device has made available | ||||
* since the last time we updated vq->vq_last_avail. | * since the last time we updated vq->vq_last_avail. | ||||
* | * | ||||
* We just need to do the subtraction as an unsigned int, | * We just need to do the subtraction as an unsigned int, | ||||
* then trim off excess bits. | * then trim off excess bits. | ||||
*/ | */ | ||||
idx = vq->vq_last_avail; | idx = vq->vq_last_avail; | ||||
ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx); | ndesc = (uint16_t)((u_int)vq->vq_avail->idx - idx); | ||||
if (ndesc == 0) | if (ndesc == 0) | ||||
return (0); | return (0); | ||||
if (ndesc > vq->vq_qsize) { | if (ndesc > vq->vq_qsize) { | ||||
/* XXX need better way to diagnose issues */ | /* XXX need better way to diagnose issues */ | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: ndesc (%u) out of range, driver confused?", | "%s: ndesc (%u) out of range, driver confused?", | ||||
name, (u_int)ndesc); | name, (u_int)ndesc); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
/* | /* | ||||
* Now count/parse "involved" descriptors starting from | * Now count/parse "involved" descriptors starting from | ||||
* the head of the chain. | * the head of the chain. | ||||
* | * | ||||
* To prevent loops, we could be more complicated and | * To prevent loops, we could be more complicated and | ||||
* check whether we're re-visiting a previously visited | * check whether we're re-visiting a previously visited | ||||
* index, but we just abort if the count gets excessive. | * index, but we just abort if the count gets excessive. | ||||
*/ | */ | ||||
ctx = vs->vs_pi->pi_vmctx; | ctx = vs->vs_pi->pi_vmctx; | ||||
*pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)]; | *pidx = next = vq->vq_avail->ring[idx & (vq->vq_qsize - 1)]; | ||||
vq->vq_last_avail++; | vq->vq_last_avail++; | ||||
for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) { | for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->next) { | ||||
if (next >= vq->vq_qsize) { | if (next >= vq->vq_qsize) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: descriptor index %u out of range, " | "%s: descriptor index %u out of range, " | ||||
"driver confused?", | "driver confused?", | ||||
name, next); | name, next); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
vdir = &vq->vq_desc[next]; | vdir = &vq->vq_desc[next]; | ||||
if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) { | if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) { | ||||
_vq_record(i, vdir, ctx, iov, n_iov, flags); | _vq_record(i, vdir, ctx, iov, n_iov, flags); | ||||
i++; | i++; | ||||
} else if ((vs->vs_vc->vc_hv_caps & | } else if ((vs->vs_vc->vc_hv_caps & | ||||
VIRTIO_RING_F_INDIRECT_DESC) == 0) { | VIRTIO_RING_F_INDIRECT_DESC) == 0) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: descriptor has forbidden INDIRECT flag, " | "%s: descriptor has forbidden INDIRECT flag, " | ||||
"driver confused?", | "driver confused?", | ||||
name); | name); | ||||
return (-1); | return (-1); | ||||
} else { | } else { | ||||
n_indir = vdir->vd_len / 16; | n_indir = vdir->len / 16; | ||||
if ((vdir->vd_len & 0xf) || n_indir == 0) { | if ((vdir->len & 0xf) || n_indir == 0) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: invalid indir len 0x%x, " | "%s: invalid indir len 0x%x, " | ||||
"driver confused?", | "driver confused?", | ||||
name, (u_int)vdir->vd_len); | name, (u_int)vdir->len); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
vindir = paddr_guest2host(ctx, | vindir = paddr_guest2host(ctx, | ||||
vdir->vd_addr, vdir->vd_len); | vdir->addr, vdir->len); | ||||
/* | /* | ||||
* Indirects start at the 0th, then follow | * Indirects start at the 0th, then follow | ||||
* their own embedded "next"s until those run | * their own embedded "next"s until those run | ||||
* out. Each one's indirect flag must be off | * out. Each one's indirect flag must be off | ||||
* (we don't really have to check, could just | * (we don't really have to check, could just | ||||
* ignore errors...). | * ignore errors...). | ||||
*/ | */ | ||||
next = 0; | next = 0; | ||||
for (;;) { | for (;;) { | ||||
vp = &vindir[next]; | vp = &vindir[next]; | ||||
if (vp->vd_flags & VRING_DESC_F_INDIRECT) { | if (vp->flags & VRING_DESC_F_INDIRECT) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: indirect desc has INDIR flag," | "%s: indirect desc has INDIR flag," | ||||
" driver confused?", | " driver confused?", | ||||
name); | name); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
_vq_record(i, vp, ctx, iov, n_iov, flags); | _vq_record(i, vp, ctx, iov, n_iov, flags); | ||||
if (++i > VQ_MAX_DESCRIPTORS) | if (++i > VQ_MAX_DESCRIPTORS) | ||||
goto loopy; | goto loopy; | ||||
if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0) | if ((vp->flags & VRING_DESC_F_NEXT) == 0) | ||||
break; | break; | ||||
next = vp->vd_next; | next = vp->next; | ||||
if (next >= n_indir) { | if (next >= n_indir) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: invalid next %u > %u, " | "%s: invalid next %u > %u, " | ||||
"driver confused?", | "driver confused?", | ||||
name, (u_int)next, n_indir); | name, (u_int)next, n_indir); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0) | if ((vdir->flags & VRING_DESC_F_NEXT) == 0) | ||||
return (i); | return (i); | ||||
} | } | ||||
loopy: | loopy: | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: descriptor loop? count > %d - driver confused?", | "%s: descriptor loop? count > %d - driver confused?", | ||||
name, i); | name, i); | ||||
return (-1); | return (-1); | ||||
} | } | ||||
Show All 10 Lines | vq_retchains(struct vqueue_info *vq, uint16_t n_chains) | ||||
vq->vq_last_avail -= n_chains; | vq->vq_last_avail -= n_chains; | ||||
} | } | ||||
void | void | ||||
vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) | vq_relchain_prepare(struct vqueue_info *vq, uint16_t idx, uint32_t iolen) | ||||
{ | { | ||||
volatile struct vring_used *vuh; | volatile struct vring_used *vuh; | ||||
volatile struct virtio_used *vue; | volatile struct vring_used_elem *vue; | ||||
uint16_t mask; | uint16_t mask; | ||||
/* | /* | ||||
* Notes: | * Notes: | ||||
* - mask is N-1 where N is a power of 2 so computes x % N | * - mask is N-1 where N is a power of 2 so computes x % N | ||||
* - vuh points to the "used" data shared with guest | * - vuh points to the "used" data shared with guest | ||||
* - vue points to the "used" ring entry we want to update | * - vue points to the "used" ring entry we want to update | ||||
* | |||||
* (I apologize for the two fields named vu_idx; the | |||||
* virtio spec calls the one that vue points to, "id"...) | |||||
*/ | */ | ||||
mask = vq->vq_qsize - 1; | mask = vq->vq_qsize - 1; | ||||
vuh = vq->vq_used; | vuh = vq->vq_used; | ||||
vue = &vuh->vu_ring[vq->vq_next_used++ & mask]; | vue = &vuh->ring[vq->vq_next_used++ & mask]; | ||||
vue->vu_idx = idx; | vue->id = idx; | ||||
vue->vu_tlen = iolen; | vue->len = iolen; | ||||
} | } | ||||
void | void | ||||
vq_relchain_publish(struct vqueue_info *vq) | vq_relchain_publish(struct vqueue_info *vq) | ||||
{ | { | ||||
/* | /* | ||||
* Ensure the used descriptor is visible before updating the index. | * Ensure the used descriptor is visible before updating the index. | ||||
* This is necessary on ISAs with memory ordering less strict than x86 | * This is necessary on ISAs with memory ordering less strict than x86 | ||||
* (and even on x86 to act as a compiler barrier). | * (and even on x86 to act as a compiler barrier). | ||||
*/ | */ | ||||
atomic_thread_fence_rel(); | atomic_thread_fence_rel(); | ||||
vq->vq_used->vu_idx = vq->vq_next_used; | vq->vq_used->idx = vq->vq_next_used; | ||||
} | } | ||||
/* | /* | ||||
* Return specified request chain to the guest, setting its I/O length | * Return specified request chain to the guest, setting its I/O length | ||||
* to the provided value. | * to the provided value. | ||||
* | * | ||||
* (This chain is the one you handled when you called vq_getchain() | * (This chain is the one you handled when you called vq_getchain() | ||||
* and used its positive return value.) | * and used its positive return value.) | ||||
Show All 33 Lines | vq_endchains(struct vqueue_info *vq, int used_all_avail) | ||||
* Otherwise interrupt is generated if we added "used" entries, | * Otherwise interrupt is generated if we added "used" entries, | ||||
* but suppressed by VRING_AVAIL_F_NO_INTERRUPT. | * but suppressed by VRING_AVAIL_F_NO_INTERRUPT. | ||||
* | * | ||||
* In any case, though, if NOTIFY_ON_EMPTY is set and the | * In any case, though, if NOTIFY_ON_EMPTY is set and the | ||||
* entire avail was processed, we need to interrupt always. | * entire avail was processed, we need to interrupt always. | ||||
*/ | */ | ||||
vs = vq->vq_vs; | vs = vq->vq_vs; | ||||
old_idx = vq->vq_save_used; | old_idx = vq->vq_save_used; | ||||
vq->vq_save_used = new_idx = vq->vq_used->vu_idx; | vq->vq_save_used = new_idx = vq->vq_used->idx; | ||||
/* | /* | ||||
* Use full memory barrier between vu_idx store from preceding | * Use full memory barrier between "idx" store from preceding | ||||
* vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or | * vq_relchain() call and the loads from VQ_USED_EVENT_IDX() or | ||||
* va_flags below. | * "flags" field below. | ||||
*/ | */ | ||||
atomic_thread_fence_seq_cst(); | atomic_thread_fence_seq_cst(); | ||||
if (used_all_avail && | if (used_all_avail && | ||||
(vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) | (vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY)) | ||||
intr = 1; | intr = 1; | ||||
else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) { | else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) { | ||||
event_idx = VQ_USED_EVENT_IDX(vq); | event_idx = VQ_USED_EVENT_IDX(vq); | ||||
/* | /* | ||||
* This calculation is per docs and the kernel | * This calculation is per docs and the kernel | ||||
* (see src/sys/dev/virtio/virtio_ring.h). | * (see src/sys/dev/virtio/virtio_ring.h). | ||||
*/ | */ | ||||
intr = (uint16_t)(new_idx - event_idx - 1) < | intr = (uint16_t)(new_idx - event_idx - 1) < | ||||
(uint16_t)(new_idx - old_idx); | (uint16_t)(new_idx - old_idx); | ||||
} else { | } else { | ||||
intr = new_idx != old_idx && | intr = new_idx != old_idx && | ||||
!(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT); | !(vq->vq_avail->flags & VRING_AVAIL_F_NO_INTERRUPT); | ||||
} | } | ||||
if (intr) | if (intr) | ||||
vq_interrupt(vs, vq); | vq_interrupt(vs, vq); | ||||
} | } | ||||
/* Note: these are in sorted order to make for a fast search */ | /* Note: these are in sorted order to make for a fast search */ | ||||
static struct config_reg { | static struct config_reg { | ||||
uint16_t cr_offset; /* register offset */ | uint16_t cr_offset; /* register offset */ | ||||
uint8_t cr_size; /* size (bytes) */ | uint8_t cr_size; /* size (bytes) */ | ||||
uint8_t cr_ro; /* true => reg is read only */ | uint8_t cr_ro; /* true => reg is read only */ | ||||
const char *cr_name; /* name of reg */ | const char *cr_name; /* name of reg */ | ||||
} config_regs[] = { | } config_regs[] = { | ||||
{ VTCFG_R_HOSTCAP, 4, 1, "HOSTCAP" }, | { VIRTIO_PCI_HOST_FEATURES, 4, 1, "HOST_FEATURES" }, | ||||
{ VTCFG_R_GUESTCAP, 4, 0, "GUESTCAP" }, | { VIRTIO_PCI_GUEST_FEATURES, 4, 0, "GUEST_FEATURES" }, | ||||
{ VTCFG_R_PFN, 4, 0, "PFN" }, | { VIRTIO_PCI_QUEUE_PFN, 4, 0, "QUEUE_PFN" }, | ||||
{ VTCFG_R_QNUM, 2, 1, "QNUM" }, | { VIRTIO_PCI_QUEUE_NUM, 2, 1, "QUEUE_NUM" }, | ||||
{ VTCFG_R_QSEL, 2, 0, "QSEL" }, | { VIRTIO_PCI_QUEUE_SEL, 2, 0, "QUEUE_SEL" }, | ||||
{ VTCFG_R_QNOTIFY, 2, 0, "QNOTIFY" }, | { VIRTIO_PCI_QUEUE_NOTIFY, 2, 0, "QUEUE_NOTIFY" }, | ||||
{ VTCFG_R_STATUS, 1, 0, "STATUS" }, | { VIRTIO_PCI_STATUS, 1, 0, "STATUS" }, | ||||
{ VTCFG_R_ISR, 1, 0, "ISR" }, | { VIRTIO_PCI_ISR, 1, 0, "ISR" }, | ||||
{ VTCFG_R_CFGVEC, 2, 0, "CFGVEC" }, | { VIRTIO_MSI_CONFIG_VECTOR, 2, 0, "CONFIG_VECTOR" }, | ||||
{ VTCFG_R_QVEC, 2, 0, "QVEC" }, | { VIRTIO_MSI_QUEUE_VECTOR, 2, 0, "QUEUE_VECTOR" }, | ||||
}; | }; | ||||
static inline struct config_reg * | static inline struct config_reg * | ||||
vi_find_cr(int offset) { | vi_find_cr(int offset) { | ||||
u_int hi, lo, mid; | u_int hi, lo, mid; | ||||
struct config_reg *cr; | struct config_reg *cr; | ||||
lo = 0; | lo = 0; | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, | ||||
vc = vs->vs_vc; | vc = vs->vs_vc; | ||||
name = vc->vc_name; | name = vc->vc_name; | ||||
value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; | value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; | ||||
if (size != 1 && size != 2 && size != 4) | if (size != 1 && size != 2 && size != 4) | ||||
goto bad; | goto bad; | ||||
if (pci_msix_enabled(pi)) | virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(pi)); | ||||
virtio_config_size = VTCFG_R_CFG1; | |||||
else | |||||
virtio_config_size = VTCFG_R_CFG0; | |||||
if (offset >= virtio_config_size) { | if (offset >= virtio_config_size) { | ||||
/* | /* | ||||
* Subtract off the standard size (including MSI-X | * Subtract off the standard size (including MSI-X | ||||
* registers if enabled) and dispatch to underlying driver. | * registers if enabled) and dispatch to underlying driver. | ||||
* If that fails, fall into general code. | * If that fails, fall into general code. | ||||
*/ | */ | ||||
newoff = offset - virtio_config_size; | newoff = offset - virtio_config_size; | ||||
Show All 17 Lines | if (cr != NULL) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: read from bad offset/size %jd/%d", | "%s: read from bad offset/size %jd/%d", | ||||
name, (uintmax_t)offset, size); | name, (uintmax_t)offset, size); | ||||
} | } | ||||
goto done; | goto done; | ||||
} | } | ||||
switch (offset) { | switch (offset) { | ||||
case VTCFG_R_HOSTCAP: | case VIRTIO_PCI_HOST_FEATURES: | ||||
value = vc->vc_hv_caps; | value = vc->vc_hv_caps; | ||||
break; | break; | ||||
case VTCFG_R_GUESTCAP: | case VIRTIO_PCI_GUEST_FEATURES: | ||||
value = vs->vs_negotiated_caps; | value = vs->vs_negotiated_caps; | ||||
break; | break; | ||||
case VTCFG_R_PFN: | case VIRTIO_PCI_QUEUE_PFN: | ||||
if (vs->vs_curq < vc->vc_nvq) | if (vs->vs_curq < vc->vc_nvq) | ||||
value = vs->vs_queues[vs->vs_curq].vq_pfn; | value = vs->vs_queues[vs->vs_curq].vq_pfn; | ||||
break; | break; | ||||
case VTCFG_R_QNUM: | case VIRTIO_PCI_QUEUE_NUM: | ||||
value = vs->vs_curq < vc->vc_nvq ? | value = vs->vs_curq < vc->vc_nvq ? | ||||
vs->vs_queues[vs->vs_curq].vq_qsize : 0; | vs->vs_queues[vs->vs_curq].vq_qsize : 0; | ||||
break; | break; | ||||
case VTCFG_R_QSEL: | case VIRTIO_PCI_QUEUE_SEL: | ||||
value = vs->vs_curq; | value = vs->vs_curq; | ||||
break; | break; | ||||
case VTCFG_R_QNOTIFY: | case VIRTIO_PCI_QUEUE_NOTIFY: | ||||
value = 0; /* XXX */ | value = 0; /* XXX */ | ||||
break; | break; | ||||
case VTCFG_R_STATUS: | case VIRTIO_PCI_STATUS: | ||||
value = vs->vs_status; | value = vs->vs_status; | ||||
break; | break; | ||||
case VTCFG_R_ISR: | case VIRTIO_PCI_ISR: | ||||
value = vs->vs_isr; | value = vs->vs_isr; | ||||
vs->vs_isr = 0; /* a read clears this flag */ | vs->vs_isr = 0; /* a read clears this flag */ | ||||
if (value) | if (value) | ||||
pci_lintr_deassert(pi); | pci_lintr_deassert(pi); | ||||
break; | break; | ||||
case VTCFG_R_CFGVEC: | case VIRTIO_MSI_CONFIG_VECTOR: | ||||
value = vs->vs_msix_cfg_idx; | value = vs->vs_msix_cfg_idx; | ||||
break; | break; | ||||
case VTCFG_R_QVEC: | case VIRTIO_MSI_QUEUE_VECTOR: | ||||
value = vs->vs_curq < vc->vc_nvq ? | value = vs->vs_curq < vc->vc_nvq ? | ||||
vs->vs_queues[vs->vs_curq].vq_msix_idx : | vs->vs_queues[vs->vs_curq].vq_msix_idx : | ||||
VIRTIO_MSI_NO_VECTOR; | VIRTIO_MSI_NO_VECTOR; | ||||
break; | break; | ||||
} | } | ||||
done: | done: | ||||
if (vs->vs_mtx) | if (vs->vs_mtx) | ||||
pthread_mutex_unlock(vs->vs_mtx); | pthread_mutex_unlock(vs->vs_mtx); | ||||
Show All 34 Lines | if (vs->vs_mtx) | ||||
pthread_mutex_lock(vs->vs_mtx); | pthread_mutex_lock(vs->vs_mtx); | ||||
vc = vs->vs_vc; | vc = vs->vs_vc; | ||||
name = vc->vc_name; | name = vc->vc_name; | ||||
if (size != 1 && size != 2 && size != 4) | if (size != 1 && size != 2 && size != 4) | ||||
goto bad; | goto bad; | ||||
if (pci_msix_enabled(pi)) | virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(pi)); | ||||
virtio_config_size = VTCFG_R_CFG1; | |||||
else | |||||
virtio_config_size = VTCFG_R_CFG0; | |||||
if (offset >= virtio_config_size) { | if (offset >= virtio_config_size) { | ||||
/* | /* | ||||
* Subtract off the standard size (including MSI-X | * Subtract off the standard size (including MSI-X | ||||
* registers if enabled) and dispatch to underlying driver. | * registers if enabled) and dispatch to underlying driver. | ||||
*/ | */ | ||||
newoff = offset - virtio_config_size; | newoff = offset - virtio_config_size; | ||||
max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; | max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; | ||||
Show All 21 Lines | if (cr != NULL) { | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: write to bad offset/size %jd/%d", | "%s: write to bad offset/size %jd/%d", | ||||
name, (uintmax_t)offset, size); | name, (uintmax_t)offset, size); | ||||
} | } | ||||
goto done; | goto done; | ||||
} | } | ||||
switch (offset) { | switch (offset) { | ||||
case VTCFG_R_GUESTCAP: | case VIRTIO_PCI_GUEST_FEATURES: | ||||
vs->vs_negotiated_caps = value & vc->vc_hv_caps; | vs->vs_negotiated_caps = value & vc->vc_hv_caps; | ||||
if (vc->vc_apply_features) | if (vc->vc_apply_features) | ||||
(*vc->vc_apply_features)(DEV_SOFTC(vs), | (*vc->vc_apply_features)(DEV_SOFTC(vs), | ||||
vs->vs_negotiated_caps); | vs->vs_negotiated_caps); | ||||
break; | break; | ||||
case VTCFG_R_PFN: | case VIRTIO_PCI_QUEUE_PFN: | ||||
if (vs->vs_curq >= vc->vc_nvq) | if (vs->vs_curq >= vc->vc_nvq) | ||||
goto bad_qindex; | goto bad_qindex; | ||||
vi_vq_init(vs, value); | vi_vq_init(vs, value); | ||||
break; | break; | ||||
case VTCFG_R_QSEL: | case VIRTIO_PCI_QUEUE_SEL: | ||||
/* | /* | ||||
* Note that the guest is allowed to select an | * Note that the guest is allowed to select an | ||||
* invalid queue; we just need to return a QNUM | * invalid queue; we just need to return a QNUM | ||||
* of 0 while the bad queue is selected. | * of 0 while the bad queue is selected. | ||||
*/ | */ | ||||
vs->vs_curq = value; | vs->vs_curq = value; | ||||
break; | break; | ||||
case VTCFG_R_QNOTIFY: | case VIRTIO_PCI_QUEUE_NOTIFY: | ||||
if (value >= vc->vc_nvq) { | if (value >= vc->vc_nvq) { | ||||
EPRINTLN("%s: queue %d notify out of range", | EPRINTLN("%s: queue %d notify out of range", | ||||
name, (int)value); | name, (int)value); | ||||
goto done; | goto done; | ||||
} | } | ||||
vq = &vs->vs_queues[value]; | vq = &vs->vs_queues[value]; | ||||
if (vq->vq_notify) | if (vq->vq_notify) | ||||
(*vq->vq_notify)(DEV_SOFTC(vs), vq); | (*vq->vq_notify)(DEV_SOFTC(vs), vq); | ||||
else if (vc->vc_qnotify) | else if (vc->vc_qnotify) | ||||
(*vc->vc_qnotify)(DEV_SOFTC(vs), vq); | (*vc->vc_qnotify)(DEV_SOFTC(vs), vq); | ||||
else | else | ||||
EPRINTLN( | EPRINTLN( | ||||
"%s: qnotify queue %d: missing vq/vc notify", | "%s: qnotify queue %d: missing vq/vc notify", | ||||
name, (int)value); | name, (int)value); | ||||
break; | break; | ||||
case VTCFG_R_STATUS: | case VIRTIO_PCI_STATUS: | ||||
vs->vs_status = value; | vs->vs_status = value; | ||||
if (value == 0) | if (value == 0) | ||||
(*vc->vc_reset)(DEV_SOFTC(vs)); | (*vc->vc_reset)(DEV_SOFTC(vs)); | ||||
break; | break; | ||||
case VTCFG_R_CFGVEC: | case VIRTIO_MSI_CONFIG_VECTOR: | ||||
vs->vs_msix_cfg_idx = value; | vs->vs_msix_cfg_idx = value; | ||||
break; | break; | ||||
case VTCFG_R_QVEC: | case VIRTIO_MSI_QUEUE_VECTOR: | ||||
if (vs->vs_curq >= vc->vc_nvq) | if (vs->vs_curq >= vc->vc_nvq) | ||||
goto bad_qindex; | goto bad_qindex; | ||||
vq = &vs->vs_queues[vs->vs_curq]; | vq = &vs->vs_queues[vs->vs_curq]; | ||||
vq->vq_msix_idx = value; | vq->vq_msix_idx = value; | ||||
break; | break; | ||||
} | } | ||||
goto done; | goto done; | ||||
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | for (i = 0; i < vc->vc_nvq; i++) { | ||||
SNAPSHOT_VAR_OR_LEAVE(vq->vq_flags, meta, ret, done); | SNAPSHOT_VAR_OR_LEAVE(vq->vq_flags, meta, ret, done); | ||||
SNAPSHOT_VAR_OR_LEAVE(vq->vq_last_avail, meta, ret, done); | SNAPSHOT_VAR_OR_LEAVE(vq->vq_last_avail, meta, ret, done); | ||||
SNAPSHOT_VAR_OR_LEAVE(vq->vq_next_used, meta, ret, done); | SNAPSHOT_VAR_OR_LEAVE(vq->vq_next_used, meta, ret, done); | ||||
SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done); | SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done); | ||||
SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done); | SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done); | ||||
SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done); | SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done); | ||||
addr_size = vq->vq_qsize * sizeof(struct virtio_desc); | addr_size = vq->vq_qsize * sizeof(struct vring_desc); | ||||
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size, | SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size, | ||||
false, meta, ret, done); | false, meta, ret, done); | ||||
addr_size = (2 + vq->vq_qsize + 1) * sizeof(uint16_t); | addr_size = (2 + vq->vq_qsize + 1) * sizeof(uint16_t); | ||||
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_avail, addr_size, | SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_avail, addr_size, | ||||
false, meta, ret, done); | false, meta, ret, done); | ||||
addr_size = (2 + 2 * vq->vq_qsize + 1) * sizeof(uint16_t); | addr_size = (2 + 2 * vq->vq_qsize + 1) * sizeof(uint16_t); | ||||
SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size, | SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_used, addr_size, | ||||
false, meta, ret, done); | false, meta, ret, done); | ||||
SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, vring_size(vq->vq_qsize), | SNAPSHOT_BUF_OR_LEAVE(vq->vq_desc, | ||||
meta, ret, done); | vring_size_aligned(vq->vq_qsize), meta, ret, done); | ||||
} | } | ||||
done: | done: | ||||
return (ret); | return (ret); | ||||
} | } | ||||
int | int | ||||
vi_pci_snapshot(struct vm_snapshot_meta *meta) | vi_pci_snapshot(struct vm_snapshot_meta *meta) | ||||
Show All 36 Lines |