Index: usr.sbin/bhyve/pci_emul.h =================================================================== --- usr.sbin/bhyve/pci_emul.h +++ usr.sbin/bhyve/pci_emul.h @@ -228,6 +228,8 @@ void pci_callback(void); int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size); +int pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen, + int *capoffp); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, Index: usr.sbin/bhyve/pci_emul.c =================================================================== --- usr.sbin/bhyve/pci_emul.c +++ usr.sbin/bhyve/pci_emul.c @@ -727,8 +727,9 @@ } #define CAP_START_OFFSET 0x40 -static int -pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) +int +pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen, + int *capoffp) { int i, capoff, reallen; uint16_t sts; @@ -763,6 +764,9 @@ pi->pi_prevcap = capoff; pi->pi_capend = capoff + reallen - 1; + + if (capoffp != NULL) + *capoffp = capoff; return (0); } @@ -839,7 +843,8 @@ pci_populate_msicap(&msicap, msgnum, 0); - return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap))); + return (pci_emul_add_capability(pi, (u_char *)&msicap, sizeof(msicap), + NULL)); } static void @@ -914,7 +919,7 @@ tab_size + pi->pi_msix.pba_size); return (pci_emul_add_capability(pi, (u_char *)&msixcap, - sizeof(msixcap))); + sizeof(msixcap), NULL)); } static void @@ -1014,7 +1019,8 @@ pciecap.link_status = 0x11; /* gen1, x1 */ } - err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap)); + err = pci_emul_add_capability(pi, (u_char *)&pciecap, sizeof(pciecap), + NULL); return (err); } Index: usr.sbin/bhyve/pci_virtio_9p.c =================================================================== --- usr.sbin/bhyve/pci_virtio_9p.c +++ usr.sbin/bhyve/pci_virtio_9p.c @@ -113,7 +113,11 @@ pci_vt9p_cfgread, /* read virtio config */ NULL, /* write virtio config */ pci_vt9p_neg_features, /* apply negotiated features */ - (1 << 0), /* our capabilities */ + (1 << 0), /* our capabilities (legacy) */ + (1 << 0), /* our capabilities (modern) */ + true, /* Enable legacy */ + true, /* Enable modern */ + 2, /* PCI BAR# for modern */ }; @@ -329,7 +333,8 @@ sc->vsc_vq.vq_qsize = VT9P_RINGSZ; /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P); + pci_set_cfgdata16(pi, PCIR_DEVICE, sc->vsc_vs.vs_vc->vc_en_legacy ? + VIRTIO_DEV_9P : vi_get_modern_pci_devid(VIRTIO_ID_9P)); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_9P); @@ -337,7 +342,7 @@ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); - vi_set_io_bar(&sc->vsc_vs, 0); + vi_setup_pci_bar(&sc->vsc_vs); return (0); } @@ -346,6 +351,8 @@ .pe_emu = "virtio-9p", .pe_legacy_config = pci_vt9p_legacy_config, .pe_init = pci_vt9p_init, + .pe_cfgwrite = vi_pci_cfgwrite, + .pe_cfgread = vi_pci_cfgread, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; Index: usr.sbin/bhyve/pci_virtio_block.c =================================================================== --- usr.sbin/bhyve/pci_virtio_block.c +++ usr.sbin/bhyve/pci_virtio_block.c @@ -216,7 +216,11 @@ pci_vtblk_cfgread, /* read PCI config */ pci_vtblk_cfgwrite, /* write PCI config */ NULL, /* apply negotiated features */ - VTBLK_S_HOSTCAPS, /* our capabilities */ + VTBLK_S_HOSTCAPS, /* our capabilities (legacy) */ + VTBLK_S_HOSTCAPS, /* our capabilities (modern) */ + true, /* Enable legacy */ + true, /* Enable modern */ + 2, /* PCI BAR# for modern */ #ifdef BHYVE_SNAPSHOT pci_vtblk_pause, /* pause blockif threads */ pci_vtblk_resume, /* resume blockif threads */ @@ -485,8 +489,10 @@ } bcopy(&vtblk_vi_consts, &sc->vbsc_consts, sizeof (vtblk_vi_consts)); - if (blockif_candelete(sc->bc)) - sc->vbsc_consts.vc_hv_caps |= VTBLK_F_DISCARD; + if (blockif_candelete(sc->bc)) { + sc->vbsc_consts.vc_hv_caps_legacy |= VTBLK_F_DISCARD; + sc->vbsc_consts.vc_hv_caps_modern |= VTBLK_F_DISCARD; + } pthread_mutex_init(&sc->vsc_mtx, NULL); @@ -541,7 +547,8 @@ * have the device, class, and subdev_0 as fields in * the virtio constants structure. */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK); + pci_set_cfgdata16(pi, PCIR_DEVICE, sc->vbsc_consts.vc_en_legacy ? + VIRTIO_DEV_BLOCK : vi_get_modern_pci_devid(VIRTIO_ID_BLOCK)); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_BLOCK); @@ -552,7 +559,7 @@ free(sc); return (1); } - vi_set_io_bar(&sc->vbsc_vs, 0); + vi_setup_pci_bar(&sc->vbsc_vs); blockif_register_resize_callback(sc->bc, pci_vtblk_resized, sc); return (0); } @@ -581,6 +588,8 @@ .pe_emu = "virtio-blk", .pe_init = pci_vtblk_init, .pe_legacy_config = blockif_legacy_config, + .pe_cfgwrite = vi_pci_cfgwrite, + .pe_cfgread = vi_pci_cfgread, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read, #ifdef BHYVE_SNAPSHOT Index: usr.sbin/bhyve/pci_virtio_console.c =================================================================== --- usr.sbin/bhyve/pci_virtio_console.c +++ usr.sbin/bhyve/pci_virtio_console.c @@ -177,7 +177,11 @@ pci_vtcon_cfgread, /* read virtio config */ pci_vtcon_cfgwrite, /* write virtio config */ pci_vtcon_neg_features, /* apply negotiated features */ - VTCON_S_HOSTCAPS, /* our capabilities */ + VTCON_S_HOSTCAPS, /* our capabilities (legacy) */ + VTCON_S_HOSTCAPS, /* our capabilities (modern) */ + true, /* Enable legacy */ + true, /* Enable modern */ + 2, /* PCI BAR# for modern */ }; @@ -709,7 +713,8 @@ } /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_CONSOLE); + pci_set_cfgdata16(pi, PCIR_DEVICE, sc->vsc_vs.vs_vc->vc_en_legacy ? + VIRTIO_DEV_CONSOLE : vi_get_modern_pci_devid(VIRTIO_ID_CONSOLE)); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_CONSOLE); @@ -717,7 +722,7 @@ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) return (1); - vi_set_io_bar(&sc->vsc_vs, 0); + vi_setup_pci_bar(&sc->vsc_vs); /* create control port */ sc->vsc_control_port.vsp_sc = sc; @@ -753,6 +758,8 @@ struct pci_devemu pci_de_vcon = { .pe_emu = "virtio-console", .pe_init = pci_vtcon_init, + .pe_cfgwrite = vi_pci_cfgwrite, + .pe_cfgread = vi_pci_cfgread, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; Index: usr.sbin/bhyve/pci_virtio_input.c =================================================================== --- usr.sbin/bhyve/pci_virtio_input.c +++ usr.sbin/bhyve/pci_virtio_input.c @@ -465,6 +465,9 @@ /* select/subsel changed, query new config on next cfgread */ sc->vsc_config_valid = 0; + /* notify the guest the device configuration has been changed */ + vq_devcfg_changed(&sc->vsc_vs); + return (0); } @@ -747,7 +750,7 @@ if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) goto failed; /* add virtio register */ - vi_set_io_bar(&sc->vsc_vs, 0); + vi_setup_pci_bar(&sc->vsc_vs); return (0); Index: usr.sbin/bhyve/pci_virtio_net.c =================================================================== --- usr.sbin/bhyve/pci_virtio_net.c +++ usr.sbin/bhyve/pci_virtio_net.c @@ -72,9 +72,12 @@ #define VTNET_MIN_MTU ETHERMIN #define VTNET_MAX_MTU 65535 -#define VTNET_S_HOSTCAPS \ +#define VTNET_S_HOSTCAPS_LEGACY \ ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) +#define VTNET_S_HOSTCAPS_MODERN \ + ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ + VIRTIO_RING_F_INDIRECT_DESC) /* * PCI config-space "registers" @@ -153,7 +156,11 @@ pci_vtnet_cfgread, /* read PCI config */ pci_vtnet_cfgwrite, /* write PCI config */ pci_vtnet_neg_features, /* apply negotiated features */ - VTNET_S_HOSTCAPS, /* our capabilities */ + VTNET_S_HOSTCAPS_LEGACY, /* our capabilities (legacy) */ + VTNET_S_HOSTCAPS_MODERN, /* our capabilities (modern) */ + true, /* Enable legacy */ + true, /* Enable modern */ + 2, /* PCI BAR# for modern */ #ifdef BHYVE_SNAPSHOT pci_vtnet_pause, /* pause rx/tx threads */ pci_vtnet_resume, /* resume rx/tx threads */ @@ -612,7 +619,8 @@ free(sc); return (err); } - sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU; + sc->vsc_consts.vc_hv_caps_legacy |= VIRTIO_NET_F_MTU; + sc->vsc_consts.vc_hv_caps_modern |= VIRTIO_NET_F_MTU; } sc->vsc_config.mtu = mtu; @@ -625,7 +633,9 @@ } } - sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | + sc->vsc_consts.vc_hv_caps_legacy |= VIRTIO_NET_F_MRG_RXBUF | + netbe_get_cap(sc->vsc_be); + sc->vsc_consts.vc_hv_caps_modern |= VIRTIO_NET_F_MRG_RXBUF | netbe_get_cap(sc->vsc_be); /* @@ -635,7 +645,8 @@ sc->vsc_config.max_virtqueue_pairs = 1; /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); + pci_set_cfgdata16(pi, PCIR_DEVICE, sc->vsc_consts.vc_en_legacy ? + VIRTIO_DEV_NET : vi_get_modern_pci_devid(VIRTIO_ID_NETWORK)); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); @@ -653,8 +664,8 @@ return (1); } - /* use BAR 0 to map config regs in IO space */ - vi_set_io_bar(&sc->vsc_vs, 0); + /* Virtio-legacy: use BAR 0 to map config regs in IO space */ + vi_setup_pci_bar(&sc->vsc_vs); sc->resetting = 0; @@ -691,6 +702,8 @@ */ ptr = &sc->vsc_config.mac[offset]; memcpy(ptr, &value, size); + + vq_devcfg_changed(&sc->vsc_vs); } else { /* silently ignore other writes */ DPRINTF(("vtnet: write to readonly reg %d", offset)); @@ -808,6 +821,8 @@ .pe_emu = "virtio-net", .pe_init = pci_vtnet_init, .pe_legacy_config = netbe_legacy_config, + .pe_cfgwrite = vi_pci_cfgwrite, + .pe_cfgread = vi_pci_cfgread, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read, #ifdef BHYVE_SNAPSHOT Index: usr.sbin/bhyve/pci_virtio_rnd.c =================================================================== --- usr.sbin/bhyve/pci_virtio_rnd.c +++ usr.sbin/bhyve/pci_virtio_rnd.c @@ -92,7 +92,11 @@ NULL, /* read virtio config */ NULL, /* write virtio config */ NULL, /* apply negotiated features */ - 0, /* our capabilities */ + 0, /* our capabilities (legacy) */ + 0, /* our capabilities (modern) */ + true, /* Enable legacy */ + true, /* Enable modern */ + 2, /* PCI BAR# for modern */ }; @@ -187,7 +191,8 @@ sc->vrsc_fd = fd; /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_RANDOM); + pci_set_cfgdata16(pi, PCIR_DEVICE, sc->vrsc_vs.vs_vc->vc_en_legacy ? + VIRTIO_DEV_RANDOM : VIRTIO_ID_ENTROPY); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_ENTROPY); @@ -195,7 +200,7 @@ if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix())) return (1); - vi_set_io_bar(&sc->vrsc_vs, 0); + vi_setup_pci_bar(&sc->vrsc_vs); return (0); } @@ -204,6 +209,8 @@ struct pci_devemu pci_de_vrnd = { .pe_emu = "virtio-rnd", .pe_init = pci_vtrnd_init, + .pe_cfgwrite = vi_pci_cfgwrite, + .pe_cfgread = vi_pci_cfgread, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read, #ifdef BHYVE_SNAPSHOT Index: usr.sbin/bhyve/pci_virtio_scsi.c =================================================================== --- usr.sbin/bhyve/pci_virtio_scsi.c +++ usr.sbin/bhyve/pci_virtio_scsi.c @@ -256,7 +256,11 @@ pci_vtscsi_cfgread, /* read virtio config */ pci_vtscsi_cfgwrite, /* write virtio config */ pci_vtscsi_neg_features, /* apply negotiated features */ - 0, /* our capabilities */ + 0, /* our capabilities (legacy) */ + 0, /* our capabilities (modern) */ + true, /* Enable legacy */ + true, /* Enable modern */ + 2, /* PCI BAR# for modern */ }; static void * @@ -717,7 +721,8 @@ } /* initialize config space */ - pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_SCSI); + pci_set_cfgdata16(pi, PCIR_DEVICE, sc->vss_vs.vs_vc->vc_en_legacy ? + VIRTIO_DEV_SCSI : VIRTIO_ID_SCSI); pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE); pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_SCSI); @@ -725,7 +730,7 @@ if (vi_intr_init(&sc->vss_vs, 1, fbsdrun_virtio_msix())) return (1); - vi_set_io_bar(&sc->vss_vs, 0); + vi_setup_pci_bar(&sc->vss_vs); return (0); } @@ -735,6 +740,8 @@ .pe_emu = "virtio-scsi", .pe_init = pci_vtscsi_init, .pe_legacy_config = pci_vtscsi_legacy_config, + .pe_cfgwrite = vi_pci_cfgwrite, + .pe_cfgread = vi_pci_cfgread, .pe_barwrite = vi_pci_write, .pe_barread = vi_pci_read }; Index: usr.sbin/bhyve/virtio.h =================================================================== --- usr.sbin/bhyve/virtio.h +++ usr.sbin/bhyve/virtio.h @@ -3,6 +3,10 @@ * * Copyright (c) 2013 Chris Torek * All rights reserved. + * Copyright (c) 2021 The FreeBSD Foundation + * + * Portions of this software were developed by Ka Ho Ng + * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -38,14 +42,17 @@ #include /* - * These are derived from several virtio specifications. - * - * Some useful links: + * Virtio legacy supports are derived from several specifications below: * https://github.com/rustyrussell/virtio-spec * http://people.redhat.com/pbonzini/virtio-spec.pdf + * + * Virtio modern supports is authored with the reference below: + * https://docs.oasis-open.org/virtio/virtio/v1.1/cs01/virtio-v1.1-cs01.pdf */ /* + * Virtio legacy: + * * A virtual device has zero or more "virtual queues" (virtqueue). * Each virtqueue uses at least two 4096-byte pages, laid out thus: * @@ -130,6 +137,8 @@ #define VRING_ALIGN 4096 /* + * Virtio legacy: + * * The address of any given virtual queue is determined by a single * Page Frame Number register. The guest writes the PFN into the * PCI config space. However, a device that has two or more @@ -201,6 +210,8 @@ struct vm_snapshot_meta; /* + * Virtio legacy: + * * A virtual device, with some number (possibly 0) of virtual * queues and some size (possibly 0) of configuration-space * registers private to the device. The virtio_softc should come @@ -229,22 +240,46 @@ * * The BROKED flag ("this thing done gone and broked") is for future * use. + * + * + * Virtio modern: + * + * The DFSELECT_HI flag indicates the device should return the high-part of + * the hypervisor-provided capabilities. The GFSELECT_HI flag indicates the + * device should return the high-part of the guest driver's capabilities. */ #define VIRTIO_USE_MSIX 0x01 #define VIRTIO_EVENT_IDX 0x02 /* use the event-index values */ +#define VIRTIO_DEVCFG_CHG 0x04 /* Device configuration changed */ #define VIRTIO_BROKED 0x08 /* ??? */ +#define VIRTIO_DFSELECT_HI 0x10 /* return high-part of host cap */ +#define VIRTIO_GFSELECT_HI 0x20 /* return high-part of guest cap */ + +struct virtio_pci_cfg { + int c_captype; + int c_baridx; + uint32_t c_offset; + uint32_t c_size; + int c_capoff; + int c_caplen; +}; struct virtio_softc { struct virtio_consts *vs_vc; /* constants (see below) */ int vs_flags; /* VIRTIO_* flags from above */ pthread_mutex_t *vs_mtx; /* POSIX mutex, if any */ struct pci_devinst *vs_pi; /* PCI device instance */ - uint32_t vs_negotiated_caps; /* negotiated capabilities */ + uint64_t vs_negotiated_caps; /* negotiated capabilities */ struct vqueue_info *vs_queues; /* one per vc_nvq */ int vs_curq; /* current queue */ uint8_t vs_status; /* value from last status write */ uint8_t vs_isr; /* ISR flags, if not MSI-X */ uint16_t vs_msix_cfg_idx; /* MSI-X vector for config event */ + uint8_t vs_devcfg_gen; /* Generation of device config space */ + struct virtio_pci_cfg vs_cfgs[5]; /* Configurations */ + struct virtio_pci_cfg *vs_pcicfg; /* PCI configuration access + cap */ + int vs_ncfgs; /* Number of PCI configurations */ }; #define VS_LOCK(vs) \ @@ -272,7 +307,13 @@ /* called to write config regs */ void (*vc_apply_features)(void *, uint64_t); /* called to apply negotiated features */ - uint64_t vc_hv_caps; /* hypervisor-provided capabilities */ + uint64_t vc_hv_caps_legacy; + /* hypervisor-provided capabilities (legacy) */ + uint64_t vc_hv_caps_modern; + /* hypervisor-provided capabilities (modern) */ + bool vc_en_legacy; /* enable legacy */ + bool vc_en_modern; /* enable modern */ + char vc_modern_pcibar; /* PCI BAR# for modern */ void (*vc_pause)(void *); /* called to pause device activity */ void (*vc_resume)(void *); /* called to resume device activity */ int (*vc_snapshot)(void *, struct vm_snapshot_meta *); @@ -298,6 +339,7 @@ */ #define VQ_ALLOC 0x01 /* set once we have a pfn */ #define VQ_BROKED 0x02 /* ??? */ +#define VQ_ENABLED 0x04 /* set if the queue was enabled */ struct vqueue_info { uint16_t vq_qsize; /* size of this queue (a power of 2) */ void (*vq_notify)(void *, struct vqueue_info *); @@ -312,19 +354,34 @@ uint16_t vq_save_used; /* saved vq_used->idx; see vq_endchains */ uint16_t vq_msix_idx; /* MSI-X index, or VIRTIO_MSI_NO_VECTOR */ - uint32_t vq_pfn; /* PFN of virt queue (not shifted!) */ + uint64_t vq_desc_gpa; /* PA of virtqueue descriptors ring */ + uint64_t vq_avail_gpa; /* PA of virtqueue avail ring */ + uint64_t vq_used_gpa; /* PA of virtqueue used ring */ volatile struct vring_desc *vq_desc; /* descriptor array */ volatile struct vring_avail *vq_avail; /* the "avail" ring */ volatile struct vring_used *vq_used; /* the "used" ring */ }; -/* as noted above, these are sort of backwards, name-wise */ +/* + * As noted above, these are sort of backwards, name-wise. + * + * Endian helpers must be used when using the following macros. + */ #define VQ_AVAIL_EVENT_IDX(vq) \ (*(volatile uint16_t *)&(vq)->vq_used->ring[(vq)->vq_qsize]) #define VQ_USED_EVENT_IDX(vq) \ ((vq)->vq_avail->ring[(vq)->vq_qsize]) +/* + * Return true if the virtio device is running in modern mode + */ +static inline bool +vi_is_modern(struct virtio_softc *vs) +{ + return (vs->vs_negotiated_caps & VIRTIO_F_VERSION_1) != 0; +} + /* * Is this ring ready for I/O? */ @@ -343,8 +400,7 @@ vq_has_descs(struct vqueue_info *vq) { - return (vq_ring_ready(vq) && vq->vq_last_avail != - vq->vq_avail->idx); + return (vq_ring_ready(vq) && vq->vq_last_avail != vq->vq_avail->idx); } /* @@ -354,15 +410,15 @@ static inline void vi_interrupt(struct virtio_softc *vs, uint8_t isr, uint16_t msix_idx) { + if (!(vs->vs_status & VIRTIO_CONFIG_STATUS_DRIVER_OK)) + return; if (pci_msix_enabled(vs->vs_pi)) pci_generate_msix(vs->vs_pi, msix_idx); else { - VS_LOCK(vs); vs->vs_isr |= isr; pci_generate_msi(vs->vs_pi, 0); pci_lintr_assert(vs->vs_pi); - VS_UNLOCK(vs); } } @@ -377,6 +433,17 @@ vi_interrupt(vs, VIRTIO_PCI_ISR_INTR, vq->vq_msix_idx); } +/* + * Deliver an interrupt to guest on device-specific configuration changes + * (if possible, or a generic MSI interrupt if not using MSI-X). + */ +static inline void +vq_devcfg_changed(struct virtio_softc *vs) +{ + vs->vs_flags |= VIRTIO_DEVCFG_CHG; + vi_interrupt(vs, VIRTIO_PCI_ISR_CONFIG, vs->vs_msix_cfg_idx); +} + static inline void vq_kick_enable(struct vqueue_info *vq) { @@ -397,6 +464,21 @@ vq->vq_used->flags |= VRING_USED_F_NO_NOTIFY; } +static inline uint64_t +vi_hv_features(struct virtio_softc *vs, bool modern) +{ + return (modern ? vs->vs_vc->vc_hv_caps_modern | VIRTIO_F_VERSION_1 : + vs->vs_vc->vc_hv_caps_legacy); +} + +static inline uint16_t +vi_get_modern_pci_devid(uint16_t vdid) +{ + return (vdid + VIRTIO_PCI_DEVICEID_MODERN_MIN); +} + +#define VIRTIO_LEGACY_BAR 0 /* BAR # to host virtio legacy cfg regs */ + struct iovec; /* @@ -414,8 +496,8 @@ void *dev_softc, struct pci_devinst *pi, struct vqueue_info *queues); int vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix); +void vi_setup_pci_bar(struct virtio_softc *vs); void vi_reset_dev(struct virtio_softc *); -void vi_set_io_bar(struct virtio_softc *, int); int vq_getchain(struct vqueue_info *vq, struct iovec *iov, int niov, struct vi_req *reqp); @@ -426,6 +508,10 @@ void vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen); void vq_endchains(struct vqueue_info *vq, int used_all_avail); +int vi_pci_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int offset, int bytes, uint32_t *retval); +int vi_pci_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int offset, int bytes, uint32_t val); uint64_t vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset, int size); void vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, Index: usr.sbin/bhyve/virtio.c =================================================================== --- usr.sbin/bhyve/virtio.c +++ usr.sbin/bhyve/virtio.c @@ -4,6 +4,10 @@ * Copyright (c) 2013 Chris Torek * All rights reserved. * Copyright (c) 2019 Joyent, Inc. + * Copyright (c) 2021 The FreeBSD Foundation + * + * Portions of this software were developed by Ka Ho Ng + * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -37,7 +41,9 @@ #include #include +#include +#include #include #include #include @@ -52,8 +58,14 @@ /* * Functions for dealing with generalized "virtual devices" as * defined by + * + * The reference for the implementation of virtio modern is on + * */ +/* XXX Make this configurable? */ +#define VQ_NOTIFY_OFF_MULTIPLIER PAGE_SIZE + /* * In case we decide to relax the "virtio softc comes at the * front of virtio-based device softc" constraint, let's use @@ -61,6 +73,11 @@ */ #define DEV_SOFTC(vs) ((void *)(vs)) +static uint64_t vi_modern_pci_read(struct virtio_softc *vs, int vcpu, + int baridx, uint64_t offset, int size); +static void vi_modern_pci_write(struct virtio_softc *vs, int vcpu, + int baridx, uint64_t offset, int size, uint64_t value); + /* * Link a virtio_softc to its constants, the device softc, and * the PCI emulation. @@ -109,7 +126,7 @@ vq->vq_last_avail = 0; vq->vq_next_used = 0; vq->vq_save_used = 0; - vq->vq_pfn = 0; + vq->vq_desc_gpa = vq->vq_avail_gpa = vq->vq_used_gpa = 0; vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR; } vs->vs_negotiated_caps = 0; @@ -122,10 +139,10 @@ } /* - * Set I/O BAR (usually 0) to map PCI config registers. + * Set I/O BAR (usually 0) to map legacy PCI config registers. */ -void -vi_set_io_bar(struct virtio_softc *vs, int barnum) +static void +vi_legacy_set_io_bar(struct virtio_softc *vs, int barnum) { size_t size; @@ -137,6 +154,153 @@ pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size); } +/* + * Add modern configuration structure capability + */ +static inline int +vi_modern_add_cfg(struct virtio_softc *vs, struct virtio_pci_cap *cap, + int barnum, uint32_t off, uint32_t length, uint8_t caplen, + uint8_t cfgtype) +{ + int capoff; + + cap->cap_vndr = PCIY_VENDOR; + cap->cap_len = caplen; + cap->cfg_type = cfgtype; + cap->bar = barnum; + cap->offset = off; + cap->length = length; + if (pci_emul_add_capability(vs->vs_pi, (u_char *)cap, caplen, + &capoff) != 0) + return (-1); + + vs->vs_cfgs[vs->vs_ncfgs].c_captype = cfgtype; + vs->vs_cfgs[vs->vs_ncfgs].c_baridx = cap->bar; + vs->vs_cfgs[vs->vs_ncfgs].c_offset = cap->offset; + vs->vs_cfgs[vs->vs_ncfgs].c_size = cap->length; + vs->vs_cfgs[vs->vs_ncfgs].c_capoff = capoff; + vs->vs_cfgs[vs->vs_ncfgs++].c_caplen = caplen; + return (0); +} + +/* + * Add COMMON_CFG configuration structure capability + */ +static void +vi_modern_add_common_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp) +{ + struct virtio_pci_cap cap; + uint32_t cfglen; + + cfglen = roundup2(sizeof(struct virtio_pci_common_cfg), PAGE_SIZE); + + memset(&cap, 0, sizeof(cap)); + vi_modern_add_cfg(vs, &cap, barnum, *offp, cfglen, sizeof(cap), + VIRTIO_PCI_CAP_COMMON_CFG); + *offp += cfglen; +} + +/* + * Add NOTIFY_CFG configuration structure capability + */ +static void +vi_modern_add_notify_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp) +{ + struct virtio_pci_notify_cap cap; + uint32_t cfglen; + + cfglen = roundup2(VQ_NOTIFY_OFF_MULTIPLIER * vs->vs_vc->vc_nvq, PAGE_SIZE); + + memset(&cap, 0, sizeof(cap)); + cap.notify_off_multiplier = VQ_NOTIFY_OFF_MULTIPLIER; + vi_modern_add_cfg(vs, &cap.cap, barnum, *offp, cfglen, sizeof(cap), + VIRTIO_PCI_CAP_NOTIFY_CFG); + *offp += cfglen; +} + +/* + * Add ISR_CFG configuration structure capability + */ +static void +vi_modern_add_isr_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp) +{ + struct virtio_pci_cap cap; + + memset(&cap, 0, sizeof(cap)); + vi_modern_add_cfg(vs, &cap, barnum, *offp, PAGE_SIZE, sizeof(cap), + VIRTIO_PCI_CAP_ISR_CFG); + *offp += PAGE_SIZE; +} + +/* + * Add DEV_CFG configuration structure capability + */ +static void +vi_modern_add_dev_cfg(struct virtio_softc *vs, int barnum, uint32_t *offp) +{ + struct virtio_pci_cap cap; + + memset(&cap, 0, sizeof(cap)); + vi_modern_add_cfg(vs, &cap, barnum, *offp, PAGE_SIZE, sizeof(cap), + VIRTIO_PCI_CAP_DEVICE_CFG); + *offp += PAGE_SIZE; +} + +/* + * Add PCI_CFG configuration structure capability + */ +static void +vi_modern_add_pci_cfg(struct virtio_softc *vs) +{ + struct virtio_pci_cfg_cap cap; + + memset(&cap, 0, sizeof(cap)); + memset(cap.pci_cfg_data, 0xff, sizeof(cap.pci_cfg_data)); + if (vi_modern_add_cfg(vs, &cap.cap, 0, 0, 0, + sizeof(cap), VIRTIO_PCI_CAP_PCI_CFG) != 0) + return; + vs->vs_pcicfg = &vs->vs_cfgs[vs->vs_ncfgs - 1]; +} + +/* + * Set up Virtio modern device pci configuration space + */ +static void +vi_modern_setup_mem_bar(struct virtio_softc *vs, int barnum) +{ + uint32_t size; + + size = 0; + + vi_modern_add_common_cfg(vs, barnum, &size); + vi_modern_add_notify_cfg(vs, barnum, &size); + vi_modern_add_dev_cfg(vs, barnum, &size); + vi_modern_add_isr_cfg(vs, barnum, &size); + vi_modern_add_pci_cfg(vs); + pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_MEM64, size); +} + +/* + * Set up Virtio device pci configuration space. + * + * If both modern and legacy are supported (i.e. transitional device), "barnum" + * MUST NOT be 0. + */ +void +vi_setup_pci_bar(struct virtio_softc *vs) +{ + struct virtio_consts *vc; + + vc = vs->vs_vc; + + if (vc->vc_en_legacy) + vi_legacy_set_io_bar(vs, 0); + if (vc->vc_en_modern) { + assert(!vc->vc_en_legacy || vc->vc_modern_pcibar != 0); + vi_modern_setup_mem_bar(vs, vc->vc_modern_pcibar); + } +} + /* * Initialize MSI-X vector capabilities if we're to use MSI-X, * or MSI capabilities if not. @@ -170,12 +334,11 @@ } /* - * Initialize the currently-selected virtio queue (vs->vs_curq). - * The guest just gave us a page frame number, from which we can - * calculate the addresses of the queue. + * Initialize the currently-selected virtio queue (vs->vs_curq) + * for virtio modern device only */ -void -vi_vq_init(struct virtio_softc *vs, uint32_t pfn) +static void +vi_vq_init(struct virtio_softc *vs) { struct vqueue_info *vq; uint64_t phys; @@ -183,23 +346,22 @@ char *base; vq = &vs->vs_queues[vs->vs_curq]; - vq->vq_pfn = pfn; - phys = (uint64_t)pfn << VRING_PFN; - size = vring_size_aligned(vq->vq_qsize); - base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); - /* First page(s) are descriptors... */ + phys = vq->vq_desc_gpa; + size = vq->vq_qsize * sizeof(struct vring_desc); + base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); vq->vq_desc = (struct vring_desc *)base; - base += vq->vq_qsize * sizeof(struct vring_desc); - /* ... immediately followed by "avail" ring (entirely uint16_t's) */ + phys = vq->vq_avail_gpa; + size = sizeof(struct vring_avail) + sizeof(uint16_t) + + vq->vq_qsize * sizeof(uint16_t); + base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); vq->vq_avail = (struct vring_avail *)base; - base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t); - - /* Then it's rounded up to the next page... */ - base = (char *)roundup2((uintptr_t)base, VRING_ALIGN); - /* ... and the last page(s) are the used ring. */ + phys = vq->vq_used_gpa; + size = sizeof(struct vring_used) + sizeof(uint16_t) + + vq->vq_qsize * sizeof(struct vring_used_elem); + base = paddr_guest2host(vs->vs_pi->pi_vmctx, phys, size); vq->vq_used = (struct vring_used *)base; /* Mark queue as allocated, and start at 0 when we use it. */ @@ -209,14 +371,47 @@ vq->vq_save_used = 0; } +/* + * Initialize the currently-selected virtio queue (vs->vs_curq). + * The guest just gave us a page frame number, from which we can + * calculate the addresses of the queue. + */ +static void +vi_legacy_vq_init(struct virtio_softc *vs, uint32_t pfn) +{ + struct vqueue_info *vq; + uint64_t phys; + + vq = &vs->vs_queues[vs->vs_curq]; + phys = (uint64_t)pfn << VRING_PFN; + + /* First page(s) are descriptors... */ + vq->vq_desc_gpa = phys; + phys += vq->vq_qsize * sizeof(struct vring_desc); + /* ... immediately followed by "avail" ring (entirely uint16_t's) */ + vq->vq_avail_gpa = phys; + phys += sizeof(struct vring_avail) + sizeof(uint16_t) + + vq->vq_qsize * sizeof(uint16_t); + /* Then it's rounded up to the next page... */ + phys = roundup2(phys, VRING_ALIGN); + /* ... and the last page(s) are the used ring. */ + vq->vq_used_gpa = phys; + + vi_vq_init(vs); +} + + /* * Helper inline for vq_getchain(): record the i'th "real" * descriptor. */ static inline void -_vq_record(int i, volatile struct vring_desc *vd, - struct vmctx *ctx, struct iovec *iov, int n_iov, - struct vi_req *reqp) { +_vq_record(struct virtio_softc *vs, int i, volatile struct vring_desc *vd, + struct iovec *iov, int n_iov, struct vi_req *reqp) +{ + struct vmctx *ctx; + + ctx = vs->vs_pi->pi_vmctx; if (i >= n_iov) return; @@ -326,9 +521,9 @@ } vdir = &vq->vq_desc[next]; if ((vdir->flags & VRING_DESC_F_INDIRECT) == 0) { - _vq_record(i, vdir, ctx, iov, niov, &req); + _vq_record(vs, i, vdir, iov, niov, &req); i++; - } else if ((vs->vs_vc->vc_hv_caps & + } else if ((vs->vs_negotiated_caps & VIRTIO_RING_F_INDIRECT_DESC) == 0) { EPRINTLN( "%s: descriptor has forbidden INDIRECT flag, " @@ -363,7 +558,7 @@ name); return (-1); } - _vq_record(i, vp, ctx, iov, niov, &req); + _vq_record(vs, i, vp, iov, niov, &req); if (++i > VQ_MAX_DESCRIPTORS) goto loopy; if ((vp->flags & VRING_DESC_F_NEXT) == 0) @@ -519,7 +714,7 @@ uint8_t cr_size; /* size (bytes) */ uint8_t cr_ro; /* true => reg is read only */ const char *cr_name; /* name of reg */ -} config_regs[] = { +} legacy_cfg_regs[] = { { VIRTIO_PCI_HOST_FEATURES, 4, 1, "HOST_FEATURES" }, { VIRTIO_PCI_GUEST_FEATURES, 4, 0, "GUEST_FEATURES" }, { VIRTIO_PCI_QUEUE_PFN, 4, 0, "QUEUE_PFN" }, @@ -530,18 +725,38 @@ { VIRTIO_PCI_ISR, 1, 0, "ISR" }, { VIRTIO_MSI_CONFIG_VECTOR, 2, 0, "CONFIG_VECTOR" }, { VIRTIO_MSI_QUEUE_VECTOR, 2, 0, "QUEUE_VECTOR" }, +}, common_cfg_regs[] = { + { VIRTIO_PCI_COMMON_DFSELECT, 4, 0, "DFSELECT" }, + { VIRTIO_PCI_COMMON_DF, 4, 1, "DF" }, + { VIRTIO_PCI_COMMON_GFSELECT, 4, 0, "GFSELECT" }, + { VIRTIO_PCI_COMMON_GF, 4, 0, "GF" }, + { VIRTIO_PCI_COMMON_MSIX, 2, 0, "MSIX" }, + { VIRTIO_PCI_COMMON_NUMQ, 2, 1, "NUMQ" }, + { VIRTIO_PCI_COMMON_STATUS, 1, 0, "STATUS" }, + { VIRTIO_PCI_COMMON_CFGGENERATION, 1, 1, "CFGGENERATION" }, + { VIRTIO_PCI_COMMON_Q_SELECT, 2, 0, "Q_SELECT" }, + { VIRTIO_PCI_COMMON_Q_SIZE, 2, 0, "Q_SIZE" }, + { VIRTIO_PCI_COMMON_Q_MSIX, 2, 0, "Q_MSIX" }, + { VIRTIO_PCI_COMMON_Q_ENABLE, 2, 0, "Q_ENABLE" }, + { VIRTIO_PCI_COMMON_Q_NOFF, 2, 1, "Q_NOFF" }, + { VIRTIO_PCI_COMMON_Q_DESCLO, 4, 0, "Q_DESCLO" }, + { VIRTIO_PCI_COMMON_Q_DESCHI, 4, 0, "Q_DESCHI" }, + { VIRTIO_PCI_COMMON_Q_AVAILLO, 4, 0, "Q_AVAILLO" }, + { VIRTIO_PCI_COMMON_Q_AVAILHI, 4, 0, "Q_AVAILHI" }, + { VIRTIO_PCI_COMMON_Q_USEDLO, 4, 0, "Q_USEDLO" }, + { VIRTIO_PCI_COMMON_Q_USEDHI, 4, 0, "Q_USEDHI" }, }; static inline struct config_reg * -vi_find_cr(int offset) { +vi_find_cr(struct config_reg *regstbl, size_t n, int offset) { u_int hi, lo, mid; struct config_reg *cr; lo = 0; - hi = sizeof(config_regs) / sizeof(*config_regs) - 1; + hi = n - 1; while (hi >= lo) { mid = (hi + lo) >> 1; - cr = &config_regs[mid]; + cr = ®stbl[mid]; if (cr->cr_offset == offset) return (cr); if (cr->cr_offset < offset) @@ -553,16 +768,15 @@ } /* - * Handle pci config space reads. - * If it's to the MSI-X info, do that. - * If it's part of the virtio standard stuff, do that. - * Otherwise dispatch to the actual driver. + * Handle legacy pci config space writes. + * + * If it's part of the legacy virtio config structure, do that. + * Otherwise dispatch to the actual device backend's config read + * callback. */ -uint64_t -vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size) +static uint64_t +vi_legacy_pci_read(struct virtio_softc *vs, int vcpu, uint64_t offset, int size) { - struct virtio_softc *vs = pi->pi_arg; struct virtio_consts *vc; struct config_reg *cr; uint64_t virtio_config_size, max; @@ -571,27 +785,13 @@ uint32_t value; int error; - if (vs->vs_flags & VIRTIO_USE_MSIX) { - if (baridx == pci_msix_table_bar(pi) || - baridx == pci_msix_pba_bar(pi)) { - return (pci_emul_msix_tread(pi, offset, size)); - } - } - - /* XXX probably should do something better than just assert() */ - assert(baridx == 0); - - if (vs->vs_mtx) - pthread_mutex_lock(vs->vs_mtx); + /* Checked by caller */ + assert(size == 1 || size == 2 || size == 4); vc = vs->vs_vc; name = vc->vc_name; value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; - - if (size != 1 && size != 2 && size != 4) - goto bad; - - virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(pi)); + virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(vs->vs_pi)); if (offset >= virtio_config_size) { /* @@ -612,7 +812,7 @@ } bad: - cr = vi_find_cr(offset); + cr = vi_find_cr(legacy_cfg_regs, nitems(legacy_cfg_regs), offset); if (cr == NULL || cr->cr_size != size) { if (cr != NULL) { /* offset must be OK, so size must be bad */ @@ -629,14 +829,17 @@ switch (offset) { case VIRTIO_PCI_HOST_FEATURES: - value = vc->vc_hv_caps; + /* Caps for legacy PCI configuration layout is only 32bit */ + value = vi_hv_features(vs, false); break; case VIRTIO_PCI_GUEST_FEATURES: value = vs->vs_negotiated_caps; break; case VIRTIO_PCI_QUEUE_PFN: - if (vs->vs_curq < vc->vc_nvq) - value = vs->vs_queues[vs->vs_curq].vq_pfn; + if ((vs->vs_negotiated_caps & VIRTIO_F_VERSION_1) == 0 && + vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_desc_gpa >> + VRING_PFN; break; case VIRTIO_PCI_QUEUE_NUM: value = vs->vs_curq < vc->vc_nvq ? @@ -655,7 +858,7 @@ value = vs->vs_isr; vs->vs_isr = 0; /* a read clears this flag */ if (value) - pci_lintr_deassert(pi); + pci_lintr_deassert(vs->vs_pi); break; case VIRTIO_MSI_CONFIG_VECTOR: value = vs->vs_msix_cfg_idx; @@ -666,23 +869,22 @@ VIRTIO_MSI_NO_VECTOR; break; } + done: - if (vs->vs_mtx) - pthread_mutex_unlock(vs->vs_mtx); return (value); } /* - * Handle pci config space writes. - * If it's to the MSI-X info, do that. - * If it's part of the virtio standard stuff, do that. - * Otherwise dispatch to the actual driver. + * Handle legacy pci config space writes. + * + * If it's part of the legacy virtio config structure, do that. + * Otherwise dispatch to the actual device backend's config write + * callback. */ -void -vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, - int baridx, uint64_t offset, int size, uint64_t value) +static void +vi_legacy_pci_write(struct virtio_softc *vs, int vcpu, + uint64_t offset, int size, uint64_t value) { - struct virtio_softc *vs = pi->pi_arg; struct vqueue_info *vq; struct virtio_consts *vc; struct config_reg *cr; @@ -691,27 +893,12 @@ uint32_t newoff; int error; - if (vs->vs_flags & VIRTIO_USE_MSIX) { - if (baridx == pci_msix_table_bar(pi) || - baridx == pci_msix_pba_bar(pi)) { - pci_emul_msix_twrite(pi, offset, size, value); - return; - } - } - - /* XXX probably should do something better than just assert() */ - assert(baridx == 0); - - if (vs->vs_mtx) - pthread_mutex_lock(vs->vs_mtx); + /* Checked by caller */ + assert(size == 1 || size == 2 || size == 4); vc = vs->vs_vc; name = vc->vc_name; - - if (size != 1 && size != 2 && size != 4) - goto bad; - - virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(pi)); + virtio_config_size = VIRTIO_PCI_CONFIG_OFF(pci_msix_enabled(vs->vs_pi)); if (offset >= virtio_config_size) { /* @@ -727,11 +914,11 @@ else error = 0; if (!error) - goto done; + return; } bad: - cr = vi_find_cr(offset); + cr = vi_find_cr(legacy_cfg_regs, nitems(legacy_cfg_regs), offset); if (cr == NULL || cr->cr_size != size || cr->cr_ro) { if (cr != NULL) { /* offset must be OK, wrong size and/or reg is R/O */ @@ -748,12 +935,12 @@ "%s: write to bad offset/size %jd/%d", name, (uintmax_t)offset, size); } - goto done; + return; } switch (offset) { case VIRTIO_PCI_GUEST_FEATURES: - vs->vs_negotiated_caps = value & vc->vc_hv_caps; + vs->vs_negotiated_caps = vi_hv_features(vs, false); if (vc->vc_apply_features) (*vc->vc_apply_features)(DEV_SOFTC(vs), vs->vs_negotiated_caps); @@ -761,7 +948,7 @@ case VIRTIO_PCI_QUEUE_PFN: if (vs->vs_curq >= vc->vc_nvq) goto bad_qindex; - vi_vq_init(vs, value); + vi_legacy_vq_init(vs, value); break; case VIRTIO_PCI_QUEUE_SEL: /* @@ -775,7 +962,7 @@ if (value >= vc->vc_nvq) { EPRINTLN("%s: queue %d notify out of range", name, (int)value); - goto done; + break; } vq = &vs->vs_queues[value]; if (vq->vq_notify) @@ -802,61 +989,823 @@ vq->vq_msix_idx = value; break; } - goto done; + + return; bad_qindex: EPRINTLN( "%s: write config reg %s: curq %d >= max %d", name, cr->cr_name, vs->vs_curq, vc->vc_nvq); -done: - if (vs->vs_mtx) - pthread_mutex_unlock(vs->vs_mtx); } -#ifdef BHYVE_SNAPSHOT -int -vi_pci_pause(struct vmctx *ctx, struct pci_devinst *pi) +/* + * Virtio modern: + * Handle pci config space reads to common config structure. + */ +static uint64_t +vi_pci_common_cfg_read(struct virtio_softc *vs, int vcpu, uint64_t offset, + int size) { - struct virtio_softc *vs; + uint64_t mask = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; + uint64_t value = -1; struct virtio_consts *vc; + struct vqueue_info *vq; + struct config_reg *cr; + const char *name; - vs = pi->pi_arg; - vc = vs->vs_vc; + /* Checked by caller */ + assert(size == 1 || size == 2 || size == 4); vc = vs->vs_vc; - assert(vc->vc_pause != NULL); - (*vc->vc_pause)(DEV_SOFTC(vs)); + name = vc->vc_name; - return (0); + cr = vi_find_cr(common_cfg_regs, nitems(common_cfg_regs), offset); + if (cr == NULL || cr->cr_size != size) { + /* Strict alignment and access size checking */ + if (cr != NULL) { + EPRINTLN( + "%s: read from %s: bad size %d", + name, cr->cr_name, size); + } else { + EPRINTLN( + "%s: read from bad offset/size %jd/%d", + name, (uintmax_t)offset, size); + } + goto done; + } + + switch (offset) { + case VIRTIO_PCI_COMMON_DFSELECT: + if (vs->vs_flags & VIRTIO_DFSELECT_HI) + value = 1; + else + value = 0; + break; + case VIRTIO_PCI_COMMON_DF: + value = vi_hv_features(vs, true); + if (vs->vs_flags & VIRTIO_DFSELECT_HI) + value >>= 32; + else + value & 0xffffffff; + break; + case VIRTIO_PCI_COMMON_GFSELECT: + if (vs->vs_flags & VIRTIO_GFSELECT_HI) + value = 1; + else + value = 0; + break; + case VIRTIO_PCI_COMMON_GF: + value = (vs->vs_flags & VIRTIO_GFSELECT_HI) ? + (vs->vs_negotiated_caps >> 32) : + (vs->vs_negotiated_caps & 0xffffffff); + break; + case VIRTIO_PCI_COMMON_MSIX: + value = vs->vs_msix_cfg_idx; + break; + case VIRTIO_PCI_COMMON_NUMQ: + value = vc->vc_nvq; + break; + case VIRTIO_PCI_COMMON_STATUS: + value = vs->vs_status; + break; + case VIRTIO_PCI_COMMON_CFGGENERATION: + if (vs->vs_flags & VIRTIO_DEVCFG_CHG) { + vs->vs_devcfg_gen++; + vs->vs_flags &= ~VIRTIO_DEVCFG_CHG; + } + value = vs->vs_devcfg_gen; + break; + case VIRTIO_PCI_COMMON_Q_SELECT: + value = vs->vs_curq; + break; + case VIRTIO_PCI_COMMON_Q_SIZE: + value = vs->vs_curq < vc->vc_nvq ? + vs->vs_queues[vs->vs_curq].vq_qsize : 0; + break; + case VIRTIO_PCI_COMMON_Q_MSIX: + if (vs->vs_curq < vc->vc_nvq) { + vq = &vs->vs_queues[vs->vs_curq]; + value = vq->vq_msix_idx; + } + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + value = vs->vs_curq < vc->vc_nvq ? + !!(vs->vs_queues[vs->vs_curq].vq_flags & VQ_ENABLED) : 0; + break; + case VIRTIO_PCI_COMMON_Q_NOFF: + /* queue_notify_off is equal to qid for now */ + value = vs->vs_curq; + break; + case VIRTIO_PCI_COMMON_Q_DESCLO: + if (vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_desc_gpa & + 0xffffffff; + break; + case VIRTIO_PCI_COMMON_Q_DESCHI: + if (vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_desc_gpa >> 32; + break; + case VIRTIO_PCI_COMMON_Q_AVAILLO: + if (vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_avail_gpa & + 0xffffffff; + break; + case VIRTIO_PCI_COMMON_Q_AVAILHI: + if (vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_avail_gpa >> 32; + break; + case VIRTIO_PCI_COMMON_Q_USEDLO: + if (vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_used_gpa & + 0xffffffff; + break; + case VIRTIO_PCI_COMMON_Q_USEDHI: + if (vs->vs_curq < vc->vc_nvq) + value = vs->vs_queues[vs->vs_curq].vq_used_gpa >> 32; + break; + } + +done: + value &= mask; + return (value); } -int -vi_pci_resume(struct vmctx *ctx, struct pci_devinst *pi) +/* + * Virtio modern: + * Handle pci config space writes to common config structure. + */ +static void +vi_pci_common_cfg_write(struct virtio_softc *vs, int vcpu, + uint64_t offset, int size, uint64_t value) { - struct virtio_softc *vs; + uint64_t mask = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; struct virtio_consts *vc; + struct vqueue_info *vq; + struct config_reg *cr; + const char *name; - vs = pi->pi_arg; - vc = vs->vs_vc; + /* Checked by caller */ + assert(size == 1 || size == 2 || size == 4); vc = vs->vs_vc; - assert(vc->vc_resume != NULL); - (*vc->vc_resume)(DEV_SOFTC(vs)); - - return (0); -} + name = vc->vc_name; + value &= mask; -static int -vi_pci_snapshot_softc(struct virtio_softc *vs, struct vm_snapshot_meta *meta) -{ - int ret; + cr = vi_find_cr(common_cfg_regs, nitems(common_cfg_regs), offset); + if (cr == NULL || cr->cr_size != size) { + /* Strict alignment and access size checking */ + if (cr != NULL) { + EPRINTLN( + "%s: read from %s: bad size %d", + name, cr->cr_name, size); + } else { + EPRINTLN( + "%s: read from bad offset/size %jd/%d", + name, (uintmax_t)offset, size); + } + return; + } - SNAPSHOT_VAR_OR_LEAVE(vs->vs_flags, meta, ret, done); + switch (offset) { + case VIRTIO_PCI_COMMON_DFSELECT: + if (value == 1) + vs->vs_flags |= VIRTIO_DFSELECT_HI; + else if (value == 0) + vs->vs_flags &= ~VIRTIO_DFSELECT_HI; + else { + EPRINTLN( + "%s: writing bad value to device_feature_select", + name); + goto bad_write; + } + break; + case VIRTIO_PCI_COMMON_GFSELECT: + if (value == 1) + vs->vs_flags |= VIRTIO_GFSELECT_HI; + else if (value == 0) + vs->vs_flags &= ~VIRTIO_GFSELECT_HI; + else { + EPRINTLN( + "%s: writing bad value to driver_feature_select", + name); + goto bad_write; + } + break; + case VIRTIO_PCI_COMMON_GF: + value &= vi_hv_features(vs, true); + if (vs->vs_flags & VIRTIO_GFSELECT_HI) + vs->vs_negotiated_caps = (vs->vs_negotiated_caps & 0xffffffff) | + (value << 32); + else + vs->vs_negotiated_caps = + (vs->vs_negotiated_caps & 0xffffffff00000000) | value; + break; + case VIRTIO_PCI_COMMON_MSIX: + vs->vs_msix_cfg_idx = value; + break; + case VIRTIO_PCI_COMMON_STATUS: + if (value == 0) { + (*vc->vc_reset)(DEV_SOFTC(vs)); + vs->vs_status = value; + break; + } + if (!(vs->vs_status & VIRTIO_CONFIG_S_FEATURES_OK) && + value & VIRTIO_CONFIG_S_FEATURES_OK) { + if (vc->vc_apply_features) + (*vc->vc_apply_features)(DEV_SOFTC(vs), + vs->vs_negotiated_caps); + } + vs->vs_status = value; + break; + case VIRTIO_PCI_COMMON_Q_SELECT: + if (value >= vc->vc_nvq) { + EPRINTLN("%s: queue select %d out of range", + name, (int)value); + goto bad_write; + } + vs->vs_curq = value; + break; + case VIRTIO_PCI_COMMON_Q_SIZE: + /* XXX: Check power of 2 */ + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN("%s: setting queue size for %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_qsize = value; + break; + case VIRTIO_PCI_COMMON_Q_MSIX: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting msix vector of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_msix_idx = value; + break; + case VIRTIO_PCI_COMMON_Q_ENABLE: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN("%s: enabling queue %d out of range", name, + vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + if (!(vq->vq_flags & VQ_ENABLED) && value == 1) { + vi_vq_init(vs); + vq->vq_flags |= VQ_ENABLED; + } else if (!value) + vq->vq_flags &= ~VQ_ENABLED; + break; + case VIRTIO_PCI_COMMON_Q_DESCLO: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting desc ring of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_desc_gpa = + (vq->vq_desc_gpa & 0xffffffff00000000) | value; + break; + case VIRTIO_PCI_COMMON_Q_DESCHI: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting desc ring of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_desc_gpa = + (vq->vq_desc_gpa & 0xffffffff) | (value << 32); + break; + case VIRTIO_PCI_COMMON_Q_AVAILLO: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting avail ring of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_avail_gpa = + (vq->vq_avail_gpa & 0xffffffff00000000) | value; + break; + case VIRTIO_PCI_COMMON_Q_AVAILHI: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting avail ring of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_avail_gpa = + (vq->vq_avail_gpa & 0xffffffff) | (value << 32); + break; + case VIRTIO_PCI_COMMON_Q_USEDLO: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting used ring of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_used_gpa = + (vq->vq_used_gpa & 0xffffffff00000000) | value; + break; + case VIRTIO_PCI_COMMON_Q_USEDHI: + if (vs->vs_curq >= vc->vc_nvq) { + EPRINTLN( + "%s: setting used ring of queue %d out of range", + name, vs->vs_curq); + goto bad_write; + } + vq = &vs->vs_queues[vs->vs_curq]; + vq->vq_used_gpa = + (vq->vq_used_gpa & 0xffffffff) | (value << 32); + break; + default: + EPRINTLN("%s: write to bad offset/size %jd/%d", name, + (uintmax_t)offset, size); + goto bad_write; + } + + return; + +bad_write: + return; +} + +/* + * Virtio modern: + * Handle pci mmio/pio reads to notification structure. + * + * Reading the structure always returns zero. + */ +static uint64_t +vi_pci_notify_cfg_read(struct virtio_softc *vs, int vcpu, uint64_t offset, + int size) +{ + return (0); +} + +/* + * Virtio modern: + * Handle pci mmio/pio writes to notification structure. + * + * VIRTIO_F_NOTIFICATION_DATA is not presented yet, so only + * consider the case writing vq index into the registers. + */ +static void +vi_pci_notify_cfg_write(struct virtio_softc *vs, int vcpu, uint64_t offset, + int size, uint64_t value) +{ + struct virtio_consts *vc; + struct vqueue_info *vq; + unsigned int qid; + const char *name; + + vc = vs->vs_vc; + name = vc->vc_name; + qid = value; + + if (size != 2) { + EPRINTLN("%s: bad access at offset %" PRIu64, + name, offset); + return; + } + + if (!(vs->vs_status & VIRTIO_CONFIG_STATUS_DRIVER_OK)) + return; + + /* queue_notify_off is equal to qid for now */ + if (offset != qid * VQ_NOTIFY_OFF_MULTIPLIER) { + EPRINTLN( + "%s: queue %u notify does not have matching offset at %" PRIu64, + name, qid, offset); + return; + } + + if (qid >= vc->vc_nvq) { + EPRINTLN("%s: queue %u notify out of range", name, qid); + return; + } + + vq = &vs->vs_queues[qid]; + if (!(vq->vq_flags & VQ_ENABLED)) + return; + if (vq->vq_notify) + (*vq->vq_notify)(DEV_SOFTC(vs), vq); + else if (vc->vc_qnotify) + (*vc->vc_qnotify)(DEV_SOFTC(vs), vq); + else + EPRINTLN( + "%s: qnotify queue %u: missing vq/vc notify", name, qid); +} + +/* + * Virtio modern: + * Handle pci mmio/pio reads to ISR structure. + * + * The ISR structure has a relaxed requirement on alignment. + */ +static uint64_t +vi_pci_isr_cfg_read(struct virtio_softc *vs, int vcpu, uint64_t offset, + int size) +{ + uint64_t value; + + if (offset == 0) + value = vs->vs_isr; + else + value = 0; + + vs->vs_isr = 0; + return (value); +} + +/* + * Virtio modern: + * pci mmio/pio writes to ISR structure are disallowed. + */ +static void +vi_pci_isr_cfg_write(struct virtio_softc *vs, int vcpu, uint64_t offset, + int size, uint64_t value) +{ + const char *name = vs->vs_vc->vc_name; + + EPRINTLN("%s: invalid write into isr cfg", name); +} + +/* + * Virtio modern: + * Handle pci mmio/pio reads to device-specific config structure. + */ +static uint64_t +vi_pci_dev_cfg_read(struct virtio_softc *vs, int vcpu, + uint64_t offset, int size) +{ + uint64_t max; + struct virtio_consts *vc; + uint32_t value; + + vc = vs->vs_vc; + value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; + + max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; + if (offset + size > max) + return (value); + + (*vc->vc_cfgread)(DEV_SOFTC(vs), offset, size, &value); + return (value); +} + +/* + * Virtio modern: + * Handle pci mmio/pio writes to device-specific config structure. + */ +static void +vi_pci_dev_cfg_write(struct virtio_softc *vs, int vcpu, uint64_t offset, + int size, uint64_t value) +{ + uint64_t mask = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; + struct virtio_consts *vc; + uint64_t max; + + value = value & mask; + + max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000; + if (offset + size > max) + return; + if (vc->vc_cfgwrite != NULL) + (*vc->vc_cfgwrite)(DEV_SOFTC(vs), offset, size, value); +} + +/* + * Check if pci config space access should be redirected or dropped. + */ +static bool +vi_pci_should_redirect(struct virtio_softc *vs, int offset, int size) +{ + int i; + + for (i = 0; i < vs->vs_ncfgs; i++) { + if (offset + size > vs->vs_cfgs[i].c_capoff && + (offset < vs->vs_cfgs[i].c_capoff + + vs->vs_cfgs[i].c_caplen)) + return (false); + } + return (true); +} + +/* + * Virtio modern: + * Handle reads to pci config access capability. + */ +int +vi_pci_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int offset, + int bytes, uint32_t *retval) +{ + uint32_t mask = bytes == 1 ? 0xff : bytes == 2 ? 0xffff : 0xffffffff; + struct virtio_softc *vs = pi->pi_arg; + uint32_t baroff, barlen; + int baridx; + + if (vs->vs_pcicfg == NULL || + (offset != vs->vs_pcicfg->c_capoff + + __offsetof(struct virtio_pci_cfg_cap, pci_cfg_data)) || + (bytes != 1 && bytes != 2 && bytes != 4)) { + return (-1); + } + + if (vs->vs_mtx) + pthread_mutex_lock(vs->vs_mtx); + + baridx = pci_get_cfgdata8(pi, + offset + __offsetof(struct virtio_pci_cap, bar)); + baroff = pci_get_cfgdata32(pi, + offset + __offsetof(struct virtio_pci_cap, offset)); + barlen = pci_get_cfgdata32(pi, + offset + __offsetof(struct virtio_pci_cap, length)); + if (baridx > PCIR_MAX_BAR_0) { + *retval = ~0 & mask; + goto done; + } + *retval = vi_modern_pci_read(vs, vcpu, baridx, baroff, barlen); + +done: + if (vs->vs_mtx) + pthread_mutex_unlock(vs->vs_mtx); + return (0); +} + +/* + * Virtio modern: + * Handle writes to pci config access capability. + */ +int +vi_pci_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int offset, int bytes, uint32_t val) +{ + struct virtio_softc *vs = pi->pi_arg; + uint32_t baroff, barlen; + int baridx; + + if (vs->vs_pcicfg == NULL || + (offset != vs->vs_pcicfg->c_capoff + + __offsetof(struct virtio_pci_cfg_cap, pci_cfg_data)) || + (bytes != 1 && bytes != 2 && bytes != 4)) { + if (vi_pci_should_redirect(vs, offset, bytes)) + /* Dispatch unrelated writes to pci emulation */ + return (-1); + /* Dropped */ + return (0); + } + + if (vs->vs_mtx) + pthread_mutex_lock(vs->vs_mtx); + + baridx = pci_get_cfgdata8(pi, + offset + __offsetof(struct virtio_pci_cap, bar)); + baroff = pci_get_cfgdata32(pi, + offset + __offsetof(struct virtio_pci_cap, offset)); + barlen = pci_get_cfgdata32(pi, + offset + __offsetof(struct virtio_pci_cap, length)); + if (baridx > PCIR_MAX_BAR_0) + goto done; + vi_modern_pci_write(vs, vcpu, baridx, baroff, barlen, val); + +done: + if (vs->vs_mtx) + pthread_mutex_unlock(vs->vs_mtx); + return (0); +} + +/* + * Handle pci config space reads to virtio-related structures + */ +static uint64_t +vi_modern_pci_read(struct virtio_softc *vs, int vcpu, + int baridx, uint64_t offset, int size) +{ + uint64_t value = -1ull; + int i; + + for (i = 0; i < vs->vs_ncfgs; i++) { + if ((vs->vs_cfgs[i].c_captype == VIRTIO_PCI_CAP_PCI_CFG) || + (baridx != vs->vs_cfgs[i].c_baridx) || + (offset < vs->vs_cfgs[i].c_offset) || + (offset + size > vs->vs_cfgs[i].c_offset + + vs->vs_cfgs[i].c_size)) + continue; + + offset -= vs->vs_cfgs[i].c_offset; + + switch (vs->vs_cfgs[i].c_captype) { + case VIRTIO_PCI_CAP_COMMON_CFG: + value = vi_pci_common_cfg_read(vs, vcpu, offset, size); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + value = vi_pci_notify_cfg_read(vs, vcpu, offset, size); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + value = vi_pci_isr_cfg_read(vs, vcpu, offset, size); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + value = vi_pci_dev_cfg_read(vs, vcpu, offset, size); + break; + } + break; + } + + return (value); +} + +/* + * Handle pci config space reads to virtio-related structures + */ +static void +vi_modern_pci_write(struct virtio_softc *vs, int vcpu, + int baridx, uint64_t offset, int size, uint64_t value) +{ + int i; + + for (i = 0; i < vs->vs_ncfgs; i++) { + if ((baridx != vs->vs_cfgs[i].c_baridx) || + (offset < vs->vs_cfgs[i].c_offset) || + (offset + size > vs->vs_cfgs[i].c_offset + + vs->vs_cfgs[i].c_size)) + continue; + + offset -= vs->vs_cfgs[i].c_offset; + + switch (vs->vs_cfgs[i].c_captype) { + case VIRTIO_PCI_CAP_COMMON_CFG: + vi_pci_common_cfg_write(vs, vcpu, offset, size, value); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + vi_pci_notify_cfg_write(vs, vcpu, offset, size, + value); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + vi_pci_isr_cfg_write(vs, vcpu, offset, size, value); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + vi_pci_dev_cfg_write(vs, vcpu, offset, size, value); + break; + } + break; + } +} + +/* + * Handle virtio bar reads. + * + * If it's to the MSI-X info, dispatch the reads to the msix handling code. + * Otherwise, dispatch the reads to virtio device code. + */ +uint64_t +vi_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size) +{ + struct virtio_softc *vs = pi->pi_arg; + struct virtio_consts *vc; + uint64_t value; + + if (vs->vs_flags & VIRTIO_USE_MSIX) { + if (baridx == pci_msix_table_bar(pi) || + baridx == pci_msix_pba_bar(pi)) { + return (pci_emul_msix_tread(pi, offset, size)); + } + } + + if (vs->vs_mtx) + pthread_mutex_lock(vs->vs_mtx); + + vc = vs->vs_vc; + value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff; + + if (size != 1 && size != 2 && size != 4) + goto done; + + if (!baridx) { + value = vi_legacy_pci_read(vs, vcpu, offset, size); + goto done; + } + + value = vi_modern_pci_read(vs, vcpu, baridx, offset, size); + +done: + if (vs->vs_mtx) + pthread_mutex_unlock(vs->vs_mtx); + return (value); +} + +/* + * Handle virtio bar writes. + * + * If it's to the MSI-X info, dispatch the writes to the msix handling code. + * Otherwise, dispatch the writes to virtio device code. + */ +void +vi_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, + int baridx, uint64_t offset, int size, uint64_t value) +{ + struct virtio_softc *vs = pi->pi_arg; + struct virtio_consts *vc; + + vc = vs->vs_vc; + + if (vs->vs_flags & VIRTIO_USE_MSIX) { + if (baridx == pci_msix_table_bar(pi) || + baridx == pci_msix_pba_bar(pi)) { + pci_emul_msix_twrite(pi, offset, size, value); + return; + } + } + + if (vs->vs_mtx) + pthread_mutex_lock(vs->vs_mtx); + + if (size != 1 && size != 2 && size != 4) + goto done; + + if (!baridx) { + vi_legacy_pci_write(vs, vcpu, offset, size, value); + goto done; + } + + vi_modern_pci_write(vs, vcpu, baridx, offset, size, value); + +done: + if (vs->vs_mtx) + pthread_mutex_unlock(vs->vs_mtx); +} + +#ifdef BHYVE_SNAPSHOT +int +vi_pci_pause(struct vmctx *ctx, struct pci_devinst *pi) +{ + struct virtio_softc *vs; + struct virtio_consts *vc; + + vs = pi->pi_arg; + vc = vs->vs_vc; + + vc = vs->vs_vc; + assert(vc->vc_pause != NULL); + (*vc->vc_pause)(DEV_SOFTC(vs)); + + return (0); +} + +int +vi_pci_resume(struct vmctx *ctx, struct pci_devinst *pi) +{ + struct virtio_softc *vs; + struct virtio_consts *vc; + + vs = pi->pi_arg; + vc = vs->vs_vc; + + vc = vs->vs_vc; + assert(vc->vc_resume != NULL); + (*vc->vc_resume)(DEV_SOFTC(vs)); + + return (0); +} + +static int +vi_pci_snapshot_softc(struct virtio_softc *vs, struct vm_snapshot_meta *meta) +{ + int ret, i; + int pcicfg_idx; + + pcicfg_idx = -1; + + SNAPSHOT_VAR_OR_LEAVE(vs->vs_flags, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vs->vs_negotiated_caps, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vs->vs_curq, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vs->vs_status, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vs->vs_isr, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vs->vs_msix_cfg_idx, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_devcfg_gen, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_ncfgs, meta, ret, done); + for (i = 0; i < vs->vs_ncfgs; i++) { + SNAPSHOT_VAR_OR_LEAVE(vs->vs_cfgs[i].c_captype, meta, ret, + done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_cfgs[i].c_baridx, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_cfgs[i].c_offset, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_cfgs[i].c_size, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_cfgs[i].c_capoff, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vs->vs_cfgs[i].c_caplen, meta, ret, done); + if (vs->vs_cfgs[i].c_captype == VIRTIO_PCI_CAP_PCI_CFG) + pcicfg_idx = i; + } + if (meta->op == VM_SNAPSHOT_RESTORE) { + if (pcicfg_idx != -1) + vs->vs_pcicfg = &vs->vs_cfgs[pcicfg_idx]; + } done: return (ret); @@ -869,7 +1818,10 @@ SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_nvq, meta, ret, done); SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_cfgsize, meta, ret, done); - SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps, meta, ret, done); + SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps_legacy, meta, ret, done); + SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_hv_caps_modern, meta, ret, done); + SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_en_legacy, meta, ret, done); + SNAPSHOT_VAR_CMP_OR_LEAVE(vc->vc_en_modern, meta, ret, done); done: return (ret); @@ -899,7 +1851,9 @@ SNAPSHOT_VAR_OR_LEAVE(vq->vq_save_used, meta, ret, done); SNAPSHOT_VAR_OR_LEAVE(vq->vq_msix_idx, meta, ret, done); - SNAPSHOT_VAR_OR_LEAVE(vq->vq_pfn, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vq->vq_desc_gpa, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vq->vq_avail_gpa, meta, ret, done); + SNAPSHOT_VAR_OR_LEAVE(vq->vq_used_gpa, meta, ret, done); addr_size = vq->vq_qsize * sizeof(struct vring_desc); SNAPSHOT_GUEST2HOST_ADDR_OR_LEAVE(vq->vq_desc, addr_size,