diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h --- a/sys/dev/pci/pcireg.h +++ b/sys/dev/pci/pcireg.h @@ -1098,3 +1098,8 @@ #define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */ #define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */ #define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */ + +/* + * PCI Vendors + */ +#define PCI_VENDOR_INTEL 0x8086 diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -42,6 +42,7 @@ pci_emul.c \ pci_hda.c \ pci_fbuf.c \ + pci_gvt-d.c \ pci_hostbridge.c \ pci_irq.c \ pci_lpc.c \ diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -76,8 +76,8 @@ struct pci_devinst *pi, int baridx, uint64_t offset, int size); - void (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi, - int baridx, int enabled, uint64_t address); + int (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi, int baridx, + int enabled, uint64_t address); /* Save/restore device state */ int (*pe_snapshot)(struct vm_snapshot_meta *meta); @@ -99,6 +99,7 @@ enum pcibar_type type; /* io or memory */ uint64_t size; uint64_t addr; + uint8_t lobits; }; #define PI_NAMESZ 40 @@ -228,6 +229,7 @@ void pci_callback(void); int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size); +uint64_t pci_emul_alloc_gsm(uint64_t size); int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum); int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type); void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -73,6 +73,8 @@ #define MAXSLOTS (PCI_SLOTMAX + 1) #define MAXFUNCS (PCI_FUNCMAX + 1) +#define GB (1024 * 1024 * 1024UL) + struct funcinfo { nvlist_t *fi_config; struct pci_devemu *fi_pde; @@ -102,10 +104,21 @@ SET_DECLARE(pci_devemu_set, struct pci_devemu); static uint64_t pci_emul_iobase; +static uint64_t pci_emul_iolim; static uint64_t pci_emul_membase32; +static uint64_t pci_emul_memlim32; static uint64_t pci_emul_membase64; static uint64_t pci_emul_memlim64; +struct pcibarlist { + struct pci_devinst *pdi; + int idx; + enum pcibar_type type; + uint64_t size; + struct pcibarlist *next; +}; +struct pcibarlist *pci_bars; + #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 @@ -114,6 +127,7 @@ SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE); #define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE +#define PCI_EMUL_MEMSIZE64 (32 * GB) static struct pci_devemu *pci_emul_finddev(const char *name); static void pci_lintr_route(struct pci_devinst *pi); @@ -512,6 +526,11 @@ struct mem_range mr; pe = pi->pi_d; + if (pe->pe_baraddr != NULL && + (*pe->pe_baraddr)( + pi->pi_vmctx, pi, idx, registration, pi->pi_bar[idx].addr) == 0) + return; + switch (pi->pi_bar[idx].type) { case PCIBAR_IO: bzero(&iop, sizeof(struct inout_port)); @@ -525,9 +544,6 @@ error = register_inout(&iop); } else error = unregister_inout(&iop); - if (pe->pe_baraddr != NULL) - (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, - pi->pi_bar[idx].addr); break; case PCIBAR_MEM32: case PCIBAR_MEM64: @@ -543,9 +559,6 @@ error = register_mem(&mr); } else error = unregister_mem(&mr); - if (pe->pe_baraddr != NULL) - (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration, - pi->pi_bar[idx].addr); break; default: error = EINVAL; @@ -597,8 +610,9 @@ * the address range decoded by the BAR register. */ static void -update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type) +update_bar_address(struct pci_devinst *pi, int idx, uint32_t val) { + int update_idx = idx; int decode; if (pi->pi_bar[idx].type == PCIBAR_IO) @@ -606,38 +620,108 @@ else decode = memen(pi); - if (decode) - unregister_bar(pi, idx); - - switch (type) { + switch (pi->pi_bar[idx].type) { + case PCIBAR_MEMHI64: + --update_idx; case PCIBAR_IO: case PCIBAR_MEM32: - pi->pi_bar[idx].addr = addr; + case PCIBAR_MEM64: { + struct pcibar *bar = &pi->pi_bar[update_idx]; + + if (decode && bar->addr) + unregister_bar(pi, update_idx); + + if (val == ~0U) { + /* guest wants to read size of BAR */ + pci_set_cfgdata32(pi, PCIR_BAR(idx), ~0U); + bar->addr = 0; + break; + } + + /* guest sets address of BAR */ + uint64_t mask; + uint32_t bar_val; + mask = ~(bar->size - 1UL); + if (pi->pi_bar[idx].type == PCIBAR_MEMHI64) + mask >>= 32UL; + bar_val = val & mask; + bar_val |= pi->pi_bar[idx].lobits; + pci_set_cfgdata32(pi, PCIR_BAR(idx), bar_val); + + /* Only register BAR if it contains a valid address */ + uint32_t lo, hi; + lo = pci_get_cfgdata32(pi, PCIR_BAR(update_idx)); + hi = 0; + if (bar->type == PCIBAR_MEM64) + hi = pci_get_cfgdata32(pi, PCIR_BAR(update_idx + 1)); + if (lo == ~0U || hi == ~0U) { + bar->addr = 0; + break; + } + + if (bar->type == PCIBAR_IO) + lo &= PCIM_BAR_IO_BASE; + else + lo &= PCIM_BAR_MEM_BASE; + bar->addr = (uint64_t)lo | ((uint64_t)hi << 32UL); + if (decode) + register_bar(pi, update_idx); + break; - case PCIBAR_MEM64: - pi->pi_bar[idx].addr &= ~0xffffffffUL; - pi->pi_bar[idx].addr |= addr; + } + case PCIBAR_NONE: break; - case PCIBAR_MEMHI64: - pi->pi_bar[idx].addr &= 0xffffffff; - pi->pi_bar[idx].addr |= addr; + default: + assert(0); + } +} + +static uint32_t +read_bar_value(struct pci_devinst *pi, int coff, int bytes) +{ + uint8_t idx; + idx = (coff - PCIR_BAR(0)) / 4; + assert(idx <= PCI_BARMAX); + + uint8_t update_idx = idx; + uint64_t val; + + if (pi->pi_bar[idx].type == PCIBAR_MEMHI64) + --update_idx; + + val = pci_get_cfgdata32(pi, PCIR_BAR(idx)); + + /* return size of BAR */ + if (val == ~0U) { + val = ~(pi->pi_bar[update_idx].size - 1); + val |= pi->pi_bar[update_idx].lobits; + if (pi->pi_bar[idx].type == PCIBAR_MEMHI64) + val >>= 32; + } + + switch (bytes) { + case 1: + val = (val >> (8 * (coff & 0x03))) & 0xFF; + break; + case 2: + assert((coff & 0x01) == 0); + val = (val >> (8 * (coff & 0x02))) & 0xFFFF; + break; + case 4: + assert((coff & 0x03) == 0); + val = (uint32_t)val; break; default: assert(0); } - if (decode) - register_bar(pi, idx); + return val; } int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type, uint64_t size) { - int error; - uint64_t *baseptr, limit, addr, mask, lobits, bar; - uint16_t cmd, enbit; - assert(idx >= 0 && idx <= PCI_BARMAX); if ((size & (size - 1)) != 0) @@ -652,6 +736,45 @@ size = 16; } + struct pcibarlist *newBar = malloc(sizeof(struct pcibarlist)); + memset(newBar, 0, sizeof(struct pcibarlist)); + newBar->pdi = pdi; + newBar->idx = idx; + newBar->type = type; + newBar->size = size; + if (pci_bars == NULL) { + /* first BAR */ + pci_bars = newBar; + } else { + struct pcibarlist *bar = pci_bars; + struct pcibarlist *lastBar = NULL; + do { + if (bar->size < size) + break; + lastBar = bar; + bar = bar->next; + } while (bar != NULL); + newBar->next = bar; + if (lastBar != NULL) + lastBar->next = newBar; + else + pci_bars = newBar; + } + return (0); +} + +static int +pci_emul_assign_bar(struct pcibarlist *pci_bar) +{ + struct pci_devinst *pdi = pci_bar->pdi; + int idx = pci_bar->idx; + enum pcibar_type type = pci_bar->type; + uint64_t size = pci_bar->size; + + int error; + uint64_t *baseptr, limit, addr, mask, lobits; + uint16_t cmd, enbit; + switch (type) { case PCIBAR_NONE: baseptr = NULL; @@ -659,7 +782,7 @@ break; case PCIBAR_IO: baseptr = &pci_emul_iobase; - limit = PCI_EMUL_IOLIMIT; + limit = pci_emul_iolim; mask = PCIM_BAR_IO_BASE; lobits = PCIM_BAR_IO_SPACE; enbit = PCIM_CMD_PORTEN; @@ -670,25 +793,29 @@ * Some drivers do not work well if the 64-bit BAR is allocated * above 4GB. Allow for this by allocating small requests under * 4GB unless then allocation size is larger than some arbitrary - * number (128MB currently). + * number (256MB currently). */ - if (size > 128 * 1024 * 1024) { + if (size > 256 * 1024 * 1024) { baseptr = &pci_emul_membase64; limit = pci_emul_memlim64; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | PCIM_BAR_MEM_PREFETCH; - } else { - baseptr = &pci_emul_membase32; - limit = PCI_EMUL_MEMLIMIT32; - mask = PCIM_BAR_MEM_BASE; - lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64; + enbit = PCIM_CMD_MEMEN; + break; } - enbit = PCIM_CMD_MEMEN; - break; + /* + * Use 32 bit BARs for small requests: + * Fallthrough into MEM32 case + */ + type = PCIBAR_MEM32; + pdi->pi_bar[idx + 1].type = PCIBAR_NONE; + /* clear 64-bit flag */ + pdi->pi_bar[idx].lobits &= ~PCIM_BAR_MEM_64; + /* [fallthrough] */ case PCIBAR_MEM32: baseptr = &pci_emul_membase32; - limit = PCI_EMUL_MEMLIMIT32; + limit = pci_emul_memlim32; mask = PCIM_BAR_MEM_BASE; lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; enbit = PCIM_CMD_MEMEN; @@ -705,27 +832,51 @@ } pdi->pi_bar[idx].type = type; - pdi->pi_bar[idx].addr = addr; + pdi->pi_bar[idx].addr = 0; pdi->pi_bar[idx].size = size; + /* passthru devices are using same lobits as physical device + * they set this property + */ + if (pdi->pi_bar[idx].lobits != 0) + lobits = pdi->pi_bar[idx].lobits; + else + pdi->pi_bar[idx].lobits = lobits; - /* Initialize the BAR register in config space */ - bar = (addr & mask) | lobits; - pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar); + /* Initialize CMD register in config space */ + cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); + if ((cmd & enbit) != enbit) + pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); + /* Initialize the BAR register in config space */ if (type == PCIBAR_MEM64) { assert(idx + 1 <= PCI_BARMAX); pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64; - pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32); + update_bar_address(pdi, idx + 1, addr); } - cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND); - if ((cmd & enbit) != enbit) - pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit); - register_bar(pdi, idx); + update_bar_address(pdi, idx, addr); return (0); } +uint64_t +pci_emul_alloc_gsm(uint64_t size) +{ + uint64_t *baseptr = &pci_emul_membase32; + uint64_t *limptr = &pci_emul_memlim32; + + /* align addr */ + const uint64_t addr = ((*limptr) - size) & ~(size - 1); + + /* if limit < base ==> ENOMEM */ + if ((*limptr) < (*baseptr)) + return 0; + + *limptr = addr; + + return addr; +} + #define CAP_START_OFFSET 0x40 static int pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) @@ -1134,6 +1285,9 @@ #define BUSIO_ROUNDUP 32 #define BUSMEM_ROUNDUP (1024 * 1024) +#define ALIGN_VALUE(Value, Alignment) \ + ((Value) + (((Alignment) - (Value)) & ((Alignment)-1))) + int init_pci(struct vmctx *ctx) { @@ -1146,25 +1300,18 @@ nvlist_t *nvl; const char *emul; size_t lowmem; - uint64_t cpu_maxphysaddr, pci_emul_memresv64; - u_int regs[4]; int bus, slot, func, error; pci_emul_iobase = PCI_EMUL_IOBASE; + pci_emul_iolim = PCI_EMUL_IOLIMIT; + pci_emul_membase32 = vm_get_lowmem_limit(ctx); + pci_emul_memlim32 = PCI_EMUL_MEMLIMIT32; - do_cpuid(0x80000008, regs); - cpu_maxphysaddr = 1ULL << (regs[0] & 0xff); - if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48) - cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48; - pci_emul_memresv64 = cpu_maxphysaddr / 4; - /* - * Max power of 2 that is less then - * cpu_maxphysaddr - pci_emul_memresv64. - */ - pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr - - pci_emul_memresv64) - 1); - pci_emul_memlim64 = cpu_maxphysaddr; + pci_emul_membase64 = 4 * GB + vm_get_highmem_size(ctx); + pci_emul_membase64 = ALIGN_VALUE( + pci_emul_membase64, PCI_EMUL_MEMSIZE64); + pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64; for (bus = 0; bus < MAXBUSES; bus++) { snprintf(node_name, sizeof(node_name), "pci.%d", bus); @@ -1182,6 +1329,7 @@ bi->membase32 = pci_emul_membase32; bi->membase64 = pci_emul_membase64; + /* first run: init devices */ for (slot = 0; slot < MAXSLOTS; slot++) { si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { @@ -1221,6 +1369,15 @@ } } + /* second run: assign BARs and free BAR list */ + struct pcibarlist *bar = pci_bars; + while (bar != NULL) { + pci_emul_assign_bar(bar); + struct pcibarlist *old = bar; + bar = bar->next; + free(old); + } + /* * Add some slop to the I/O and memory resources decoded by * this bus to give a guest some flexibility if it wants to @@ -1865,7 +2022,6 @@ struct pci_devinst *pi; struct pci_devemu *pe; int idx, needcfg; - uint64_t addr, bar, mask; if ((bi = pci_businfo[bus]) != NULL) { si = &bi->slotinfo[slot]; @@ -1917,8 +2073,14 @@ needcfg = 1; } - if (needcfg) - *eax = CFGREAD(pi, coff, bytes); + if (needcfg) { + if (coff >= PCIR_BAR(0) && + coff < PCIR_BAR(PCI_BARMAX + 1)) { + *eax = read_bar_value(pi, coff, bytes); + } else { + *eax = CFGREAD(pi, coff, bytes); + } + } pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax); } else { @@ -1938,54 +2100,7 @@ if (bytes != 4 || (coff & 0x3) != 0) return; idx = (coff - PCIR_BAR(0)) / 4; - mask = ~(pi->pi_bar[idx].size - 1); - switch (pi->pi_bar[idx].type) { - case PCIBAR_NONE: - pi->pi_bar[idx].addr = bar = 0; - break; - case PCIBAR_IO: - addr = *eax & mask; - addr &= 0xffff; - bar = addr | PCIM_BAR_IO_SPACE; - /* - * Register the new BAR value for interception - */ - if (addr != pi->pi_bar[idx].addr) { - update_bar_address(pi, addr, idx, - PCIBAR_IO); - } - break; - case PCIBAR_MEM32: - addr = bar = *eax & mask; - bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32; - if (addr != pi->pi_bar[idx].addr) { - update_bar_address(pi, addr, idx, - PCIBAR_MEM32); - } - break; - case PCIBAR_MEM64: - addr = bar = *eax & mask; - bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 | - PCIM_BAR_MEM_PREFETCH; - if (addr != (uint32_t)pi->pi_bar[idx].addr) { - update_bar_address(pi, addr, idx, - PCIBAR_MEM64); - } - break; - case PCIBAR_MEMHI64: - mask = ~(pi->pi_bar[idx - 1].size - 1); - addr = ((uint64_t)*eax << 32) & mask; - bar = addr >> 32; - if (bar != pi->pi_bar[idx - 1].addr >> 32) { - update_bar_address(pi, addr, idx - 1, - PCIBAR_MEMHI64); - } - break; - default: - assert(0); - } - pci_set_cfgdata32(pi, coff, bar); - + update_bar_address(pi, idx, *eax); } else if (pci_emul_iscap(pi, coff)) { pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0); } else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) { diff --git a/usr.sbin/bhyve/pci_fbuf.c b/usr.sbin/bhyve/pci_fbuf.c --- a/usr.sbin/bhyve/pci_fbuf.c +++ b/usr.sbin/bhyve/pci_fbuf.c @@ -216,15 +216,15 @@ return (value); } -static void +static int pci_fbuf_baraddr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, - int enabled, uint64_t address) + int enabled, uint64_t address) { struct pci_fbuf_softc *sc; int prot; if (baridx != 1) - return; + return (-1); sc = pi->pi_arg; if (!enabled && sc->fbaddr != 0) { @@ -237,6 +237,8 @@ EPRINTLN("pci_fbuf: mmap_memseg failed"); sc->fbaddr = address; } + + return (0); } @@ -375,7 +377,7 @@ static int pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) { - int error, prot; + int error; struct pci_fbuf_softc *sc; if (fbuf_sc != NULL) { @@ -393,6 +395,13 @@ pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_DISPLAY); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_DISPLAY_VGA); + sc->fb_base = vm_create_devmem( + ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE); + if (sc->fb_base == MAP_FAILED) { + error = -1; + goto done; + } + error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, DMEMSZ); assert(error == 0); @@ -402,7 +411,6 @@ error = pci_emul_add_msicap(pi, PCI_FBUF_MSI_MSGS); assert(error == 0); - sc->fbaddr = pi->pi_bar[1].addr; sc->memregs.fbsize = FB_SIZE; sc->memregs.width = COLS_DEFAULT; sc->memregs.height = ROWS_DEFAULT; @@ -423,27 +431,9 @@ goto done; } - sc->fb_base = vm_create_devmem(ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE); - if (sc->fb_base == MAP_FAILED) { - error = -1; - goto done; - } DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]", sc->fb_base, FB_SIZE)); - /* - * Map the framebuffer into the guest address space. - * XXX This may fail if the BAR is different than a prior - * run. In this case flag the error. This will be fixed - * when a change_memseg api is available. - */ - prot = PROT_READ | PROT_WRITE; - if (vm_mmap_memseg(ctx, sc->fbaddr, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0) { - EPRINTLN("pci_fbuf: mapseg failed - try deleting VM and restarting"); - error = -1; - goto done; - } - console_init(sc->memregs.width, sc->memregs.height, sc->fb_base); console_fb_register(pci_fbuf_render, sc); diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/pci_gvt-d.c @@ -0,0 +1,385 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "inout.h" +#include "pci_passthru.h" + +#define MB (1024 * 1024UL) + +/* + * PCI definitions + */ +#define PCIR_GGC 0x50 /* GMCH Graphics Control register */ +#define PCIR_BDSM 0x5C /* Base Data of Stolen Memory register */ +#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */ +#define PCIM_GEN5_75_GGC_GMS_MASK \ + 0x000000F0 /* Bits 7:4 contain Graphics Mode Select */ +#define PCIM_GEN6_GGC_GMS_MASK \ + 0x000000F8 /* Bits 7:3 contain Graphics Mode Select */ +#define PCIM_GEN8_GGC_GMS_MASK \ + 0x0000FF00 /* Bits 15:8 contain Graphics Mode Select */ +#define PCIM_BDSM_GSM_MASK \ + 0xFFF00000 /* Bits 31:20 contain base address of gsm */ +#define PCIM_ASLS_OPREGION_MASK 0xFFFFF000 /* Opregion is 4k aligned */ +#define GPU_OPREGION_LEN 0x00004000 /* Size of Opregion (16 KB) */ + +/* + * Known device ids for different generations of Intel graphics + * see https://www.graphics-drivers.eu/intel-pci-hardware-id-string.html for + * complete list + */ +/* Westmere & Ironlake */ +static const uint16_t igd_devid_gen5_75[] = { 0x0042, 0x0046 }; +/* Sandy Bridge */ +static const uint16_t igd_devid_gen6[] = { 0x0102, 0x0106, 0x010A, 0x0112, + 0x0116, 0x0122, 0x0126 }; +/* Ivy Bridge */ +static const uint16_t igd_devid_gen7[] = { 0x0152, 0x0156, 0x015A, 0x0162, + 0x0166, 0x016A }; +/* Haswell */ +static const uint16_t igd_devid_gen7_5[] = { 0x0402, 0x0406, 0x040A, 0x0412, + 0x0416, 0x041A, 0x041E, 0x0A06, 0x0A0E, 0x0A16, 0x0A1E, 0x0A26, 0x0A2E, + 0x0C02, 0x0C06, 0x0C12, 0x0C16, 0x0C22, 0x0C26, 0x0D06, 0x0D16, 0x0D22, + 0x0D26 }; +/* Broadwell */ +static const uint16_t igd_devid_gen8[] = { 0x1606, 0x160E, 0x1612, 0x1616, + 0x161A, 0x161E, 0x1622, 0x1626, 0x162A, 0x162B }; +/* Skylake */ +static const uint16_t igd_devid_gen9[] = { 0x1902, 0x1906, 0x190B, 0x190E, + 0x1912, 0x1913, 0x1916, 0x1917, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923, + 0x1926, 0x1927, 0x192B, 0x192D, 0x1932, 0x193A, 0x193B, 0x193D }; +/* Kaby Lake & Whiskey Lake & Amber Lake & Coffee Lake & Comet Lake */ +static const uint16_t igd_devid_gen9_5[] = { 0x3E90, 0x3E91, 0x3E92, 0x3E93, + 0x3E94, 0x3E96, 0x3E98, 0x3E99, 0x3E9A, 0x3E9B, 0x3E9C, 0x3EA0, 0x3EA1, + 0x3EA5, 0x3EA6, 0x3EA7, 0x3EA8, 0x3EA9, 0x5902, 0x5906, 0x590B, 0x5912, + 0x5916, 0x5917, 0x591B, 0x591C, 0x591D, 0x591E, 0x5921, 0x5926, 0x5927, + 0x87C0, 0x87CA, 0x9B21, 0x9B41, 0x9BA2, 0x9BA4, 0x9BA5, 0x9BA8, 0x9BAA, + 0x9BAC, 0x9BC2, 0x9BC4, 0x9BC5, 0x9BC6, 0x9BC8, 0x9BCA, 0x9BCC, 0x9BE6, + 0x9BF6 }; + +static int +array_contains(const uint16_t *array, uint64_t elements, uint16_t item) +{ + for (uint64_t i = 0; i < elements; ++i) + if (array[i] == item) + return 1; + return 0; +} + +#define IGD_FUNC_IS_IGD_GEN(gen) \ + static int igd_gen##gen##_is_igd_gen(int devid) \ + { \ + return array_contains(igd_devid_gen##gen, \ + sizeof(igd_devid_gen##gen) / sizeof(uint16_t), devid); \ + } + +/* GVT-d definitions */ +#define GVT_D_MAP_OPREGION 0 +#define GVT_D_MAP_GSM 1 + +/* + * Handler for passthru of igd + * + * Keep it as struct instead of a single function pointer, since new + * generations of Intel graphics could need other funcs. + * e.g. Intel Elkhartlake and Intel Tigerlake: + * They will need different handling for GSM and Opregion (See ACRN-Hypervisor + * ) + */ +struct igd_funcs { + int (*is_igd_gen)(int devid); + uint64_t (*get_gsm_len)(struct vmctx *ctx, struct passthru_softc *sc); +}; + +/* Handler for igd of gen5.75 (Westmere & Ironlake) */ +IGD_FUNC_IS_IGD_GEN(5_75); + +static uint64_t +igd_gen5_75_get_gsm_len(struct vmctx *ctx, struct passthru_softc *sc) +{ + uint16_t ggc_val = read_config(&sc->psc_sel, PCIR_GGC, 2); + uint8_t gms_val = (ggc_val & PCIM_GEN5_75_GGC_GMS_MASK) >> + 4; /* Bits 7:4 contain Graphics Mode Select */ + switch (gms_val) { + case 0x05: + return 32 * MB; + case 0x06: + return 48 * MB; + case 0x07: + return 64 * MB; + case 0x08: + return 128 * MB; + case 0x09: + return 256 * MB; + case 0x0A: + return 96 * MB; + case 0x0B: + return 160 * MB; + case 0x0C: + return 224 * MB; + case 0x0D: + return 352 * MB; + } + + warnx("Unknown Graphic Mode (%x)", gms_val); + return 0; +} + +/* Handler for igd of gen6 (Sandy Bridge) */ +IGD_FUNC_IS_IGD_GEN(6); + +static uint64_t +igd_gen6_get_gsm_len(struct vmctx *ctx, struct passthru_softc *sc) +{ + uint16_t ggc_val = read_config(&sc->psc_sel, PCIR_GGC, 2); + uint8_t gms_val = (ggc_val & PCIM_GEN6_GGC_GMS_MASK) >> + 3; /* Bits 7:3 contain Graphics Mode Select */ + if (gms_val <= 0x10) + return gms_val * 32 * MB; + + warnx("Unknown Graphic Mode (%x)", gms_val); + return 0; +} + +/* Handler for igd of gen7 (Ivy Bridge) */ +IGD_FUNC_IS_IGD_GEN(7); + +/* Handler for igd of gen7.5 (Haswell) */ +IGD_FUNC_IS_IGD_GEN(7_5); + +/* Handler for igd of gen8 (Broadwell) */ +IGD_FUNC_IS_IGD_GEN(8); + +static uint64_t +igd_gen8_get_gsm_len(struct vmctx *ctx, struct passthru_softc *sc) +{ + uint16_t ggc_val = read_config(&sc->psc_sel, PCIR_GGC, 2); + uint8_t gms_val = (ggc_val & PCIM_GEN8_GGC_GMS_MASK) >> + 8; /* Bits 15:8 contain Graphics Mode Select */ + if ((gms_val <= 0x10) || (gms_val == 0x20) || (gms_val == 0x30) || + (gms_val == 0x3F)) + return gms_val * 32 * MB; + + warnx("Unknown Graphic Mode (%x)", gms_val); + return 0; +} + +/* Handler for igd of gen9 (Skylake) */ +IGD_FUNC_IS_IGD_GEN(9); + +static uint64_t +igd_gen9_get_gsm_len(struct vmctx *ctx, struct passthru_softc *sc) +{ + uint16_t ggc_val = read_config(&sc->psc_sel, PCIR_GGC, 2); + uint8_t gms_val = (ggc_val & PCIM_GEN8_GGC_GMS_MASK) >> + 8; /* Bits 15:8 contain Graphics Mode Select */ + if ((gms_val <= 0x10) || (gms_val == 0x20) || (gms_val == 0x30) || + (gms_val == 0x40)) + return gms_val * 32 * MB; + else if (gms_val >= 0xF0 && gms_val <= 0xFE) + return gms_val * 4 * MB; + + warnx("Unknown Graphic Mode (%x)", gms_val); + return 0; +} + +/* + * Handler for igd of gen9.5 (Kaby Lake & Whiskey Lake & Amber Lake & Coffee + * Lake & Comet Lake) + */ +IGD_FUNC_IS_IGD_GEN(9_5); + +/* Westmere & Ironlake */ +static const struct igd_funcs igd_gen5_75 = { + .is_igd_gen = igd_gen5_75_is_igd_gen, + .get_gsm_len = igd_gen5_75_get_gsm_len +}; +/* Sandy Bridge */ +static const struct igd_funcs igd_gen6 = { .is_igd_gen = igd_gen6_is_igd_gen, + .get_gsm_len = igd_gen6_get_gsm_len }; +/* Ivy Bridge */ +static const struct igd_funcs igd_gen7 = { .is_igd_gen = igd_gen7_is_igd_gen, + .get_gsm_len = igd_gen6_get_gsm_len }; +/* Haswell */ +static const struct igd_funcs igd_gen7_5 = { + .is_igd_gen = igd_gen7_5_is_igd_gen, + .get_gsm_len = igd_gen6_get_gsm_len +}; +/* Broadwell */ +static const struct igd_funcs igd_gen8 = { .is_igd_gen = igd_gen8_is_igd_gen, + .get_gsm_len = igd_gen8_get_gsm_len }; +/* Skylake */ +static const struct igd_funcs igd_gen9 = { .is_igd_gen = igd_gen9_is_igd_gen, + .get_gsm_len = igd_gen9_get_gsm_len }; +/* Kaby Lake & Whiskey Lake & Amber Lake & Coffee Lake & Comet Lake */ +static const struct igd_funcs igd_gen9_5 = { + .is_igd_gen = igd_gen9_5_is_igd_gen, + .get_gsm_len = igd_gen9_get_gsm_len +}; + +static const struct igd_funcs *igd_gen_map[] = { &igd_gen5_75, &igd_gen6, + &igd_gen7, &igd_gen7_5, &igd_gen8, &igd_gen9, &igd_gen9_5 }; + +static const struct igd_funcs * +get_igd_funcs(const uint16_t devid) +{ + for (int i = 0; i < sizeof(igd_gen_map) / sizeof(struct igd_funcs *); + ++i) { + if (igd_gen_map[i]->is_igd_gen(devid)) + return igd_gen_map[i]; + } + return NULL; +} + +int +gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) +{ + int error; + struct passthru_softc *sc; + + sc = pi->pi_arg; + + /* check vendor == Intel */ + const uint16_t dev_vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 2); + if (dev_vendor != 0x8086) { + warnx("Unknown vendor (%x) of igd", dev_vendor); + return -ENODEV; + } + + /* check if device is a display device */ + if (read_config(&sc->psc_sel, PCIR_CLASS, 1) != PCIC_DISPLAY) { + warnx("%s is no display device", pi->pi_name); + return -ENODEV; + } + + /* Get IGD funcs */ + const struct igd_funcs *igd = get_igd_funcs( + read_config(&sc->psc_sel, PCIR_DEVICE, 2)); + if (igd == NULL) { + warnx("Unsupported igd-device (%x)", + read_config(&sc->psc_sel, PCIR_DEVICE, 2)); + return -ENODEV; + } + + struct passthru_mmio_mapping *opregion = + &sc->psc_mmio_map[GVT_D_MAP_OPREGION]; + struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM]; + + /* Get Opregion length */ + opregion->len = GPU_OPREGION_LEN; + /* Get Opregion HPA */ + opregion->hpa = read_config(&sc->psc_sel, PCIR_ASLS_CTL, 4) & + PCIM_ASLS_OPREGION_MASK; + /* Get Graphics Stolen Memory len */ + gsm->len = igd->get_gsm_len(ctx, sc); + /* Get Graphics Stolen Memory HPA */ + gsm->hpa = read_config(&sc->psc_sel, PCIR_BDSM, 4) & PCIM_BDSM_GSM_MASK; + + if (opregion->len == 0 || gsm->len == 0) { + warnx("Could not determine size of opregion or gsm"); + return -ENODEV; + } + + /* Allocate Opregion and GSM in guest space */ + gsm->gpa = pci_emul_alloc_gsm(gsm->len); + opregion->gpa = pci_emul_alloc_gsm(opregion->len); + if (opregion->gpa == 0 || gsm->gpa == 0) { + error = -ENOMEM; + goto failed_opregion; + } + + /* Write address of Opregion and GSM into PCI register */ + /* Set Opregion GPA */ + uint32_t asls_val = read_config(&sc->psc_sel, PCIR_ASLS_CTL, 4); + pci_set_cfgdata32(sc->psc_pi, PCIR_ASLS_CTL, + opregion->gpa | (asls_val & ~PCIM_ASLS_OPREGION_MASK)); + /* Set Graphics Stolen Memory GPA */ + uint32_t bdsm_val = read_config(&sc->psc_sel, PCIR_BDSM, 4); + pci_set_cfgdata32( + sc->psc_pi, PCIR_BDSM, gsm->gpa | (bdsm_val & ~PCIM_BDSM_GSM_MASK)); + + /* Map Opregion and GSM into guest space */ + if ((error = passthru_modify_pptdev_mmio( + ctx, sc, opregion, PT_MAP_PPTDEV_MMIO)) != 0) + goto failed_opregion; + if ((error = passthru_modify_pptdev_mmio( + ctx, sc, gsm, PT_MAP_PPTDEV_MMIO)) != 0) + goto failed_gsm; + + /* Protect PCI register */ + set_pcir_prot(sc, PCIR_ASLS_CTL, 0x04, PPT_PCIR_PROT_NA); + set_pcir_prot(sc, PCIR_BDSM, 0x04, PPT_PCIR_PROT_NA); + + return (0); + +failed_opregion: + opregion->gpa = 0; +failed_gsm: + gsm->gpa = 0; + return error; +} + +void +gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi) +{ + struct passthru_softc *sc; + + sc = pi->pi_arg; + + struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM]; + struct passthru_mmio_mapping *opregion = + &sc->psc_mmio_map[GVT_D_MAP_OPREGION]; + + /* GPA is only set, if it's initialized */ + if (gsm->gpa) + passthru_modify_pptdev_mmio(ctx, sc, gsm, PT_UNMAP_PPTDEV_MMIO); + if (opregion->gpa) + passthru_modify_pptdev_mmio( + ctx, sc, opregion, PT_UNMAP_PPTDEV_MMIO); +} diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c --- a/usr.sbin/bhyve/pci_lpc.c +++ b/usr.sbin/bhyve/pci_lpc.c @@ -33,9 +33,13 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include +#include +#include +#include #include #include #include @@ -85,6 +89,29 @@ "COM1", "COM2", "COM3", "COM4" }; +#ifndef _PATH_DEVPCI +#define _PATH_DEVPCI "/dev/pci" +#endif + +static int pcifd = -1; + +static uint32_t +read_config(struct pcisel *sel, long reg, int width) +{ + struct pci_io pi; + pi.pi_sel.pc_domain = sel->pc_domain; + pi.pi_sel.pc_bus = sel->pc_bus; + pi.pi_sel.pc_dev = sel->pc_dev; + pi.pi_sel.pc_func = sel->pc_func; + pi.pi_reg = reg; + pi.pi_width = width; + + if (ioctl(pcifd, PCIOCREAD, &pi) < 0) + return (0); + + return (pi.pi_data); +} + /* * LPC device configuration is in the following form: * [,] @@ -452,6 +479,40 @@ pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE); pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA); + /* open host device */ + if (pcifd < 0) { + pcifd = open(_PATH_DEVPCI, O_RDWR, 0); + if (pcifd < 0) { + warn("failed to open %s", _PATH_DEVPCI); + return (-1); + } + } + + /* on Intel systems lpc is always connected to 0:1f.0 */ + struct pcisel sel; + sel.pc_domain = 0; + sel.pc_bus = 0; + sel.pc_dev = 0x1f; + sel.pc_func = 0; + + if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) { + /* + * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be + * aligned with the physical ones. Without these physical + * values, GVT-d GOP driver couldn't work. + */ + pci_set_cfgdata16( + pi, PCIR_DEVICE, read_config(&sel, PCIR_DEVICE, 2)); + pci_set_cfgdata16( + pi, PCIR_VENDOR, read_config(&sel, PCIR_VENDOR, 2)); + pci_set_cfgdata8( + pi, PCIR_REVID, read_config(&sel, PCIR_REVID, 1)); + pci_set_cfgdata16( + pi, PCIR_SUBVEND_0, read_config(&sel, PCIR_SUBVEND_0, 2)); + pci_set_cfgdata16( + pi, PCIR_SUBDEV_0, read_config(&sel, PCIR_SUBDEV_0, 2)); + } + lpc_bridge = pi; return (0); diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/pci_passthru.h @@ -0,0 +1,80 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +#include + +#include + +#include "pci_emul.h" + +struct passthru_mmio_mapping { + uint64_t gpa; + uint64_t len; + uint64_t hpa; +}; + +struct passthru_softc { + struct pci_devinst *psc_pi; + struct pcibar psc_bar[PCI_BARMAX + 1]; + struct { + int capoff; + int msgctrl; + int emulated; + } psc_msi; + struct { + int capoff; + } psc_msix; + struct pcisel psc_sel; + + struct passthru_mmio_mapping psc_mmio_map[2]; + uint8_t psc_pcir_prot_map[(PCI_REGMAX + 1) / 4]; +}; + +#define PT_MAP_PPTDEV_MMIO 1 +#define PT_UNMAP_PPTDEV_MMIO 0 + +#define PPT_PCIR_PROT_NA 0 /* No Access to physical values */ +#define PPT_PCIR_PROT_RO 1 /* Read Only access to physical values */ +#define PPT_PCIR_PROT_WO 2 /* Write Only access to physical values */ +#define PPT_PCIR_PROT_RW \ + (PPT_PCIR_PROT_RO | \ + PPT_PCIR_PROT_WO) /* Read/Write access to physical values */ +#define PPT_PCIR_PROT_MASK 0x03 + +int passthru_modify_pptdev_mmio(struct vmctx *ctx, struct passthru_softc *sc, + struct passthru_mmio_mapping *map, int registration); +uint32_t read_config(const struct pcisel *sel, long reg, int width); +void write_config(const struct pcisel *sel, long reg, int width, uint32_t data); +int set_pcir_prot( + struct passthru_softc *sc, uint32_t reg, uint32_t len, uint8_t prot); +int gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl); +void gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -48,22 +48,23 @@ #ifndef WITHOUT_CAPSICUM #include #endif -#include -#include -#include +#include + #include #include #include +#include +#include +#include #include #include #include -#include #include "config.h" #include "debug.h" -#include "pci_emul.h" #include "mem.h" +#include "pci_passthru.h" #ifndef _PATH_DEVPCI #define _PATH_DEVPCI "/dev/pci" @@ -86,20 +87,6 @@ static int iofd = -1; static int memfd = -1; -struct passthru_softc { - struct pci_devinst *psc_pi; - struct pcibar psc_bar[PCI_BARMAX + 1]; - struct { - int capoff; - int msgctrl; - int emulated; - } psc_msi; - struct { - int capoff; - } psc_msix; - struct pcisel psc_sel; -}; - static int msi_caplen(int msgctrl) { @@ -122,7 +109,7 @@ return (len); } -static uint32_t +uint32_t read_config(const struct pcisel *sel, long reg, int width) { struct pci_io pi; @@ -138,7 +125,7 @@ return (pi.pi_data); } -static void +void write_config(const struct pcisel *sel, long reg, int width, uint32_t data) { struct pci_io pi; @@ -152,6 +139,20 @@ (void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */ } +int +passthru_modify_pptdev_mmio(struct vmctx *ctx, struct passthru_softc *sc, + struct passthru_mmio_mapping *map, int registration) +{ + if (registration == PT_MAP_PPTDEV_MMIO) + return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, map->gpa, map->len, + map->hpa); + else + return vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, map->gpa, + map->len); +} + #ifdef LEGACY_SUPPORT static int passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) @@ -559,12 +560,23 @@ sc->psc_bar[i].type = bartype; sc->psc_bar[i].size = size; sc->psc_bar[i].addr = base; + sc->psc_bar[i].lobits = 0; /* Allocate the BAR in the guest I/O or MMIO space */ error = pci_emul_alloc_bar(pi, i, bartype, size); if (error) return (-1); + /* Use same lobits as physical bar */ + uint8_t lobits = read_config(&sc->psc_sel, PCIR_BAR(i), 0x01); + if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) { + lobits &= ~PCIM_BAR_MEM_BASE; + } else { + lobits &= ~PCIM_BAR_IO_BASE; + } + sc->psc_bar[i].lobits = lobits; + pi->pi_bar[i].lobits = lobits; + /* The MSI-X table needs special handling */ if (i == pci_msix_table_bar(pi)) { error = init_msix_table(ctx, sc, base); @@ -610,14 +622,94 @@ goto done; } - pci_set_cfgdata16(pi, PCIR_COMMAND, read_config(&sc->psc_sel, - PCIR_COMMAND, 2)); + write_config( + &sc->psc_sel, PCIR_COMMAND, 2, pci_get_cfgdata16(pi, PCIR_COMMAND)); error = 0; /* success */ done: return (error); } +#define PPT_PCIR_PROT(reg) \ + ((sc->psc_pcir_prot_map[reg / 4] >> (reg & 0x03)) & PPT_PCIR_PROT_MASK) + +int +set_pcir_prot( + struct passthru_softc *sc, uint32_t reg, uint32_t len, uint8_t prot) +{ + if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1) + return (-1); + + prot &= PPT_PCIR_PROT_MASK; + + for (int i = reg; i < reg + len; ++i) { + /* delete old prot value */ + sc->psc_pcir_prot_map[i / 4] &= ~( + PPT_PCIR_PROT_MASK << (i & 0x03)); + /* set new prot value */ + sc->psc_pcir_prot_map[i / 4] |= prot << (i & 0x03); + } + + return (0); +} + +static int +is_pcir_writable(struct passthru_softc *sc, uint32_t reg) +{ + if (reg > PCI_REGMAX) + return (0); + + return ((PPT_PCIR_PROT(reg) & PPT_PCIR_PROT_WO) != 0); +} + +static int +is_pcir_readable(struct passthru_softc *sc, uint32_t reg) +{ + if (reg > PCI_REGMAX) + return (0); + + return ((PPT_PCIR_PROT(reg) & PPT_PCIR_PROT_RO) != 0); +} + +static int +passthru_init_quirks(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) +{ + struct passthru_softc *sc = pi->pi_arg; + + uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02); + uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01); + + /* currently only display devices have quirks */ + if (class != PCIC_DISPLAY) + return (0); + + if (vendor == PCI_VENDOR_INTEL) + return gvt_d_init(ctx, pi, nvl); + + return (0); +} + +static void +passthru_deinit_quirks(struct vmctx *ctx, struct pci_devinst *pi) +{ + struct passthru_softc *sc = pi->pi_arg; + + if (sc == NULL) + return; + + uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02); + uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01); + + /* currently only display devices have quirks */ + if (class != PCIC_DISPLAY) + return; + + if (vendor == PCI_VENDOR_INTEL) + return gvt_d_deinit(ctx, pi); + + return; +} + static int passthru_legacy_config(nvlist_t *nvl, const char *opts) { @@ -736,9 +828,21 @@ sc->psc_pi = pi; /* initialize config space */ - error = cfginit(ctx, pi, bus, slot, func); + if ((error = cfginit(ctx, pi, bus, slot, func)) != 0) + goto done; + + /* allow access to all PCI registers */ + if ((error = set_pcir_prot(sc, 0, PCI_REGMAX + 1, PPT_PCIR_PROT_RW)) != + 0) + goto done; + + if ((error = passthru_init_quirks(ctx, pi, nvl)) != 0) + goto done; + + error = 0; /* success */ done: if (error) { + passthru_deinit_quirks(ctx, pi); free(sc); vm_unassign_pptdev(ctx, bus, slot, func); } @@ -788,6 +892,10 @@ sc = pi->pi_arg; + /* skip for protected PCI registers */ + if (!is_pcir_readable(sc, coff)) + return (-1); + /* * PCI BARs and MSI capability is emulated. */ @@ -834,6 +942,10 @@ sc = pi->pi_arg; + /* skip for protected PCI registers */ + if (!is_pcir_writable(sc, coff)) + return (-1); + /* * PCI BARs are emulated */ @@ -958,7 +1070,7 @@ static void passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, - int enabled, uint64_t address) + int enabled, uint64_t address) { struct passthru_softc *sc; size_t remaining; @@ -966,21 +1078,15 @@ sc = pi->pi_arg; table_offset = rounddown2(pi->pi_msix.table_offset, 4096); + + struct passthru_mmio_mapping map; + if (table_offset > 0) { - if (!enabled) { - if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, address, - table_offset) != 0) - warnx("pci_passthru: unmap_pptdev_mmio failed"); - } else { - if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, address, - table_offset, - sc->psc_bar[baridx].addr) != 0) - warnx("pci_passthru: map_pptdev_mmio failed"); - } + map.gpa = address; + map.len = table_offset; + map.hpa = sc->psc_bar[baridx].addr; + if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0) + warnx("pci_passthru: modify_pptdev_mmio failed"); } table_size = pi->pi_msix.table_offset - table_offset; table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; @@ -988,58 +1094,42 @@ remaining = pi->pi_bar[baridx].size - table_offset - table_size; if (remaining > 0) { address += table_offset + table_size; - if (!enabled) { - if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, address, - remaining) != 0) - warnx("pci_passthru: unmap_pptdev_mmio failed"); - } else { - if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, address, - remaining, - sc->psc_bar[baridx].addr + - table_offset + table_size) != 0) - warnx("pci_passthru: map_pptdev_mmio failed"); - } + map.gpa = address; + map.len = remaining; + map.hpa = sc->psc_bar[baridx].addr + table_offset + table_size; + if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0) + warnx("pci_passthru: modify_pptdev_mmio failed"); } } static void passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, - int enabled, uint64_t address) + int enabled, uint64_t address) { struct passthru_softc *sc; sc = pi->pi_arg; - if (!enabled) { - if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, address, - sc->psc_bar[baridx].size) != 0) - warnx("pci_passthru: unmap_pptdev_mmio failed"); - } else { - if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, address, - sc->psc_bar[baridx].size, - sc->psc_bar[baridx].addr) != 0) - warnx("pci_passthru: map_pptdev_mmio failed"); - } + + struct passthru_mmio_mapping map; + map.gpa = address; + map.len = sc->psc_bar[baridx].size; + map.hpa = sc->psc_bar[baridx].addr; + + if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0) + warnx("pci_passthru: modify_pptdev_mmio failed"); } -static void +static int passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, - int enabled, uint64_t address) + int enabled, uint64_t address) { - if (pi->pi_bar[baridx].type == PCIBAR_IO) - return; + return (-1); if (baridx == pci_msix_table_bar(pi)) passthru_msix_addr(ctx, pi, baridx, enabled, address); else passthru_mmio_addr(ctx, pi, baridx, enabled, address); + return (0); } struct pci_devemu passthru = {