Page MenuHomeFreeBSD

D26209.id88259.diff
No OneTemporary

D26209.id88259.diff

diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -1098,3 +1098,8 @@
#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */
#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */
#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */
+
+/*
+ * PCI Vendors
+ */
+#define PCI_VENDOR_INTEL 0x8086
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -42,6 +42,7 @@
pci_emul.c \
pci_hda.c \
pci_fbuf.c \
+ pci_gvt-d.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -1403,6 +1403,12 @@
fbsdrun_set_capabilities(ctx, BSP);
+ /*
+ * This function could potentially adjust lowmem_limit
+ * Therefore, call this before vm_setup_memory
+ */
+ pci_early_quirks(ctx);
+
memflags = 0;
if (get_config_bool_default("memory.wired", false))
memflags |= VM_MEM_F_WIRED;
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -51,6 +51,9 @@
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
+ /* quirks; especially used by GVT-d to adjust Top of Low Usable DRAM (lowmem_limit) */
+ int (*pe_early_quirks)(struct vmctx *, const nvlist_t *nvl);
+
/* instance creation */
int (*pe_init)(struct vmctx *, struct pci_devinst *,
nvlist_t *);
@@ -76,8 +79,8 @@
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
- void (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi,
- int baridx, int enabled, uint64_t address);
+ int (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
+ int enabled, uint64_t address);
/* Save/restore device state */
int (*pe_snapshot)(struct vm_snapshot_meta *meta);
@@ -99,6 +102,7 @@
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
+ uint8_t lobits;
};
#define PI_NAMESZ 40
@@ -224,10 +228,13 @@
typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin,
int ioapic_irq, void *arg);
+int pci_early_quirks(struct vmctx *ctx);
int init_pci(struct vmctx *ctx);
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
+int pci_emul_adjust_gsmbase(struct vmctx *ctx, uint64_t size);
+uint64_t pci_emul_alloc_gsm(uint64_t size);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -73,6 +73,11 @@
#define MAXSLOTS (PCI_SLOTMAX + 1)
#define MAXFUNCS (PCI_FUNCMAX + 1)
+#define GB (1024 * 1024 * 1024UL)
+
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
struct funcinfo {
nvlist_t *fi_config;
struct pci_devemu *fi_pde;
@@ -102,10 +107,23 @@
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
+static uint64_t pci_emul_iolim;
+static uint64_t pci_emul_gsmbase;
+static uint64_t pci_emul_gsmlim;
static uint64_t pci_emul_membase32;
+static uint64_t pci_emul_memlim32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
+struct pcibarlist {
+ struct pci_devinst *pdi;
+ int idx;
+ enum pcibar_type type;
+ uint64_t size;
+ struct pcibarlist *next;
+};
+struct pcibarlist *pci_bars;
+
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
@@ -113,7 +131,9 @@
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
+#define PCI_EMUL_MEMBASE32 0xC0000000
#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
+#define PCI_EMUL_MEMSIZE64 (32 * GB)
static struct pci_devemu *pci_emul_finddev(const char *name);
static void pci_lintr_route(struct pci_devinst *pi);
@@ -512,6 +532,11 @@
struct mem_range mr;
pe = pi->pi_d;
+ if (pe->pe_baraddr != NULL &&
+ (*pe->pe_baraddr)(
+ pi->pi_vmctx, pi, idx, registration, pi->pi_bar[idx].addr) == 0)
+ return;
+
switch (pi->pi_bar[idx].type) {
case PCIBAR_IO:
bzero(&iop, sizeof(struct inout_port));
@@ -525,9 +550,6 @@
error = register_inout(&iop);
} else
error = unregister_inout(&iop);
- if (pe->pe_baraddr != NULL)
- (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
- pi->pi_bar[idx].addr);
break;
case PCIBAR_MEM32:
case PCIBAR_MEM64:
@@ -543,9 +565,6 @@
error = register_mem(&mr);
} else
error = unregister_mem(&mr);
- if (pe->pe_baraddr != NULL)
- (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
- pi->pi_bar[idx].addr);
break;
default:
error = EINVAL;
@@ -597,8 +616,9 @@
* the address range decoded by the BAR register.
*/
static void
-update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
+update_bar_address(struct pci_devinst *pi, int idx, uint32_t val)
{
+ int update_idx = idx;
int decode;
if (pi->pi_bar[idx].type == PCIBAR_IO)
@@ -606,38 +626,108 @@
else
decode = memen(pi);
- if (decode)
- unregister_bar(pi, idx);
-
- switch (type) {
+ switch (pi->pi_bar[idx].type) {
+ case PCIBAR_MEMHI64:
+ --update_idx;
case PCIBAR_IO:
case PCIBAR_MEM32:
- pi->pi_bar[idx].addr = addr;
+ case PCIBAR_MEM64: {
+ struct pcibar *bar = &pi->pi_bar[update_idx];
+
+ if (decode && bar->addr)
+ unregister_bar(pi, update_idx);
+
+ if (val == ~0U) {
+ /* guest wants to read size of BAR */
+ pci_set_cfgdata32(pi, PCIR_BAR(idx), ~0U);
+ bar->addr = 0;
+ break;
+ }
+
+ /* guest sets address of BAR */
+ uint64_t mask;
+ uint32_t bar_val;
+ mask = ~(bar->size - 1UL);
+ if (pi->pi_bar[idx].type == PCIBAR_MEMHI64)
+ mask >>= 32UL;
+ bar_val = val & mask;
+ bar_val |= pi->pi_bar[idx].lobits;
+ pci_set_cfgdata32(pi, PCIR_BAR(idx), bar_val);
+
+ /* Only register BAR if it contains a valid address */
+ uint32_t lo, hi;
+ lo = pci_get_cfgdata32(pi, PCIR_BAR(update_idx));
+ hi = 0;
+ if (bar->type == PCIBAR_MEM64)
+ hi = pci_get_cfgdata32(pi, PCIR_BAR(update_idx + 1));
+ if (lo == ~0U || hi == ~0U) {
+ bar->addr = 0;
+ break;
+ }
+
+ if (bar->type == PCIBAR_IO)
+ lo &= PCIM_BAR_IO_BASE;
+ else
+ lo &= PCIM_BAR_MEM_BASE;
+ bar->addr = (uint64_t)lo | ((uint64_t)hi << 32UL);
+ if (decode)
+ register_bar(pi, update_idx);
+
break;
- case PCIBAR_MEM64:
- pi->pi_bar[idx].addr &= ~0xffffffffUL;
- pi->pi_bar[idx].addr |= addr;
+ }
+ case PCIBAR_NONE:
break;
- case PCIBAR_MEMHI64:
- pi->pi_bar[idx].addr &= 0xffffffff;
- pi->pi_bar[idx].addr |= addr;
+ default:
+ assert(0);
+ }
+}
+
+static uint32_t
+read_bar_value(struct pci_devinst *pi, int coff, int bytes)
+{
+ uint8_t idx;
+ idx = (coff - PCIR_BAR(0)) / 4;
+ assert(idx <= PCI_BARMAX);
+
+ uint8_t update_idx = idx;
+ uint64_t val;
+
+ if (pi->pi_bar[idx].type == PCIBAR_MEMHI64)
+ --update_idx;
+
+ val = pci_get_cfgdata32(pi, PCIR_BAR(idx));
+
+ /* return size of BAR */
+ if (val == ~0U) {
+ val = ~(pi->pi_bar[update_idx].size - 1);
+ val |= pi->pi_bar[update_idx].lobits;
+ if (pi->pi_bar[idx].type == PCIBAR_MEMHI64)
+ val >>= 32;
+ }
+
+ switch (bytes) {
+ case 1:
+ val = (val >> (8 * (coff & 0x03))) & 0xFF;
+ break;
+ case 2:
+ assert((coff & 0x01) == 0);
+ val = (val >> (8 * (coff & 0x02))) & 0xFFFF;
+ break;
+ case 4:
+ assert((coff & 0x03) == 0);
+ val = (uint32_t)val;
break;
default:
assert(0);
}
- if (decode)
- register_bar(pi, idx);
+ return val;
}
int
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
- uint16_t cmd, enbit;
-
assert(idx >= 0 && idx <= PCI_BARMAX);
if ((size & (size - 1)) != 0)
@@ -652,6 +742,45 @@
size = 16;
}
+ struct pcibarlist *newBar = malloc(sizeof(struct pcibarlist));
+ memset(newBar, 0, sizeof(struct pcibarlist));
+ newBar->pdi = pdi;
+ newBar->idx = idx;
+ newBar->type = type;
+ newBar->size = size;
+ if (pci_bars == NULL) {
+ /* first BAR */
+ pci_bars = newBar;
+ } else {
+ struct pcibarlist *bar = pci_bars;
+ struct pcibarlist *lastBar = NULL;
+ do {
+ if (bar->size < size)
+ break;
+ lastBar = bar;
+ bar = bar->next;
+ } while (bar != NULL);
+ newBar->next = bar;
+ if (lastBar != NULL)
+ lastBar->next = newBar;
+ else
+ pci_bars = newBar;
+ }
+ return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pcibarlist *pci_bar)
+{
+ struct pci_devinst *pdi = pci_bar->pdi;
+ int idx = pci_bar->idx;
+ enum pcibar_type type = pci_bar->type;
+ uint64_t size = pci_bar->size;
+
+ int error;
+ uint64_t *baseptr, limit, addr, mask, lobits;
+ uint16_t cmd, enbit;
+
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
@@ -659,7 +788,7 @@
break;
case PCIBAR_IO:
baseptr = &pci_emul_iobase;
- limit = PCI_EMUL_IOLIMIT;
+ limit = pci_emul_iolim;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
enbit = PCIM_CMD_PORTEN;
@@ -670,25 +799,29 @@
* Some drivers do not work well if the 64-bit BAR is allocated
* above 4GB. Allow for this by allocating small requests under
* 4GB unless then allocation size is larger than some arbitrary
- * number (128MB currently).
+ * number (256MB currently).
*/
- if (size > 128 * 1024 * 1024) {
+ if (size > 256 * 1024 * 1024) {
baseptr = &pci_emul_membase64;
limit = pci_emul_memlim64;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
- } else {
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
- mask = PCIM_BAR_MEM_BASE;
- lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+ enbit = PCIM_CMD_MEMEN;
+ break;
}
- enbit = PCIM_CMD_MEMEN;
- break;
+ /*
+ * Use 32 bit BARs for small requests:
+ * Fallthrough into MEM32 case
+ */
+ type = PCIBAR_MEM32;
+ pdi->pi_bar[idx + 1].type = PCIBAR_NONE;
+ /* clear 64-bit flag */
+ pdi->pi_bar[idx].lobits &= ~PCIM_BAR_MEM_64;
+ /* [fallthrough] */
case PCIBAR_MEM32:
baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
+ limit = pci_emul_memlim32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
enbit = PCIM_CMD_MEMEN;
@@ -705,27 +838,60 @@
}
pdi->pi_bar[idx].type = type;
- pdi->pi_bar[idx].addr = addr;
+ pdi->pi_bar[idx].addr = 0;
pdi->pi_bar[idx].size = size;
+ /* passthru devices are using same lobits as physical device
+ * they set this property
+ */
+ if (pdi->pi_bar[idx].lobits != 0)
+ lobits = pdi->pi_bar[idx].lobits;
+ else
+ pdi->pi_bar[idx].lobits = lobits;
- /* Initialize the BAR register in config space */
- bar = (addr & mask) | lobits;
- pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
+ /* Initialize CMD register in config space */
+ cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
+ if ((cmd & enbit) != enbit)
+ pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
+ /* Initialize the BAR register in config space */
if (type == PCIBAR_MEM64) {
assert(idx + 1 <= PCI_BARMAX);
pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
- pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
+ update_bar_address(pdi, idx + 1, addr);
}
- cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
- if ((cmd & enbit) != enbit)
- pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
- register_bar(pdi, idx);
+ update_bar_address(pdi, idx, addr);
return (0);
}
+#define ALIGNED_SUBTRACT(base, size) ((base - size) & ~(size - 1))
+
+int
+pci_emul_adjust_gsmbase(struct vmctx *ctx, uint64_t size)
+{
+ if (size > pci_emul_gsmbase)
+ return (-1);
+ pci_emul_gsmbase = ALIGNED_SUBTRACT(pci_emul_gsmbase, size);
+ vm_set_lowmem_limit(
+ ctx, min(vm_get_lowmem_limit(ctx), pci_emul_gsmbase));
+
+ return (0);
+}
+
+uint64_t
+pci_emul_alloc_gsm(uint64_t size)
+{
+ if (size > pci_emul_gsmlim)
+ return 0;
+ uint64_t addr = ALIGNED_SUBTRACT(pci_emul_gsmlim, size);
+ if (addr < pci_emul_gsmbase)
+ return 0;
+ pci_emul_gsmlim = addr;
+
+ return addr;
+}
+
#define CAP_START_OFFSET 0x40
static int
pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
@@ -1131,9 +1297,51 @@
return (PCI_EMUL_ECFG_BASE);
}
+int
+pci_early_quirks(struct vmctx *ctx)
+{
+ pci_emul_gsmbase = PCI_EMUL_MEMBASE32;
+ pci_emul_gsmlim = PCI_EMUL_MEMBASE32;
+
+ for (int bus = 0; bus < MAXBUSES; ++bus) {
+ for (int slot = 0; slot < MAXSLOTS; ++slot) {
+ for (int func = 0; func < MAXFUNCS; ++func) {
+ char node_name[sizeof("pci.XXX.XX.X")];
+ snprintf(node_name, sizeof(node_name),
+ "pci.%d.%d.%d", bus, slot, func);
+ nvlist_t *nvl = find_config_node(node_name);
+ if (nvl == NULL)
+ continue;
+
+ const char *emul = get_config_value_node(
+ nvl, "device");
+ struct pci_devemu *pde = pci_emul_finddev(emul);
+ if (pde == NULL) {
+ EPRINTLN("pci slot %d:%d:%d: unknown "
+ "device \"%s\"",
+ bus, slot, func, emul);
+ return (EINVAL);
+ }
+
+ if (pde->pe_early_quirks) {
+ const int error = pde->pe_early_quirks(
+ ctx, nvl);
+ if (error)
+ return (error);
+ }
+ }
+ }
+ }
+
+ return (0);
+}
+
#define BUSIO_ROUNDUP 32
#define BUSMEM_ROUNDUP (1024 * 1024)
+#define ALIGN_VALUE(Value, Alignment) \
+ ((Value) + (((Alignment) - (Value)) & ((Alignment)-1)))
+
int
init_pci(struct vmctx *ctx)
{
@@ -1146,25 +1354,18 @@
nvlist_t *nvl;
const char *emul;
size_t lowmem;
- uint64_t cpu_maxphysaddr, pci_emul_memresv64;
- u_int regs[4];
int bus, slot, func, error;
pci_emul_iobase = PCI_EMUL_IOBASE;
- pci_emul_membase32 = vm_get_lowmem_limit(ctx);
+ pci_emul_iolim = PCI_EMUL_IOLIMIT;
- do_cpuid(0x80000008, regs);
- cpu_maxphysaddr = 1ULL << (regs[0] & 0xff);
- if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48)
- cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48;
- pci_emul_memresv64 = cpu_maxphysaddr / 4;
- /*
- * Max power of 2 that is less then
- * cpu_maxphysaddr - pci_emul_memresv64.
- */
- pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr -
- pci_emul_memresv64) - 1);
- pci_emul_memlim64 = cpu_maxphysaddr;
+ pci_emul_membase32 = PCI_EMUL_MEMBASE32;
+ pci_emul_memlim32 = PCI_EMUL_MEMLIMIT32;
+
+ pci_emul_membase64 = 4 * GB + vm_get_highmem_size(ctx);
+ pci_emul_membase64 = ALIGN_VALUE(
+ pci_emul_membase64, PCI_EMUL_MEMSIZE64);
+ pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
for (bus = 0; bus < MAXBUSES; bus++) {
snprintf(node_name, sizeof(node_name), "pci.%d", bus);
@@ -1182,6 +1383,7 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ /* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
@@ -1221,6 +1423,15 @@
}
}
+ /* second run: assign BARs and free BAR list */
+ struct pcibarlist *bar = pci_bars;
+ while (bar != NULL) {
+ pci_emul_assign_bar(bar);
+ struct pcibarlist *old = bar;
+ bar = bar->next;
+ free(old);
+ }
+
/*
* Add some slop to the I/O and memory resources decoded by
* this bus to give a guest some flexibility if it wants to
@@ -1266,7 +1477,8 @@
* The guest physical memory map looks like the following:
* [0, lowmem) guest system memory
* [lowmem, lowmem_limit) memory hole (may be absent)
- * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation)
+ * [lowmem_limit, 0xC0000000) Graphics Stolen Memory (may be absent)
+ * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation)
* [0xE0000000, 0xF0000000) PCI extended config window
* [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware
* [4GB, 4GB + highmem)
@@ -1865,7 +2077,6 @@
struct pci_devinst *pi;
struct pci_devemu *pe;
int idx, needcfg;
- uint64_t addr, bar, mask;
if ((bi = pci_businfo[bus]) != NULL) {
si = &bi->slotinfo[slot];
@@ -1917,8 +2128,14 @@
needcfg = 1;
}
- if (needcfg)
- *eax = CFGREAD(pi, coff, bytes);
+ if (needcfg) {
+ if (coff >= PCIR_BAR(0) &&
+ coff < PCIR_BAR(PCI_BARMAX + 1)) {
+ *eax = read_bar_value(pi, coff, bytes);
+ } else {
+ *eax = CFGREAD(pi, coff, bytes);
+ }
+ }
pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax);
} else {
@@ -1938,54 +2155,7 @@
if (bytes != 4 || (coff & 0x3) != 0)
return;
idx = (coff - PCIR_BAR(0)) / 4;
- mask = ~(pi->pi_bar[idx].size - 1);
- switch (pi->pi_bar[idx].type) {
- case PCIBAR_NONE:
- pi->pi_bar[idx].addr = bar = 0;
- break;
- case PCIBAR_IO:
- addr = *eax & mask;
- addr &= 0xffff;
- bar = addr | PCIM_BAR_IO_SPACE;
- /*
- * Register the new BAR value for interception
- */
- if (addr != pi->pi_bar[idx].addr) {
- update_bar_address(pi, addr, idx,
- PCIBAR_IO);
- }
- break;
- case PCIBAR_MEM32:
- addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
- if (addr != pi->pi_bar[idx].addr) {
- update_bar_address(pi, addr, idx,
- PCIBAR_MEM32);
- }
- break;
- case PCIBAR_MEM64:
- addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
- PCIM_BAR_MEM_PREFETCH;
- if (addr != (uint32_t)pi->pi_bar[idx].addr) {
- update_bar_address(pi, addr, idx,
- PCIBAR_MEM64);
- }
- break;
- case PCIBAR_MEMHI64:
- mask = ~(pi->pi_bar[idx - 1].size - 1);
- addr = ((uint64_t)*eax << 32) & mask;
- bar = addr >> 32;
- if (bar != pi->pi_bar[idx - 1].addr >> 32) {
- update_bar_address(pi, addr, idx - 1,
- PCIBAR_MEMHI64);
- }
- break;
- default:
- assert(0);
- }
- pci_set_cfgdata32(pi, coff, bar);
-
+ update_bar_address(pi, idx, *eax);
} else if (pci_emul_iscap(pi, coff)) {
pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0);
} else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
diff --git a/usr.sbin/bhyve/pci_fbuf.c b/usr.sbin/bhyve/pci_fbuf.c
--- a/usr.sbin/bhyve/pci_fbuf.c
+++ b/usr.sbin/bhyve/pci_fbuf.c
@@ -216,15 +216,15 @@
return (value);
}
-static void
+static int
pci_fbuf_baraddr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
struct pci_fbuf_softc *sc;
int prot;
if (baridx != 1)
- return;
+ return (-1);
sc = pi->pi_arg;
if (!enabled && sc->fbaddr != 0) {
@@ -237,6 +237,8 @@
EPRINTLN("pci_fbuf: mmap_memseg failed");
sc->fbaddr = address;
}
+
+ return (0);
}
@@ -375,7 +377,7 @@
static int
pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
{
- int error, prot;
+ int error;
struct pci_fbuf_softc *sc;
if (fbuf_sc != NULL) {
@@ -393,6 +395,13 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_DISPLAY);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_DISPLAY_VGA);
+ sc->fb_base = vm_create_devmem(
+ ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE);
+ if (sc->fb_base == MAP_FAILED) {
+ error = -1;
+ goto done;
+ }
+
error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, DMEMSZ);
assert(error == 0);
@@ -402,7 +411,6 @@
error = pci_emul_add_msicap(pi, PCI_FBUF_MSI_MSGS);
assert(error == 0);
- sc->fbaddr = pi->pi_bar[1].addr;
sc->memregs.fbsize = FB_SIZE;
sc->memregs.width = COLS_DEFAULT;
sc->memregs.height = ROWS_DEFAULT;
@@ -423,27 +431,9 @@
goto done;
}
- sc->fb_base = vm_create_devmem(ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE);
- if (sc->fb_base == MAP_FAILED) {
- error = -1;
- goto done;
- }
DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]",
sc->fb_base, FB_SIZE));
- /*
- * Map the framebuffer into the guest address space.
- * XXX This may fail if the BAR is different than a prior
- * run. In this case flag the error. This will be fixed
- * when a change_memseg api is available.
- */
- prot = PROT_READ | PROT_WRITE;
- if (vm_mmap_memseg(ctx, sc->fbaddr, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0) {
- EPRINTLN("pci_fbuf: mapseg failed - try deleting VM and restarting");
- error = -1;
- goto done;
- }
-
console_init(sc->memregs.width, sc->memregs.height, sc->fb_base);
console_fb_register(pci_fbuf_render, sc);
diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_gvt-d.c
@@ -0,0 +1,407 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <machine/vmm.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "inout.h"
+#include "pci_passthru.h"
+
+#define MB (1024 * 1024UL)
+
+/*
+ * PCI definitions
+ */
+#define PCIR_GGC 0x50 /* GMCH Graphics Control register */
+#define PCIR_BDSM 0x5C /* Base Data of Stolen Memory register */
+#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
+#define PCIM_GEN5_75_GGC_GMS_MASK \
+ 0x000000F0 /* Bits 7:4 contain Graphics Mode Select */
+#define PCIM_GEN6_GGC_GMS_MASK \
+ 0x000000F8 /* Bits 7:3 contain Graphics Mode Select */
+#define PCIM_GEN8_GGC_GMS_MASK \
+ 0x0000FF00 /* Bits 15:8 contain Graphics Mode Select */
+#define PCIM_BDSM_GSM_MASK \
+ 0xFFF00000 /* Bits 31:20 contain base address of gsm */
+#define PCIM_ASLS_OPREGION_MASK 0xFFFFF000 /* Opregion is 4k aligned */
+#define GPU_OPREGION_LEN 0x00004000 /* Size of Opregion (16 KB) */
+
+/*
+ * Known device ids for different generations of Intel graphics
+ * see https://www.graphics-drivers.eu/intel-pci-hardware-id-string.html for
+ * complete list
+ */
+/* Westmere & Ironlake */
+static const uint16_t igd_devid_gen5_75[] = { 0x0042, 0x0046 };
+/* Sandy Bridge */
+static const uint16_t igd_devid_gen6[] = { 0x0102, 0x0106, 0x010A, 0x0112,
+ 0x0116, 0x0122, 0x0126 };
+/* Ivy Bridge */
+static const uint16_t igd_devid_gen7[] = { 0x0152, 0x0156, 0x015A, 0x0162,
+ 0x0166, 0x016A };
+/* Haswell */
+static const uint16_t igd_devid_gen7_5[] = { 0x0402, 0x0406, 0x040A, 0x0412,
+ 0x0416, 0x041A, 0x041E, 0x0A06, 0x0A0E, 0x0A16, 0x0A1E, 0x0A26, 0x0A2E,
+ 0x0C02, 0x0C06, 0x0C12, 0x0C16, 0x0C22, 0x0C26, 0x0D06, 0x0D16, 0x0D22,
+ 0x0D26 };
+/* Broadwell */
+static const uint16_t igd_devid_gen8[] = { 0x1606, 0x160E, 0x1612, 0x1616,
+ 0x161A, 0x161E, 0x1622, 0x1626, 0x162A, 0x162B };
+/* Skylake */
+static const uint16_t igd_devid_gen9[] = { 0x1902, 0x1906, 0x190B, 0x190E,
+ 0x1912, 0x1913, 0x1916, 0x1917, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923,
+ 0x1926, 0x1927, 0x192B, 0x192D, 0x1932, 0x193A, 0x193B, 0x193D };
+/* Kaby Lake & Whiskey Lake & Amber Lake & Coffee Lake & Comet Lake */
+static const uint16_t igd_devid_gen9_5[] = { 0x3E90, 0x3E91, 0x3E92, 0x3E93,
+ 0x3E94, 0x3E96, 0x3E98, 0x3E99, 0x3E9A, 0x3E9B, 0x3E9C, 0x3EA0, 0x3EA1,
+ 0x3EA5, 0x3EA6, 0x3EA7, 0x3EA8, 0x3EA9, 0x5902, 0x5906, 0x590B, 0x5912,
+ 0x5916, 0x5917, 0x591B, 0x591C, 0x591D, 0x591E, 0x5921, 0x5926, 0x5927,
+ 0x87C0, 0x87CA, 0x9B21, 0x9B41, 0x9BA2, 0x9BA4, 0x9BA5, 0x9BA8, 0x9BAA,
+ 0x9BAC, 0x9BC2, 0x9BC4, 0x9BC5, 0x9BC6, 0x9BC8, 0x9BCA, 0x9BCC, 0x9BE6,
+ 0x9BF6 };
+
+static int
+array_contains(const uint16_t *array, uint64_t elements, uint16_t item)
+{
+ for (uint64_t i = 0; i < elements; ++i)
+ if (array[i] == item)
+ return 1;
+ return 0;
+}
+
+#define IGD_FUNC_IS_IGD_GEN(gen) \
+ static int igd_gen##gen##_is_igd_gen(int devid) \
+ { \
+ return array_contains(igd_devid_gen##gen, \
+ sizeof(igd_devid_gen##gen) / sizeof(uint16_t), devid); \
+ }
+
+/* GVT-d definitions */
+#define GVT_D_MAP_OPREGION 0
+#define GVT_D_MAP_GSM 1
+
+/*
+ * Handler for passthru of igd
+ *
+ * Keep it as struct instead of a single function pointer, since new
+ * generations of Intel graphics could need other funcs.
+ * e.g. Intel Elkhartlake and Intel Tigerlake:
+ * They will need different handling for GSM and Opregion (See ACRN-Hypervisor
+ * <https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/hw/pci/passthrough.c>)
+ */
+struct igd_funcs {
+ int (*is_igd_gen)(int devid);
+ uint64_t (*get_gsm_len)(struct vmctx *ctx, struct pcisel *sel);
+};
+
+/* Handler for igd of gen5.75 (Westmere & Ironlake) */
+IGD_FUNC_IS_IGD_GEN(5_75);
+
+static uint64_t
+igd_gen5_75_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN5_75_GGC_GMS_MASK) >>
+ 4; /* Bits 7:4 contain Graphics Mode Select */
+ switch (gms_val) {
+ case 0x05:
+ return 32 * MB;
+ case 0x06:
+ return 48 * MB;
+ case 0x07:
+ return 64 * MB;
+ case 0x08:
+ return 128 * MB;
+ case 0x09:
+ return 256 * MB;
+ case 0x0A:
+ return 96 * MB;
+ case 0x0B:
+ return 160 * MB;
+ case 0x0C:
+ return 224 * MB;
+ case 0x0D:
+ return 352 * MB;
+ }
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/* Handler for igd of gen6 (Sandy Bridge) */
+IGD_FUNC_IS_IGD_GEN(6);
+
+static uint64_t
+igd_gen6_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN6_GGC_GMS_MASK) >>
+ 3; /* Bits 7:3 contain Graphics Mode Select */
+ if (gms_val <= 0x10)
+ return gms_val * 32 * MB;
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/* Handler for igd of gen7 (Ivy Bridge) */
+IGD_FUNC_IS_IGD_GEN(7);
+
+/* Handler for igd of gen7.5 (Haswell) */
+IGD_FUNC_IS_IGD_GEN(7_5);
+
+/* Handler for igd of gen8 (Broadwell) */
+IGD_FUNC_IS_IGD_GEN(8);
+
+static uint64_t
+igd_gen8_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN8_GGC_GMS_MASK) >>
+ 8; /* Bits 15:8 contain Graphics Mode Select */
+ if ((gms_val <= 0x10) || (gms_val == 0x20) || (gms_val == 0x30) ||
+ (gms_val == 0x3F))
+ return gms_val * 32 * MB;
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/* Handler for igd of gen9 (Skylake) */
+IGD_FUNC_IS_IGD_GEN(9);
+
+static uint64_t
+igd_gen9_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN8_GGC_GMS_MASK) >>
+ 8; /* Bits 15:8 contain Graphics Mode Select */
+ if ((gms_val <= 0x10) || (gms_val == 0x20) || (gms_val == 0x30) ||
+ (gms_val == 0x40))
+ return gms_val * 32 * MB;
+ else if (gms_val >= 0xF0 && gms_val <= 0xFE)
+ return gms_val * 4 * MB;
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/*
+ * Handler for igd of gen9.5 (Kaby Lake & Whiskey Lake & Amber Lake & Coffee
+ * Lake & Comet Lake)
+ */
+IGD_FUNC_IS_IGD_GEN(9_5);
+
+/* Westmere & Ironlake */
+static const struct igd_funcs igd_gen5_75 = {
+ .is_igd_gen = igd_gen5_75_is_igd_gen,
+ .get_gsm_len = igd_gen5_75_get_gsm_len
+};
+/* Sandy Bridge */
+static const struct igd_funcs igd_gen6 = { .is_igd_gen = igd_gen6_is_igd_gen,
+ .get_gsm_len = igd_gen6_get_gsm_len };
+/* Ivy Bridge */
+static const struct igd_funcs igd_gen7 = { .is_igd_gen = igd_gen7_is_igd_gen,
+ .get_gsm_len = igd_gen6_get_gsm_len };
+/* Haswell */
+static const struct igd_funcs igd_gen7_5 = {
+ .is_igd_gen = igd_gen7_5_is_igd_gen,
+ .get_gsm_len = igd_gen6_get_gsm_len
+};
+/* Broadwell */
+static const struct igd_funcs igd_gen8 = { .is_igd_gen = igd_gen8_is_igd_gen,
+ .get_gsm_len = igd_gen8_get_gsm_len };
+/* Skylake */
+static const struct igd_funcs igd_gen9 = { .is_igd_gen = igd_gen9_is_igd_gen,
+ .get_gsm_len = igd_gen9_get_gsm_len };
+/* Kaby Lake & Whiskey Lake & Amber Lake & Coffee Lake & Comet Lake */
+static const struct igd_funcs igd_gen9_5 = {
+ .is_igd_gen = igd_gen9_5_is_igd_gen,
+ .get_gsm_len = igd_gen9_get_gsm_len
+};
+
+static const struct igd_funcs *igd_gen_map[] = { &igd_gen5_75, &igd_gen6,
+ &igd_gen7, &igd_gen7_5, &igd_gen8, &igd_gen9, &igd_gen9_5 };
+
+static const struct igd_funcs *
+get_igd_funcs(const uint16_t devid)
+{
+ for (int i = 0; i < sizeof(igd_gen_map) / sizeof(struct igd_funcs *);
+ ++i) {
+ if (igd_gen_map[i]->is_igd_gen(devid))
+ return igd_gen_map[i];
+ }
+ return NULL;
+}
+
+int
+gvt_d_early_quirks(struct vmctx *ctx, struct pcisel *sel)
+{
+ /* Get IGD funcs */
+ const uint16_t dev = read_config(sel, PCIR_DEVICE, 2);
+ const struct igd_funcs *const igd = get_igd_funcs(dev);
+ if (igd == NULL) {
+ warnx("Unsupported igd-device (%x)", dev);
+ return -ENODEV;
+ }
+
+ /* Get Graphics Stolen Memory len */
+ const uint64_t gsm_len = igd->get_gsm_len(ctx, sel);
+ /* Get Opregion length */
+ const uint64_t opregion_len = GPU_OPREGION_LEN;
+
+ pci_emul_adjust_gsmbase(ctx, gsm_len);
+ pci_emul_adjust_gsmbase(ctx, opregion_len);
+
+ return (0);
+}
+
+int
+gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
+{
+ int error;
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ /* check vendor == Intel */
+ const uint16_t dev_vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 2);
+ if (dev_vendor != 0x8086) {
+ warnx("Unknown vendor (%x) of igd", dev_vendor);
+ return -ENODEV;
+ }
+
+ /* check if device is a display device */
+ if (read_config(&sc->psc_sel, PCIR_CLASS, 1) != PCIC_DISPLAY) {
+ warnx("%s is no display device", pi->pi_name);
+ return -ENODEV;
+ }
+
+ /* Get IGD funcs */
+ const struct igd_funcs *igd = get_igd_funcs(
+ read_config(&sc->psc_sel, PCIR_DEVICE, 2));
+ if (igd == NULL) {
+ warnx("Unsupported igd-device (%x)",
+ read_config(&sc->psc_sel, PCIR_DEVICE, 2));
+ return -ENODEV;
+ }
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+ struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM];
+
+ /* Get Opregion length */
+ opregion->len = GPU_OPREGION_LEN;
+ /* Get Opregion HPA */
+ opregion->hpa = read_config(&sc->psc_sel, PCIR_ASLS_CTL, 4) &
+ PCIM_ASLS_OPREGION_MASK;
+ /* Get Graphics Stolen Memory len */
+ gsm->len = igd->get_gsm_len(ctx, &sc->psc_sel);
+ /* Get Graphics Stolen Memory HPA */
+ gsm->hpa = read_config(&sc->psc_sel, PCIR_BDSM, 4) & PCIM_BDSM_GSM_MASK;
+
+ if (opregion->len == 0 || gsm->len == 0) {
+ warnx("Could not determine size of opregion or gsm");
+ return -ENODEV;
+ }
+
+ /* Allocate Opregion and GSM in guest space */
+ gsm->gpa = pci_emul_alloc_gsm(gsm->len);
+ opregion->gpa = pci_emul_alloc_gsm(opregion->len);
+ if (opregion->gpa == 0 || gsm->gpa == 0) {
+ error = -ENOMEM;
+ goto failed_opregion;
+ }
+
+ /* Write address of Opregion and GSM into PCI register */
+ /* Set Opregion GPA */
+ uint32_t asls_val = read_config(&sc->psc_sel, PCIR_ASLS_CTL, 4);
+ pci_set_cfgdata32(sc->psc_pi, PCIR_ASLS_CTL,
+ opregion->gpa | (asls_val & ~PCIM_ASLS_OPREGION_MASK));
+ /* Set Graphics Stolen Memory GPA */
+ uint32_t bdsm_val = read_config(&sc->psc_sel, PCIR_BDSM, 4);
+ pci_set_cfgdata32(
+ sc->psc_pi, PCIR_BDSM, gsm->gpa | (bdsm_val & ~PCIM_BDSM_GSM_MASK));
+
+ /* Map Opregion and GSM into guest space */
+ if ((error = passthru_modify_pptdev_mmio(
+ ctx, sc, opregion, PT_MAP_PPTDEV_MMIO)) != 0)
+ goto failed_opregion;
+ if ((error = passthru_modify_pptdev_mmio(
+ ctx, sc, gsm, PT_MAP_PPTDEV_MMIO)) != 0)
+ goto failed_gsm;
+
+ /* Protect PCI register */
+ set_pcir_prot(sc, PCIR_ASLS_CTL, 0x04, PPT_PCIR_PROT_NA);
+ set_pcir_prot(sc, PCIR_BDSM, 0x04, PPT_PCIR_PROT_NA);
+
+ return (0);
+
+failed_opregion:
+ opregion->gpa = 0;
+failed_gsm:
+ gsm->gpa = 0;
+ return error;
+}
+
+void
+gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM];
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* GPA is only set, if it's initialized */
+ if (gsm->gpa)
+ passthru_modify_pptdev_mmio(ctx, sc, gsm, PT_UNMAP_PPTDEV_MMIO);
+ if (opregion->gpa)
+ passthru_modify_pptdev_mmio(
+ ctx, sc, opregion, PT_UNMAP_PPTDEV_MMIO);
+}
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -33,9 +33,13 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/pciio.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -85,6 +89,29 @@
"COM1", "COM2", "COM3", "COM4"
};
+#ifndef _PATH_DEVPCI
+#define _PATH_DEVPCI "/dev/pci"
+#endif
+
+static int pcifd = -1;
+
+static uint32_t
+read_config(struct pcisel *sel, long reg, int width)
+{
+ struct pci_io pi;
+ pi.pi_sel.pc_domain = sel->pc_domain;
+ pi.pi_sel.pc_bus = sel->pc_bus;
+ pi.pi_sel.pc_dev = sel->pc_dev;
+ pi.pi_sel.pc_func = sel->pc_func;
+ pi.pi_reg = reg;
+ pi.pi_width = width;
+
+ if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+ return (0);
+
+ return (pi.pi_data);
+}
+
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
@@ -452,6 +479,40 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+ /* open host device */
+ if (pcifd < 0) {
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+ }
+
+ /* on Intel systems lpc is always connected to 0:1f.0 */
+ struct pcisel sel;
+ sel.pc_domain = 0;
+ sel.pc_bus = 0;
+ sel.pc_dev = 0x1f;
+ sel.pc_func = 0;
+
+ if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) {
+ /*
+ * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be
+ * aligned with the physical ones. Without these physical
+ * values, GVT-d GOP driver couldn't work.
+ */
+ pci_set_cfgdata16(
+ pi, PCIR_DEVICE, read_config(&sel, PCIR_DEVICE, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_VENDOR, read_config(&sel, PCIR_VENDOR, 2));
+ pci_set_cfgdata8(
+ pi, PCIR_REVID, read_config(&sel, PCIR_REVID, 1));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBVEND_0, read_config(&sel, PCIR_SUBVEND_0, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBDEV_0, read_config(&sel, PCIR_SUBDEV_0, 2));
+ }
+
lpc_bridge = pi;
return (0);
diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_passthru.h
@@ -0,0 +1,93 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <sys/pciio.h>
+
+#include <vmmapi.h>
+
+#include "config.h"
+#include "pci_emul.h"
+
+struct passthru_mmio_mapping {
+ uint64_t gpa;
+ uint64_t len;
+ uint64_t hpa;
+};
+
+struct passthru_softc {
+ struct pci_devinst *psc_pi;
+ struct pcibar psc_bar[PCI_BARMAX + 1];
+ struct {
+ int capoff;
+ int msgctrl;
+ int emulated;
+ } psc_msi;
+ struct {
+ int capoff;
+ } psc_msix;
+ struct pcisel psc_sel;
+
+ struct passthru_mmio_mapping psc_mmio_map[2];
+ uint8_t psc_pcir_prot_map[(PCI_REGMAX + 1) / 4];
+};
+
+#define PT_MAP_PPTDEV_MMIO 1
+#define PT_UNMAP_PPTDEV_MMIO 0
+
+#define PPT_PCIR_PROT_NA 0 /* No Access to physical values */
+#define PPT_PCIR_PROT_RO 1 /* Read Only access to physical values */
+#define PPT_PCIR_PROT_WO 2 /* Write Only access to physical values */
+#define PPT_PCIR_PROT_RW \
+ (PPT_PCIR_PROT_RO | \
+ PPT_PCIR_PROT_WO) /* Read/Write access to physical values */
+#define PPT_PCIR_PROT_MASK 0x03
+
+#define GET_INT_CONFIG(var, name) \
+ do { \
+ const char *value = get_config_value_node(nvl, name); \
+ if (value == NULL) { \
+ EPRINTLN( \
+ "passthru: missing required %s setting", name); \
+ return (1); \
+ } \
+ var = atoi(value); \
+ } while (0)
+
+int passthru_modify_pptdev_mmio(struct vmctx *ctx, struct passthru_softc *sc,
+ struct passthru_mmio_mapping *map, int registration);
+uint32_t read_config(const struct pcisel *sel, long reg, int width);
+void write_config(const struct pcisel *sel, long reg, int width, uint32_t data);
+int set_pcir_prot(
+ struct passthru_softc *sc, uint32_t reg, uint32_t len, uint8_t prot);
+int gvt_d_early_quirks(struct vmctx *ctx, struct pcisel *sel);
+int gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl);
+void gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -60,10 +60,10 @@
#include <machine/vmm.h>
#include <vmmapi.h>
-#include "config.h"
#include "debug.h"
#include "pci_emul.h"
#include "mem.h"
+#include "pci_passthru.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
@@ -82,24 +82,21 @@
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
+#define GET_INT_CONFIG(var, name) \
+ do { \
+ const char *value = get_config_value_node(nvl, name); \
+ if (value == NULL) { \
+ EPRINTLN( \
+ "passthru: missing required %s setting", name); \
+ return (1); \
+ } \
+ var = atoi(value); \
+ } while (0)
+
static int pcifd = -1;
static int iofd = -1;
static int memfd = -1;
-struct passthru_softc {
- struct pci_devinst *psc_pi;
- struct pcibar psc_bar[PCI_BARMAX + 1];
- struct {
- int capoff;
- int msgctrl;
- int emulated;
- } psc_msi;
- struct {
- int capoff;
- } psc_msix;
- struct pcisel psc_sel;
-};
-
static int
msi_caplen(int msgctrl)
{
@@ -122,7 +119,7 @@
return (len);
}
-static uint32_t
+uint32_t
read_config(const struct pcisel *sel, long reg, int width)
{
struct pci_io pi;
@@ -138,7 +135,7 @@
return (pi.pi_data);
}
-static void
+void
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
{
struct pci_io pi;
@@ -152,6 +149,51 @@
(void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
}
+int
+passthru_modify_pptdev_mmio(struct vmctx *ctx, struct passthru_softc *sc,
+ struct passthru_mmio_mapping *map, int registration)
+{
+ if (registration == PT_MAP_PPTDEV_MMIO)
+ return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
+ sc->psc_sel.pc_dev, sc->psc_sel.pc_func, map->gpa, map->len,
+ map->hpa);
+ else
+ return vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
+ sc->psc_sel.pc_dev, sc->psc_sel.pc_func, map->gpa,
+ map->len);
+}
+
+static int
+passthru_early_quirks(struct vmctx *ctx, const nvlist_t *nvl)
+{
+ int bus, slot, func;
+ GET_INT_CONFIG(bus, "bus");
+ GET_INT_CONFIG(slot, "slot");
+ GET_INT_CONFIG(func, "func");
+
+ if (pcifd < 0) {
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+ }
+
+ struct pcisel sel = { .pc_bus = bus, .pc_dev = slot, .pc_func = func };
+
+ uint16_t vendor = read_config(&sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_early_quirks(ctx, &sel);
+
+ return (0);
+}
+
#ifdef LEGACY_SUPPORT
static int
passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
@@ -559,12 +601,23 @@
sc->psc_bar[i].type = bartype;
sc->psc_bar[i].size = size;
sc->psc_bar[i].addr = base;
+ sc->psc_bar[i].lobits = 0;
/* Allocate the BAR in the guest I/O or MMIO space */
error = pci_emul_alloc_bar(pi, i, bartype, size);
if (error)
return (-1);
+ /* Use same lobits as physical bar */
+ uint8_t lobits = read_config(&sc->psc_sel, PCIR_BAR(i), 0x01);
+ if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) {
+ lobits &= ~PCIM_BAR_MEM_BASE;
+ } else {
+ lobits &= ~PCIM_BAR_IO_BASE;
+ }
+ sc->psc_bar[i].lobits = lobits;
+ pi->pi_bar[i].lobits = lobits;
+
/* The MSI-X table needs special handling */
if (i == pci_msix_table_bar(pi)) {
error = init_msix_table(ctx, sc, base);
@@ -610,14 +663,55 @@
goto done;
}
- pci_set_cfgdata16(pi, PCIR_COMMAND, read_config(&sc->psc_sel,
- PCIR_COMMAND, 2));
+ write_config(
+ &sc->psc_sel, PCIR_COMMAND, 2, pci_get_cfgdata16(pi, PCIR_COMMAND));
error = 0; /* success */
done:
return (error);
}
+#define PPT_PCIR_PROT(reg) \
+ ((sc->psc_pcir_prot_map[reg / 4] >> (reg & 0x03)) & PPT_PCIR_PROT_MASK)
+
+int
+set_pcir_prot(
+ struct passthru_softc *sc, uint32_t reg, uint32_t len, uint8_t prot)
+{
+ if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1)
+ return (-1);
+
+ prot &= PPT_PCIR_PROT_MASK;
+
+ for (int i = reg; i < reg + len; ++i) {
+ /* delete old prot value */
+ sc->psc_pcir_prot_map[i / 4] &= ~(
+ PPT_PCIR_PROT_MASK << (i & 0x03));
+ /* set new prot value */
+ sc->psc_pcir_prot_map[i / 4] |= prot << (i & 0x03);
+ }
+
+ return (0);
+}
+
+static int
+is_pcir_writable(struct passthru_softc *sc, uint32_t reg)
+{
+ if (reg > PCI_REGMAX)
+ return (0);
+
+ return ((PPT_PCIR_PROT(reg) & PPT_PCIR_PROT_WO) != 0);
+}
+
+static int
+is_pcir_readable(struct passthru_softc *sc, uint32_t reg)
+{
+ if (reg > PCI_REGMAX)
+ return (0);
+
+ return ((PPT_PCIR_PROT(reg) & PPT_PCIR_PROT_RO) != 0);
+}
+
static int
passthru_legacy_config(nvlist_t *nvl, const char *opts)
{
@@ -641,12 +735,50 @@
return (0);
}
+static int
+passthru_init_quirks(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_init(ctx, pi, nvl);
+
+ return (0);
+}
+
+static void
+passthru_deinit_quirks(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ if (sc == NULL)
+ return;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return;
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_deinit(ctx, pi);
+
+ return;
+}
+
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
{
int bus, slot, func, error, memflags;
struct passthru_softc *sc;
- const char *value;
#ifndef WITHOUT_CAPSICUM
cap_rights_t rights;
cap_ioctl_t pci_ioctls[] = { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR };
@@ -711,15 +843,6 @@
errx(EX_OSERR, "Unable to apply rights for sandbox");
#endif
-#define GET_INT_CONFIG(var, name) do { \
- value = get_config_value_node(nvl, name); \
- if (value == NULL) { \
- EPRINTLN("passthru: missing required %s setting", name); \
- return (error); \
- } \
- var = atoi(value); \
-} while (0)
-
GET_INT_CONFIG(bus, "bus");
GET_INT_CONFIG(slot, "slot");
GET_INT_CONFIG(func, "func");
@@ -736,9 +859,21 @@
sc->psc_pi = pi;
/* initialize config space */
- error = cfginit(ctx, pi, bus, slot, func);
+ if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ goto done;
+
+ /* allow access to all PCI registers */
+ if ((error = set_pcir_prot(sc, 0, PCI_REGMAX + 1, PPT_PCIR_PROT_RW)) !=
+ 0)
+ goto done;
+
+ if ((error = passthru_init_quirks(ctx, pi, nvl)) != 0)
+ goto done;
+
+ error = 0; /* success */
done:
if (error) {
+ passthru_deinit_quirks(ctx, pi);
free(sc);
vm_unassign_pptdev(ctx, bus, slot, func);
}
@@ -788,6 +923,10 @@
sc = pi->pi_arg;
+ /* skip for protected PCI registers */
+ if (!is_pcir_readable(sc, coff))
+ return (-1);
+
/*
* PCI BARs and MSI capability is emulated.
*/
@@ -834,6 +973,10 @@
sc = pi->pi_arg;
+ /* skip for protected PCI registers */
+ if (!is_pcir_writable(sc, coff))
+ return (-1);
+
/*
* PCI BARs are emulated
*/
@@ -958,7 +1101,7 @@
static void
passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
struct passthru_softc *sc;
size_t remaining;
@@ -966,21 +1109,15 @@
sc = pi->pi_arg;
table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
+
+ struct passthru_mmio_mapping map;
+
if (table_offset > 0) {
- if (!enabled) {
- if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- table_offset) != 0)
- warnx("pci_passthru: unmap_pptdev_mmio failed");
- } else {
- if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- table_offset,
- sc->psc_bar[baridx].addr) != 0)
- warnx("pci_passthru: map_pptdev_mmio failed");
- }
+ map.gpa = address;
+ map.len = table_offset;
+ map.hpa = sc->psc_bar[baridx].addr;
+ if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0)
+ warnx("pci_passthru: modify_pptdev_mmio failed");
}
table_size = pi->pi_msix.table_offset - table_offset;
table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
@@ -988,62 +1125,47 @@
remaining = pi->pi_bar[baridx].size - table_offset - table_size;
if (remaining > 0) {
address += table_offset + table_size;
- if (!enabled) {
- if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- remaining) != 0)
- warnx("pci_passthru: unmap_pptdev_mmio failed");
- } else {
- if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- remaining,
- sc->psc_bar[baridx].addr +
- table_offset + table_size) != 0)
- warnx("pci_passthru: map_pptdev_mmio failed");
- }
+ map.gpa = address;
+ map.len = remaining;
+ map.hpa = sc->psc_bar[baridx].addr + table_offset + table_size;
+ if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0)
+ warnx("pci_passthru: modify_pptdev_mmio failed");
}
}
static void
passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
- if (!enabled) {
- if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- sc->psc_bar[baridx].size) != 0)
- warnx("pci_passthru: unmap_pptdev_mmio failed");
- } else {
- if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- sc->psc_bar[baridx].size,
- sc->psc_bar[baridx].addr) != 0)
- warnx("pci_passthru: map_pptdev_mmio failed");
- }
+
+ struct passthru_mmio_mapping map;
+ map.gpa = address;
+ map.len = sc->psc_bar[baridx].size;
+ map.hpa = sc->psc_bar[baridx].addr;
+
+ if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0)
+ warnx("pci_passthru: modify_pptdev_mmio failed");
}
-static void
+static int
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
-
if (pi->pi_bar[baridx].type == PCIBAR_IO)
- return;
+ return (-1);
if (baridx == pci_msix_table_bar(pi))
passthru_msix_addr(ctx, pi, baridx, enabled, address);
else
passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ return (0);
}
struct pci_devemu passthru = {
.pe_emu = "passthru",
+ .pe_early_quirks = passthru_early_quirks,
.pe_init = passthru_init,
.pe_legacy_config = passthru_legacy_config,
.pe_cfgwrite = passthru_cfgwrite,

File Metadata

Mime Type
text/plain
Expires
Thu, Apr 30, 1:57 AM (6 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32429688
Default Alt Text
D26209.id88259.diff (50 KB)

Event Timeline