Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F154622537
D26209.id88259.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
50 KB
Referenced Files
None
Subscribers
None
D26209.id88259.diff
View Options
diff --git a/sys/dev/pci/pcireg.h b/sys/dev/pci/pcireg.h
--- a/sys/dev/pci/pcireg.h
+++ b/sys/dev/pci/pcireg.h
@@ -1098,3 +1098,8 @@
#define PCIM_OSC_CTL_PCIE_PME 0x04 /* PCIe Native Power Mgt Events */
#define PCIM_OSC_CTL_PCIE_AER 0x08 /* PCIe Advanced Error Reporting */
#define PCIM_OSC_CTL_PCIE_CAP_STRUCT 0x10 /* Various Capability Structures */
+
+/*
+ * PCI Vendors
+ */
+#define PCI_VENDOR_INTEL 0x8086
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -42,6 +42,7 @@
pci_emul.c \
pci_hda.c \
pci_fbuf.c \
+ pci_gvt-d.c \
pci_hostbridge.c \
pci_irq.c \
pci_lpc.c \
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -1403,6 +1403,12 @@
fbsdrun_set_capabilities(ctx, BSP);
+ /*
+ * This function could potentially adjust lowmem_limit
+ * Therefore, call this before vm_setup_memory
+ */
+ pci_early_quirks(ctx);
+
memflags = 0;
if (get_config_bool_default("memory.wired", false))
memflags |= VM_MEM_F_WIRED;
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -51,6 +51,9 @@
struct pci_devemu {
char *pe_emu; /* Name of device emulation */
+ /* quirks; especially used by GVT-d to adjust Top of Low Usable DRAM (lowmem_limit) */
+ int (*pe_early_quirks)(struct vmctx *, const nvlist_t *nvl);
+
/* instance creation */
int (*pe_init)(struct vmctx *, struct pci_devinst *,
nvlist_t *);
@@ -76,8 +79,8 @@
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
- void (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi,
- int baridx, int enabled, uint64_t address);
+ int (*pe_baraddr)(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
+ int enabled, uint64_t address);
/* Save/restore device state */
int (*pe_snapshot)(struct vm_snapshot_meta *meta);
@@ -99,6 +102,7 @@
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
+ uint8_t lobits;
};
#define PI_NAMESZ 40
@@ -224,10 +228,13 @@
typedef void (*pci_lintr_cb)(int b, int s, int pin, int pirq_pin,
int ioapic_irq, void *arg);
+int pci_early_quirks(struct vmctx *ctx);
int init_pci(struct vmctx *ctx);
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
+int pci_emul_adjust_gsmbase(struct vmctx *ctx, uint64_t size);
+uint64_t pci_emul_alloc_gsm(uint64_t size);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_emul_add_pciecap(struct pci_devinst *pi, int pcie_device_type);
void pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes,
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -73,6 +73,11 @@
#define MAXSLOTS (PCI_SLOTMAX + 1)
#define MAXFUNCS (PCI_FUNCMAX + 1)
+#define GB (1024 * 1024 * 1024UL)
+
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
struct funcinfo {
nvlist_t *fi_config;
struct pci_devemu *fi_pde;
@@ -102,10 +107,23 @@
SET_DECLARE(pci_devemu_set, struct pci_devemu);
static uint64_t pci_emul_iobase;
+static uint64_t pci_emul_iolim;
+static uint64_t pci_emul_gsmbase;
+static uint64_t pci_emul_gsmlim;
static uint64_t pci_emul_membase32;
+static uint64_t pci_emul_memlim32;
static uint64_t pci_emul_membase64;
static uint64_t pci_emul_memlim64;
+struct pcibarlist {
+ struct pci_devinst *pdi;
+ int idx;
+ enum pcibar_type type;
+ uint64_t size;
+ struct pcibarlist *next;
+};
+struct pcibarlist *pci_bars;
+
#define PCI_EMUL_IOBASE 0x2000
#define PCI_EMUL_IOLIMIT 0x10000
@@ -113,7 +131,9 @@
#define PCI_EMUL_ECFG_SIZE (MAXBUSES * 1024 * 1024) /* 1MB per bus */
SYSRES_MEM(PCI_EMUL_ECFG_BASE, PCI_EMUL_ECFG_SIZE);
+#define PCI_EMUL_MEMBASE32 0xC0000000
#define PCI_EMUL_MEMLIMIT32 PCI_EMUL_ECFG_BASE
+#define PCI_EMUL_MEMSIZE64 (32 * GB)
static struct pci_devemu *pci_emul_finddev(const char *name);
static void pci_lintr_route(struct pci_devinst *pi);
@@ -512,6 +532,11 @@
struct mem_range mr;
pe = pi->pi_d;
+ if (pe->pe_baraddr != NULL &&
+ (*pe->pe_baraddr)(
+ pi->pi_vmctx, pi, idx, registration, pi->pi_bar[idx].addr) == 0)
+ return;
+
switch (pi->pi_bar[idx].type) {
case PCIBAR_IO:
bzero(&iop, sizeof(struct inout_port));
@@ -525,9 +550,6 @@
error = register_inout(&iop);
} else
error = unregister_inout(&iop);
- if (pe->pe_baraddr != NULL)
- (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
- pi->pi_bar[idx].addr);
break;
case PCIBAR_MEM32:
case PCIBAR_MEM64:
@@ -543,9 +565,6 @@
error = register_mem(&mr);
} else
error = unregister_mem(&mr);
- if (pe->pe_baraddr != NULL)
- (*pe->pe_baraddr)(pi->pi_vmctx, pi, idx, registration,
- pi->pi_bar[idx].addr);
break;
default:
error = EINVAL;
@@ -597,8 +616,9 @@
* the address range decoded by the BAR register.
*/
static void
-update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
+update_bar_address(struct pci_devinst *pi, int idx, uint32_t val)
{
+ int update_idx = idx;
int decode;
if (pi->pi_bar[idx].type == PCIBAR_IO)
@@ -606,38 +626,108 @@
else
decode = memen(pi);
- if (decode)
- unregister_bar(pi, idx);
-
- switch (type) {
+ switch (pi->pi_bar[idx].type) {
+ case PCIBAR_MEMHI64:
+ --update_idx;
case PCIBAR_IO:
case PCIBAR_MEM32:
- pi->pi_bar[idx].addr = addr;
+ case PCIBAR_MEM64: {
+ struct pcibar *bar = &pi->pi_bar[update_idx];
+
+ if (decode && bar->addr)
+ unregister_bar(pi, update_idx);
+
+ if (val == ~0U) {
+ /* guest wants to read size of BAR */
+ pci_set_cfgdata32(pi, PCIR_BAR(idx), ~0U);
+ bar->addr = 0;
+ break;
+ }
+
+ /* guest sets address of BAR */
+ uint64_t mask;
+ uint32_t bar_val;
+ mask = ~(bar->size - 1UL);
+ if (pi->pi_bar[idx].type == PCIBAR_MEMHI64)
+ mask >>= 32UL;
+ bar_val = val & mask;
+ bar_val |= pi->pi_bar[idx].lobits;
+ pci_set_cfgdata32(pi, PCIR_BAR(idx), bar_val);
+
+ /* Only register BAR if it contains a valid address */
+ uint32_t lo, hi;
+ lo = pci_get_cfgdata32(pi, PCIR_BAR(update_idx));
+ hi = 0;
+ if (bar->type == PCIBAR_MEM64)
+ hi = pci_get_cfgdata32(pi, PCIR_BAR(update_idx + 1));
+ if (lo == ~0U || hi == ~0U) {
+ bar->addr = 0;
+ break;
+ }
+
+ if (bar->type == PCIBAR_IO)
+ lo &= PCIM_BAR_IO_BASE;
+ else
+ lo &= PCIM_BAR_MEM_BASE;
+ bar->addr = (uint64_t)lo | ((uint64_t)hi << 32UL);
+ if (decode)
+ register_bar(pi, update_idx);
+
break;
- case PCIBAR_MEM64:
- pi->pi_bar[idx].addr &= ~0xffffffffUL;
- pi->pi_bar[idx].addr |= addr;
+ }
+ case PCIBAR_NONE:
break;
- case PCIBAR_MEMHI64:
- pi->pi_bar[idx].addr &= 0xffffffff;
- pi->pi_bar[idx].addr |= addr;
+ default:
+ assert(0);
+ }
+}
+
+static uint32_t
+read_bar_value(struct pci_devinst *pi, int coff, int bytes)
+{
+ uint8_t idx;
+ idx = (coff - PCIR_BAR(0)) / 4;
+ assert(idx <= PCI_BARMAX);
+
+ uint8_t update_idx = idx;
+ uint64_t val;
+
+ if (pi->pi_bar[idx].type == PCIBAR_MEMHI64)
+ --update_idx;
+
+ val = pci_get_cfgdata32(pi, PCIR_BAR(idx));
+
+ /* return size of BAR */
+ if (val == ~0U) {
+ val = ~(pi->pi_bar[update_idx].size - 1);
+ val |= pi->pi_bar[update_idx].lobits;
+ if (pi->pi_bar[idx].type == PCIBAR_MEMHI64)
+ val >>= 32;
+ }
+
+ switch (bytes) {
+ case 1:
+ val = (val >> (8 * (coff & 0x03))) & 0xFF;
+ break;
+ case 2:
+ assert((coff & 0x01) == 0);
+ val = (val >> (8 * (coff & 0x02))) & 0xFFFF;
+ break;
+ case 4:
+ assert((coff & 0x03) == 0);
+ val = (uint32_t)val;
break;
default:
assert(0);
}
- if (decode)
- register_bar(pi, idx);
+ return val;
}
int
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
- uint16_t cmd, enbit;
-
assert(idx >= 0 && idx <= PCI_BARMAX);
if ((size & (size - 1)) != 0)
@@ -652,6 +742,45 @@
size = 16;
}
+ struct pcibarlist *newBar = malloc(sizeof(struct pcibarlist));
+ memset(newBar, 0, sizeof(struct pcibarlist));
+ newBar->pdi = pdi;
+ newBar->idx = idx;
+ newBar->type = type;
+ newBar->size = size;
+ if (pci_bars == NULL) {
+ /* first BAR */
+ pci_bars = newBar;
+ } else {
+ struct pcibarlist *bar = pci_bars;
+ struct pcibarlist *lastBar = NULL;
+ do {
+ if (bar->size < size)
+ break;
+ lastBar = bar;
+ bar = bar->next;
+ } while (bar != NULL);
+ newBar->next = bar;
+ if (lastBar != NULL)
+ lastBar->next = newBar;
+ else
+ pci_bars = newBar;
+ }
+ return (0);
+}
+
+static int
+pci_emul_assign_bar(struct pcibarlist *pci_bar)
+{
+ struct pci_devinst *pdi = pci_bar->pdi;
+ int idx = pci_bar->idx;
+ enum pcibar_type type = pci_bar->type;
+ uint64_t size = pci_bar->size;
+
+ int error;
+ uint64_t *baseptr, limit, addr, mask, lobits;
+ uint16_t cmd, enbit;
+
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
@@ -659,7 +788,7 @@
break;
case PCIBAR_IO:
baseptr = &pci_emul_iobase;
- limit = PCI_EMUL_IOLIMIT;
+ limit = pci_emul_iolim;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
enbit = PCIM_CMD_PORTEN;
@@ -670,25 +799,29 @@
* Some drivers do not work well if the 64-bit BAR is allocated
* above 4GB. Allow for this by allocating small requests under
* 4GB unless then allocation size is larger than some arbitrary
- * number (128MB currently).
+ * number (256MB currently).
*/
- if (size > 128 * 1024 * 1024) {
+ if (size > 256 * 1024 * 1024) {
baseptr = &pci_emul_membase64;
limit = pci_emul_memlim64;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
- } else {
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
- mask = PCIM_BAR_MEM_BASE;
- lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+ enbit = PCIM_CMD_MEMEN;
+ break;
}
- enbit = PCIM_CMD_MEMEN;
- break;
+ /*
+ * Use 32 bit BARs for small requests:
+ * Fallthrough into MEM32 case
+ */
+ type = PCIBAR_MEM32;
+ pdi->pi_bar[idx + 1].type = PCIBAR_NONE;
+ /* clear 64-bit flag */
+ pdi->pi_bar[idx].lobits &= ~PCIM_BAR_MEM_64;
+ /* [fallthrough] */
case PCIBAR_MEM32:
baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
+ limit = pci_emul_memlim32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
enbit = PCIM_CMD_MEMEN;
@@ -705,27 +838,60 @@
}
pdi->pi_bar[idx].type = type;
- pdi->pi_bar[idx].addr = addr;
+ pdi->pi_bar[idx].addr = 0;
pdi->pi_bar[idx].size = size;
+ /* passthru devices are using same lobits as physical device
+ * they set this property
+ */
+ if (pdi->pi_bar[idx].lobits != 0)
+ lobits = pdi->pi_bar[idx].lobits;
+ else
+ pdi->pi_bar[idx].lobits = lobits;
- /* Initialize the BAR register in config space */
- bar = (addr & mask) | lobits;
- pci_set_cfgdata32(pdi, PCIR_BAR(idx), bar);
+ /* Initialize CMD register in config space */
+ cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
+ if ((cmd & enbit) != enbit)
+ pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
+ /* Initialize the BAR register in config space */
if (type == PCIBAR_MEM64) {
assert(idx + 1 <= PCI_BARMAX);
pdi->pi_bar[idx + 1].type = PCIBAR_MEMHI64;
- pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
+ update_bar_address(pdi, idx + 1, addr);
}
- cmd = pci_get_cfgdata16(pdi, PCIR_COMMAND);
- if ((cmd & enbit) != enbit)
- pci_set_cfgdata16(pdi, PCIR_COMMAND, cmd | enbit);
- register_bar(pdi, idx);
+ update_bar_address(pdi, idx, addr);
return (0);
}
+#define ALIGNED_SUBTRACT(base, size) ((base - size) & ~(size - 1))
+
+int
+pci_emul_adjust_gsmbase(struct vmctx *ctx, uint64_t size)
+{
+ if (size > pci_emul_gsmbase)
+ return (-1);
+ pci_emul_gsmbase = ALIGNED_SUBTRACT(pci_emul_gsmbase, size);
+ vm_set_lowmem_limit(
+ ctx, min(vm_get_lowmem_limit(ctx), pci_emul_gsmbase));
+
+ return (0);
+}
+
+uint64_t
+pci_emul_alloc_gsm(uint64_t size)
+{
+ if (size > pci_emul_gsmlim)
+ return 0;
+ uint64_t addr = ALIGNED_SUBTRACT(pci_emul_gsmlim, size);
+ if (addr < pci_emul_gsmbase)
+ return 0;
+ pci_emul_gsmlim = addr;
+
+ return addr;
+}
+
#define CAP_START_OFFSET 0x40
static int
pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen)
@@ -1131,9 +1297,51 @@
return (PCI_EMUL_ECFG_BASE);
}
+int
+pci_early_quirks(struct vmctx *ctx)
+{
+ pci_emul_gsmbase = PCI_EMUL_MEMBASE32;
+ pci_emul_gsmlim = PCI_EMUL_MEMBASE32;
+
+ for (int bus = 0; bus < MAXBUSES; ++bus) {
+ for (int slot = 0; slot < MAXSLOTS; ++slot) {
+ for (int func = 0; func < MAXFUNCS; ++func) {
+ char node_name[sizeof("pci.XXX.XX.X")];
+ snprintf(node_name, sizeof(node_name),
+ "pci.%d.%d.%d", bus, slot, func);
+ nvlist_t *nvl = find_config_node(node_name);
+ if (nvl == NULL)
+ continue;
+
+ const char *emul = get_config_value_node(
+ nvl, "device");
+ struct pci_devemu *pde = pci_emul_finddev(emul);
+ if (pde == NULL) {
+ EPRINTLN("pci slot %d:%d:%d: unknown "
+ "device \"%s\"",
+ bus, slot, func, emul);
+ return (EINVAL);
+ }
+
+ if (pde->pe_early_quirks) {
+ const int error = pde->pe_early_quirks(
+ ctx, nvl);
+ if (error)
+ return (error);
+ }
+ }
+ }
+ }
+
+ return (0);
+}
+
#define BUSIO_ROUNDUP 32
#define BUSMEM_ROUNDUP (1024 * 1024)
+#define ALIGN_VALUE(Value, Alignment) \
+ ((Value) + (((Alignment) - (Value)) & ((Alignment)-1)))
+
int
init_pci(struct vmctx *ctx)
{
@@ -1146,25 +1354,18 @@
nvlist_t *nvl;
const char *emul;
size_t lowmem;
- uint64_t cpu_maxphysaddr, pci_emul_memresv64;
- u_int regs[4];
int bus, slot, func, error;
pci_emul_iobase = PCI_EMUL_IOBASE;
- pci_emul_membase32 = vm_get_lowmem_limit(ctx);
+ pci_emul_iolim = PCI_EMUL_IOLIMIT;
- do_cpuid(0x80000008, regs);
- cpu_maxphysaddr = 1ULL << (regs[0] & 0xff);
- if (cpu_maxphysaddr > VM_MAXUSER_ADDRESS_LA48)
- cpu_maxphysaddr = VM_MAXUSER_ADDRESS_LA48;
- pci_emul_memresv64 = cpu_maxphysaddr / 4;
- /*
- * Max power of 2 that is less then
- * cpu_maxphysaddr - pci_emul_memresv64.
- */
- pci_emul_membase64 = 1ULL << (flsl(cpu_maxphysaddr -
- pci_emul_memresv64) - 1);
- pci_emul_memlim64 = cpu_maxphysaddr;
+ pci_emul_membase32 = PCI_EMUL_MEMBASE32;
+ pci_emul_memlim32 = PCI_EMUL_MEMLIMIT32;
+
+ pci_emul_membase64 = 4 * GB + vm_get_highmem_size(ctx);
+ pci_emul_membase64 = ALIGN_VALUE(
+ pci_emul_membase64, PCI_EMUL_MEMSIZE64);
+ pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
for (bus = 0; bus < MAXBUSES; bus++) {
snprintf(node_name, sizeof(node_name), "pci.%d", bus);
@@ -1182,6 +1383,7 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ /* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
for (func = 0; func < MAXFUNCS; func++) {
@@ -1221,6 +1423,15 @@
}
}
+ /* second run: assign BARs and free BAR list */
+ struct pcibarlist *bar = pci_bars;
+ while (bar != NULL) {
+ pci_emul_assign_bar(bar);
+ struct pcibarlist *old = bar;
+ bar = bar->next;
+ free(old);
+ }
+
/*
* Add some slop to the I/O and memory resources decoded by
* this bus to give a guest some flexibility if it wants to
@@ -1266,7 +1477,8 @@
* The guest physical memory map looks like the following:
* [0, lowmem) guest system memory
* [lowmem, lowmem_limit) memory hole (may be absent)
- * [lowmem_limit, 0xE0000000) PCI hole (32-bit BAR allocation)
+ * [lowmem_limit, 0xC0000000) Graphics Stolen Memory (may be absent)
+ * [0xC0000000, 0xE0000000) PCI hole (32-bit BAR allocation)
* [0xE0000000, 0xF0000000) PCI extended config window
* [0xF0000000, 4GB) LAPIC, IOAPIC, HPET, firmware
* [4GB, 4GB + highmem)
@@ -1865,7 +2077,6 @@
struct pci_devinst *pi;
struct pci_devemu *pe;
int idx, needcfg;
- uint64_t addr, bar, mask;
if ((bi = pci_businfo[bus]) != NULL) {
si = &bi->slotinfo[slot];
@@ -1917,8 +2128,14 @@
needcfg = 1;
}
- if (needcfg)
- *eax = CFGREAD(pi, coff, bytes);
+ if (needcfg) {
+ if (coff >= PCIR_BAR(0) &&
+ coff < PCIR_BAR(PCI_BARMAX + 1)) {
+ *eax = read_bar_value(pi, coff, bytes);
+ } else {
+ *eax = CFGREAD(pi, coff, bytes);
+ }
+ }
pci_emul_hdrtype_fixup(bus, slot, coff, bytes, eax);
} else {
@@ -1938,54 +2155,7 @@
if (bytes != 4 || (coff & 0x3) != 0)
return;
idx = (coff - PCIR_BAR(0)) / 4;
- mask = ~(pi->pi_bar[idx].size - 1);
- switch (pi->pi_bar[idx].type) {
- case PCIBAR_NONE:
- pi->pi_bar[idx].addr = bar = 0;
- break;
- case PCIBAR_IO:
- addr = *eax & mask;
- addr &= 0xffff;
- bar = addr | PCIM_BAR_IO_SPACE;
- /*
- * Register the new BAR value for interception
- */
- if (addr != pi->pi_bar[idx].addr) {
- update_bar_address(pi, addr, idx,
- PCIBAR_IO);
- }
- break;
- case PCIBAR_MEM32:
- addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
- if (addr != pi->pi_bar[idx].addr) {
- update_bar_address(pi, addr, idx,
- PCIBAR_MEM32);
- }
- break;
- case PCIBAR_MEM64:
- addr = bar = *eax & mask;
- bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
- PCIM_BAR_MEM_PREFETCH;
- if (addr != (uint32_t)pi->pi_bar[idx].addr) {
- update_bar_address(pi, addr, idx,
- PCIBAR_MEM64);
- }
- break;
- case PCIBAR_MEMHI64:
- mask = ~(pi->pi_bar[idx - 1].size - 1);
- addr = ((uint64_t)*eax << 32) & mask;
- bar = addr >> 32;
- if (bar != pi->pi_bar[idx - 1].addr >> 32) {
- update_bar_address(pi, addr, idx - 1,
- PCIBAR_MEMHI64);
- }
- break;
- default:
- assert(0);
- }
- pci_set_cfgdata32(pi, coff, bar);
-
+ update_bar_address(pi, idx, *eax);
} else if (pci_emul_iscap(pi, coff)) {
pci_emul_capwrite(pi, coff, bytes, *eax, 0, 0);
} else if (coff >= PCIR_COMMAND && coff < PCIR_REVID) {
diff --git a/usr.sbin/bhyve/pci_fbuf.c b/usr.sbin/bhyve/pci_fbuf.c
--- a/usr.sbin/bhyve/pci_fbuf.c
+++ b/usr.sbin/bhyve/pci_fbuf.c
@@ -216,15 +216,15 @@
return (value);
}
-static void
+static int
pci_fbuf_baraddr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
struct pci_fbuf_softc *sc;
int prot;
if (baridx != 1)
- return;
+ return (-1);
sc = pi->pi_arg;
if (!enabled && sc->fbaddr != 0) {
@@ -237,6 +237,8 @@
EPRINTLN("pci_fbuf: mmap_memseg failed");
sc->fbaddr = address;
}
+
+ return (0);
}
@@ -375,7 +377,7 @@
static int
pci_fbuf_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
{
- int error, prot;
+ int error;
struct pci_fbuf_softc *sc;
if (fbuf_sc != NULL) {
@@ -393,6 +395,13 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_DISPLAY);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_DISPLAY_VGA);
+ sc->fb_base = vm_create_devmem(
+ ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE);
+ if (sc->fb_base == MAP_FAILED) {
+ error = -1;
+ goto done;
+ }
+
error = pci_emul_alloc_bar(pi, 0, PCIBAR_MEM32, DMEMSZ);
assert(error == 0);
@@ -402,7 +411,6 @@
error = pci_emul_add_msicap(pi, PCI_FBUF_MSI_MSGS);
assert(error == 0);
- sc->fbaddr = pi->pi_bar[1].addr;
sc->memregs.fbsize = FB_SIZE;
sc->memregs.width = COLS_DEFAULT;
sc->memregs.height = ROWS_DEFAULT;
@@ -423,27 +431,9 @@
goto done;
}
- sc->fb_base = vm_create_devmem(ctx, VM_FRAMEBUFFER, "framebuffer", FB_SIZE);
- if (sc->fb_base == MAP_FAILED) {
- error = -1;
- goto done;
- }
DPRINTF(DEBUG_INFO, ("fbuf frame buffer base: %p [sz %lu]",
sc->fb_base, FB_SIZE));
- /*
- * Map the framebuffer into the guest address space.
- * XXX This may fail if the BAR is different than a prior
- * run. In this case flag the error. This will be fixed
- * when a change_memseg api is available.
- */
- prot = PROT_READ | PROT_WRITE;
- if (vm_mmap_memseg(ctx, sc->fbaddr, VM_FRAMEBUFFER, 0, FB_SIZE, prot) != 0) {
- EPRINTLN("pci_fbuf: mapseg failed - try deleting VM and restarting");
- error = -1;
- goto done;
- }
-
console_init(sc->memregs.width, sc->memregs.height, sc->fb_base);
console_fb_register(pci_fbuf_render, sc);
diff --git a/usr.sbin/bhyve/pci_gvt-d.c b/usr.sbin/bhyve/pci_gvt-d.c
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_gvt-d.c
@@ -0,0 +1,407 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/param.h>
+
+#include <machine/vmm.h>
+
+#include <dev/pci/pcireg.h>
+
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "inout.h"
+#include "pci_passthru.h"
+
+#define MB (1024 * 1024UL)
+
+/*
+ * PCI definitions
+ */
+#define PCIR_GGC 0x50 /* GMCH Graphics Control register */
+#define PCIR_BDSM 0x5C /* Base Data of Stolen Memory register */
+#define PCIR_ASLS_CTL 0xFC /* Opregion start address register */
+#define PCIM_GEN5_75_GGC_GMS_MASK \
+ 0x000000F0 /* Bits 7:4 contain Graphics Mode Select */
+#define PCIM_GEN6_GGC_GMS_MASK \
+ 0x000000F8 /* Bits 7:3 contain Graphics Mode Select */
+#define PCIM_GEN8_GGC_GMS_MASK \
+ 0x0000FF00 /* Bits 15:8 contain Graphics Mode Select */
+#define PCIM_BDSM_GSM_MASK \
+ 0xFFF00000 /* Bits 31:20 contain base address of gsm */
+#define PCIM_ASLS_OPREGION_MASK 0xFFFFF000 /* Opregion is 4k aligned */
+#define GPU_OPREGION_LEN 0x00004000 /* Size of Opregion (16 KB) */
+
+/*
+ * Known device ids for different generations of Intel graphics
+ * see https://www.graphics-drivers.eu/intel-pci-hardware-id-string.html for
+ * complete list
+ */
+/* Westmere & Ironlake */
+static const uint16_t igd_devid_gen5_75[] = { 0x0042, 0x0046 };
+/* Sandy Bridge */
+static const uint16_t igd_devid_gen6[] = { 0x0102, 0x0106, 0x010A, 0x0112,
+ 0x0116, 0x0122, 0x0126 };
+/* Ivy Bridge */
+static const uint16_t igd_devid_gen7[] = { 0x0152, 0x0156, 0x015A, 0x0162,
+ 0x0166, 0x016A };
+/* Haswell */
+static const uint16_t igd_devid_gen7_5[] = { 0x0402, 0x0406, 0x040A, 0x0412,
+ 0x0416, 0x041A, 0x041E, 0x0A06, 0x0A0E, 0x0A16, 0x0A1E, 0x0A26, 0x0A2E,
+ 0x0C02, 0x0C06, 0x0C12, 0x0C16, 0x0C22, 0x0C26, 0x0D06, 0x0D16, 0x0D22,
+ 0x0D26 };
+/* Broadwell */
+static const uint16_t igd_devid_gen8[] = { 0x1606, 0x160E, 0x1612, 0x1616,
+ 0x161A, 0x161E, 0x1622, 0x1626, 0x162A, 0x162B };
+/* Skylake */
+static const uint16_t igd_devid_gen9[] = { 0x1902, 0x1906, 0x190B, 0x190E,
+ 0x1912, 0x1913, 0x1916, 0x1917, 0x191B, 0x191D, 0x191E, 0x1921, 0x1923,
+ 0x1926, 0x1927, 0x192B, 0x192D, 0x1932, 0x193A, 0x193B, 0x193D };
+/* Kaby Lake & Whiskey Lake & Amber Lake & Coffee Lake & Comet Lake */
+static const uint16_t igd_devid_gen9_5[] = { 0x3E90, 0x3E91, 0x3E92, 0x3E93,
+ 0x3E94, 0x3E96, 0x3E98, 0x3E99, 0x3E9A, 0x3E9B, 0x3E9C, 0x3EA0, 0x3EA1,
+ 0x3EA5, 0x3EA6, 0x3EA7, 0x3EA8, 0x3EA9, 0x5902, 0x5906, 0x590B, 0x5912,
+ 0x5916, 0x5917, 0x591B, 0x591C, 0x591D, 0x591E, 0x5921, 0x5926, 0x5927,
+ 0x87C0, 0x87CA, 0x9B21, 0x9B41, 0x9BA2, 0x9BA4, 0x9BA5, 0x9BA8, 0x9BAA,
+ 0x9BAC, 0x9BC2, 0x9BC4, 0x9BC5, 0x9BC6, 0x9BC8, 0x9BCA, 0x9BCC, 0x9BE6,
+ 0x9BF6 };
+
+static int
+array_contains(const uint16_t *array, uint64_t elements, uint16_t item)
+{
+ for (uint64_t i = 0; i < elements; ++i)
+ if (array[i] == item)
+ return 1;
+ return 0;
+}
+
+#define IGD_FUNC_IS_IGD_GEN(gen) \
+ static int igd_gen##gen##_is_igd_gen(int devid) \
+ { \
+ return array_contains(igd_devid_gen##gen, \
+ sizeof(igd_devid_gen##gen) / sizeof(uint16_t), devid); \
+ }
+
+/* GVT-d definitions */
+#define GVT_D_MAP_OPREGION 0
+#define GVT_D_MAP_GSM 1
+
+/*
+ * Handler for passthru of igd
+ *
+ * Keep it as struct instead of a single function pointer, since new
+ * generations of Intel graphics could need other funcs.
+ * e.g. Intel Elkhartlake and Intel Tigerlake:
+ * They will need different handling for GSM and Opregion (See ACRN-Hypervisor
+ * <https://github.com/projectacrn/acrn-hypervisor/blob/master/devicemodel/hw/pci/passthrough.c>)
+ */
+struct igd_funcs {
+ int (*is_igd_gen)(int devid);
+ uint64_t (*get_gsm_len)(struct vmctx *ctx, struct pcisel *sel);
+};
+
+/* Handler for igd of gen5.75 (Westmere & Ironlake) */
+IGD_FUNC_IS_IGD_GEN(5_75);
+
+static uint64_t
+igd_gen5_75_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN5_75_GGC_GMS_MASK) >>
+ 4; /* Bits 7:4 contain Graphics Mode Select */
+ switch (gms_val) {
+ case 0x05:
+ return 32 * MB;
+ case 0x06:
+ return 48 * MB;
+ case 0x07:
+ return 64 * MB;
+ case 0x08:
+ return 128 * MB;
+ case 0x09:
+ return 256 * MB;
+ case 0x0A:
+ return 96 * MB;
+ case 0x0B:
+ return 160 * MB;
+ case 0x0C:
+ return 224 * MB;
+ case 0x0D:
+ return 352 * MB;
+ }
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/* Handler for igd of gen6 (Sandy Bridge) */
+IGD_FUNC_IS_IGD_GEN(6);
+
+static uint64_t
+igd_gen6_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN6_GGC_GMS_MASK) >>
+ 3; /* Bits 7:3 contain Graphics Mode Select */
+ if (gms_val <= 0x10)
+ return gms_val * 32 * MB;
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/* Handler for igd of gen7 (Ivy Bridge) */
+IGD_FUNC_IS_IGD_GEN(7);
+
+/* Handler for igd of gen7.5 (Haswell) */
+IGD_FUNC_IS_IGD_GEN(7_5);
+
+/* Handler for igd of gen8 (Broadwell) */
+IGD_FUNC_IS_IGD_GEN(8);
+
+static uint64_t
+igd_gen8_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN8_GGC_GMS_MASK) >>
+ 8; /* Bits 15:8 contain Graphics Mode Select */
+ if ((gms_val <= 0x10) || (gms_val == 0x20) || (gms_val == 0x30) ||
+ (gms_val == 0x3F))
+ return gms_val * 32 * MB;
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/* Handler for igd of gen9 (Skylake) */
+IGD_FUNC_IS_IGD_GEN(9);
+
+static uint64_t
+igd_gen9_get_gsm_len(struct vmctx *ctx, struct pcisel *sel)
+{
+ uint16_t ggc_val = read_config(sel, PCIR_GGC, 2);
+ uint8_t gms_val = (ggc_val & PCIM_GEN8_GGC_GMS_MASK) >>
+ 8; /* Bits 15:8 contain Graphics Mode Select */
+ if ((gms_val <= 0x10) || (gms_val == 0x20) || (gms_val == 0x30) ||
+ (gms_val == 0x40))
+ return gms_val * 32 * MB;
+ else if (gms_val >= 0xF0 && gms_val <= 0xFE)
+ return gms_val * 4 * MB;
+
+ warnx("Unknown Graphic Mode (%x)", gms_val);
+ return 0;
+}
+
+/*
+ * Handler for igd of gen9.5 (Kaby Lake & Whiskey Lake & Amber Lake & Coffee
+ * Lake & Comet Lake)
+ */
+IGD_FUNC_IS_IGD_GEN(9_5);
+
+/* Westmere & Ironlake */
+static const struct igd_funcs igd_gen5_75 = {
+ .is_igd_gen = igd_gen5_75_is_igd_gen,
+ .get_gsm_len = igd_gen5_75_get_gsm_len
+};
+/* Sandy Bridge */
+static const struct igd_funcs igd_gen6 = { .is_igd_gen = igd_gen6_is_igd_gen,
+ .get_gsm_len = igd_gen6_get_gsm_len };
+/* Ivy Bridge */
+static const struct igd_funcs igd_gen7 = { .is_igd_gen = igd_gen7_is_igd_gen,
+ .get_gsm_len = igd_gen6_get_gsm_len };
+/* Haswell */
+static const struct igd_funcs igd_gen7_5 = {
+ .is_igd_gen = igd_gen7_5_is_igd_gen,
+ .get_gsm_len = igd_gen6_get_gsm_len
+};
+/* Broadwell */
+static const struct igd_funcs igd_gen8 = { .is_igd_gen = igd_gen8_is_igd_gen,
+ .get_gsm_len = igd_gen8_get_gsm_len };
+/* Skylake */
+static const struct igd_funcs igd_gen9 = { .is_igd_gen = igd_gen9_is_igd_gen,
+ .get_gsm_len = igd_gen9_get_gsm_len };
+/* Kaby Lake & Whiskey Lake & Amber Lake & Coffee Lake & Comet Lake */
+static const struct igd_funcs igd_gen9_5 = {
+ .is_igd_gen = igd_gen9_5_is_igd_gen,
+ .get_gsm_len = igd_gen9_get_gsm_len
+};
+
+static const struct igd_funcs *igd_gen_map[] = { &igd_gen5_75, &igd_gen6,
+ &igd_gen7, &igd_gen7_5, &igd_gen8, &igd_gen9, &igd_gen9_5 };
+
+static const struct igd_funcs *
+get_igd_funcs(const uint16_t devid)
+{
+ for (int i = 0; i < sizeof(igd_gen_map) / sizeof(struct igd_funcs *);
+ ++i) {
+ if (igd_gen_map[i]->is_igd_gen(devid))
+ return igd_gen_map[i];
+ }
+ return NULL;
+}
+
+int
+gvt_d_early_quirks(struct vmctx *ctx, struct pcisel *sel)
+{
+ /* Get IGD funcs */
+ const uint16_t dev = read_config(sel, PCIR_DEVICE, 2);
+ const struct igd_funcs *const igd = get_igd_funcs(dev);
+ if (igd == NULL) {
+ warnx("Unsupported igd-device (%x)", dev);
+ return -ENODEV;
+ }
+
+ /* Get Graphics Stolen Memory len */
+ const uint64_t gsm_len = igd->get_gsm_len(ctx, sel);
+ /* Get Opregion length */
+ const uint64_t opregion_len = GPU_OPREGION_LEN;
+
+ pci_emul_adjust_gsmbase(ctx, gsm_len);
+ pci_emul_adjust_gsmbase(ctx, opregion_len);
+
+ return (0);
+}
+
+int
+gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
+{
+ int error;
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ /* check vendor == Intel */
+ const uint16_t dev_vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 2);
+ if (dev_vendor != 0x8086) {
+ warnx("Unknown vendor (%x) of igd", dev_vendor);
+ return -ENODEV;
+ }
+
+ /* check if device is a display device */
+ if (read_config(&sc->psc_sel, PCIR_CLASS, 1) != PCIC_DISPLAY) {
+ warnx("%s is no display device", pi->pi_name);
+ return -ENODEV;
+ }
+
+ /* Get IGD funcs */
+ const struct igd_funcs *igd = get_igd_funcs(
+ read_config(&sc->psc_sel, PCIR_DEVICE, 2));
+ if (igd == NULL) {
+ warnx("Unsupported igd-device (%x)",
+ read_config(&sc->psc_sel, PCIR_DEVICE, 2));
+ return -ENODEV;
+ }
+
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+ struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM];
+
+ /* Get Opregion length */
+ opregion->len = GPU_OPREGION_LEN;
+ /* Get Opregion HPA */
+ opregion->hpa = read_config(&sc->psc_sel, PCIR_ASLS_CTL, 4) &
+ PCIM_ASLS_OPREGION_MASK;
+ /* Get Graphics Stolen Memory len */
+ gsm->len = igd->get_gsm_len(ctx, &sc->psc_sel);
+ /* Get Graphics Stolen Memory HPA */
+ gsm->hpa = read_config(&sc->psc_sel, PCIR_BDSM, 4) & PCIM_BDSM_GSM_MASK;
+
+ if (opregion->len == 0 || gsm->len == 0) {
+ warnx("Could not determine size of opregion or gsm");
+ return -ENODEV;
+ }
+
+ /* Allocate Opregion and GSM in guest space */
+ gsm->gpa = pci_emul_alloc_gsm(gsm->len);
+ opregion->gpa = pci_emul_alloc_gsm(opregion->len);
+ if (opregion->gpa == 0 || gsm->gpa == 0) {
+ error = -ENOMEM;
+ goto failed_opregion;
+ }
+
+ /* Write address of Opregion and GSM into PCI register */
+ /* Set Opregion GPA */
+ uint32_t asls_val = read_config(&sc->psc_sel, PCIR_ASLS_CTL, 4);
+ pci_set_cfgdata32(sc->psc_pi, PCIR_ASLS_CTL,
+ opregion->gpa | (asls_val & ~PCIM_ASLS_OPREGION_MASK));
+ /* Set Graphics Stolen Memory GPA */
+ uint32_t bdsm_val = read_config(&sc->psc_sel, PCIR_BDSM, 4);
+ pci_set_cfgdata32(
+ sc->psc_pi, PCIR_BDSM, gsm->gpa | (bdsm_val & ~PCIM_BDSM_GSM_MASK));
+
+ /* Map Opregion and GSM into guest space */
+ if ((error = passthru_modify_pptdev_mmio(
+ ctx, sc, opregion, PT_MAP_PPTDEV_MMIO)) != 0)
+ goto failed_opregion;
+ if ((error = passthru_modify_pptdev_mmio(
+ ctx, sc, gsm, PT_MAP_PPTDEV_MMIO)) != 0)
+ goto failed_gsm;
+
+ /* Protect PCI register */
+ set_pcir_prot(sc, PCIR_ASLS_CTL, 0x04, PPT_PCIR_PROT_NA);
+ set_pcir_prot(sc, PCIR_BDSM, 0x04, PPT_PCIR_PROT_NA);
+
+ return (0);
+
+failed_opregion:
+ opregion->gpa = 0;
+failed_gsm:
+ gsm->gpa = 0;
+ return error;
+}
+
+void
+gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc;
+
+ sc = pi->pi_arg;
+
+ struct passthru_mmio_mapping *gsm = &sc->psc_mmio_map[GVT_D_MAP_GSM];
+ struct passthru_mmio_mapping *opregion =
+ &sc->psc_mmio_map[GVT_D_MAP_OPREGION];
+
+ /* GPA is only set, if it's initialized */
+ if (gsm->gpa)
+ passthru_modify_pptdev_mmio(ctx, sc, gsm, PT_UNMAP_PPTDEV_MMIO);
+ if (opregion->gpa)
+ passthru_modify_pptdev_mmio(
+ ctx, sc, opregion, PT_UNMAP_PPTDEV_MMIO);
+}
diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c
--- a/usr.sbin/bhyve/pci_lpc.c
+++ b/usr.sbin/bhyve/pci_lpc.c
@@ -33,9 +33,13 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <sys/pciio.h>
#include <machine/vmm.h>
#include <machine/vmm_snapshot.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -85,6 +89,29 @@
"COM1", "COM2", "COM3", "COM4"
};
+#ifndef _PATH_DEVPCI
+#define _PATH_DEVPCI "/dev/pci"
+#endif
+
+static int pcifd = -1;
+
+static uint32_t
+read_config(struct pcisel *sel, long reg, int width)
+{
+ struct pci_io pi;
+ pi.pi_sel.pc_domain = sel->pc_domain;
+ pi.pi_sel.pc_bus = sel->pc_bus;
+ pi.pi_sel.pc_dev = sel->pc_dev;
+ pi.pi_sel.pc_func = sel->pc_func;
+ pi.pi_reg = reg;
+ pi.pi_width = width;
+
+ if (ioctl(pcifd, PCIOCREAD, &pi) < 0)
+ return (0);
+
+ return (pi.pi_data);
+}
+
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
@@ -452,6 +479,40 @@
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
+ /* open host device */
+ if (pcifd < 0) {
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+ }
+
+ /* on Intel systems lpc is always connected to 0:1f.0 */
+ struct pcisel sel;
+ sel.pc_domain = 0;
+ sel.pc_bus = 0;
+ sel.pc_dev = 0x1f;
+ sel.pc_func = 0;
+
+ if (read_config(&sel, PCIR_VENDOR, 2) == PCI_VENDOR_INTEL) {
+ /*
+ * The VID, DID, REVID, SUBVID and SUBDID of igd-lpc need to be
+ * aligned with the physical ones. Without these physical
+ * values, GVT-d GOP driver couldn't work.
+ */
+ pci_set_cfgdata16(
+ pi, PCIR_DEVICE, read_config(&sel, PCIR_DEVICE, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_VENDOR, read_config(&sel, PCIR_VENDOR, 2));
+ pci_set_cfgdata8(
+ pi, PCIR_REVID, read_config(&sel, PCIR_REVID, 1));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBVEND_0, read_config(&sel, PCIR_SUBVEND_0, 2));
+ pci_set_cfgdata16(
+ pi, PCIR_SUBDEV_0, read_config(&sel, PCIR_SUBDEV_0, 2));
+ }
+
lpc_bridge = pi;
return (0);
diff --git a/usr.sbin/bhyve/pci_passthru.h b/usr.sbin/bhyve/pci_passthru.h
new file mode 100644
--- /dev/null
+++ b/usr.sbin/bhyve/pci_passthru.h
@@ -0,0 +1,93 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Beckhoff Automation GmbH & Co. KG
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#pragma once
+
+#include <sys/pciio.h>
+
+#include <vmmapi.h>
+
+#include "config.h"
+#include "pci_emul.h"
+
+struct passthru_mmio_mapping {
+ uint64_t gpa;
+ uint64_t len;
+ uint64_t hpa;
+};
+
+struct passthru_softc {
+ struct pci_devinst *psc_pi;
+ struct pcibar psc_bar[PCI_BARMAX + 1];
+ struct {
+ int capoff;
+ int msgctrl;
+ int emulated;
+ } psc_msi;
+ struct {
+ int capoff;
+ } psc_msix;
+ struct pcisel psc_sel;
+
+ struct passthru_mmio_mapping psc_mmio_map[2];
+ uint8_t psc_pcir_prot_map[(PCI_REGMAX + 1) / 4];
+};
+
+#define PT_MAP_PPTDEV_MMIO 1
+#define PT_UNMAP_PPTDEV_MMIO 0
+
+#define PPT_PCIR_PROT_NA 0 /* No Access to physical values */
+#define PPT_PCIR_PROT_RO 1 /* Read Only access to physical values */
+#define PPT_PCIR_PROT_WO 2 /* Write Only access to physical values */
+#define PPT_PCIR_PROT_RW \
+ (PPT_PCIR_PROT_RO | \
+ PPT_PCIR_PROT_WO) /* Read/Write access to physical values */
+#define PPT_PCIR_PROT_MASK 0x03
+
+#define GET_INT_CONFIG(var, name) \
+ do { \
+ const char *value = get_config_value_node(nvl, name); \
+ if (value == NULL) { \
+ EPRINTLN( \
+ "passthru: missing required %s setting", name); \
+ return (1); \
+ } \
+ var = atoi(value); \
+ } while (0)
+
+int passthru_modify_pptdev_mmio(struct vmctx *ctx, struct passthru_softc *sc,
+ struct passthru_mmio_mapping *map, int registration);
+uint32_t read_config(const struct pcisel *sel, long reg, int width);
+void write_config(const struct pcisel *sel, long reg, int width, uint32_t data);
+int set_pcir_prot(
+ struct passthru_softc *sc, uint32_t reg, uint32_t len, uint8_t prot);
+int gvt_d_early_quirks(struct vmctx *ctx, struct pcisel *sel);
+int gvt_d_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl);
+void gvt_d_deinit(struct vmctx *ctx, struct pci_devinst *pi);
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -60,10 +60,10 @@
#include <machine/vmm.h>
#include <vmmapi.h>
-#include "config.h"
#include "debug.h"
#include "pci_emul.h"
#include "mem.h"
+#include "pci_passthru.h"
#ifndef _PATH_DEVPCI
#define _PATH_DEVPCI "/dev/pci"
@@ -82,24 +82,21 @@
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1)
#define MSIX_CAPLEN 12
+#define GET_INT_CONFIG(var, name) \
+ do { \
+ const char *value = get_config_value_node(nvl, name); \
+ if (value == NULL) { \
+ EPRINTLN( \
+ "passthru: missing required %s setting", name); \
+ return (1); \
+ } \
+ var = atoi(value); \
+ } while (0)
+
static int pcifd = -1;
static int iofd = -1;
static int memfd = -1;
-struct passthru_softc {
- struct pci_devinst *psc_pi;
- struct pcibar psc_bar[PCI_BARMAX + 1];
- struct {
- int capoff;
- int msgctrl;
- int emulated;
- } psc_msi;
- struct {
- int capoff;
- } psc_msix;
- struct pcisel psc_sel;
-};
-
static int
msi_caplen(int msgctrl)
{
@@ -122,7 +119,7 @@
return (len);
}
-static uint32_t
+uint32_t
read_config(const struct pcisel *sel, long reg, int width)
{
struct pci_io pi;
@@ -138,7 +135,7 @@
return (pi.pi_data);
}
-static void
+void
write_config(const struct pcisel *sel, long reg, int width, uint32_t data)
{
struct pci_io pi;
@@ -152,6 +149,51 @@
(void)ioctl(pcifd, PCIOCWRITE, &pi); /* XXX */
}
+int
+passthru_modify_pptdev_mmio(struct vmctx *ctx, struct passthru_softc *sc,
+ struct passthru_mmio_mapping *map, int registration)
+{
+ if (registration == PT_MAP_PPTDEV_MMIO)
+ return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
+ sc->psc_sel.pc_dev, sc->psc_sel.pc_func, map->gpa, map->len,
+ map->hpa);
+ else
+ return vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
+ sc->psc_sel.pc_dev, sc->psc_sel.pc_func, map->gpa,
+ map->len);
+}
+
+static int
+passthru_early_quirks(struct vmctx *ctx, const nvlist_t *nvl)
+{
+ int bus, slot, func;
+ GET_INT_CONFIG(bus, "bus");
+ GET_INT_CONFIG(slot, "slot");
+ GET_INT_CONFIG(func, "func");
+
+ if (pcifd < 0) {
+ pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
+ if (pcifd < 0) {
+ warn("failed to open %s", _PATH_DEVPCI);
+ return (-1);
+ }
+ }
+
+ struct pcisel sel = { .pc_bus = bus, .pc_dev = slot, .pc_func = func };
+
+ uint16_t vendor = read_config(&sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_early_quirks(ctx, &sel);
+
+ return (0);
+}
+
#ifdef LEGACY_SUPPORT
static int
passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr)
@@ -559,12 +601,23 @@
sc->psc_bar[i].type = bartype;
sc->psc_bar[i].size = size;
sc->psc_bar[i].addr = base;
+ sc->psc_bar[i].lobits = 0;
/* Allocate the BAR in the guest I/O or MMIO space */
error = pci_emul_alloc_bar(pi, i, bartype, size);
if (error)
return (-1);
+ /* Use same lobits as physical bar */
+ uint8_t lobits = read_config(&sc->psc_sel, PCIR_BAR(i), 0x01);
+ if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) {
+ lobits &= ~PCIM_BAR_MEM_BASE;
+ } else {
+ lobits &= ~PCIM_BAR_IO_BASE;
+ }
+ sc->psc_bar[i].lobits = lobits;
+ pi->pi_bar[i].lobits = lobits;
+
/* The MSI-X table needs special handling */
if (i == pci_msix_table_bar(pi)) {
error = init_msix_table(ctx, sc, base);
@@ -610,14 +663,55 @@
goto done;
}
- pci_set_cfgdata16(pi, PCIR_COMMAND, read_config(&sc->psc_sel,
- PCIR_COMMAND, 2));
+ write_config(
+ &sc->psc_sel, PCIR_COMMAND, 2, pci_get_cfgdata16(pi, PCIR_COMMAND));
error = 0; /* success */
done:
return (error);
}
+#define PPT_PCIR_PROT(reg) \
+ ((sc->psc_pcir_prot_map[reg / 4] >> (reg & 0x03)) & PPT_PCIR_PROT_MASK)
+
+int
+set_pcir_prot(
+ struct passthru_softc *sc, uint32_t reg, uint32_t len, uint8_t prot)
+{
+ if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1)
+ return (-1);
+
+ prot &= PPT_PCIR_PROT_MASK;
+
+ for (int i = reg; i < reg + len; ++i) {
+ /* delete old prot value */
+ sc->psc_pcir_prot_map[i / 4] &= ~(
+ PPT_PCIR_PROT_MASK << (i & 0x03));
+ /* set new prot value */
+ sc->psc_pcir_prot_map[i / 4] |= prot << (i & 0x03);
+ }
+
+ return (0);
+}
+
+static int
+is_pcir_writable(struct passthru_softc *sc, uint32_t reg)
+{
+ if (reg > PCI_REGMAX)
+ return (0);
+
+ return ((PPT_PCIR_PROT(reg) & PPT_PCIR_PROT_WO) != 0);
+}
+
+static int
+is_pcir_readable(struct passthru_softc *sc, uint32_t reg)
+{
+ if (reg > PCI_REGMAX)
+ return (0);
+
+ return ((PPT_PCIR_PROT(reg) & PPT_PCIR_PROT_RO) != 0);
+}
+
static int
passthru_legacy_config(nvlist_t *nvl, const char *opts)
{
@@ -641,12 +735,50 @@
return (0);
}
+static int
+passthru_init_quirks(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return (0);
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_init(ctx, pi, nvl);
+
+ return (0);
+}
+
+static void
+passthru_deinit_quirks(struct vmctx *ctx, struct pci_devinst *pi)
+{
+ struct passthru_softc *sc = pi->pi_arg;
+
+ if (sc == NULL)
+ return;
+
+ uint16_t vendor = read_config(&sc->psc_sel, PCIR_VENDOR, 0x02);
+ uint8_t class = read_config(&sc->psc_sel, PCIR_CLASS, 0x01);
+
+ /* currently only display devices have quirks */
+ if (class != PCIC_DISPLAY)
+ return;
+
+ if (vendor == PCI_VENDOR_INTEL)
+ return gvt_d_deinit(ctx, pi);
+
+ return;
+}
+
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
{
int bus, slot, func, error, memflags;
struct passthru_softc *sc;
- const char *value;
#ifndef WITHOUT_CAPSICUM
cap_rights_t rights;
cap_ioctl_t pci_ioctls[] = { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR };
@@ -711,15 +843,6 @@
errx(EX_OSERR, "Unable to apply rights for sandbox");
#endif
-#define GET_INT_CONFIG(var, name) do { \
- value = get_config_value_node(nvl, name); \
- if (value == NULL) { \
- EPRINTLN("passthru: missing required %s setting", name); \
- return (error); \
- } \
- var = atoi(value); \
-} while (0)
-
GET_INT_CONFIG(bus, "bus");
GET_INT_CONFIG(slot, "slot");
GET_INT_CONFIG(func, "func");
@@ -736,9 +859,21 @@
sc->psc_pi = pi;
/* initialize config space */
- error = cfginit(ctx, pi, bus, slot, func);
+ if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
+ goto done;
+
+ /* allow access to all PCI registers */
+ if ((error = set_pcir_prot(sc, 0, PCI_REGMAX + 1, PPT_PCIR_PROT_RW)) !=
+ 0)
+ goto done;
+
+ if ((error = passthru_init_quirks(ctx, pi, nvl)) != 0)
+ goto done;
+
+ error = 0; /* success */
done:
if (error) {
+ passthru_deinit_quirks(ctx, pi);
free(sc);
vm_unassign_pptdev(ctx, bus, slot, func);
}
@@ -788,6 +923,10 @@
sc = pi->pi_arg;
+ /* skip for protected PCI registers */
+ if (!is_pcir_readable(sc, coff))
+ return (-1);
+
/*
* PCI BARs and MSI capability is emulated.
*/
@@ -834,6 +973,10 @@
sc = pi->pi_arg;
+ /* skip for protected PCI registers */
+ if (!is_pcir_writable(sc, coff))
+ return (-1);
+
/*
* PCI BARs are emulated
*/
@@ -958,7 +1101,7 @@
static void
passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
struct passthru_softc *sc;
size_t remaining;
@@ -966,21 +1109,15 @@
sc = pi->pi_arg;
table_offset = rounddown2(pi->pi_msix.table_offset, 4096);
+
+ struct passthru_mmio_mapping map;
+
if (table_offset > 0) {
- if (!enabled) {
- if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- table_offset) != 0)
- warnx("pci_passthru: unmap_pptdev_mmio failed");
- } else {
- if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- table_offset,
- sc->psc_bar[baridx].addr) != 0)
- warnx("pci_passthru: map_pptdev_mmio failed");
- }
+ map.gpa = address;
+ map.len = table_offset;
+ map.hpa = sc->psc_bar[baridx].addr;
+ if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0)
+ warnx("pci_passthru: modify_pptdev_mmio failed");
}
table_size = pi->pi_msix.table_offset - table_offset;
table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE;
@@ -988,62 +1125,47 @@
remaining = pi->pi_bar[baridx].size - table_offset - table_size;
if (remaining > 0) {
address += table_offset + table_size;
- if (!enabled) {
- if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- remaining) != 0)
- warnx("pci_passthru: unmap_pptdev_mmio failed");
- } else {
- if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- remaining,
- sc->psc_bar[baridx].addr +
- table_offset + table_size) != 0)
- warnx("pci_passthru: map_pptdev_mmio failed");
- }
+ map.gpa = address;
+ map.len = remaining;
+ map.hpa = sc->psc_bar[baridx].addr + table_offset + table_size;
+ if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0)
+ warnx("pci_passthru: modify_pptdev_mmio failed");
}
}
static void
passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
struct passthru_softc *sc;
sc = pi->pi_arg;
- if (!enabled) {
- if (vm_unmap_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- sc->psc_bar[baridx].size) != 0)
- warnx("pci_passthru: unmap_pptdev_mmio failed");
- } else {
- if (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, address,
- sc->psc_bar[baridx].size,
- sc->psc_bar[baridx].addr) != 0)
- warnx("pci_passthru: map_pptdev_mmio failed");
- }
+
+ struct passthru_mmio_mapping map;
+ map.gpa = address;
+ map.len = sc->psc_bar[baridx].size;
+ map.hpa = sc->psc_bar[baridx].addr;
+
+ if (passthru_modify_pptdev_mmio(ctx, sc, &map, enabled) != 0)
+ warnx("pci_passthru: modify_pptdev_mmio failed");
}
-static void
+static int
passthru_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx,
- int enabled, uint64_t address)
+ int enabled, uint64_t address)
{
-
if (pi->pi_bar[baridx].type == PCIBAR_IO)
- return;
+ return (-1);
if (baridx == pci_msix_table_bar(pi))
passthru_msix_addr(ctx, pi, baridx, enabled, address);
else
passthru_mmio_addr(ctx, pi, baridx, enabled, address);
+ return (0);
}
struct pci_devemu passthru = {
.pe_emu = "passthru",
+ .pe_early_quirks = passthru_early_quirks,
.pe_init = passthru_init,
.pe_legacy_config = passthru_legacy_config,
.pe_cfgwrite = passthru_cfgwrite,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Apr 30, 1:57 AM (6 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32429688
Default Alt Text
D26209.id88259.diff (50 KB)
Attached To
Mode
D26209: GVT-d support for bhyve
Attached
Detach File
Event Timeline
Log In to Comment