Page MenuHomeFreeBSD

D54641.id172185.diff
No OneTemporary

D54641.id172185.diff

diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -92,13 +92,22 @@
PCIBAR_MEM64,
PCIBAR_MEMHI64,
PCIBAR_ROM,
+ PCIBAR_MAX
};
+#define PCIBAR_MEM64_MEM32_ADDR 0x1
+
struct pcibar {
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
+ enum pcibar_addr_state {
+ PCIBAR_ADDR_INVALID,
+ PCIBAR_ADDR_ASSIGNED,
+ PCIBAR_ADDR_PARTIAL
+ } state;
uint8_t lobits;
+ uint8_t flags;
};
#define PI_NAMESZ 40
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -29,6 +29,8 @@
#include <sys/param.h>
#include <sys/linker_set.h>
#include <sys/mman.h>
+#include <sys/nv.h>
+#include <sys/vmem.h>
#include <ctype.h>
#include <err.h>
@@ -95,19 +97,16 @@
uint32_t membase32, memlimit32; /* mmio window below 4GB */
uint64_t membase64, memlimit64; /* mmio window above 4GB */
struct slotinfo slotinfo[MAXSLOTS];
+ vmem_t *resources[PCIBAR_MAX];
};
static struct businfo *pci_businfo[MAXBUSES];
SET_DECLARE(pci_devemu_set, struct pci_devemu);
-static uint64_t pci_emul_iobase;
static uint8_t *pci_emul_rombase;
static uint64_t pci_emul_romoffset;
static uint8_t *pci_emul_romlim;
-static uint64_t pci_emul_membase32;
-static uint64_t pci_emul_membase64;
-static uint64_t pci_emul_memlim64;
struct pci_bar_allocation {
TAILQ_ENTRY(pci_bar_allocation) chain;
@@ -610,25 +609,6 @@
return (0);
}
-
-static int
-pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
- uint64_t *addr)
-{
- uint64_t base;
-
- assert((size & (size - 1)) == 0); /* must be a power of 2 */
-
- base = roundup2(*baseptr, size);
-
- if (base + size <= limit) {
- *addr = base;
- *baseptr = base + size;
- return (0);
- } else
- return (-1);
-}
-
/*
* Register (or unregister) the MMIO or I/O region associated with the BAR
* register 'idx' of an emulated pci device.
@@ -753,18 +733,99 @@
return (cmd & PCIM_CMD_MEMEN);
}
+/*
+ * Searches all emulated PCI hierachy to find a
+ * BAR that contains the address 'addr'.
+ */
+static int
+find_assigned_bar(uint64_t addr, struct pcibar **res)
+{
+ struct pcibar *bp;
+ struct businfo *bi;
+ struct funcinfo *fi;
+ struct slotinfo *si;
+ int i, bus, slot, func;
+ struct pci_devinst *pdi;
+
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ if ((bi = pci_businfo[bus]) == NULL)
+ continue;
+ for (slot = 0; slot < MAXSLOTS; slot++) {
+ si = &bi->slotinfo[slot];
+ for (func = 0; func < MAXFUNCS; func++) {
+ fi = &si->si_funcs[func];
+ if (fi->fi_devi == NULL)
+ continue;
+ pdi = fi->fi_devi;
+ for (i = 0; i <= PCI_BARMAX; i++) {
+ bp = &pdi->pi_bar[i];
+
+ if (bp->type == PCIBAR_NONE ||
+ bp->state != PCIBAR_ADDR_ASSIGNED)
+ continue;
+ if (addr >= bp->addr &&
+ addr < (bp->addr + bp->size)) {
+ *res = bp;
+ return (0);
+ }
+ }
+ }
+ }
+ }
+
+ return (ENOENT);
+}
+
/*
* Update the MMIO or I/O address that is decoded by the BAR register.
*
+ * The lifecycle of a BAR address is tracked using the following state machine:
+ * +----------+
+ * +> | invalid | -+
+ * | +----------+ |
+ * | | |
+ * | | |
+ * | v |
+ * | +----------+ |
+ * | | partial | |
+ * | +----------+ |
+ * | | |
+ * | | |
+ * | v |
+ * | +----------+ |
+ * +- | assigned | <+
+ * +----------+
+ * ^ |
+ * +------+
+ * The 'assigned' state means that the BAR's address was allocated from the
+ * appropriate vmem arena, while the 'invalid' state means that the BAR does not
+ * have a valid address. The 'partial' state covers the two-step process with
+ * which a 64-bit BAR address is constructed. A guest will first update the
+ * lower 32 bits of the address, moving from an 'invalid' to a 'partial' state.
+ * Updating the upper 32 bits will then move it to the 'assigned' state. Note
+ * that a guest may also directly move a 64 bit BAR address directly from
+ * 'invalid' to 'assigned' by modifying the upper 32 bits only. A guest may also
+ * move an 'assigned' address to another valid address, effectively performing a
+ * self-referencing transition.
+ *
* If the pci device has enabled the address space decoding then intercept
* the address range decoded by the BAR register.
*/
static void
update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
{
- int decode;
+ bool alloc;
+ vmem_t *arena;
+ int decode, error;
+ uint64_t new_addr;
+ struct businfo *bi;
+ struct pcibar *bp, *bp2;
+ uint64_t mask, old_addr;
+
+ bi = pci_businfo[pi->pi_bus];
+ bp = &pi->pi_bar[idx];
- if (pi->pi_bar[idx].type == PCIBAR_IO)
+ if (bp->type == PCIBAR_IO)
decode = porten(pi);
else
decode = memen(pi);
@@ -772,23 +833,136 @@
if (decode)
unregister_bar(pi, idx);
+ old_addr = bp->addr;
+ mask = ~(bp->size - 1);
switch (type) {
case PCIBAR_IO:
+ bp->addr = addr;
+ alloc = addr != ((uint16_t)-1 & mask);
+ break;
case PCIBAR_MEM32:
- pi->pi_bar[idx].addr = addr;
+ bp->addr = addr;
+ alloc = addr != ((uint32_t)-1 & mask);
break;
case PCIBAR_MEM64:
- pi->pi_bar[idx].addr &= ~0xffffffffUL;
- pi->pi_bar[idx].addr |= addr;
+ bp->addr &= ~0xffffffffUL;
+ bp->addr |= addr;
+ alloc = addr != ((uint32_t)-1 & mask);
+ if (alloc) {
+ assert(bp->state == PCIBAR_ADDR_INVALID);
+ bp->state = PCIBAR_ADDR_PARTIAL;
+ /*
+ * Skip operating on a partial address since the
+ * guest has currently only set the lower 32 bits.
+ */
+ type = PCIBAR_NONE;
+ }
break;
case PCIBAR_MEMHI64:
- pi->pi_bar[idx].addr &= 0xffffffff;
- pi->pi_bar[idx].addr |= addr;
+ bp->addr &= 0xffffffff;
+ bp->addr |= addr;
+ alloc = addr != ((uint64_t)-1 & ~0xffffffffUL);
+ if (alloc)
+ type = PCIBAR_MEM64;
+ else {
+ /*
+ * Skip operating on a partial address since the
+ * guest is currently clearing the upper 32 bits.
+ */
+ type = PCIBAR_NONE;
+ }
break;
default:
assert(0);
}
+ arena = bi->resources[type];
+ if (arena == NULL) {
+ assert(bp->state != PCIBAR_ADDR_ASSIGNED);
+ goto done;
+ }
+ if (!alloc) {
+ assert(bp->state == PCIBAR_ADDR_ASSIGNED);
+ if ((bp->flags & PCIBAR_MEM64_MEM32_ADDR) != 0) {
+ /*
+ * We're dealing with a MEM64 address that was allocated
+ * from the MEM32 pool. Clear the corresponding
+ * flag and release it to the MEM32 pool.
+ */
+ assert(bp->type == PCIBAR_MEM64);
+ bp->flags &= ~PCIBAR_MEM64_MEM32_ADDR;
+ arena = bi->resources[PCIBAR_MEM32];
+ }
+ vmem_xfree(arena, old_addr, bp->size);
+ bp->state = PCIBAR_ADDR_INVALID;
+ } else {
+ if (bp->state == PCIBAR_ADDR_ASSIGNED) {
+ /*
+ * This BAR's address is already assigned and the guest
+ * wants to move it elsewhere ('assigned' ->
+ * 'assigned'). Start the process by releasing the
+ * current address first.
+ */
+ if ((bp->flags & PCIBAR_MEM64_MEM32_ADDR) != 0) {
+ /*
+ * Same as in the '!alloc' case above.
+ */
+ assert(bp->type == PCIBAR_MEM64);
+ bp->flags &= ~PCIBAR_MEM64_MEM32_ADDR;
+ vmem_xfree(bi->resources[PCIBAR_MEM32],
+ old_addr, bp->size);
+ } else
+ vmem_xfree(arena, old_addr, bp->size);
+ bp->state = PCIBAR_ADDR_INVALID;
+ }
+
+ /*
+ * We're about to allocate a new BAR address so
+ * the existing one must not be valid.
+ */
+ assert(bp->state != PCIBAR_ADDR_ASSIGNED);
+ new_addr = bp->addr;
+ if (bp->type == PCIBAR_MEM64 && new_addr < 4 * GB) {
+ /*
+ * Comply with the remark in 'pci_emul_assign_bar'
+ * and allocate this BAR address from the MEM32 pool.
+ */
+ bp->flags |= PCIBAR_MEM64_MEM32_ADDR;
+ arena = bi->resources[PCIBAR_MEM32];
+ }
+ error = vmem_xalloc(arena, bp->size, bp->size, 0, 0, new_addr,
+ new_addr + bp->size, M_BESTFIT | M_NOWAIT, &bp->addr);
+ if (error != 0) {
+ /*
+ * The allocation failed, meaning that another BAR is
+ * currently residing at the target address. Handle this
+ * by finding the offending BAR, releasing its address
+ * into the appropriate pool, and retrying the
+ * allocation.
+ */
+
+ error = find_assigned_bar(new_addr, &bp2);
+ assert(error == 0);
+ if ((bp2->flags & PCIBAR_MEM64_MEM32_ADDR) != 0) {
+ /* Same as the '!alloc' case above. */
+ assert(bp2->type == PCIBAR_MEM64);
+ bp2->flags &= ~PCIBAR_MEM64_MEM32_ADDR;
+ vmem_xfree(bi->resources[PCIBAR_MEM32],
+ bp2->addr, bp2->size);
+ } else
+ vmem_xfree(arena, bp2->addr, bp2->size);
+ bp2->state = PCIBAR_ADDR_INVALID;
+
+ error = vmem_xalloc(arena, bp->size, bp->size, 0, 0,
+ new_addr, new_addr + bp->size, M_BESTFIT | M_NOWAIT,
+ &bp->addr);
+ }
+ assert(error == 0);
+ assert(bp->addr == new_addr);
+ bp->state = PCIBAR_ADDR_ASSIGNED;
+ }
+
+done:
if (decode)
register_bar(pi, idx);
}
@@ -892,17 +1066,16 @@
pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
const enum pcibar_type type, const uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
+ uint64_t addr, mask, lobits, bar;
+ struct businfo *bi;
+ vmem_t *arena;
+ bi = pci_businfo[pdi->pi_bus];
+ arena = bi->resources[type];
switch (type) {
case PCIBAR_NONE:
- baseptr = NULL;
- addr = mask = lobits = 0;
- break;
+ return (0);
case PCIBAR_IO:
- baseptr = &pci_emul_iobase;
- limit = PCI_EMUL_IOLIMIT;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
break;
@@ -915,47 +1088,41 @@
* number (128MB currently).
*/
if (size > 128 * 1024 * 1024) {
- baseptr = &pci_emul_membase64;
- limit = pci_emul_memlim64;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
} else {
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+ pdi->pi_bar[idx].flags = PCIBAR_MEM64_MEM32_ADDR;
+ arena = bi->resources[PCIBAR_MEM32];
}
break;
case PCIBAR_MEM32:
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
break;
case PCIBAR_ROM:
/* do not claim memory for ROM. OVMF will do it for us. */
- baseptr = NULL;
- limit = 0;
+ addr = 0;
mask = PCIM_BIOS_ADDR_MASK;
lobits = 0;
break;
default:
- printf("pci_emul_alloc_base: invalid bar type %d\n", type);
- assert(0);
+ printf("%s: invalid bar type %d\n", __func__, type);
+ return (-1);
}
- if (baseptr != NULL) {
- error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
- if (error != 0)
- return (error);
- } else {
- addr = 0;
- }
+ assert((size & (size - 1)) == 0); /* must be a power of 2 */
+ if (arena != NULL &&
+ vmem_xalloc(arena, size, size, 0, 0, 0, ~0ul, M_BESTFIT, &addr) != 0)
+ return (-1);
pdi->pi_bar[idx].type = type;
pdi->pi_bar[idx].addr = addr;
pdi->pi_bar[idx].size = size;
+ pdi->pi_bar[idx].state = PCIBAR_ADDR_ASSIGNED;
+
/*
* passthru devices are using same lobits as physical device they set
* this property
@@ -1516,6 +1683,9 @@
int
init_pci(struct vmctx *ctx)
{
+ size_t io_range_size, mem32_range_size, mem64_range_size;
+ uint64_t pci_emul_membase32, pci_emul_membase64;
+ uint64_t pci_emul_iobase, pci_emul_memlim64;
char node_name[sizeof("pci.XXX.XX.X")];
struct mem_range mr;
struct pci_devemu *pde;
@@ -1526,11 +1696,20 @@
const char *emul;
size_t lowmem;
int bus, slot, func;
- int error;
+ int error, nbuses;
if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32)
errx(EX_OSERR, "Invalid lowmem limit");
+ nbuses = 0;
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ snprintf(node_name, sizeof(node_name), "pci.%d", bus);
+ nvl = find_config_node(node_name);
+ if (nvl == NULL)
+ continue;
+ nbuses++;
+ }
+
pci_emul_iobase = PCI_EMUL_IOBASE;
pci_emul_membase32 = PCI_EMUL_MEMBASE32;
@@ -1539,6 +1718,10 @@
pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
+ io_range_size = (PCI_EMUL_IOLIMIT - pci_emul_iobase) / nbuses;
+ mem32_range_size = (PCI_EMUL_MEMLIMIT32 - pci_emul_membase32) / nbuses;
+ mem64_range_size = (pci_emul_memlim64 - pci_emul_membase64) / nbuses;
+
TAILQ_INIT(&boot_devices);
for (bus = 0; bus < MAXBUSES; bus++) {
@@ -1557,6 +1740,24 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ pci_emul_iobase += io_range_size;
+ pci_emul_membase32 += mem32_range_size;
+ pci_emul_membase64 += mem64_range_size;
+
+ bi->iolimit = pci_emul_iobase - 1;
+ bi->memlimit32 = pci_emul_membase32 - 1;
+ bi->memlimit64 = pci_emul_membase64 - 1;
+
+ bi->resources[PCIBAR_IO] = vmem_create("io", bi->iobase,
+ io_range_size, 0, 0, 0);
+ assert(bi->resources[PCIBAR_IO] != NULL);
+ bi->resources[PCIBAR_MEM32] = vmem_create("mem32",
+ bi->membase32, mem32_range_size, 0, 0, 0);
+ assert(bi->resources[PCIBAR_MEM32] != NULL);
+ bi->resources[PCIBAR_MEM64] = vmem_create("mem64",
+ bi->membase64, mem64_range_size, 0, 0, 0);
+ assert(bi->resources[PCIBAR_MEM64] != NULL);
+
/* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
@@ -1606,25 +1807,6 @@
free(bar);
}
TAILQ_INIT(&pci_bars);
-
- /*
- * Add some slop to the I/O and memory resources decoded by
- * this bus to give a guest some flexibility if it wants to
- * reprogram the BARs.
- */
- pci_emul_iobase += BUSIO_ROUNDUP;
- pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
- bi->iolimit = pci_emul_iobase;
-
- pci_emul_membase32 += BUSMEM32_ROUNDUP;
- pci_emul_membase32 = roundup2(pci_emul_membase32,
- BUSMEM32_ROUNDUP);
- bi->memlimit32 = pci_emul_membase32;
-
- pci_emul_membase64 += BUSMEM64_ROUNDUP;
- pci_emul_membase64 = roundup2(pci_emul_membase64,
- BUSMEM64_ROUNDUP);
- bi->memlimit64 = pci_emul_membase64;
}
/*
@@ -1785,6 +1967,9 @@
#ifdef __amd64__
if (bus == 0) {
+ int error;
+ vmem_t *arena;
+
dsdt_indent(3);
dsdt_fixed_ioport(0xCF8, 8);
dsdt_unindent(3);
@@ -1813,6 +1998,13 @@
dsdt_line(" })");
goto done;
}
+
+ /*
+ * Register the bus's IO BAR address range.
+ */
+ arena = bi->resources[PCIBAR_IO];
+ error = vmem_add(arena, 0x0D00, PCI_EMUL_IOBASE - 0x0D00, 0);
+ assert(error == 0);
}
#endif
assert(bi != NULL);

File Metadata

Mime Type
text/plain
Expires
Fri, May 15, 12:34 AM (4 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28885837
Default Alt Text
D54641.id172185.diff (14 KB)

Event Timeline