Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F156485197
D54641.id172185.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D54641.id172185.diff
View Options
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -92,13 +92,22 @@
PCIBAR_MEM64,
PCIBAR_MEMHI64,
PCIBAR_ROM,
+ PCIBAR_MAX
};
+#define PCIBAR_MEM64_MEM32_ADDR 0x1
+
struct pcibar {
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
+ enum pcibar_addr_state {
+ PCIBAR_ADDR_INVALID,
+ PCIBAR_ADDR_ASSIGNED,
+ PCIBAR_ADDR_PARTIAL
+ } state;
uint8_t lobits;
+ uint8_t flags;
};
#define PI_NAMESZ 40
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -29,6 +29,8 @@
#include <sys/param.h>
#include <sys/linker_set.h>
#include <sys/mman.h>
+#include <sys/nv.h>
+#include <sys/vmem.h>
#include <ctype.h>
#include <err.h>
@@ -95,19 +97,16 @@
uint32_t membase32, memlimit32; /* mmio window below 4GB */
uint64_t membase64, memlimit64; /* mmio window above 4GB */
struct slotinfo slotinfo[MAXSLOTS];
+ vmem_t *resources[PCIBAR_MAX];
};
static struct businfo *pci_businfo[MAXBUSES];
SET_DECLARE(pci_devemu_set, struct pci_devemu);
-static uint64_t pci_emul_iobase;
static uint8_t *pci_emul_rombase;
static uint64_t pci_emul_romoffset;
static uint8_t *pci_emul_romlim;
-static uint64_t pci_emul_membase32;
-static uint64_t pci_emul_membase64;
-static uint64_t pci_emul_memlim64;
struct pci_bar_allocation {
TAILQ_ENTRY(pci_bar_allocation) chain;
@@ -610,25 +609,6 @@
return (0);
}
-
-static int
-pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
- uint64_t *addr)
-{
- uint64_t base;
-
- assert((size & (size - 1)) == 0); /* must be a power of 2 */
-
- base = roundup2(*baseptr, size);
-
- if (base + size <= limit) {
- *addr = base;
- *baseptr = base + size;
- return (0);
- } else
- return (-1);
-}
-
/*
* Register (or unregister) the MMIO or I/O region associated with the BAR
* register 'idx' of an emulated pci device.
@@ -753,18 +733,99 @@
return (cmd & PCIM_CMD_MEMEN);
}
+/*
+ * Searches all emulated PCI hierachy to find a
+ * BAR that contains the address 'addr'.
+ */
+static int
+find_assigned_bar(uint64_t addr, struct pcibar **res)
+{
+ struct pcibar *bp;
+ struct businfo *bi;
+ struct funcinfo *fi;
+ struct slotinfo *si;
+ int i, bus, slot, func;
+ struct pci_devinst *pdi;
+
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ if ((bi = pci_businfo[bus]) == NULL)
+ continue;
+ for (slot = 0; slot < MAXSLOTS; slot++) {
+ si = &bi->slotinfo[slot];
+ for (func = 0; func < MAXFUNCS; func++) {
+ fi = &si->si_funcs[func];
+ if (fi->fi_devi == NULL)
+ continue;
+ pdi = fi->fi_devi;
+ for (i = 0; i <= PCI_BARMAX; i++) {
+ bp = &pdi->pi_bar[i];
+
+ if (bp->type == PCIBAR_NONE ||
+ bp->state != PCIBAR_ADDR_ASSIGNED)
+ continue;
+ if (addr >= bp->addr &&
+ addr < (bp->addr + bp->size)) {
+ *res = bp;
+ return (0);
+ }
+ }
+ }
+ }
+ }
+
+ return (ENOENT);
+}
+
/*
* Update the MMIO or I/O address that is decoded by the BAR register.
*
+ * The lifecycle of a BAR address is tracked using the following state machine:
+ * +----------+
+ * +> | invalid | -+
+ * | +----------+ |
+ * | | |
+ * | | |
+ * | v |
+ * | +----------+ |
+ * | | partial | |
+ * | +----------+ |
+ * | | |
+ * | | |
+ * | v |
+ * | +----------+ |
+ * +- | assigned | <+
+ * +----------+
+ * ^ |
+ * +------+
+ * The 'assigned' state means that the BAR's address was allocated from the
+ * appropriate vmem arena, while the 'invalid' state means that the BAR does not
+ * have a valid address. The 'partial' state covers the two-step process with
+ * which a 64-bit BAR address is constructed. A guest will first update the
+ * lower 32 bits of the address, moving from an 'invalid' to a 'partial' state.
+ * Updating the upper 32 bits will then move it to the 'assigned' state. Note
+ * that a guest may also directly move a 64 bit BAR address directly from
+ * 'invalid' to 'assigned' by modifying the upper 32 bits only. A guest may also
+ * move an 'assigned' address to another valid address, effectively performing a
+ * self-referencing transition.
+ *
* If the pci device has enabled the address space decoding then intercept
* the address range decoded by the BAR register.
*/
static void
update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
{
- int decode;
+ bool alloc;
+ vmem_t *arena;
+ int decode, error;
+ uint64_t new_addr;
+ struct businfo *bi;
+ struct pcibar *bp, *bp2;
+ uint64_t mask, old_addr;
+
+ bi = pci_businfo[pi->pi_bus];
+ bp = &pi->pi_bar[idx];
- if (pi->pi_bar[idx].type == PCIBAR_IO)
+ if (bp->type == PCIBAR_IO)
decode = porten(pi);
else
decode = memen(pi);
@@ -772,23 +833,136 @@
if (decode)
unregister_bar(pi, idx);
+ old_addr = bp->addr;
+ mask = ~(bp->size - 1);
switch (type) {
case PCIBAR_IO:
+ bp->addr = addr;
+ alloc = addr != ((uint16_t)-1 & mask);
+ break;
case PCIBAR_MEM32:
- pi->pi_bar[idx].addr = addr;
+ bp->addr = addr;
+ alloc = addr != ((uint32_t)-1 & mask);
break;
case PCIBAR_MEM64:
- pi->pi_bar[idx].addr &= ~0xffffffffUL;
- pi->pi_bar[idx].addr |= addr;
+ bp->addr &= ~0xffffffffUL;
+ bp->addr |= addr;
+ alloc = addr != ((uint32_t)-1 & mask);
+ if (alloc) {
+ assert(bp->state == PCIBAR_ADDR_INVALID);
+ bp->state = PCIBAR_ADDR_PARTIAL;
+ /*
+ * Skip operating on a partial address since the
+ * guest has currently only set the lower 32 bits.
+ */
+ type = PCIBAR_NONE;
+ }
break;
case PCIBAR_MEMHI64:
- pi->pi_bar[idx].addr &= 0xffffffff;
- pi->pi_bar[idx].addr |= addr;
+ bp->addr &= 0xffffffff;
+ bp->addr |= addr;
+ alloc = addr != ((uint64_t)-1 & ~0xffffffffUL);
+ if (alloc)
+ type = PCIBAR_MEM64;
+ else {
+ /*
+ * Skip operating on a partial address since the
+ * guest is currently clearing the upper 32 bits.
+ */
+ type = PCIBAR_NONE;
+ }
break;
default:
assert(0);
}
+ arena = bi->resources[type];
+ if (arena == NULL) {
+ assert(bp->state != PCIBAR_ADDR_ASSIGNED);
+ goto done;
+ }
+ if (!alloc) {
+ assert(bp->state == PCIBAR_ADDR_ASSIGNED);
+ if ((bp->flags & PCIBAR_MEM64_MEM32_ADDR) != 0) {
+ /*
+ * We're dealing with a MEM64 address that was allocated
+ * from the MEM32 pool. Clear the corresponding
+ * flag and release it to the MEM32 pool.
+ */
+ assert(bp->type == PCIBAR_MEM64);
+ bp->flags &= ~PCIBAR_MEM64_MEM32_ADDR;
+ arena = bi->resources[PCIBAR_MEM32];
+ }
+ vmem_xfree(arena, old_addr, bp->size);
+ bp->state = PCIBAR_ADDR_INVALID;
+ } else {
+ if (bp->state == PCIBAR_ADDR_ASSIGNED) {
+ /*
+ * This BAR's address is already assigned and the guest
+ * wants to move it elsewhere ('assigned' ->
+ * 'assigned'). Start the process by releasing the
+ * current address first.
+ */
+ if ((bp->flags & PCIBAR_MEM64_MEM32_ADDR) != 0) {
+ /*
+ * Same as in the '!alloc' case above.
+ */
+ assert(bp->type == PCIBAR_MEM64);
+ bp->flags &= ~PCIBAR_MEM64_MEM32_ADDR;
+ vmem_xfree(bi->resources[PCIBAR_MEM32],
+ old_addr, bp->size);
+ } else
+ vmem_xfree(arena, old_addr, bp->size);
+ bp->state = PCIBAR_ADDR_INVALID;
+ }
+
+ /*
+ * We're about to allocate a new BAR address so
+ * the existing one must not be valid.
+ */
+ assert(bp->state != PCIBAR_ADDR_ASSIGNED);
+ new_addr = bp->addr;
+ if (bp->type == PCIBAR_MEM64 && new_addr < 4 * GB) {
+ /*
+ * Comply with the remark in 'pci_emul_assign_bar'
+ * and allocate this BAR address from the MEM32 pool.
+ */
+ bp->flags |= PCIBAR_MEM64_MEM32_ADDR;
+ arena = bi->resources[PCIBAR_MEM32];
+ }
+ error = vmem_xalloc(arena, bp->size, bp->size, 0, 0, new_addr,
+ new_addr + bp->size, M_BESTFIT | M_NOWAIT, &bp->addr);
+ if (error != 0) {
+ /*
+ * The allocation failed, meaning that another BAR is
+ * currently residing at the target address. Handle this
+ * by finding the offending BAR, releasing its address
+ * into the appropriate pool, and retrying the
+ * allocation.
+ */
+
+ error = find_assigned_bar(new_addr, &bp2);
+ assert(error == 0);
+ if ((bp2->flags & PCIBAR_MEM64_MEM32_ADDR) != 0) {
+ /* Same as the '!alloc' case above. */
+ assert(bp2->type == PCIBAR_MEM64);
+ bp2->flags &= ~PCIBAR_MEM64_MEM32_ADDR;
+ vmem_xfree(bi->resources[PCIBAR_MEM32],
+ bp2->addr, bp2->size);
+ } else
+ vmem_xfree(arena, bp2->addr, bp2->size);
+ bp2->state = PCIBAR_ADDR_INVALID;
+
+ error = vmem_xalloc(arena, bp->size, bp->size, 0, 0,
+ new_addr, new_addr + bp->size, M_BESTFIT | M_NOWAIT,
+ &bp->addr);
+ }
+ assert(error == 0);
+ assert(bp->addr == new_addr);
+ bp->state = PCIBAR_ADDR_ASSIGNED;
+ }
+
+done:
if (decode)
register_bar(pi, idx);
}
@@ -892,17 +1066,16 @@
pci_emul_assign_bar(struct pci_devinst *const pdi, const int idx,
const enum pcibar_type type, const uint64_t size)
{
- int error;
- uint64_t *baseptr, limit, addr, mask, lobits, bar;
+ uint64_t addr, mask, lobits, bar;
+ struct businfo *bi;
+ vmem_t *arena;
+ bi = pci_businfo[pdi->pi_bus];
+ arena = bi->resources[type];
switch (type) {
case PCIBAR_NONE:
- baseptr = NULL;
- addr = mask = lobits = 0;
- break;
+ return (0);
case PCIBAR_IO:
- baseptr = &pci_emul_iobase;
- limit = PCI_EMUL_IOLIMIT;
mask = PCIM_BAR_IO_BASE;
lobits = PCIM_BAR_IO_SPACE;
break;
@@ -915,47 +1088,41 @@
* number (128MB currently).
*/
if (size > 128 * 1024 * 1024) {
- baseptr = &pci_emul_membase64;
- limit = pci_emul_memlim64;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
} else {
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64;
+ pdi->pi_bar[idx].flags = PCIBAR_MEM64_MEM32_ADDR;
+ arena = bi->resources[PCIBAR_MEM32];
}
break;
case PCIBAR_MEM32:
- baseptr = &pci_emul_membase32;
- limit = PCI_EMUL_MEMLIMIT32;
mask = PCIM_BAR_MEM_BASE;
lobits = PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
break;
case PCIBAR_ROM:
/* do not claim memory for ROM. OVMF will do it for us. */
- baseptr = NULL;
- limit = 0;
+ addr = 0;
mask = PCIM_BIOS_ADDR_MASK;
lobits = 0;
break;
default:
- printf("pci_emul_alloc_base: invalid bar type %d\n", type);
- assert(0);
+ printf("%s: invalid bar type %d\n", __func__, type);
+ return (-1);
}
- if (baseptr != NULL) {
- error = pci_emul_alloc_resource(baseptr, limit, size, &addr);
- if (error != 0)
- return (error);
- } else {
- addr = 0;
- }
+ assert((size & (size - 1)) == 0); /* must be a power of 2 */
+ if (arena != NULL &&
+ vmem_xalloc(arena, size, size, 0, 0, 0, ~0ul, M_BESTFIT, &addr) != 0)
+ return (-1);
pdi->pi_bar[idx].type = type;
pdi->pi_bar[idx].addr = addr;
pdi->pi_bar[idx].size = size;
+ pdi->pi_bar[idx].state = PCIBAR_ADDR_ASSIGNED;
+
/*
* passthru devices are using same lobits as physical device they set
* this property
@@ -1516,6 +1683,9 @@
int
init_pci(struct vmctx *ctx)
{
+ size_t io_range_size, mem32_range_size, mem64_range_size;
+ uint64_t pci_emul_membase32, pci_emul_membase64;
+ uint64_t pci_emul_iobase, pci_emul_memlim64;
char node_name[sizeof("pci.XXX.XX.X")];
struct mem_range mr;
struct pci_devemu *pde;
@@ -1526,11 +1696,20 @@
const char *emul;
size_t lowmem;
int bus, slot, func;
- int error;
+ int error, nbuses;
if (vm_get_lowmem_limit(ctx) > PCI_EMUL_MEMBASE32)
errx(EX_OSERR, "Invalid lowmem limit");
+ nbuses = 0;
+ for (bus = 0; bus < MAXBUSES; bus++) {
+ snprintf(node_name, sizeof(node_name), "pci.%d", bus);
+ nvl = find_config_node(node_name);
+ if (nvl == NULL)
+ continue;
+ nbuses++;
+ }
+
pci_emul_iobase = PCI_EMUL_IOBASE;
pci_emul_membase32 = PCI_EMUL_MEMBASE32;
@@ -1539,6 +1718,10 @@
pci_emul_membase64 = roundup2(pci_emul_membase64, PCI_EMUL_MEMSIZE64);
pci_emul_memlim64 = pci_emul_membase64 + PCI_EMUL_MEMSIZE64;
+ io_range_size = (PCI_EMUL_IOLIMIT - pci_emul_iobase) / nbuses;
+ mem32_range_size = (PCI_EMUL_MEMLIMIT32 - pci_emul_membase32) / nbuses;
+ mem64_range_size = (pci_emul_memlim64 - pci_emul_membase64) / nbuses;
+
TAILQ_INIT(&boot_devices);
for (bus = 0; bus < MAXBUSES; bus++) {
@@ -1557,6 +1740,24 @@
bi->membase32 = pci_emul_membase32;
bi->membase64 = pci_emul_membase64;
+ pci_emul_iobase += io_range_size;
+ pci_emul_membase32 += mem32_range_size;
+ pci_emul_membase64 += mem64_range_size;
+
+ bi->iolimit = pci_emul_iobase - 1;
+ bi->memlimit32 = pci_emul_membase32 - 1;
+ bi->memlimit64 = pci_emul_membase64 - 1;
+
+ bi->resources[PCIBAR_IO] = vmem_create("io", bi->iobase,
+ io_range_size, 0, 0, 0);
+ assert(bi->resources[PCIBAR_IO] != NULL);
+ bi->resources[PCIBAR_MEM32] = vmem_create("mem32",
+ bi->membase32, mem32_range_size, 0, 0, 0);
+ assert(bi->resources[PCIBAR_MEM32] != NULL);
+ bi->resources[PCIBAR_MEM64] = vmem_create("mem64",
+ bi->membase64, mem64_range_size, 0, 0, 0);
+ assert(bi->resources[PCIBAR_MEM64] != NULL);
+
/* first run: init devices */
for (slot = 0; slot < MAXSLOTS; slot++) {
si = &bi->slotinfo[slot];
@@ -1606,25 +1807,6 @@
free(bar);
}
TAILQ_INIT(&pci_bars);
-
- /*
- * Add some slop to the I/O and memory resources decoded by
- * this bus to give a guest some flexibility if it wants to
- * reprogram the BARs.
- */
- pci_emul_iobase += BUSIO_ROUNDUP;
- pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP);
- bi->iolimit = pci_emul_iobase;
-
- pci_emul_membase32 += BUSMEM32_ROUNDUP;
- pci_emul_membase32 = roundup2(pci_emul_membase32,
- BUSMEM32_ROUNDUP);
- bi->memlimit32 = pci_emul_membase32;
-
- pci_emul_membase64 += BUSMEM64_ROUNDUP;
- pci_emul_membase64 = roundup2(pci_emul_membase64,
- BUSMEM64_ROUNDUP);
- bi->memlimit64 = pci_emul_membase64;
}
/*
@@ -1785,6 +1967,9 @@
#ifdef __amd64__
if (bus == 0) {
+ int error;
+ vmem_t *arena;
+
dsdt_indent(3);
dsdt_fixed_ioport(0xCF8, 8);
dsdt_unindent(3);
@@ -1813,6 +1998,13 @@
dsdt_line(" })");
goto done;
}
+
+ /*
+ * Register the bus's IO BAR address range.
+ */
+ arena = bi->resources[PCIBAR_IO];
+ error = vmem_add(arena, 0x0D00, PCI_EMUL_IOBASE - 0x0D00, 0);
+ assert(error == 0);
}
#endif
assert(bi != NULL);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, May 15, 12:34 AM (4 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28885837
Default Alt Text
D54641.id172185.diff (14 KB)
Attached To
Mode
D54641: bhyve/pci_emul: Use vmem to track BAR allocations
Attached
Detach File
Event Timeline
Log In to Comment