diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -97,6 +97,7 @@ uint64_t size; uint64_t addr; uint8_t lobits; + uint64_t mapped_size; }; #define PI_NAMESZ 40 diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -77,6 +77,8 @@ static int pcifd = -1; +static void ppt_build_occupied_pages(void); + SET_DECLARE(passthru_dev_set, struct passthru_dev); struct passthru_softc { @@ -564,6 +566,141 @@ return (0); } +struct occ_entry { + uint64_t page; + uint32_t bdfd; + uint8_t bar_idx; +}; + +static int g_occ_built = 0; +static struct occ_entry *g_occ; +static size_t g_occ_n, g_occ_cap; + +static inline uint32_t +pack_bdfd(struct pcisel s) +{ + return ((uint32_t)s.pc_domain << 16) | ((uint32_t)s.pc_bus << 8) | + ((uint32_t)s.pc_dev << 3) | (uint32_t)s.pc_func; +} + +static void +occ_push(uint64_t page, uint32_t bdfd, uint8_t bar_idx) +{ + if (g_occ_n == g_occ_cap) { + size_t nc = g_occ_cap ? g_occ_cap * 2 : 256; + void *nv = realloc(g_occ, nc * sizeof(*g_occ)); + g_occ = nv; + g_occ_cap = nc; + } + g_occ[g_occ_n++] = (struct occ_entry) { + .page = page, + .bdfd = bdfd, + .bar_idx = bar_idx + }; +} + +static int +cmp_occ(const void *a, const void *b) +{ + const struct occ_entry *x = a, *y = b; + + if (x->page < y->page) + return -1; + if (x->page > y->page) + return 1; + if (x->bdfd < y->bdfd) + return -1; + if (x->bdfd > y->bdfd) + return 1; + return 0; +} + +static int +page_has_other_dev(uint64_t page, uint32_t self_bdfd) +{ + size_t lo = 0, hi = g_occ_n; + + while (lo < hi) { + size_t mid = (lo + hi) / 2; + if (g_occ[mid].page < page) + lo = mid + 1; + else + hi = mid; + } + for (size_t i = lo; i < g_occ_n && g_occ[i].page == page; ++i) { + if (g_occ[i].bdfd != self_bdfd) + return 1; + } + return 0; +} + +static void +ppt_build_occupied_pages(void) +{ + struct pci_conf_io cio; + struct pci_conf *buf = NULL; + const size_t step = 256; + + bzero(&cio, sizeof(cio)); + buf = calloc(step, sizeof(*buf)); + + cio.matches = buf; + cio.match_buf_len = step * sizeof(*buf); + + for (;;) { + cio.num_matches = 0; + if (ioctl(pcifd, PCIOCGETCONF, &cio) < 0) + err(1, "PCIOCGETCONF"); + + for (unsigned i = 0; i < cio.num_matches; i++) { + struct pci_bar_io bar; + uint32_t bdfd = pack_bdfd(buf[i].pc_sel); + + for (int r = 0; r <= PCI_BARMAX; r++) { + bzero(&bar, sizeof(bar)); + bar.pbi_sel = buf[i].pc_sel; + bar.pbi_reg = PCIR_BAR(r); + if (ioctl(pcifd, PCIOCGETBAR, &bar) < 0) + continue; + + if (PCI_BAR_IO(bar.pbi_base)) + continue; + + uint64_t base = bar.pbi_base & + PCIM_BAR_MEM_BASE; + uint64_t size = bar.pbi_length; + if (size == 0) + continue; + + uint64_t start = base & ~PAGE_MASK; + uint64_t end = (base + size + PAGE_MASK) & + ~PAGE_MASK; + for (uint64_t p = start; p < end; + p += PAGE_SIZE) + occ_push(p, bdfd, (uint8_t)r); + + if ((bar.pbi_base & PCIM_BAR_MEM_TYPE) == + PCIM_BAR_MEM_64) + r++; + } + } + if (cio.status == PCI_GETCONF_LAST_DEVICE) + break; + cio.offset += cio.num_matches * sizeof(struct pci_conf); + } + qsort(g_occ, g_occ_n, sizeof(g_occ[0]), cmp_occ); + free(buf); +} + +static inline void +build_occ_map(void) +{ + if (!g_occ_built) { + ppt_build_occupied_pages(); + g_occ_built = 1; + } +} + static int cfginitbar(struct passthru_softc *sc) { @@ -572,9 +709,12 @@ struct pci_bar_io bar; enum pcibar_type bartype; uint64_t base, size; + uint32_t self_bdf; pi = sc->psc_pi; - + self_bdf = ((uint32_t)sc->psc_sel.pc_bus << 8) | + ((uint32_t)sc->psc_sel.pc_dev << 3) | + (uint32_t)sc->psc_sel.pc_func; /* * Initialize BAR registers */ @@ -604,21 +744,44 @@ } size = bar.pbi_length; - if (bartype != PCIBAR_IO) { - if (((base | size) & PAGE_MASK) != 0) { - warnx("passthru device %d/%d/%d BAR %d: " - "base %#lx or size %#lx not page aligned\n", - sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, base, size); - return (-1); - } - } - /* Cache information about the "real" BAR */ sc->psc_bar[i].type = bartype; sc->psc_bar[i].size = size; sc->psc_bar[i].addr = base; sc->psc_bar[i].lobits = 0; + sc->psc_bar[i].mapped_size = size; + + if (bartype != PCIBAR_IO) { + if ((base & PAGE_MASK) != 0) { + warnx("passthru device %d/%d/%d BAR %d: base %#lx not page aligned", + sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, base); + return (-1); + } + if ((size & PAGE_MASK) != 0) { + if (size < PAGE_SIZE) { + uint64_t page = base & ~PAGE_MASK; + build_occ_map(); + if (page_has_other_dev(page, self_bdf)) { + warnx("passthru device %d/%d/%d BAR %d: " + "size %#lx <4K but page %#lx shared by other device", + sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, + size, page); + return (-1); + } + sc->psc_bar[i].mapped_size = PAGE_SIZE; + } else { + warnx("passthru device %d/%d/%d BAR %d: " + "size %#lx not multiple of page size", + sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, size); + return (-1); + } + } + } /* Allocate the BAR in the guest I/O or MMIO space */ error = pci_emul_alloc_bar(pi, i, bartype, size); @@ -1283,13 +1446,13 @@ if (vm_unmap_pptdev_mmio(pi->pi_vmctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, - sc->psc_bar[baridx].size) != 0) + sc->psc_bar[baridx].mapped_size) != 0) warnx("pci_passthru: unmap_pptdev_mmio failed"); } else { if (vm_map_pptdev_mmio(pi->pi_vmctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, address, - sc->psc_bar[baridx].size, + sc->psc_bar[baridx].mapped_size , sc->psc_bar[baridx].addr) != 0) warnx("pci_passthru: map_pptdev_mmio failed"); }