Changeset View
Changeset View
Standalone View
Standalone View
usr.sbin/bhyve/pci_passthru.c
Show All 37 Lines | |||||||||
#include <sys/types.h> | #include <sys/types.h> | ||||||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||||||
#include <sys/pciio.h> | #include <sys/pciio.h> | ||||||||
#include <sys/ioctl.h> | #include <sys/ioctl.h> | ||||||||
#include <dev/io/iodev.h> | #include <dev/io/iodev.h> | ||||||||
#include <dev/pci/pcireg.h> | #include <dev/pci/pcireg.h> | ||||||||
#include <vm/vm.h> | |||||||||
#include <machine/iodev.h> | #include <machine/iodev.h> | ||||||||
#include <machine/vm.h> | |||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | ||||||||
#include <capsicum_helpers.h> | #include <capsicum_helpers.h> | ||||||||
#endif | #endif | ||||||||
#include <stdio.h> | #include <stdio.h> | ||||||||
#include <stdlib.h> | #include <stdlib.h> | ||||||||
#include <string.h> | #include <string.h> | ||||||||
#include <err.h> | #include <err.h> | ||||||||
Show All 9 Lines | |||||||||
#include "debug.h" | #include "debug.h" | ||||||||
#include "pci_emul.h" | #include "pci_emul.h" | ||||||||
#include "mem.h" | #include "mem.h" | ||||||||
#ifndef _PATH_DEVPCI | #ifndef _PATH_DEVPCI | ||||||||
#define _PATH_DEVPCI "/dev/pci" | #define _PATH_DEVPCI "/dev/pci" | ||||||||
#endif | #endif | ||||||||
#ifndef _PATH_MEM | |||||||||
#define _PATH_MEM "/dev/mem" | |||||||||
#endif | |||||||||
#define LEGACY_SUPPORT 1 | #define LEGACY_SUPPORT 1 | ||||||||
#define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) | #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) | ||||||||
#define MSIX_CAPLEN 12 | #define MSIX_CAPLEN 12 | ||||||||
static int pcifd = -1; | static int pcifd = -1; | ||||||||
static int memfd = -1; | |||||||||
struct passthru_softc { | struct passthru_softc { | ||||||||
struct pci_devinst *psc_pi; | struct pci_devinst *psc_pi; | ||||||||
struct pcibar psc_bar[PCI_BARMAX + 1]; | struct pcibar psc_bar[PCI_BARMAX + 1]; | ||||||||
struct { | struct { | ||||||||
int capoff; | int capoff; | ||||||||
int msgctrl; | int msgctrl; | ||||||||
int emulated; | int emulated; | ||||||||
▲ Show 20 Lines • Show All 194 Lines • ▼ Show 20 Lines | msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) | ||||||||
struct pci_devinst *pi; | struct pci_devinst *pi; | ||||||||
struct msix_table_entry *entry; | struct msix_table_entry *entry; | ||||||||
uint8_t *src8; | uint8_t *src8; | ||||||||
uint16_t *src16; | uint16_t *src16; | ||||||||
uint32_t *src32; | uint32_t *src32; | ||||||||
uint64_t *src64; | uint64_t *src64; | ||||||||
uint64_t data; | uint64_t data; | ||||||||
size_t entry_offset; | size_t entry_offset; | ||||||||
int index; | uint32_t table_offset; | ||||||||
int index, table_count; | |||||||||
pi = sc->psc_pi; | pi = sc->psc_pi; | ||||||||
if (pi->pi_msix.pba_page != NULL && offset >= pi->pi_msix.pba_offset && | |||||||||
offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { | table_offset = pi->pi_msix.table_offset; | ||||||||
table_count = pi->pi_msix.table_count; | |||||||||
if (offset < table_offset || | |||||||||
offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { | |||||||||
switch(size) { | switch (size) { | ||||||||
case 1: | case 1: | ||||||||
src8 = (uint8_t *)(pi->pi_msix.pba_page + offset - | src8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
data = *src8; | data = *src8; | ||||||||
break; | break; | ||||||||
case 2: | case 2: | ||||||||
src16 = (uint16_t *)(pi->pi_msix.pba_page + offset - | src16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
data = *src16; | data = *src16; | ||||||||
break; | break; | ||||||||
case 4: | case 4: | ||||||||
src32 = (uint32_t *)(pi->pi_msix.pba_page + offset - | src32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
data = *src32; | data = *src32; | ||||||||
break; | break; | ||||||||
case 8: | case 8: | ||||||||
src64 = (uint64_t *)(pi->pi_msix.pba_page + offset - | src64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
data = *src64; | data = *src64; | ||||||||
break; | break; | ||||||||
default: | default: | ||||||||
return (-1); | return (-1); | ||||||||
} | } | ||||||||
return (data); | return (data); | ||||||||
} | } | ||||||||
if (offset < pi->pi_msix.table_offset) | offset -= table_offset; | ||||||||
return (-1); | |||||||||
offset -= pi->pi_msix.table_offset; | |||||||||
index = offset / MSIX_TABLE_ENTRY_SIZE; | index = offset / MSIX_TABLE_ENTRY_SIZE; | ||||||||
if (index >= pi->pi_msix.table_count) | assert(index < table_count); | ||||||||
jhb: You could perhaps keep the 'return (data)' here so that the else case below remains unindented. | |||||||||
return (-1); | |||||||||
entry = &pi->pi_msix.table[index]; | entry = &pi->pi_msix.table[index]; | ||||||||
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; | entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; | ||||||||
switch(size) { | switch (size) { | ||||||||
case 1: | case 1: | ||||||||
src8 = (uint8_t *)((void *)entry + entry_offset); | src8 = (uint8_t *)((uint8_t *)entry + entry_offset); | ||||||||
data = *src8; | data = *src8; | ||||||||
break; | break; | ||||||||
case 2: | case 2: | ||||||||
src16 = (uint16_t *)((void *)entry + entry_offset); | src16 = (uint16_t *)((uint8_t *)entry + entry_offset); | ||||||||
data = *src16; | data = *src16; | ||||||||
break; | break; | ||||||||
case 4: | case 4: | ||||||||
src32 = (uint32_t *)((void *)entry + entry_offset); | src32 = (uint32_t *)((uint8_t *)entry + entry_offset); | ||||||||
data = *src32; | data = *src32; | ||||||||
break; | break; | ||||||||
case 8: | case 8: | ||||||||
src64 = (uint64_t *)((void *)entry + entry_offset); | src64 = (uint64_t *)((uint8_t *)entry + entry_offset); | ||||||||
data = *src64; | data = *src64; | ||||||||
break; | break; | ||||||||
default: | default: | ||||||||
return (-1); | return (-1); | ||||||||
} | } | ||||||||
return (data); | return (data); | ||||||||
} | } | ||||||||
static void | static void | ||||||||
msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, | msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, | ||||||||
uint64_t offset, int size, uint64_t data) | uint64_t offset, int size, uint64_t data) | ||||||||
{ | { | ||||||||
struct pci_devinst *pi; | struct pci_devinst *pi; | ||||||||
struct msix_table_entry *entry; | struct msix_table_entry *entry; | ||||||||
uint8_t *dest8; | uint8_t *dest8; | ||||||||
uint16_t *dest16; | uint16_t *dest16; | ||||||||
uint32_t *dest32; | uint32_t *dest32; | ||||||||
uint64_t *dest64; | uint64_t *dest64; | ||||||||
size_t entry_offset; | size_t entry_offset; | ||||||||
uint32_t vector_control; | uint32_t table_offset, vector_control; | ||||||||
int index; | int index, table_count; | ||||||||
pi = sc->psc_pi; | pi = sc->psc_pi; | ||||||||
if (pi->pi_msix.pba_page != NULL && offset >= pi->pi_msix.pba_offset && | |||||||||
offset < pi->pi_msix.pba_offset + pi->pi_msix.pba_size) { | table_offset = pi->pi_msix.table_offset; | ||||||||
table_count = pi->pi_msix.table_count; | |||||||||
if (offset < table_offset || | |||||||||
offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { | |||||||||
switch(size) { | switch (size) { | ||||||||
case 1: | case 1: | ||||||||
dest8 = (uint8_t *)(pi->pi_msix.pba_page + offset - | dest8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
*dest8 = data; | *dest8 = data; | ||||||||
break; | break; | ||||||||
case 2: | case 2: | ||||||||
dest16 = (uint16_t *)(pi->pi_msix.pba_page + offset - | dest16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
*dest16 = data; | *dest16 = data; | ||||||||
break; | break; | ||||||||
case 4: | case 4: | ||||||||
dest32 = (uint32_t *)(pi->pi_msix.pba_page + offset - | dest32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
*dest32 = data; | *dest32 = data; | ||||||||
break; | break; | ||||||||
case 8: | case 8: | ||||||||
dest64 = (uint64_t *)(pi->pi_msix.pba_page + offset - | dest64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); | ||||||||
pi->pi_msix.pba_page_offset); | |||||||||
*dest64 = data; | *dest64 = data; | ||||||||
break; | break; | ||||||||
default: | |||||||||
break; | |||||||||
} | } | ||||||||
return; | return; | ||||||||
} | } | ||||||||
if (offset < pi->pi_msix.table_offset) | offset -= table_offset; | ||||||||
return; | |||||||||
offset -= pi->pi_msix.table_offset; | |||||||||
index = offset / MSIX_TABLE_ENTRY_SIZE; | index = offset / MSIX_TABLE_ENTRY_SIZE; | ||||||||
if (index >= pi->pi_msix.table_count) | assert(index < table_count); | ||||||||
return; | |||||||||
entry = &pi->pi_msix.table[index]; | entry = &pi->pi_msix.table[index]; | ||||||||
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; | entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; | ||||||||
/* Only 4 byte naturally-aligned writes are supported */ | /* Only 4 byte naturally-aligned writes are supported */ | ||||||||
assert(size == 4); | assert(size == 4); | ||||||||
assert(entry_offset % 4 == 0); | assert(entry_offset % 4 == 0); | ||||||||
Show All 11 Lines | if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || | ||||||||
entry->msg_data, entry->vector_control); | entry->msg_data, entry->vector_control); | ||||||||
} | } | ||||||||
} | } | ||||||||
} | } | ||||||||
static int | static int | ||||||||
init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) | init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) | ||||||||
{ | { | ||||||||
struct pci_devinst *pi = sc->psc_pi; | |||||||||
struct pci_bar_mmap pbm; | |||||||||
int b, s, f; | int b, s, f; | ||||||||
int idx; | |||||||||
size_t remaining; | |||||||||
uint32_t table_size, table_offset; | uint32_t table_size, table_offset; | ||||||||
uint32_t pba_size, pba_offset; | |||||||||
vm_paddr_t start; | |||||||||
struct pci_devinst *pi = sc->psc_pi; | |||||||||
assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); | assert(pci_msix_table_bar(pi) >= 0 && pci_msix_pba_bar(pi) >= 0); | ||||||||
b = sc->psc_sel.pc_bus; | b = sc->psc_sel.pc_bus; | ||||||||
s = sc->psc_sel.pc_dev; | s = sc->psc_sel.pc_dev; | ||||||||
f = sc->psc_sel.pc_func; | f = sc->psc_sel.pc_func; | ||||||||
/* | /* | ||||||||
* If the MSI-X table BAR maps memory intended for | * Map the region of the BAR containing the MSI-X table. This is | ||||||||
* other uses, it is at least assured that the table | * necessary for two reasons: | ||||||||
* either resides in its own page within the region, | * 1. The PBA may reside in the first or last page containing the MSI-X | ||||||||
* or it resides in a page shared with only the PBA. | * table. | ||||||||
* 2. While PCI devices are not supposed to use the page(s) containing | |||||||||
* the MSI-X table for other purposes, some do in practice. | |||||||||
*/ | */ | ||||||||
memset(&pbm, 0, sizeof(pbm)); | |||||||||
pbm.pbm_sel = sc->psc_sel; | |||||||||
pbm.pbm_flags = PCIIO_BAR_MMAP_RW; | |||||||||
pbm.pbm_reg = PCIR_BAR(pi->pi_msix.pba_bar); | |||||||||
Not Done Inline ActionsI'm in the process of porting this change to illumos and, looking at this, should it be pi->pi_msix.table_bar ? andy_omniosce.org: I'm in the process of porting this change to illumos and, looking at this, should it be
pi… | |||||||||
Done Inline ActionsI think you're right, thanks. An earlier version would only create this mapping if the PBA and MSI-X table resided in the same BAR, and that was true for the devices I was testing with. :( markj: I think you're right, thanks. An earlier version would only create this mapping if the PBA and… | |||||||||
Done Inline Actionsmarkj: https://reviews.freebsd.org/D33739 | |||||||||
pbm.pbm_memattr = VM_MEMATTR_DEVICE; | |||||||||
if (ioctl(pcifd, PCIOCBARMMAP, &pbm) != 0) { | |||||||||
warn("Failed to map MSI-X table BAR on %d/%d/%d", b, s, f); | |||||||||
Done Inline Actions
Minor pedanticism since the PBA can be in a different BAR. jhb: Minor pedanticism since the PBA can be in a different BAR. | |||||||||
return (-1); | |||||||||
} | |||||||||
assert(pbm.pbm_bar_off == 0); | |||||||||
pi->pi_msix.mapped_addr = (uint8_t *)(uintptr_t)pbm.pbm_map_base; | |||||||||
pi->pi_msix.mapped_size = pbm.pbm_map_length; | |||||||||
table_offset = rounddown2(pi->pi_msix.table_offset, 4096); | table_offset = rounddown2(pi->pi_msix.table_offset, 4096); | ||||||||
table_size = pi->pi_msix.table_offset - table_offset; | table_size = pi->pi_msix.table_offset - table_offset; | ||||||||
table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; | table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; | ||||||||
table_size = roundup2(table_size, 4096); | table_size = roundup2(table_size, 4096); | ||||||||
idx = pi->pi_msix.table_bar; | |||||||||
start = pi->pi_bar[idx].addr; | |||||||||
remaining = pi->pi_bar[idx].size; | |||||||||
if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { | |||||||||
pba_offset = pi->pi_msix.pba_offset; | |||||||||
pba_size = pi->pi_msix.pba_size; | |||||||||
if (pba_offset >= table_offset + table_size || | |||||||||
table_offset >= pba_offset + pba_size) { | |||||||||
/* | /* | ||||||||
* If the PBA does not share a page with the MSI-x | * Unmap any pages not covered by the table, we do not need to emulate | ||||||||
* tables, no PBA emulation is required. | * accesses to them. Avoid releasing address space to help ensure that | ||||||||
* a buggy out-of-bounds access causes a crash. | |||||||||
*/ | */ | ||||||||
pi->pi_msix.pba_page = NULL; | if (table_offset != 0) | ||||||||
pi->pi_msix.pba_page_offset = 0; | if (mprotect(pi->pi_msix.mapped_addr, table_offset, | ||||||||
} else { | PROT_NONE) != 0) | ||||||||
/* | warn("Failed to unmap MSI-X table BAR region"); | ||||||||
* The PBA overlaps with either the first or last | if (table_offset + table_size != pi->pi_msix.mapped_size) | ||||||||
* page of the MSI-X table region. Map the | if (mprotect(pi->pi_msix.mapped_addr, | ||||||||
* appropriate page. | pi->pi_msix.mapped_size - (table_offset + table_size), | ||||||||
*/ | PROT_NONE) != 0) | ||||||||
if (pba_offset <= table_offset) | warn("Failed to unmap MSI-X table BAR region"); | ||||||||
pi->pi_msix.pba_page_offset = table_offset; | |||||||||
else | |||||||||
pi->pi_msix.pba_page_offset = table_offset + | |||||||||
table_size - 4096; | |||||||||
pi->pi_msix.pba_page = mmap(NULL, 4096, PROT_READ | | |||||||||
PROT_WRITE, MAP_SHARED, memfd, start + | |||||||||
pi->pi_msix.pba_page_offset); | |||||||||
if (pi->pi_msix.pba_page == MAP_FAILED) { | |||||||||
warn( | |||||||||
"Failed to map PBA page for MSI-X on %d/%d/%d", | |||||||||
b, s, f); | |||||||||
return (-1); | |||||||||
} | |||||||||
} | |||||||||
} | |||||||||
return (0); | return (0); | ||||||||
} | } | ||||||||
static int | static int | ||||||||
cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) | cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) | ||||||||
{ | { | ||||||||
int i, error; | int i, error; | ||||||||
▲ Show 20 Lines • Show All 131 Lines • ▼ Show 20 Lines | |||||||||
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) | passthru_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl) | ||||||||
{ | { | ||||||||
int bus, slot, func, error, memflags; | int bus, slot, func, error, memflags; | ||||||||
struct passthru_softc *sc; | struct passthru_softc *sc; | ||||||||
const char *value; | const char *value; | ||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | ||||||||
cap_rights_t rights; | cap_rights_t rights; | ||||||||
cap_ioctl_t pci_ioctls[] = | cap_ioctl_t pci_ioctls[] = | ||||||||
{ PCIOCREAD, PCIOCWRITE, PCIOCGETBAR, PCIOCBARIO }; | { PCIOCREAD, PCIOCWRITE, PCIOCGETBAR, PCIOCBARIO, PCIOCBARMMAP }; | ||||||||
#endif | #endif | ||||||||
sc = NULL; | sc = NULL; | ||||||||
error = 1; | error = 1; | ||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | ||||||||
cap_rights_init(&rights, CAP_IOCTL, CAP_READ, CAP_WRITE); | cap_rights_init(&rights, CAP_IOCTL, CAP_READ, CAP_WRITE); | ||||||||
#endif | #endif | ||||||||
Show All 11 Lines | if (pcifd < 0) { | ||||||||
return (error); | return (error); | ||||||||
} | } | ||||||||
} | } | ||||||||
#ifndef WITHOUT_CAPSICUM | #ifndef WITHOUT_CAPSICUM | ||||||||
if (caph_rights_limit(pcifd, &rights) == -1) | if (caph_rights_limit(pcifd, &rights) == -1) | ||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||
if (caph_ioctls_limit(pcifd, pci_ioctls, nitems(pci_ioctls)) == -1) | if (caph_ioctls_limit(pcifd, pci_ioctls, nitems(pci_ioctls)) == -1) | ||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | |||||||||
#endif | |||||||||
if (memfd < 0) { | |||||||||
memfd = open(_PATH_MEM, O_RDWR, 0); | |||||||||
if (memfd < 0) { | |||||||||
warn("failed to open %s", _PATH_MEM); | |||||||||
return (error); | |||||||||
} | |||||||||
} | |||||||||
#ifndef WITHOUT_CAPSICUM | |||||||||
cap_rights_clear(&rights, CAP_IOCTL); | |||||||||
cap_rights_set(&rights, CAP_MMAP_RW); | |||||||||
if (caph_rights_limit(memfd, &rights) == -1) | |||||||||
errx(EX_OSERR, "Unable to apply rights for sandbox"); | errx(EX_OSERR, "Unable to apply rights for sandbox"); | ||||||||
#endif | #endif | ||||||||
#define GET_INT_CONFIG(var, name) do { \ | #define GET_INT_CONFIG(var, name) do { \ | ||||||||
value = get_config_value_node(nvl, name); \ | value = get_config_value_node(nvl, name); \ | ||||||||
if (value == NULL) { \ | if (value == NULL) { \ | ||||||||
EPRINTLN("passthru: missing required %s setting", name); \ | EPRINTLN("passthru: missing required %s setting", name); \ | ||||||||
return (error); \ | return (error); \ | ||||||||
▲ Show 20 Lines • Show All 346 Lines • Show Last 20 Lines |
You could perhaps keep the 'return (data)' here so that the else case below remains unindented. I don't really mind either way, but the write function below keeps the table case unindented and it seems useful to keep the two functions consistent in their general layout?