diff --git a/sys/amd64/vmm/intel/vtd.c b/sys/amd64/vmm/intel/vtd.c index 8f06dc823364..21e81223f6ee 100644 --- a/sys/amd64/vmm/intel/vtd.c +++ b/sys/amd64/vmm/intel/vtd.c @@ -1,776 +1,778 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "io/iommu.h" /* * Documented in the "Intel Virtualization Technology for Directed I/O", * Architecture Spec, September 2008. */ #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1) /* Section 10.4 "Register Descriptions" */ struct vtdmap { volatile uint32_t version; volatile uint32_t res0; volatile uint64_t cap; volatile uint64_t ext_cap; volatile uint32_t gcr; volatile uint32_t gsr; volatile uint64_t rta; volatile uint64_t ccr; }; #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) #define VTD_CAP_ND(cap) ((cap) & 0x7) #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) #define VTD_GCR_WBF (1 << 27) #define VTD_GCR_SRTP (1 << 30) #define VTD_GCR_TE (1U << 31) #define VTD_GSR_WBFS (1 << 27) #define VTD_GSR_RTPS (1 << 30) #define VTD_GSR_TES (1U << 31) #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ #define VTD_IIR_DOMAIN_P 32 #define VTD_ROOT_PRESENT 0x1 #define VTD_CTX_PRESENT 0x1 #define VTD_CTX_TT_ALL (1UL << 2) #define VTD_PTE_RD (1UL << 0) #define VTD_PTE_WR (1UL << 1) #define VTD_PTE_SUPERPAGE (1UL << 7) #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2) struct domain { uint64_t *ptp; /* first level page table page */ int pt_levels; /* number of page table levels */ int addrwidth; /* 'AW' field in context entry */ int spsmask; /* supported super page sizes */ u_int id; /* domain id */ vm_paddr_t maxaddr; /* highest address to be mapped */ SLIST_ENTRY(domain) next; }; static SLIST_HEAD(, domain) domhead; #define DRHD_MAX_UNITS 8 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; static int drhd_num; static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; static int max_domains; typedef int (*drhd_ident_func_t)(void); static uint64_t root_table[PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); static uint64_t ctx_tables[256][PAGE_SIZE / sizeof(uint64_t)] __aligned(4096); static MALLOC_DEFINE(M_VTD, "vtd", "vtd"); static int vtd_max_domains(struct vtdmap *vtdmap) { int nd; nd = VTD_CAP_ND(vtdmap->cap); switch (nd) { case 0: return (16); case 1: return (64); case 2: return (256); case 3: return (1024); case 4: return (4 * 1024); case 5: return (16 * 1024); case 6: return (64 * 1024); default: panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); } } static u_int domain_id(void) { u_int id; struct domain *dom; /* Skip domain id 0 - it is reserved when Caching Mode field is set */ for (id = 1; id < max_domains; id++) { SLIST_FOREACH(dom, &domhead, next) { if (dom->id == id) break; } if (dom == NULL) break; /* found it */ } if (id >= max_domains) panic("domain ids exhausted"); return (id); } static struct vtdmap * vtd_device_scope(uint16_t rid) { int i, remaining, pathremaining; char *end, *pathend; struct vtdmap *vtdmap; ACPI_DMAR_HARDWARE_UNIT *drhd; ACPI_DMAR_DEVICE_SCOPE *device_scope; ACPI_DMAR_PCI_PATH *path; for (i = 0; i < drhd_num; i++) { drhd = drhds[i]; if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) { /* * From Intel VT-d arch spec, version 3.0: * If a DRHD structure with INCLUDE_PCI_ALL flag Set is reported * for a Segment, it must be enumerated by BIOS after all other * DRHD structures for the same Segment. */ vtdmap = vtdmaps[i]; return(vtdmap); } end = (char *)drhd + drhd->Header.Length; remaining = drhd->Header.Length - sizeof(ACPI_DMAR_HARDWARE_UNIT); while (remaining > sizeof(ACPI_DMAR_DEVICE_SCOPE)) { device_scope = (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining); remaining -= device_scope->Length; switch (device_scope->EntryType){ /* 0x01 and 0x02 are PCI device entries */ case 0x01: case 0x02: break; default: continue; } if (PCI_RID2BUS(rid) != device_scope->Bus) continue; pathend = (char *)device_scope + device_scope->Length; pathremaining = device_scope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE); while (pathremaining >= sizeof(ACPI_DMAR_PCI_PATH)) { path = (ACPI_DMAR_PCI_PATH *)(pathend - pathremaining); pathremaining -= sizeof(ACPI_DMAR_PCI_PATH); if (PCI_RID2SLOT(rid) != path->Device) continue; if (PCI_RID2FUNC(rid) != path->Function) continue; vtdmap = vtdmaps[i]; return (vtdmap); } } } /* No matching scope */ return (NULL); } static void vtd_wbflush(struct vtdmap *vtdmap) { if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) pmap_invalidate_cache(); if (VTD_CAP_RWBF(vtdmap->cap)) { vtdmap->gcr = VTD_GCR_WBF; while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) ; } } static void vtd_ctx_global_invalidate(struct vtdmap *vtdmap) { vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; while ((vtdmap->ccr & VTD_CCR_ICC) != 0) ; } static void vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) { int offset; volatile uint64_t *iotlb_reg, val; vtd_wbflush(vtdmap); offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; while (1) { val = *iotlb_reg; if ((val & VTD_IIR_IVT) == 0) break; } } static void vtd_translation_enable(struct vtdmap *vtdmap) { vtdmap->gcr = VTD_GCR_TE; while ((vtdmap->gsr & VTD_GSR_TES) == 0) ; } static void vtd_translation_disable(struct vtdmap *vtdmap) { vtdmap->gcr = 0; while ((vtdmap->gsr & VTD_GSR_TES) != 0) ; } static int vtd_init(void) { int i, units, remaining, tmp; struct vtdmap *vtdmap; vm_paddr_t ctx_paddr; char *end, envname[32]; unsigned long mapaddr; ACPI_STATUS status; ACPI_TABLE_DMAR *dmar; ACPI_DMAR_HEADER *hdr; ACPI_DMAR_HARDWARE_UNIT *drhd; /* * Allow the user to override the ACPI DMAR table by specifying the * physical address of each remapping unit. * * The following example specifies two remapping units at * physical addresses 0xfed90000 and 0xfeda0000 respectively. * set vtd.regmap.0.addr=0xfed90000 * set vtd.regmap.1.addr=0xfeda0000 */ for (units = 0; units < DRHD_MAX_UNITS; units++) { snprintf(envname, sizeof(envname), "vtd.regmap.%d.addr", units); if (getenv_ulong(envname, &mapaddr) == 0) break; vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr); } if (units > 0) goto skip_dmar; /* Search for DMAR table. */ status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); if (ACPI_FAILURE(status)) return (ENXIO); end = (char *)dmar + dmar->Header.Length; remaining = dmar->Header.Length - sizeof(ACPI_TABLE_DMAR); while (remaining > sizeof(ACPI_DMAR_HEADER)) { hdr = (ACPI_DMAR_HEADER *)(end - remaining); if (hdr->Length > remaining) break; /* * From Intel VT-d arch spec, version 1.3: * BIOS implementations must report mapping structures * in numerical order, i.e. All remapping structures of * type 0 (DRHD) enumerated before remapping structures of * type 1 (RMRR) and so forth. */ if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) break; drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; drhds[units] = drhd; vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); if (++units >= DRHD_MAX_UNITS) break; remaining -= hdr->Length; } if (units <= 0) return (ENXIO); skip_dmar: drhd_num = units; max_domains = 64 * 1024; /* maximum valid value */ for (i = 0; i < drhd_num; i++){ vtdmap = vtdmaps[i]; if (VTD_CAP_CM(vtdmap->cap) != 0) panic("vtd_init: invalid caching mode"); /* take most compatible (minimum) value */ if ((tmp = vtd_max_domains(vtdmap)) < max_domains) max_domains = tmp; } /* * Set up the root-table to point to the context-entry tables */ for (i = 0; i < 256; i++) { ctx_paddr = vtophys(ctx_tables[i]); if (ctx_paddr & PAGE_MASK) panic("ctx table (0x%0lx) not page aligned", ctx_paddr); root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; } return (0); } static void vtd_cleanup(void) { } static void vtd_enable(void) { int i; struct vtdmap *vtdmap; for (i = 0; i < drhd_num; i++) { vtdmap = vtdmaps[i]; vtd_wbflush(vtdmap); /* Update the root table address */ vtdmap->rta = vtophys(root_table); vtdmap->gcr = VTD_GCR_SRTP; while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) ; vtd_ctx_global_invalidate(vtdmap); vtd_iotlb_global_invalidate(vtdmap); vtd_translation_enable(vtdmap); } } static void vtd_disable(void) { int i; struct vtdmap *vtdmap; for (i = 0; i < drhd_num; i++) { vtdmap = vtdmaps[i]; vtd_translation_disable(vtdmap); } } static void vtd_add_device(void *arg, uint16_t rid) { int idx; uint64_t *ctxp; struct domain *dom = arg; vm_paddr_t pt_paddr; struct vtdmap *vtdmap; uint8_t bus; + KASSERT(dom != NULL, ("domain is NULL")); + bus = PCI_RID2BUS(rid); ctxp = ctx_tables[bus]; pt_paddr = vtophys(dom->ptp); idx = VTD_RID2IDX(rid); if (ctxp[idx] & VTD_CTX_PRESENT) { panic("vtd_add_device: device %x is already owned by " "domain %d", rid, (uint16_t)(ctxp[idx + 1] >> 8)); } if ((vtdmap = vtd_device_scope(rid)) == NULL) panic("vtd_add_device: device %x is not in scope for " "any DMA remapping unit", rid); /* * Order is important. The 'present' bit is set only after all fields * of the context pointer are initialized. */ ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); if (VTD_ECAP_DI(vtdmap->ext_cap)) ctxp[idx] = VTD_CTX_TT_ALL; else ctxp[idx] = 0; ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; /* * 'Not Present' entries are not cached in either the Context Cache * or in the IOTLB, so there is no need to invalidate either of them. */ } static void vtd_remove_device(void *arg, uint16_t rid) { int i, idx; uint64_t *ctxp; struct vtdmap *vtdmap; uint8_t bus; bus = PCI_RID2BUS(rid); ctxp = ctx_tables[bus]; idx = VTD_RID2IDX(rid); /* * Order is important. The 'present' bit is must be cleared first. */ ctxp[idx] = 0; ctxp[idx + 1] = 0; /* * Invalidate the Context Cache and the IOTLB. * * XXX use device-selective invalidation for Context Cache * XXX use domain-selective invalidation for IOTLB */ for (i = 0; i < drhd_num; i++) { vtdmap = vtdmaps[i]; vtd_ctx_global_invalidate(vtdmap); vtd_iotlb_global_invalidate(vtdmap); } } #define CREATE_MAPPING 0 #define REMOVE_MAPPING 1 static uint64_t vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, int remove) { struct domain *dom; int i, spshift, ptpshift, ptpindex, nlevels; uint64_t spsize, *ptp; dom = arg; ptpindex = 0; ptpshift = 0; KASSERT(gpa + len > gpa, ("%s: invalid gpa range %#lx/%#lx", __func__, gpa, len)); KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %#lx/%#lx beyond " "domain maxaddr %#lx", __func__, gpa, len, dom->maxaddr)); if (gpa & PAGE_MASK) panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); if (hpa & PAGE_MASK) panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); if (len & PAGE_MASK) panic("vtd_create_mapping: unaligned len 0x%0lx", len); /* * Compute the size of the mapping that we can accommodate. * * This is based on three factors: * - supported super page size * - alignment of the region starting at 'gpa' and 'hpa' * - length of the region 'len' */ spshift = 48; for (i = 3; i >= 0; i--) { spsize = 1UL << spshift; if ((dom->spsmask & (1 << i)) != 0 && (gpa & (spsize - 1)) == 0 && (hpa & (spsize - 1)) == 0 && (len >= spsize)) { break; } spshift -= 9; } ptp = dom->ptp; nlevels = dom->pt_levels; while (--nlevels >= 0) { ptpshift = 12 + nlevels * 9; ptpindex = (gpa >> ptpshift) & 0x1FF; /* We have reached the leaf mapping */ if (spshift >= ptpshift) { break; } /* * We are working on a non-leaf page table page. * * Create a downstream page table page if necessary and point * to it from the current page table. */ if (ptp[ptpindex] == 0) { void *nlp = malloc(PAGE_SIZE, M_VTD, M_WAITOK | M_ZERO); ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; } ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); } if ((gpa & ((1UL << ptpshift) - 1)) != 0) panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); /* * Update the 'gpa' -> 'hpa' mapping */ if (remove) { ptp[ptpindex] = 0; } else { ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; if (nlevels > 0) ptp[ptpindex] |= VTD_PTE_SUPERPAGE; } return (1UL << ptpshift); } static uint64_t vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) { return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); } static uint64_t vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) { return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); } static void vtd_invalidate_tlb(void *dom) { int i; struct vtdmap *vtdmap; /* * Invalidate the IOTLB. * XXX use domain-selective invalidation for IOTLB */ for (i = 0; i < drhd_num; i++) { vtdmap = vtdmaps[i]; vtd_iotlb_global_invalidate(vtdmap); } } static void * vtd_create_domain(vm_paddr_t maxaddr) { struct domain *dom; vm_paddr_t addr; int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; struct vtdmap *vtdmap; if (drhd_num <= 0) panic("vtd_create_domain: no dma remapping hardware available"); /* * Calculate AGAW. * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. */ addr = 0; for (gaw = 0; addr < maxaddr; gaw++) addr = 1ULL << gaw; res = (gaw - 12) % 9; if (res == 0) agaw = gaw; else agaw = gaw + 9 - res; if (agaw > 64) agaw = 64; /* * Select the smallest Supported AGAW and the corresponding number * of page table levels. */ pt_levels = 2; sagaw = 30; addrwidth = 0; tmp = ~0; for (i = 0; i < drhd_num; i++) { vtdmap = vtdmaps[i]; /* take most compatible value */ tmp &= VTD_CAP_SAGAW(vtdmap->cap); } for (i = 0; i < 5; i++) { if ((tmp & (1 << i)) != 0 && sagaw >= agaw) break; pt_levels++; addrwidth++; sagaw += 9; if (sagaw > 64) sagaw = 64; } if (i >= 5) { panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d", tmp, agaw); } dom = malloc(sizeof(struct domain), M_VTD, M_ZERO | M_WAITOK); dom->pt_levels = pt_levels; dom->addrwidth = addrwidth; dom->id = domain_id(); dom->maxaddr = maxaddr; dom->ptp = malloc(PAGE_SIZE, M_VTD, M_ZERO | M_WAITOK); if ((uintptr_t)dom->ptp & PAGE_MASK) panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); #ifdef notyet /* * XXX superpage mappings for the iommu do not work correctly. * * By default all physical memory is mapped into the host_domain. * When a VM is allocated wired memory the pages belonging to it * are removed from the host_domain and added to the vm's domain. * * If the page being removed was mapped using a superpage mapping * in the host_domain then we need to demote the mapping before * removing the page. * * There is not any code to deal with the demotion at the moment * so we disable superpage mappings altogether. */ dom->spsmask = ~0; for (i = 0; i < drhd_num; i++) { vtdmap = vtdmaps[i]; /* take most compatible value */ dom->spsmask &= VTD_CAP_SPS(vtdmap->cap); } #endif SLIST_INSERT_HEAD(&domhead, dom, next); return (dom); } static void vtd_free_ptp(uint64_t *ptp, int level) { int i; uint64_t *nlp; if (level > 1) { for (i = 0; i < 512; i++) { if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) continue; if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) continue; nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); vtd_free_ptp(nlp, level - 1); } } bzero(ptp, PAGE_SIZE); free(ptp, M_VTD); } static void vtd_destroy_domain(void *arg) { struct domain *dom; dom = arg; SLIST_REMOVE(&domhead, dom, domain, next); vtd_free_ptp(dom->ptp, dom->pt_levels); free(dom, M_VTD); } const struct iommu_ops iommu_ops_intel = { .init = vtd_init, .cleanup = vtd_cleanup, .enable = vtd_enable, .disable = vtd_disable, .create_domain = vtd_create_domain, .destroy_domain = vtd_destroy_domain, .create_mapping = vtd_create_mapping, .remove_mapping = vtd_remove_mapping, .add_device = vtd_add_device, .remove_device = vtd_remove_device, .invalidate_tlb = vtd_invalidate_tlb, }; diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c index 6a589f153815..01ce29539ec2 100644 --- a/sys/amd64/vmm/io/iommu.c +++ b/sys/amd64/vmm/io/iommu.c @@ -1,342 +1,343 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "vmm_util.h" #include "vmm_mem.h" #include "iommu.h" SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "bhyve iommu parameters"); static int iommu_avail; SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, initialized, CTLFLAG_RD, &iommu_avail, 0, "bhyve iommu initialized?"); static int iommu_enable = 1; SYSCTL_INT(_hw_vmm_iommu, OID_AUTO, enable, CTLFLAG_RDTUN, &iommu_enable, 0, "Enable use of I/O MMU (required for PCI passthrough)."); static const struct iommu_ops *ops; static void *host_domain; static eventhandler_tag add_tag, delete_tag; static __inline int IOMMU_INIT(void) { if (ops != NULL) return ((*ops->init)()); else return (ENXIO); } static __inline void IOMMU_CLEANUP(void) { if (ops != NULL && iommu_avail) (*ops->cleanup)(); } static __inline void * IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr) { if (ops != NULL && iommu_avail) return ((*ops->create_domain)(maxaddr)); else return (NULL); } static __inline void IOMMU_DESTROY_DOMAIN(void *dom) { if (ops != NULL && iommu_avail) (*ops->destroy_domain)(dom); } static __inline uint64_t IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) { if (ops != NULL && iommu_avail) return ((*ops->create_mapping)(domain, gpa, hpa, len)); else return (len); /* XXX */ } static __inline uint64_t IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len) { if (ops != NULL && iommu_avail) return ((*ops->remove_mapping)(domain, gpa, len)); else return (len); /* XXX */ } static __inline void IOMMU_ADD_DEVICE(void *domain, uint16_t rid) { if (ops != NULL && iommu_avail) (*ops->add_device)(domain, rid); } static __inline void IOMMU_REMOVE_DEVICE(void *domain, uint16_t rid) { if (ops != NULL && iommu_avail) (*ops->remove_device)(domain, rid); } static __inline void IOMMU_INVALIDATE_TLB(void *domain) { if (ops != NULL && iommu_avail) (*ops->invalidate_tlb)(domain); } static __inline void IOMMU_ENABLE(void) { if (ops != NULL && iommu_avail) (*ops->enable)(); } static __inline void IOMMU_DISABLE(void) { if (ops != NULL && iommu_avail) (*ops->disable)(); } static void iommu_pci_add(void *arg, device_t dev) { /* Add new devices to the host domain. */ iommu_add_device(host_domain, pci_get_rid(dev)); } static void iommu_pci_delete(void *arg, device_t dev) { iommu_remove_device(host_domain, pci_get_rid(dev)); } static void iommu_init(void) { int error, bus, slot, func; vm_paddr_t maxaddr; devclass_t dc; device_t dev; if (!iommu_enable) return; if (vmm_is_intel()) ops = &iommu_ops_intel; else if (vmm_is_svm()) ops = &iommu_ops_amd; else ops = NULL; error = IOMMU_INIT(); if (error) return; iommu_avail = 1; /* * Create a domain for the devices owned by the host */ maxaddr = vmm_mem_maxaddr(); host_domain = IOMMU_CREATE_DOMAIN(maxaddr); if (host_domain == NULL) { printf("iommu_init: unable to create a host domain"); IOMMU_CLEANUP(); ops = NULL; iommu_avail = 0; return; } /* * Create 1:1 mappings from '0' to 'maxaddr' for devices assigned to * the host */ iommu_create_mapping(host_domain, 0, 0, maxaddr); add_tag = EVENTHANDLER_REGISTER(pci_add_device, iommu_pci_add, NULL, 0); delete_tag = EVENTHANDLER_REGISTER(pci_delete_device, iommu_pci_delete, NULL, 0); dc = devclass_find("ppt"); for (bus = 0; bus <= PCI_BUSMAX; bus++) { for (slot = 0; slot <= PCI_SLOTMAX; slot++) { for (func = 0; func <= PCI_FUNCMAX; func++) { dev = pci_find_dbsf(0, bus, slot, func); if (dev == NULL) continue; /* Skip passthrough devices. */ if (dc != NULL && device_get_devclass(dev) == dc) continue; /* * Everything else belongs to the host * domain. */ iommu_add_device(host_domain, pci_get_rid(dev)); } } } IOMMU_ENABLE(); } void iommu_cleanup(void) { if (add_tag != NULL) { EVENTHANDLER_DEREGISTER(pci_add_device, add_tag); add_tag = NULL; } if (delete_tag != NULL) { EVENTHANDLER_DEREGISTER(pci_delete_device, delete_tag); delete_tag = NULL; } IOMMU_DISABLE(); IOMMU_DESTROY_DOMAIN(host_domain); + host_domain = NULL; IOMMU_CLEANUP(); } void * iommu_create_domain(vm_paddr_t maxaddr) { static volatile int iommu_initted; if (iommu_initted < 2) { if (atomic_cmpset_int(&iommu_initted, 0, 1)) { iommu_init(); atomic_store_rel_int(&iommu_initted, 2); } else while (iommu_initted == 1) cpu_spinwait(); } return (IOMMU_CREATE_DOMAIN(maxaddr)); } void iommu_destroy_domain(void *dom) { IOMMU_DESTROY_DOMAIN(dom); } void iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len) { uint64_t mapped, remaining; remaining = len; while (remaining > 0) { mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining); gpa += mapped; hpa += mapped; remaining -= mapped; } } void iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len) { uint64_t unmapped, remaining; remaining = len; while (remaining > 0) { unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining); gpa += unmapped; remaining -= unmapped; } } void * iommu_host_domain(void) { return (host_domain); } void iommu_add_device(void *dom, uint16_t rid) { IOMMU_ADD_DEVICE(dom, rid); } void iommu_remove_device(void *dom, uint16_t rid) { IOMMU_REMOVE_DEVICE(dom, rid); } void iommu_invalidate_tlb(void *domain) { IOMMU_INVALIDATE_TLB(domain); } diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index edb65a3ac07b..26dad1832b10 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -1,762 +1,764 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmm_lapic.h" #include "vmm_ktr.h" #include "iommu.h" #include "ppt.h" /* XXX locking */ #define MAX_MSIMSGS 32 /* * If the MSI-X table is located in the middle of a BAR then that MMIO * region gets split into two segments - one segment above the MSI-X table * and the other segment below the MSI-X table - with a hole in place of * the MSI-X table so accesses to it can be trapped and emulated. * * So, allocate a MMIO segment for each BAR register + 1 additional segment. */ #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); struct pptintr_arg { /* pptintr(pptintr_arg) */ struct pptdev *pptdev; uint64_t addr; uint64_t msg_data; }; struct pptseg { vm_paddr_t gpa; size_t len; int wired; }; struct pptdev { device_t dev; struct vm *vm; /* owner of this device */ TAILQ_ENTRY(pptdev) next; struct pptseg mmio[MAX_MMIOSEGS]; struct { int num_msgs; /* guest state */ int startrid; /* host state */ struct resource *res[MAX_MSIMSGS]; void *cookie[MAX_MSIMSGS]; struct pptintr_arg arg[MAX_MSIMSGS]; } msi; struct { int num_msgs; int startrid; int msix_table_rid; int msix_pba_rid; struct resource *msix_table_res; struct resource *msix_pba_res; struct resource **res; void **cookie; struct pptintr_arg *arg; } msix; }; SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "bhyve passthru devices"); static int num_pptdevs; SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0, "number of pci passthru devices"); static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list); static int ppt_probe(device_t dev) { int bus, slot, func; struct pci_devinfo *dinfo; dinfo = (struct pci_devinfo *)device_get_ivars(dev); bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); /* * To qualify as a pci passthrough device a device must: * - be allowed by administrator to be used in this role * - be an endpoint device */ if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL) return (ENXIO); else if (vmm_is_pptdev(bus, slot, func)) return (0); else /* * Returning BUS_PROBE_NOWILDCARD here matches devices that the * SR-IOV infrastructure specified as "ppt" passthrough devices. * All normal devices that did not have "ppt" specified as their * driver will not be matched by this. */ return (BUS_PROBE_NOWILDCARD); } static int ppt_attach(device_t dev) { struct pptdev *ppt; ppt = device_get_softc(dev); iommu_remove_device(iommu_host_domain(), pci_get_rid(dev)); num_pptdevs++; TAILQ_INSERT_TAIL(&pptdev_list, ppt, next); ppt->dev = dev; if (bootverbose) device_printf(dev, "attached\n"); return (0); } static int ppt_detach(device_t dev) { struct pptdev *ppt; ppt = device_get_softc(dev); if (ppt->vm != NULL) return (EBUSY); num_pptdevs--; TAILQ_REMOVE(&pptdev_list, ppt, next); pci_disable_busmaster(dev); - iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); + + if (iommu_host_domain() != NULL) + iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); return (0); } static device_method_t ppt_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ppt_probe), DEVMETHOD(device_attach, ppt_attach), DEVMETHOD(device_detach, ppt_detach), {0, 0} }; DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev)); DRIVER_MODULE(ppt, pci, ppt_driver, NULL, NULL); static int ppt_find(struct vm *vm, int bus, int slot, int func, struct pptdev **pptp) { device_t dev; struct pptdev *ppt; int b, s, f; TAILQ_FOREACH(ppt, &pptdev_list, next) { dev = ppt->dev; b = pci_get_bus(dev); s = pci_get_slot(dev); f = pci_get_function(dev); if (bus == b && slot == s && func == f) break; } if (ppt == NULL) return (ENOENT); if (ppt->vm != vm) /* Make sure we own this device */ return (EBUSY); *pptp = ppt; return (0); } static void ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt) { int i; struct pptseg *seg; for (i = 0; i < MAX_MMIOSEGS; i++) { seg = &ppt->mmio[i]; if (seg->len == 0) continue; (void)vm_unmap_mmio(vm, seg->gpa, seg->len); bzero(seg, sizeof(struct pptseg)); } } static void ppt_teardown_msi(struct pptdev *ppt) { int i, rid; void *cookie; struct resource *res; if (ppt->msi.num_msgs == 0) return; for (i = 0; i < ppt->msi.num_msgs; i++) { rid = ppt->msi.startrid + i; res = ppt->msi.res[i]; cookie = ppt->msi.cookie[i]; if (cookie != NULL) bus_teardown_intr(ppt->dev, res, cookie); if (res != NULL) bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); ppt->msi.res[i] = NULL; ppt->msi.cookie[i] = NULL; } if (ppt->msi.startrid == 1) pci_release_msi(ppt->dev); ppt->msi.num_msgs = 0; } static void ppt_teardown_msix_intr(struct pptdev *ppt, int idx) { int rid; struct resource *res; void *cookie; rid = ppt->msix.startrid + idx; res = ppt->msix.res[idx]; cookie = ppt->msix.cookie[idx]; if (cookie != NULL) bus_teardown_intr(ppt->dev, res, cookie); if (res != NULL) bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); ppt->msix.res[idx] = NULL; ppt->msix.cookie[idx] = NULL; } static void ppt_teardown_msix(struct pptdev *ppt) { int i; if (ppt->msix.num_msgs == 0) return; for (i = 0; i < ppt->msix.num_msgs; i++) ppt_teardown_msix_intr(ppt, i); free(ppt->msix.res, M_PPTMSIX); free(ppt->msix.cookie, M_PPTMSIX); free(ppt->msix.arg, M_PPTMSIX); pci_release_msi(ppt->dev); if (ppt->msix.msix_table_res) { bus_release_resource(ppt->dev, SYS_RES_MEMORY, ppt->msix.msix_table_rid, ppt->msix.msix_table_res); ppt->msix.msix_table_res = NULL; ppt->msix.msix_table_rid = 0; } if (ppt->msix.msix_pba_res) { bus_release_resource(ppt->dev, SYS_RES_MEMORY, ppt->msix.msix_pba_rid, ppt->msix.msix_pba_res); ppt->msix.msix_pba_res = NULL; ppt->msix.msix_pba_rid = 0; } ppt->msix.num_msgs = 0; } int ppt_avail_devices(void) { return (num_pptdevs); } int ppt_assigned_devices(struct vm *vm) { struct pptdev *ppt; int num; num = 0; TAILQ_FOREACH(ppt, &pptdev_list, next) { if (ppt->vm == vm) num++; } return (num); } bool ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) { int i; struct pptdev *ppt; struct pptseg *seg; TAILQ_FOREACH(ppt, &pptdev_list, next) { if (ppt->vm != vm) continue; for (i = 0; i < MAX_MMIOSEGS; i++) { seg = &ppt->mmio[i]; if (seg->len == 0) continue; if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) return (true); } } return (false); } static void ppt_pci_reset(device_t dev) { if (pcie_flr(dev, max(pcie_get_max_completion_timeout(dev) / 1000, 10), true)) return; pci_power_reset(dev); } int ppt_assign_device(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; /* Passing NULL requires the device to be unowned. */ error = ppt_find(NULL, bus, slot, func, &ppt); if (error) return (error); pci_save_state(ppt->dev); ppt_pci_reset(ppt->dev); pci_restore_state(ppt->dev); ppt->vm = vm; iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); return (0); } int ppt_unassign_device(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); pci_save_state(ppt->dev); ppt_pci_reset(ppt->dev); pci_restore_state(ppt->dev); ppt_unmap_all_mmio(vm, ppt); ppt_teardown_msi(ppt); ppt_teardown_msix(ppt); iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); ppt->vm = NULL; return (0); } int ppt_unassign_all(struct vm *vm) { struct pptdev *ppt; int bus, slot, func; device_t dev; TAILQ_FOREACH(ppt, &pptdev_list, next) { if (ppt->vm == vm) { dev = ppt->dev; bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); vm_unassign_pptdev(vm, bus, slot, func); } } return (0); } static bool ppt_valid_bar_mapping(struct pptdev *ppt, vm_paddr_t hpa, size_t len) { struct pci_map *pm; pci_addr_t base, size; for (pm = pci_first_bar(ppt->dev); pm != NULL; pm = pci_next_bar(pm)) { if (!PCI_BAR_MEM(pm->pm_value)) continue; base = pm->pm_value & PCIM_BAR_MEM_BASE; size = (pci_addr_t)1 << pm->pm_size; if (hpa >= base && hpa + len <= base + size) return (true); } return (false); } int ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { int i, error; struct pptseg *seg; struct pptdev *ppt; if (len % PAGE_SIZE != 0 || len == 0 || gpa % PAGE_SIZE != 0 || hpa % PAGE_SIZE != 0 || gpa + len < gpa || hpa + len < hpa) return (EINVAL); error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); if (!ppt_valid_bar_mapping(ppt, hpa, len)) return (EINVAL); for (i = 0; i < MAX_MMIOSEGS; i++) { seg = &ppt->mmio[i]; if (seg->len == 0) { error = vm_map_mmio(vm, gpa, len, hpa); if (error == 0) { seg->gpa = gpa; seg->len = len; } return (error); } } return (ENOSPC); } int ppt_unmap_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len) { int i, error; struct pptseg *seg; struct pptdev *ppt; error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); for (i = 0; i < MAX_MMIOSEGS; i++) { seg = &ppt->mmio[i]; if (seg->gpa == gpa && seg->len == len) { error = vm_unmap_mmio(vm, seg->gpa, seg->len); if (error == 0) { seg->gpa = 0; seg->len = 0; } return (error); } } return (ENOENT); } static int pptintr(void *arg) { struct pptdev *ppt; struct pptintr_arg *pptarg; pptarg = arg; ppt = pptarg->pptdev; if (ppt->vm != NULL) lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); else { /* * XXX * This is not expected to happen - panic? */ } /* * For legacy interrupts give other filters a chance in case * the interrupt was not generated by the passthrough device. */ if (ppt->msi.startrid == 0) return (FILTER_STRAY); else return (FILTER_HANDLED); } int ppt_setup_msi(struct vm *vm, int bus, int slot, int func, uint64_t addr, uint64_t msg, int numvec) { int i, rid, flags; int msi_count, startrid, error, tmp; struct pptdev *ppt; if (numvec < 0 || numvec > MAX_MSIMSGS) return (EINVAL); error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); /* Reject attempts to enable MSI while MSI-X is active. */ if (ppt->msix.num_msgs != 0 && numvec != 0) return (EBUSY); /* Free any allocated resources */ ppt_teardown_msi(ppt); if (numvec == 0) /* nothing more to do */ return (0); flags = RF_ACTIVE; msi_count = pci_msi_count(ppt->dev); if (msi_count == 0) { startrid = 0; /* legacy interrupt */ msi_count = 1; flags |= RF_SHAREABLE; } else startrid = 1; /* MSI */ /* * The device must be capable of supporting the number of vectors * the guest wants to allocate. */ if (numvec > msi_count) return (EINVAL); /* * Make sure that we can allocate all the MSI vectors that are needed * by the guest. */ if (startrid == 1) { tmp = numvec; error = pci_alloc_msi(ppt->dev, &tmp); if (error) return (error); else if (tmp != numvec) { pci_release_msi(ppt->dev); return (ENOSPC); } else { /* success */ } } ppt->msi.startrid = startrid; /* * Allocate the irq resource and attach it to the interrupt handler. */ for (i = 0; i < numvec; i++) { ppt->msi.num_msgs = i + 1; ppt->msi.cookie[i] = NULL; rid = startrid + i; ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, &rid, flags); if (ppt->msi.res[i] == NULL) break; ppt->msi.arg[i].pptdev = ppt; ppt->msi.arg[i].addr = addr; ppt->msi.arg[i].msg_data = msg + i; error = bus_setup_intr(ppt->dev, ppt->msi.res[i], INTR_TYPE_NET | INTR_MPSAFE, pptintr, NULL, &ppt->msi.arg[i], &ppt->msi.cookie[i]); if (error != 0) break; } if (i < numvec) { ppt_teardown_msi(ppt); return (ENXIO); } return (0); } int ppt_setup_msix(struct vm *vm, int bus, int slot, int func, int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) { struct pptdev *ppt; struct pci_devinfo *dinfo; int numvec, alloced, rid, error; size_t res_size, cookie_size, arg_size; error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); /* Reject attempts to enable MSI-X while MSI is active. */ if (ppt->msi.num_msgs != 0) return (EBUSY); dinfo = device_get_ivars(ppt->dev); if (!dinfo) return (ENXIO); /* * First-time configuration: * Allocate the MSI-X table * Allocate the IRQ resources * Set up some variables in ppt->msix */ if (ppt->msix.num_msgs == 0) { numvec = pci_msix_count(ppt->dev); if (numvec <= 0) return (EINVAL); ppt->msix.startrid = 1; ppt->msix.num_msgs = numvec; res_size = numvec * sizeof(ppt->msix.res[0]); cookie_size = numvec * sizeof(ppt->msix.cookie[0]); arg_size = numvec * sizeof(ppt->msix.arg[0]); ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO); ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, M_WAITOK | M_ZERO); ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO); rid = dinfo->cfg.msix.msix_table_bar; ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (ppt->msix.msix_table_res == NULL) { ppt_teardown_msix(ppt); return (ENOSPC); } ppt->msix.msix_table_rid = rid; if (dinfo->cfg.msix.msix_table_bar != dinfo->cfg.msix.msix_pba_bar) { rid = dinfo->cfg.msix.msix_pba_bar; ppt->msix.msix_pba_res = bus_alloc_resource_any( ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (ppt->msix.msix_pba_res == NULL) { ppt_teardown_msix(ppt); return (ENOSPC); } ppt->msix.msix_pba_rid = rid; } alloced = numvec; error = pci_alloc_msix(ppt->dev, &alloced); if (error || alloced != numvec) { ppt_teardown_msix(ppt); return (error == 0 ? ENOSPC: error); } } if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { /* Tear down the IRQ if it's already set up */ ppt_teardown_msix_intr(ppt, idx); /* Allocate the IRQ resource */ ppt->msix.cookie[idx] = NULL; rid = ppt->msix.startrid + idx; ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (ppt->msix.res[idx] == NULL) return (ENXIO); ppt->msix.arg[idx].pptdev = ppt; ppt->msix.arg[idx].addr = addr; ppt->msix.arg[idx].msg_data = msg; /* Setup the MSI-X interrupt */ error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], INTR_TYPE_NET | INTR_MPSAFE, pptintr, NULL, &ppt->msix.arg[idx], &ppt->msix.cookie[idx]); if (error != 0) { bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); ppt->msix.cookie[idx] = NULL; ppt->msix.res[idx] = NULL; return (ENXIO); } } else { /* Masked, tear it down if it's already been set up */ ppt_teardown_msix_intr(ppt, idx); } return (0); } int ppt_disable_msix(struct vm *vm, int bus, int slot, int func) { struct pptdev *ppt; int error; error = ppt_find(vm, bus, slot, func, &ppt); if (error) return (error); ppt_teardown_msix(ppt); return (0); }