diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -11838,7 +11838,7 @@ continue; } if (PMAP_ADDRESS_IN_LARGEMAP(sva) && - vm_phys_paddr_to_vm_page(pa) == NULL) { + vm_phys_paddr_to_vm_page(pa, NULL) == NULL) { /* * Page table pages for the large map may be * freed. Validate the next-level address @@ -11866,7 +11866,8 @@ continue; } if (PMAP_ADDRESS_IN_LARGEMAP(sva) && - vm_phys_paddr_to_vm_page(pa) == NULL) { + vm_phys_paddr_to_vm_page(pa, NULL) == + NULL) { /* * Page table pages for the large map * may be freed. Validate the diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c --- a/sys/amd64/vmm/amd/amdvi_hw.c +++ b/sys/amd64/vmm/amd/amdvi_hw.c @@ -1005,7 +1005,7 @@ } static void * -amdvi_create_domain(vm_paddr_t maxaddr) +amdvi_create_domain(vm_paddr_t maxaddr, bool host_domain __unused) { struct amdvi_domain *dom; @@ -1158,9 +1158,9 @@ return (mapped); } -static uint64_t +static int amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, - uint64_t len) + uint64_t len, uint64_t *res_len) { struct amdvi_domain *domain; @@ -1168,7 +1168,7 @@ if (domain->id && !domain->ptp) { printf("ptp is NULL"); - return (-1); + return (EINVAL); } /* @@ -1176,13 +1176,14 @@ * table set-up. */ if (domain->ptp) - return (amdvi_update_mapping(domain, gpa, hpa, len, true)); + *res_len = amdvi_update_mapping(domain, gpa, hpa, len, true); else - return (len); + *res_len = len; + return (0); } -static uint64_t -amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) +static int +amdvi_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len) { struct amdvi_domain *domain; @@ -1192,9 +1193,10 @@ * table set-up. */ if (domain->ptp) - return (amdvi_update_mapping(domain, gpa, 0, len, false)); - return - (len); + *res_len = amdvi_update_mapping(domain, gpa, 0, len, false); + else + *res_len = len; + return (0); } static struct amdvi_softc * @@ -1271,8 +1273,8 @@ amdvi_wait(softc); } -static void -amdvi_add_device(void *arg, uint16_t devid) +static int +amdvi_add_device(void *arg, device_t dev __unused, uint16_t devid) { struct amdvi_domain *domain; struct amdvi_softc *softc; @@ -1285,13 +1287,14 @@ #endif softc = amdvi_find_iommu(devid); if (softc == NULL) - return; + return (ENXIO); amdvi_set_dte(domain, softc, devid, true); amdvi_inv_device(softc, devid); + return (0); } -static void -amdvi_remove_device(void *arg, uint16_t devid) +static int +amdvi_remove_device(void *arg, device_t dev __unused, uint16_t devid) { struct amdvi_domain *domain; struct amdvi_softc *softc; @@ -1303,9 +1306,10 @@ #endif softc = amdvi_find_iommu(devid); if (softc == NULL) - return; + return (ENXIO); amdvi_set_dte(domain, softc, devid, false); amdvi_inv_device(softc, devid); + return (0); } static void @@ -1360,7 +1364,7 @@ } } -static void +static int amdvi_invalidate_tlb(void *arg) { struct amdvi_domain *domain; @@ -1368,6 +1372,7 @@ domain = (struct amdvi_domain *)arg; KASSERT(domain, ("domain is NULL")); amdvi_do_inv_domain(domain->id, false); + return (0); } const struct iommu_ops iommu_ops_amd = { @@ -1381,5 +1386,5 @@ .remove_mapping = amdvi_remove_mapping, .add_device = amdvi_add_device, .remove_device = amdvi_remove_device, - .invalidate_tlb = amdvi_invalidate_tlb + .invalidate_tlb = amdvi_invalidate_tlb, }; diff --git a/sys/amd64/vmm/intel/dmar_iommu.c b/sys/amd64/vmm/intel/dmar_iommu.c new file mode 100644 --- /dev/null +++ b/sys/amd64/vmm/intel/dmar_iommu.c @@ -0,0 +1,337 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io/iommu.h" + +static MALLOC_DEFINE(M_DMAR_VMM, "dmar_vmm", "DMAR VMM memory"); + +struct dmar_vmm_domain { + bool host_domain; + struct sx lock; + LIST_HEAD(, dmar_domain) dmar_domains; +}; + +static int +dmar_init(void) +{ + + return (dmar_is_running()); +} + +static void +dmar_cleanup(void) +{ + + /* XXXKIB */ +} + +static void +dmar_enable(void) +{ + + /* XXXKIB */ +} + +static void +dmar_disable(void) +{ + + /* XXXKIB */ +} + +static void * +dmar_create_domain(vm_paddr_t maxaddr, bool host_domain) +{ + struct dmar_vmm_domain *vmm_dom; + + vmm_dom = malloc(sizeof(struct dmar_vmm_domain), M_DMAR_VMM, + M_WAITOK | M_ZERO); + LIST_INIT(&vmm_dom->dmar_domains); + sx_init(&vmm_dom->lock, "vmmdom"); + vmm_dom->host_domain = host_domain; + return (vmm_dom); +} + +static void +dmar_destroy_domain(void *domain) +{ + struct dmar_vmm_domain *vmm_dom; + + vmm_dom = domain; +#if 0 + LIST_FOREACH_SAFE() { + } +#endif + sx_destroy(&vmm_dom->lock); + free(vmm_dom, M_DMAR_VMM); +} + +static int +dmar_create_mapping_onedmar(struct dmar_domain *dom, vm_paddr_t gpa, + vm_paddr_t hpa, uint64_t len) +{ + struct iommu_map_entry *e; + vm_page_t fm, m, *ma; + vm_paddr_t pa; + iommu_gaddr_t glen, gtotal_len; + u_long cnt_after; + int error; + + fm = NULL; + for (pa = hpa, gtotal_len = 0; gtotal_len < len; pa += glen, + gpa += glen, gtotal_len += glen) { + e = iommu_gas_alloc_entry(&dom->iodom, IOMMU_PGF_WAITOK); + e->start = gpa; + m = vm_page_phys_to_vm_page(pa, &cnt_after); + if (m == NULL) { + if (fm == NULL) + fm = vm_page_getfake(pa, VM_MEMATTR_DEFAULT); + else + vm_page_updatefake(fm, pa, VM_MEMATTR_DEFAULT); + ma = &fm; + glen = PAGE_SIZE; + } else { + ma = &m; + glen = MIN(len - gtotal_len, ptoa(cnt_after + 1)); + } + e->end = gpa + glen; + error = iommu_gas_map_region(&dom->iodom, e, + IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE, + IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT | IOMMU_MF_VMM, ma); + if (error != 0) + break; + glen = e->end - e->start; + } + if (fm != NULL) + vm_page_putfake(fm); + return (error); +} + +static int +dmar_create_mapping(void *dom1, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, + uint64_t *res_len) +{ + struct dmar_vmm_domain *vmm_dom; + struct dmar_domain *dom; + int error; + + vmm_dom = dom1; + error = 0; +//printf("dmar_create_mapping (%d): gpa %#lx hpa %#lx len %#lx\n", vmm_dom->host_domain, gpa, hpa, len); + if (vmm_dom->host_domain) { + sx_xlock(&vmm_dom->lock); + LIST_FOREACH(dom, &vmm_dom->dmar_domains, vmm_dom_link) { + error = dmar_create_mapping_onedmar(dom, gpa, hpa, len); + if (error != 0) + break; + } + sx_xunlock(&vmm_dom->lock); + } + *res_len = len; + return (error); +} + +static int +dmar_create_mapping_bulk_onedmar(struct dmar_domain *dom, vm_paddr_t gpa, + struct vm_page **ma, uint64_t len) +{ + struct iommu_map_entry *e; + iommu_gaddr_t gtotal_len, glen; + int error; + + for (gtotal_len = 0; gtotal_len < len; gpa += glen, + gtotal_len += glen) { + e = iommu_gas_alloc_entry(&dom->iodom, IOMMU_PGF_WAITOK); + glen = len - gtotal_len; + e->start = gpa; + e->end = gpa + glen; + error = iommu_gas_map_region(&dom->iodom, e, + IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE, + IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT | IOMMU_MF_VMM, + ma + atop(gtotal_len)); + if (error != 0) + break; + glen = e->end - e->start; + } + return (error); +} + +static int +dmar_create_mapping_bulk(void *dom1, vm_paddr_t gpa, struct vm_page **ma, + uint64_t len) +{ + struct dmar_vmm_domain *vmm_dom; + struct dmar_domain *dom; + int error; + + vmm_dom = dom1; + error = 0; + if (!vmm_dom->host_domain) { + sx_xlock(&vmm_dom->lock); + LIST_FOREACH(dom, &vmm_dom->dmar_domains, vmm_dom_link) { + error = dmar_create_mapping_bulk_onedmar(dom, gpa, + ma, len); + if (error != 0) + break; + } + sx_xunlock(&vmm_dom->lock); + } + return (error); +} + +static int +dmar_remove_mapping(void *dom1, vm_paddr_t gpa, uint64_t len, uint64_t *res_len) +{ + struct dmar_vmm_domain *vmm_dom; + struct dmar_domain *dom; + + vmm_dom = dom1; + if (!vmm_dom->host_domain) { + sx_xlock(&vmm_dom->lock); + LIST_FOREACH(dom, &vmm_dom->dmar_domains, vmm_dom_link) + iommu_gas_remove(&dom->iodom, gpa, gpa + len); + sx_xunlock(&vmm_dom->lock); + } + *res_len = len; + return (0); +} + +static int +dmar_add_device(void *dom1, device_t dev, uint16_t rid) +{ + struct dmar_vmm_domain *vmm_dom; + struct dmar_unit *dmar; + struct dmar_domain *domain, *rdomain; + int error; + + vmm_dom = dom1; + if (vmm_dom->host_domain) + return (0); + error = 0; + sx_xlock(&vmm_dom->lock); + dmar = dmar_find(dev, true); + if (dmar == NULL) { + error = ENOTTY; + goto done; + } + LIST_FOREACH(domain, &vmm_dom->dmar_domains, vmm_dom_link) { + if (domain->dmar == dmar) + break; + } + rdomain = dmar_vmm_domain_add_dev(&dmar->iommu, domain, dev, rid); + if (rdomain == NULL) { + error = EBUSY; + goto done; + } + if (domain == NULL || domain != rdomain) { + LIST_INSERT_HEAD(&vmm_dom->dmar_domains, rdomain, + vmm_dom_link); + } +done: + sx_xunlock(&vmm_dom->lock); + return (error); +} + +static int +dmar_remove_device(void *dom1, device_t dev, uint16_t rid) +{ + struct dmar_vmm_domain *vmm_dom; + + vmm_dom = dom1; + if (vmm_dom->host_domain) + return (0); + /* XXXKIB */ + return (0); +} + +static int +dmar_invalidate_tlb(void *dom) +{ + + /* XXXKIB: do nothing, rely on map/unmap ? */ + return (0); +} + +static bool +dmar_get_swcaps(enum iommu_swcaps cap) +{ + + switch (cap) { + case IOMMU_CAP_BULK: + return (true); + default: + return (false); + } +} + +const struct iommu_ops iommu_ops_dmar = { + .init = dmar_init, + .cleanup = dmar_cleanup, + .enable = dmar_enable, + .disable = dmar_disable, + .create_domain = dmar_create_domain, + .destroy_domain = dmar_destroy_domain, + .create_mapping = dmar_create_mapping, + .create_mapping_bulk = dmar_create_mapping_bulk, + .remove_mapping = dmar_remove_mapping, + .add_device = dmar_add_device, + .remove_device = dmar_remove_device, + .invalidate_tlb = dmar_invalidate_tlb, + .get_swcaps = dmar_get_swcaps, +}; diff --git a/sys/amd64/vmm/intel/vtd.c b/sys/amd64/vmm/intel/vtd.c --- a/sys/amd64/vmm/intel/vtd.c +++ b/sys/amd64/vmm/intel/vtd.c @@ -436,8 +436,8 @@ } } -static void -vtd_add_device(void *arg, uint16_t rid) +static int +vtd_add_device(void *arg, device_t dev __unused, uint16_t rid) { int idx; uint64_t *ctxp; @@ -478,10 +478,11 @@ * 'Not Present' entries are not cached in either the Context Cache * or in the IOTLB, so there is no need to invalidate either of them. */ + return (0); } -static void -vtd_remove_device(void *arg, uint16_t rid) +static int +vtd_remove_device(void *arg, device_t dev __unused, uint16_t rid) { int i, idx; uint64_t *ctxp; @@ -509,6 +510,7 @@ vtd_ctx_global_invalidate(vtdmap); vtd_iotlb_global_invalidate(vtdmap); } + return (0); } #define CREATE_MAPPING 0 @@ -603,21 +605,24 @@ return (1UL << ptpshift); } -static uint64_t -vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) +static int +vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, + uint64_t *res_len) { - return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); + *res_len = vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING); + return (0); } -static uint64_t -vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) +static int +vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len, uint64_t *res_len) { - return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); + *res_len = vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING); + return (0); } -static void +static int vtd_invalidate_tlb(void *dom) { int i; @@ -631,10 +636,11 @@ vtdmap = vtdmaps[i]; vtd_iotlb_global_invalidate(vtdmap); } + return (0); } static void * -vtd_create_domain(vm_paddr_t maxaddr) +vtd_create_domain(vm_paddr_t maxaddr, bool host_domain __unused) { struct domain *dom; vm_paddr_t addr; diff --git a/sys/amd64/vmm/io/iommu.h b/sys/amd64/vmm/io/iommu.h --- a/sys/amd64/vmm/io/iommu.h +++ b/sys/amd64/vmm/io/iommu.h @@ -31,19 +31,26 @@ #ifndef _IO_IOMMU_H_ #define _IO_IOMMU_H_ +enum iommu_swcaps { + IOMMU_CAP_BULK, +}; + typedef int (*iommu_init_func_t)(void); typedef void (*iommu_cleanup_func_t)(void); typedef void (*iommu_enable_func_t)(void); typedef void (*iommu_disable_func_t)(void); -typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr); +typedef void *(*iommu_create_domain_t)(vm_paddr_t maxaddr, bool host_domain); typedef void (*iommu_destroy_domain_t)(void *domain); -typedef uint64_t (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa, - vm_paddr_t hpa, uint64_t len); -typedef uint64_t (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa, - uint64_t len); -typedef void (*iommu_add_device_t)(void *domain, uint16_t rid); -typedef void (*iommu_remove_device_t)(void *dom, uint16_t rid); -typedef void (*iommu_invalidate_tlb_t)(void *dom); +typedef int (*iommu_create_mapping_t)(void *domain, vm_paddr_t gpa, + vm_paddr_t hpa, uint64_t len, uint64_t *res_len); +typedef int (*iommu_create_mapping_bulk_t)(void *domain, vm_paddr_t gpa, + struct vm_page **ma, uint64_t len); +typedef int (*iommu_remove_mapping_t)(void *domain, vm_paddr_t gpa, + uint64_t len, uint64_t *res_len); +typedef int (*iommu_add_device_t)(void *domain, device_t dev, uint16_t rid); +typedef int (*iommu_remove_device_t)(void *dom, device_t dev, uint16_t rid); +typedef int (*iommu_invalidate_tlb_t)(void *dom); +typedef bool (*iommu_get_swcaps_t)(enum iommu_swcaps); struct iommu_ops { iommu_init_func_t init; /* module wide */ @@ -54,23 +61,29 @@ iommu_create_domain_t create_domain; /* domain-specific */ iommu_destroy_domain_t destroy_domain; iommu_create_mapping_t create_mapping; + iommu_create_mapping_bulk_t create_mapping_bulk; iommu_remove_mapping_t remove_mapping; iommu_add_device_t add_device; iommu_remove_device_t remove_device; iommu_invalidate_tlb_t invalidate_tlb; + iommu_get_swcaps_t get_swcaps; }; -extern const struct iommu_ops iommu_ops_intel; extern const struct iommu_ops iommu_ops_amd; +extern const struct iommu_ops iommu_ops_dmar; +extern const struct iommu_ops iommu_ops_intel; void iommu_cleanup(void); void *iommu_host_domain(void); void *iommu_create_domain(vm_paddr_t maxaddr); void iommu_destroy_domain(void *dom); -void iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, - size_t len); -void iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len); -void iommu_add_device(void *dom, uint16_t rid); -void iommu_remove_device(void *dom, uint16_t rid); -void iommu_invalidate_tlb(void *domain); +int iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, + size_t len); +int iommu_create_mapping_bulk(void *dom, vm_paddr_t gpa, + struct vm_page **ma, size_t len); +int iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len); +int iommu_add_device(void *dom, device_t dev, uint16_t rid); +int iommu_remove_device(void *dom, device_t dev, uint16_t rid); +int iommu_invalidate_tlb(void *domain); +bool iommu_get_swcaps(enum iommu_swcaps cap); #endif diff --git a/sys/amd64/vmm/io/iommu.c b/sys/amd64/vmm/io/iommu.c --- a/sys/amd64/vmm/io/iommu.c +++ b/sys/amd64/vmm/io/iommu.c @@ -80,11 +80,11 @@ } static __inline void * -IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr) +IOMMU_CREATE_DOMAIN(vm_paddr_t maxaddr, bool host_domain) { if (ops != NULL && iommu_avail) - return ((*ops->create_domain)(maxaddr)); + return ((*ops->create_domain)(maxaddr, host_domain)); else return (NULL); } @@ -97,48 +97,61 @@ (*ops->destroy_domain)(dom); } -static __inline uint64_t -IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) +static __inline int +IOMMU_CREATE_MAPPING(void *domain, vm_paddr_t gpa, vm_paddr_t hpa, + uint64_t len, uint64_t *res_len) { if (ops != NULL && iommu_avail) - return ((*ops->create_mapping)(domain, gpa, hpa, len)); - else - return (len); /* XXX */ + return ((*ops->create_mapping)(domain, gpa, hpa, len, res_len)); + return (EOPNOTSUPP); } -static __inline uint64_t -IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len) +static __inline int +IOMMU_CREATE_MAPPING_BULK(void *domain, vm_paddr_t gpa, struct vm_page **ma, + size_t len) { if (ops != NULL && iommu_avail) - return ((*ops->remove_mapping)(domain, gpa, len)); - else - return (len); /* XXX */ + return ((*ops->create_mapping_bulk)(domain, gpa, ma, len)); + return (EOPNOTSUPP); } -static __inline void -IOMMU_ADD_DEVICE(void *domain, uint16_t rid) +static __inline int +IOMMU_REMOVE_MAPPING(void *domain, vm_paddr_t gpa, uint64_t len, + uint64_t *res_len) { if (ops != NULL && iommu_avail) - (*ops->add_device)(domain, rid); + return ((*ops->remove_mapping)(domain, gpa, len, res_len)); + return (EOPNOTSUPP); } -static __inline void -IOMMU_REMOVE_DEVICE(void *domain, uint16_t rid) +static __inline int +IOMMU_ADD_DEVICE(void *domain, device_t dev, uint16_t rid) { if (ops != NULL && iommu_avail) - (*ops->remove_device)(domain, rid); + return ((*ops->add_device)(domain, dev, rid)); + return (EOPNOTSUPP); } -static __inline void +static __inline int +IOMMU_REMOVE_DEVICE(void *domain, device_t dev, uint16_t rid) +{ + + if (ops != NULL && iommu_avail) + return ((*ops->remove_device)(domain, dev, rid)); + return (0); /* To allow ppt_attach() to succeed. */ +} + +static __inline int IOMMU_INVALIDATE_TLB(void *domain) { if (ops != NULL && iommu_avail) - (*ops->invalidate_tlb)(domain); + return ((*ops->invalidate_tlb)(domain)); + return (0); } static __inline void @@ -157,19 +170,28 @@ (*ops->disable)(); } +static __inline bool +IOMMU_GET_SWCAPS(enum iommu_swcaps cap) +{ + + if (ops != NULL && ops->get_swcaps != NULL && iommu_avail) + return (*ops->get_swcaps)(cap); + return (false); +} + static void iommu_pci_add(void *arg, device_t dev) { /* Add new devices to the host domain. */ - iommu_add_device(host_domain, pci_get_rid(dev)); + iommu_add_device(host_domain, dev, pci_get_rid(dev)); } static void iommu_pci_delete(void *arg, device_t dev) { - iommu_remove_device(host_domain, pci_get_rid(dev)); + iommu_remove_device(host_domain, dev, pci_get_rid(dev)); } static void @@ -183,16 +205,19 @@ if (!iommu_enable) return; - if (vmm_is_intel()) - ops = &iommu_ops_intel; - else if (vmm_is_svm()) + if (vmm_is_intel()) { + /* XXXKIB ops = &iommu_ops_intel; */ + ops = &iommu_ops_dmar; + } else if (vmm_is_svm()) ops = &iommu_ops_amd; else ops = NULL; error = IOMMU_INIT(); - if (error) + if (error) { + printf("iommu_init: error %d\n", error); return; + } iommu_avail = 1; @@ -200,7 +225,7 @@ * Create a domain for the devices owned by the host */ maxaddr = vmm_mem_maxaddr(); - host_domain = IOMMU_CREATE_DOMAIN(maxaddr); + host_domain = IOMMU_CREATE_DOMAIN(maxaddr, true); if (host_domain == NULL) { printf("iommu_init: unable to create a host domain"); IOMMU_CLEANUP(); @@ -235,8 +260,12 @@ * Everything else belongs to the host * domain. */ - iommu_add_device(host_domain, + error = iommu_add_device(host_domain, dev, pci_get_rid(dev)); + if (error != 0) { + /* XXXKIB cleanup */ + return; + } } } } @@ -274,7 +303,7 @@ while (iommu_initted == 1) cpu_spinwait(); } - return (IOMMU_CREATE_DOMAIN(maxaddr)); + return (IOMMU_CREATE_DOMAIN(maxaddr, false)); } void @@ -284,33 +313,43 @@ IOMMU_DESTROY_DOMAIN(dom); } -void +int iommu_create_mapping(void *dom, vm_paddr_t gpa, vm_paddr_t hpa, size_t len) { uint64_t mapped, remaining; + int error; + + for (remaining = len; remaining > 0; gpa += mapped, hpa += mapped, + remaining -= mapped) { + error = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining, + &mapped); + if (error != 0) + return (error); + } + return (0); +} - remaining = len; +int +iommu_create_mapping_bulk(void *dom, vm_paddr_t gpa, struct vm_page **ma, + size_t len) +{ - while (remaining > 0) { - mapped = IOMMU_CREATE_MAPPING(dom, gpa, hpa, remaining); - gpa += mapped; - hpa += mapped; - remaining -= mapped; - } + return (IOMMU_CREATE_MAPPING_BULK(dom, gpa, ma, len)); } -void +int iommu_remove_mapping(void *dom, vm_paddr_t gpa, size_t len) { uint64_t unmapped, remaining; + int error; - remaining = len; - - while (remaining > 0) { - unmapped = IOMMU_REMOVE_MAPPING(dom, gpa, remaining); - gpa += unmapped; - remaining -= unmapped; + for (remaining = len; remaining > 0; gpa += unmapped, + remaining -= unmapped) { + error = IOMMU_REMOVE_MAPPING(dom, gpa, remaining, &unmapped); + if (error != 0) + return (error); } + return (0); } void * @@ -320,23 +359,30 @@ return (host_domain); } -void -iommu_add_device(void *dom, uint16_t rid) +int +iommu_add_device(void *dom, device_t dev, uint16_t rid) { - IOMMU_ADD_DEVICE(dom, rid); + return (IOMMU_ADD_DEVICE(dom, dev, rid)); } -void -iommu_remove_device(void *dom, uint16_t rid) +int +iommu_remove_device(void *dom, device_t dev, uint16_t rid) { - IOMMU_REMOVE_DEVICE(dom, rid); + return (IOMMU_REMOVE_DEVICE(dom, dev, rid)); } -void +int iommu_invalidate_tlb(void *domain) { - IOMMU_INVALIDATE_TLB(domain); + return (IOMMU_INVALIDATE_TLB(domain)); +} + +bool +iommu_get_swcaps(enum iommu_swcaps cap) +{ + + return (IOMMU_GET_SWCAPS(cap)); } diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -156,10 +156,13 @@ ppt_attach(device_t dev) { struct pptdev *ppt; + int error; ppt = device_get_softc(dev); - iommu_remove_device(iommu_host_domain(), pci_get_rid(dev)); + error = iommu_remove_device(iommu_host_domain(), dev, pci_get_rid(dev)); + if (error != 0) + return (error); num_pptdevs++; TAILQ_INSERT_TAIL(&pptdev_list, ppt, next); ppt->dev = dev; @@ -174,15 +177,18 @@ ppt_detach(device_t dev) { struct pptdev *ppt; + int error; ppt = device_get_softc(dev); if (ppt->vm != NULL) return (EBUSY); + error = iommu_add_device(iommu_host_domain(), dev, pci_get_rid(dev)); + if (error != 0) + return (error); num_pptdevs--; TAILQ_REMOVE(&pptdev_list, ppt, next); pci_disable_busmaster(dev); - iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); return (0); } @@ -394,8 +400,11 @@ ppt_pci_reset(ppt->dev); pci_restore_state(ppt->dev); ppt->vm = vm; - iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); - return (0); + error = iommu_add_device(vm_iommu_domain(vm), ppt->dev, + pci_get_rid(ppt->dev)); + if (error == 0) + ppt->vm = vm; + return (error); } int @@ -414,9 +423,10 @@ ppt_unmap_all_mmio(vm, ppt); ppt_teardown_msi(ppt); ppt_teardown_msix(ppt); - iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); + error = iommu_remove_device(vm_iommu_domain(vm), ppt->dev, + pci_get_rid(ppt->dev)); ppt->vm = NULL; - return (0); + return (error); } int diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -904,15 +904,25 @@ return (maxaddr); } -static void +_Static_assert((1 << VM_LEVEL_0_ORDER) * sizeof(vm_page_t) == PAGE_SIZE, "XXX"); + +static int vm_iommu_modify(struct vm *vm, bool map) { - int i, sz; vm_paddr_t gpa, hpa; struct mem_map *mm; void *vp, *cookie, *host_domain; + vm_page_t *ma; + int error, i, n, sz; - sz = PAGE_SIZE; + if (iommu_get_swcaps(IOMMU_CAP_BULK)) { + sz = ptoa(1 << VM_LEVEL_0_ORDER); + ma = (void *)kmem_malloc((1 << VM_LEVEL_0_ORDER) * + sizeof(vm_page_t), M_WAITOK | M_ZERO); + } else { + sz = PAGE_SIZE; + ma = NULL; + } host_domain = iommu_host_domain(); for (i = 0; i < VM_MAX_MEMMAPS; i++) { @@ -936,23 +946,62 @@ mm->gpa, mm->len, mm->flags)); } - gpa = mm->gpa; - while (gpa < mm->gpa + mm->len) { - vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, - &cookie); - KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", - vm_name(vm), gpa)); - - vm_gpa_release(cookie); - - hpa = DMAP_TO_PHYS((uintptr_t)vp); + if (iommu_get_swcaps(IOMMU_CAP_BULK)) { if (map) { - iommu_create_mapping(vm->iommu, gpa, hpa, sz); + for (gpa = mm->gpa; gpa < mm->gpa + mm->len;) { + n = vm_fault_quick_hold_pages( + &vm->vmspace->vm_map, gpa, + MIN(mm->gpa + mm->len - gpa, sz), + VM_PROT_READ | VM_PROT_WRITE, ma, + 1 << VM_LEVEL_0_ORDER); + if (n == -1) { + error = EFAULT; + goto ret; + } + error = iommu_create_mapping_bulk( + vm->iommu, gpa, ma, ptoa(n)); + vm_page_unhold_pages(ma, n); + if (error != 0) + goto ret; + gpa += ptoa(n); + } } else { - iommu_remove_mapping(vm->iommu, gpa, sz); + error = iommu_remove_mapping(vm->iommu, + mm->gpa, mm->gpa + mm->len); + if (error != 0) + goto ret; + } + } else { + while (gpa < mm->gpa + mm->len) { + vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, + VM_PROT_WRITE, &cookie); + KASSERT(vp != NULL, + ("vm(%s) could not map gpa %#lx", + vm_name(vm), gpa)); + vm_gpa_release(cookie); + + hpa = DMAP_TO_PHYS((uintptr_t)vp); + if (map) { + error = iommu_create_mapping(vm->iommu, + gpa, hpa, sz); + if (error != 0) + goto ret; + error = iommu_remove_mapping( + host_domain, hpa, sz); + if (error != 0) + goto ret; + } else { + error = iommu_remove_mapping(vm->iommu, + gpa, sz); + if (error != 0) + goto ret; + error = iommu_create_mapping( + host_domain, hpa, hpa, sz); + if (error != 0) + goto ret; + } + gpa += PAGE_SIZE; } - - gpa += PAGE_SIZE; } } @@ -961,9 +1010,15 @@ * from which pages were removed. */ if (map) - iommu_invalidate_tlb(host_domain); + error = iommu_invalidate_tlb(host_domain); else - iommu_invalidate_tlb(vm->iommu); + error = iommu_invalidate_tlb(vm->iommu); +ret: + if (ma != NULL) { + kmem_free((vm_offset_t)ma, (1 << VM_LEVEL_0_ORDER) * + sizeof(vm_page_t)); + } + return (error); } #define vm_iommu_unmap(vm) vm_iommu_modify((vm), false) @@ -979,9 +1034,9 @@ return (error); if (ppt_assigned_devices(vm) == 0) - vm_iommu_unmap(vm); + error = vm_iommu_unmap(vm); - return (0); + return (error); } int @@ -998,10 +1053,17 @@ vm->iommu = iommu_create_domain(maxaddr); if (vm->iommu == NULL) return (ENXIO); - vm_iommu_map(vm); +#if 0 + error = vm_iommu_map(vm); + if (error != 0) + return (error); +#endif } error = ppt_assign_device(vm, bus, slot, func); + if (error != 0) + return (error); + error = vm_iommu_map(vm); return (error); } diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h --- a/sys/dev/iommu/iommu.h +++ b/sys/dev/iommu/iommu.h @@ -140,6 +140,8 @@ page table */ #define IOMMU_DOMAIN_RMRR 0x0020 /* Domain contains RMRR entry, cannot be turned off */ +#define DMAR_DOMAIN_VMM 0x0040 /* Used by VMM */ +#define DMAR_DOMAIN_BUSDMA 0x0100 /* Used for busdma */ #define IOMMU_LOCK(unit) mtx_lock(&(unit)->lock) #define IOMMU_UNLOCK(unit) mtx_unlock(&(unit)->lock) diff --git a/sys/dev/iommu/iommu_gas.h b/sys/dev/iommu/iommu_gas.h --- a/sys/dev/iommu/iommu_gas.h +++ b/sys/dev/iommu/iommu_gas.h @@ -37,6 +37,8 @@ #define IOMMU_MF_CANWAIT 0x0001 #define IOMMU_MF_CANSPLIT 0x0002 #define IOMMU_MF_RMRR 0x0004 +#define IOMMU_MF_CANTRIM 0x0008 +#define IOMMU_MF_VMM 0x0010 #define IOMMU_PGF_WAITOK 0x0001 #define IOMMU_PGF_ZERO 0x0002 @@ -49,6 +51,7 @@ dmamap_link */ #define IOMMU_MAP_ENTRY_MAP 0x0004 /* Busdma created, linked by dmamap_link */ +#define IOMMU_MAP_ENTRY_VMM 0x0008 /* VMM created */ #define IOMMU_MAP_ENTRY_UNMAPPED 0x0010 /* No backing pages */ #define IOMMU_MAP_ENTRY_REMOVING 0x0020 /* In process of removal by iommu_gas_remove() */ diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -535,7 +535,9 @@ KASSERT(found, ("found RMRR dup %p start %jx end %jx", domain, (uintmax_t)entry->start, (uintmax_t)entry->end)); if ((flags & IOMMU_MF_RMRR) != 0) - entry->flags = IOMMU_MAP_ENTRY_RMRR; + entry->flags |= IOMMU_MAP_ENTRY_RMRR; + if ((flags & IOMMU_MF_VMM) != 0) + entry->flags |= IOMMU_MAP_ENTRY_VMM; #ifdef INVARIANTS struct iommu_map_entry *ip, *in; @@ -562,9 +564,10 @@ struct iommu_domain *domain; domain = entry->domain; - KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | - IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, - ("permanent entry %p %p", domain, entry)); + KASSERT((entry->flags & IOMMU_MAP_ENTRY_RMRR) == 0, + ("removing RMRR entry dom %p e %p (%#jx, %#jx) fl %#x", domain, + entry, + (uintmax_t)entry->start, (uintmax_t)entry->end, entry->flags)); IOMMU_DOMAIN_LOCK(domain); iommu_gas_rb_remove(domain, entry); @@ -777,7 +780,7 @@ iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma) { - iommu_gaddr_t start; + struct iommu_map_entry *r1, *r2; int error; KASSERT(entry->domain == domain, @@ -785,33 +788,47 @@ entry, entry->domain)); KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, entry, entry->flags)); - KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, + KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANTRIM | + IOMMU_MF_CANSPLIT | IOMMU_MF_RMRR | IOMMU_MF_VMM)) == 0, ("invalid flags 0x%x", flags)); - start = entry->start; + if ((flags & IOMMU_MF_VMM) != 0) { + r1 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); + r2 = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); + } IOMMU_DOMAIN_LOCK(domain); + if ((flags & IOMMU_MF_VMM) != 0) { + iommu_gas_remove_locked(domain, entry->start, entry->end, + &r1, &r2); + } error = iommu_gas_alloc_region(domain, entry, flags); if (error != 0) { IOMMU_DOMAIN_UNLOCK(domain); - return (error); + goto done; } entry->flags |= eflags; IOMMU_DOMAIN_UNLOCK(domain); if (entry->end == entry->start) - return (0); + goto done; error = domain->ops->map(domain, entry->start, - entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), - eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); + entry->end - entry->start, ma, eflags, + (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); if (error == ENOMEM) { iommu_domain_unload_entry(entry, false, (flags & IOMMU_MF_CANWAIT) != 0); - return (error); + goto done; } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); - - return (0); +done: + if ((flags & IOMMU_MF_VMM) != 0) { + if (r1 != NULL) + iommu_gas_free_entry(r1); + if (r2 != NULL) + iommu_gas_free_entry(r2); + } + return (error); } static int diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -42,6 +42,7 @@ # intel-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/intel SRCS+= ept.c \ + dmar_iommu.c \ vmcs.c \ vmx_msr.c \ vmx_support.S \ diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c --- a/sys/vm/device_pager.c +++ b/sys/vm/device_pager.c @@ -370,7 +370,7 @@ } /* If "paddr" is a real page, perform a sanity check on "memattr". */ - if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && + if ((m_paddr = vm_phys_paddr_to_vm_page(paddr, NULL)) != NULL && (memattr1 = pmap_page_get_memattr(m_paddr)) != memattr) { /* * For the /dev/mem d_mmap routine to return the diff --git a/sys/vm/sg_pager.c b/sys/vm/sg_pager.c --- a/sys/vm/sg_pager.c +++ b/sys/vm/sg_pager.c @@ -181,7 +181,7 @@ KASSERT(paddr != 1, ("invalid SG page index")); /* If "paddr" is a real page, perform a sanity check on "memattr". */ - if ((m_paddr = vm_phys_paddr_to_vm_page(paddr)) != NULL && + if ((m_paddr = vm_phys_paddr_to_vm_page(paddr, NULL)) != NULL && pmap_page_get_memattr(m_paddr) != memattr) { memattr = pmap_page_get_memattr(m_paddr); printf( diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -509,8 +509,11 @@ * PHYS_TO_VM_PAGE() returns the vm_page_t object that represents a memory * page to which the given physical address belongs. The correct vm_page_t * object is returned for addresses that are not page-aligned. + * vm_page_phys_to_vm_page() is same as PHYS_TO_VM_PAGE() but also can + * return the count of pages after m in the same physical segment. */ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa); +vm_page_t vm_page_phys_to_vm_page(vm_paddr_t pa, u_long *cnt_after); /* * Page allocation parameters for vm_page for the functions diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -329,7 +329,7 @@ vm_page_t m; int ret; - m = vm_phys_paddr_to_vm_page(pa); + m = vm_phys_paddr_to_vm_page(pa, NULL); if (m == NULL) return (true); /* page does not exist, no failure */ @@ -566,7 +566,7 @@ vm_offset_t mapped; int witness_size; #endif -#if defined(__i386__) && defined(VM_PHYSSEG_DENSE) +#if (defined(__i386__) || defined(__amd64__)) && defined(VM_PHYSSEG_DENSE) long ii; #endif @@ -755,7 +755,11 @@ * Initialize the page structures and add every available page to the * physical memory allocator's free lists. */ -#if defined(__i386__) && defined(VM_PHYSSEG_DENSE) +#if (defined(__i386__) || defined(__amd64__)) && defined(VM_PHYSSEG_DENSE) + /* + * i386 needs this for copyout(9) calling vm_fault_quick_hold_pages(). + * amd64 requires that for DMAR busdma and bhyve IOMMU. + */ for (ii = 0; ii < vm_page_array_size; ii++) { m = &vm_page_array[ii]; vm_page_init_page(m, (first_page + ii) << PAGE_SHIFT, 0); @@ -1219,13 +1223,20 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa) +{ + + return (vm_page_phys_to_vm_page(pa, NULL)); +} + +vm_page_t +vm_page_phys_to_vm_page(vm_paddr_t pa, u_long *cnt_after) { vm_page_t m; #ifdef VM_PHYSSEG_SPARSE - m = vm_phys_paddr_to_vm_page(pa); + m = vm_phys_paddr_to_vm_page(pa, cnt_after); if (m == NULL) - m = vm_phys_fictitious_to_vm_page(pa); + m = vm_phys_fictitious_to_vm_page(pa, cnt_after); return (m); #elif defined(VM_PHYSSEG_DENSE) long pi; @@ -1233,9 +1244,11 @@ pi = atop(pa); if (pi >= first_page && (pi - first_page) < vm_page_array_size) { m = &vm_page_array[pi - first_page]; + if (cnt_after != NULL) + *cnt_after = vm_page_array_size - pi - first_page; return (m); } - return (vm_phys_fictitious_to_vm_page(pa)); + return (vm_phys_fictitious_to_vm_page(pa, cnt_after)); #else #error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined." #endif diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -70,11 +70,11 @@ int vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, vm_memattr_t memattr); void vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end); -vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa); +vm_page_t vm_phys_fictitious_to_vm_page(vm_paddr_t pa, u_long *cnt_after); void vm_phys_free_contig(vm_page_t m, u_long npages); void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); -vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); +vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa, u_long *cnt_after); void vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, int *locality); vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -895,21 +895,24 @@ * Find the vm_page corresponding to the given physical address. */ vm_page_t -vm_phys_paddr_to_vm_page(vm_paddr_t pa) +vm_phys_paddr_to_vm_page(vm_paddr_t pa, u_long *cnt_after) { struct vm_phys_seg *seg; int segind; for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; - if (pa >= seg->start && pa < seg->end) + if (pa >= seg->start && pa < seg->end) { + if (cnt_after != NULL) + *cnt_after = atop(seg->end - pa); return (&seg->first_page[atop(pa - seg->start)]); + } } return (NULL); } vm_page_t -vm_phys_fictitious_to_vm_page(vm_paddr_t pa) +vm_phys_fictitious_to_vm_page(vm_paddr_t pa, u_long *cnt_after) { struct vm_phys_fictitious_seg tmp, *seg; vm_page_t m; @@ -927,6 +930,8 @@ m = &seg->first_page[atop(pa - seg->start)]; KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); + if (cnt_after != NULL) + *cnt_after = atop(seg->end - pa); return (m); } @@ -1705,7 +1710,7 @@ vm_page_t m; int i; - if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) + if ((m = vm_phys_paddr_to_vm_page(pa, NULL)) != NULL) return ((m->flags & PG_NODUMP) == 0); for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c --- a/sys/x86/iommu/intel_ctx.c +++ b/sys/x86/iommu/intel_ctx.c @@ -283,7 +283,7 @@ } error1 = iommu_gas_map_region(DOM2IODOM(domain), entry, IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE, - IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma); + IOMMU_MF_CANWAIT | IOMMU_MF_CANTRIM | IOMMU_MF_RMRR, ma); /* * Non-failed RMRR entries are owned by context rb * tree. Get rid of the failed entry, but do not stop @@ -515,11 +515,11 @@ } static struct dmar_ctx * -dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid, - int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, - bool id_mapped, bool rmrr_init) +dmar_get_ctx_for_dev1(struct dmar_unit *dmar, struct dmar_domain *domain, + device_t dev, uint16_t rid, int dev_domain, int dev_busno, + const void *dev_path, int dev_path_len, bool id_mapped, bool rmrr_init) { - struct dmar_domain *domain, *domain1; + struct dmar_domain *domain1; struct dmar_ctx *ctx, *ctx1; struct iommu_unit *unit __diagused; dmar_ctx_entry_t *ctxp; @@ -537,9 +537,10 @@ func = PCI_RID2FUNC(rid); } enable = false; + domain1 = NULL; + DMAR_ASSERT_LOCKED(dmar); TD_PREP_PINNED_ASSERT; unit = DMAR2IOMMU(dmar); - DMAR_LOCK(dmar); KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0), ("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus, slot, func)); @@ -551,23 +552,27 @@ * higher chance to succeed if the sleep is allowed. */ DMAR_UNLOCK(dmar); - dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); - domain1 = dmar_domain_alloc(dmar, id_mapped); - if (domain1 == NULL) { - TD_PINNED_ASSERT; - return (NULL); - } - if (!id_mapped) { - error = domain_init_rmrr(domain1, dev, bus, - slot, func, dev_domain, dev_busno, dev_path, - dev_path_len); - if (error == 0 && dev != NULL) - error = dmar_reserve_pci_regions(domain1, dev); - if (error != 0) { - dmar_domain_destroy(domain1); + if (domain == NULL) { + dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid)); + domain1 = dmar_domain_alloc(dmar, id_mapped); + if (domain1 == NULL) { TD_PINNED_ASSERT; return (NULL); } + if (!id_mapped) { + error = domain_init_rmrr(domain1, dev, bus, + slot, func, dev_domain, dev_busno, dev_path, + dev_path_len); + if (error == 0 && dev != NULL) { + error = dmar_reserve_pci_regions( + domain1, dev); + } + if (error != 0) { + dmar_domain_destroy(domain1); + TD_PINNED_ASSERT; + return (NULL); + } + } } ctx1 = dmar_ctx_alloc(domain1, rid); ctxp = dmar_map_ctx_entry(ctx1, &sf); @@ -579,7 +584,15 @@ */ ctx = dmar_find_ctx_locked(dmar, rid); if (ctx == NULL) { - domain = domain1; + if (LIST_EMPTY(&dmar->domains)) { + MPASS(domain == NULL); + enable = true; + } + if (domain == NULL) { + domain = domain1; + domain1 = NULL; + LIST_INSERT_HEAD(&dmar->domains, domain, link); + } ctx = ctx1; dmar_ctx_link(ctx); ctx->context.tag->owner = dev; @@ -590,9 +603,6 @@ * DMAR unit. Enable the translation after * everything is set up. */ - if (LIST_EMPTY(&dmar->domains)) - enable = true; - LIST_INSERT_HEAD(&dmar->domains, domain, link); ctx_id_entry_init(ctx, ctxp, false, bus); if (dev != NULL) { device_printf(dev, @@ -604,14 +614,17 @@ } dmar_unmap_pgtbl(sf); } else { - dmar_unmap_pgtbl(sf); - dmar_domain_destroy(domain1); /* Nothing needs to be done to destroy ctx1. */ free(ctx1, M_DMAR_CTX); domain = CTX2DOM(ctx); ctx->refs++; /* tag referenced us */ } + if (domain1 != NULL) { + dmar_unmap_pgtbl(sf); + dmar_domain_destroy(domain1); + } } else { + MPASS(domain == NULL); domain = CTX2DOM(ctx); if (ctx->context.tag->owner == NULL) ctx->context.tag->owner = dev; @@ -649,14 +662,35 @@ return (NULL); } } - DMAR_UNLOCK(dmar); TD_PINNED_ASSERT; return (ctx); } +#if 0 +/* + * XXXKIB need to figure out devpath from rid + */ struct dmar_ctx * dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) +{ + struct dmar_ctx *ctx; + + DMAR_LOCK(dmar); + ctx = dmar_get_ctx_for_dev1(dmar, NULL, dev, rid, id_mapped, + rmrr_init); + if (ctx != NULL) { + MPASS((ctx->domain->flags & DMAR_DOMAIN_VMM) == 0); + ctx->domain->flags |= DMAR_DOMAIN_BUSDMA; + } + DMAR_UNLOCK(dmar); + return (ctx); +} +#endif + +struct dmar_ctx * +dmar_get_ctx_for_dev(struct dmar_unit *dmar, struct dmar_domain *domain, + device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) { int dev_domain, dev_path_len, dev_busno; @@ -664,8 +698,8 @@ dev_path_len = dmar_dev_depth(dev); ACPI_DMAR_PCI_PATH dev_path[dev_path_len]; dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len); - return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno, - dev_path, dev_path_len, id_mapped, rmrr_init)); + return (dmar_get_ctx_for_dev1(dmar, domain, dev, rid, dev_domain, + dev_busno, dev_path, dev_path_len, id_mapped, rmrr_init)); } struct dmar_ctx * @@ -675,37 +709,42 @@ bool id_mapped, bool rmrr_init) { - return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno, - dev_path, dev_path_len, id_mapped, rmrr_init)); + return (dmar_get_ctx_for_dev1(dmar, NULL, NULL, rid, dev_domain, + dev_busno, dev_path, dev_path_len, id_mapped, rmrr_init)); } -int +static struct dmar_domain * dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx) { struct dmar_unit *dmar; struct dmar_domain *old_domain; dmar_ctx_entry_t *ctxp; struct sf_buf *sf; - int error; dmar = domain->dmar; + DMAR_ASSERT_LOCKED(dmar); old_domain = CTX2DOM(ctx); - if (domain == old_domain) + if (domain == old_domain) { + DMAR_UNLOCK(dmar); return (0); + } KASSERT(old_domain->iodom.iommu == domain->iodom.iommu, ("domain %p %u moving between dmars %u %u", domain, - domain->domain, old_domain->iodom.iommu->unit, - domain->iodom.iommu->unit)); + domain->domain, old_domain->dmar->iommu.unit, + domain->dmar->iommu.unit)); + + if ((old_domain->iodom.flags & IOMMU_DOMAIN_RMRR) != 0) + return (old_domain); + TD_PREP_PINNED_ASSERT; ctxp = dmar_map_ctx_entry(ctx, &sf); - DMAR_LOCK(dmar); dmar_ctx_unlink(ctx); ctx->context.domain = &domain->iodom; dmar_ctx_link(ctx); ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100); dmar_unmap_pgtbl(sf); - error = dmar_flush_for_ctx_entry(dmar, true); + (void)dmar_flush_for_ctx_entry(dmar, true); /* If flush failed, rolling back would not work as well. */ printf("dmar%d rid %x domain %d->%d %s-mapped\n", dmar->iommu.unit, ctx->context.rid, old_domain->domain, @@ -713,7 +752,58 @@ "id" : "re"); dmar_unref_domain_locked(dmar, old_domain); TD_PINNED_ASSERT; - return (error); + return (domain); +} + +/* + * Create a VMM domain for the given device. Keep on private domain + * if the device needs RMRR. Otherwise coalesce VMM domains to reduce + * number of maintained page tables. If this is the first domain on + * this dmar on this VM (domain == NULL), reuse already created busdma + * domain if possible. + */ +struct dmar_domain * +dmar_vmm_domain_add_dev(struct iommu_unit *iommu, struct dmar_domain *domain, + device_t dev, uint16_t rid) +{ + struct dmar_unit *dmar; + struct dmar_domain *rdomain; + struct dmar_ctx *ctx; + bool drain; + + dmar = (struct dmar_unit *)iommu; + MPASS(domain == NULL || domain->dmar == dmar); + rdomain = NULL; + drain = false; + DMAR_LOCK(dmar); + ctx = dmar_find_ctx_locked(dmar, rid); + if (ctx != NULL) { + rdomain = domain != NULL ? dmar_move_ctx_to_domain(domain, + ctx) : (struct dmar_domain *)ctx->context.domain; + } else { + ctx = dmar_get_ctx_for_dev(dmar, domain, dev, rid, false, + true); + if (ctx != NULL) { + rdomain = (struct dmar_domain *)ctx->context.domain; + MPASS(domain == NULL || rdomain == domain); + } + } + if (rdomain != NULL) { + MPASS((rdomain->iodom.flags & (DMAR_DOMAIN_BUSDMA | + DMAR_DOMAIN_VMM)) != (DMAR_DOMAIN_BUSDMA | + DMAR_DOMAIN_VMM)); + if ((rdomain->iodom.flags & DMAR_DOMAIN_BUSDMA) != 0) { + rdomain->iodom.flags &= ~DMAR_DOMAIN_BUSDMA; + drain = true; + } + rdomain->iodom.flags |= DMAR_DOMAIN_VMM; + } + DMAR_UNLOCK(dmar); + if (drain) { + taskqueue_drain(dmar->iommu.delayed_taskqueue, + &rdomain->iodom.unload_task); + } + return (rdomain); } static void @@ -829,9 +919,6 @@ dmar_free_ctx_locked(dmar, ctx); } -/* - * Returns with the domain locked. - */ struct dmar_ctx * dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid) { @@ -955,7 +1042,7 @@ dmar = IOMMU2DMAR(iommu); - ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init); + ret = dmar_get_ctx_for_dev(dmar, NULL, dev, rid, id_mapped, rmrr_init); return (CTX2IOCTX(ret)); } diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h --- a/sys/x86/iommu/intel_dmar.h +++ b/sys/x86/iommu/intel_dmar.h @@ -67,6 +67,7 @@ u_int refs; /* (u) Refs, including ctx */ struct dmar_unit *dmar; /* (c) */ LIST_ENTRY(dmar_domain) link; /* (u) Member in the dmar list */ + LIST_ENTRY(dmar_domain) vmm_dom_link; /* Member in external vmm dom */ LIST_HEAD(, dmar_ctx) contexts; /* (u) */ vm_object_t pgtbl_obj; /* (c) Page table pages */ u_int batch_no; @@ -295,16 +296,18 @@ int dmar_dev_depth(device_t child); void dmar_dev_path(device_t child, int *busno, void *path1, int depth); -struct dmar_ctx *dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, - uint16_t rid, bool id_mapped, bool rmrr_init); +struct dmar_ctx *dmar_get_ctx_for_dev(struct dmar_unit *dmar, + struct dmar_domain *domain, device_t dev, uint16_t rid, bool id_mapped, + bool rmrr_init); struct dmar_ctx *dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, bool id_mapped, bool rmrr_init); -int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx); void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx); void dmar_free_ctx(struct dmar_ctx *ctx); struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid); void dmar_domain_free_entry(struct iommu_map_entry *entry, bool free); +struct dmar_domain *dmar_vmm_domain_add_dev(struct iommu_unit *dmar, + struct dmar_domain *domain, device_t dev, uint16_t rid); void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno, const void *dev_path, int dev_path_len, @@ -317,6 +320,8 @@ int dmar_init_irt(struct dmar_unit *unit); void dmar_fini_irt(struct dmar_unit *unit); +int dmar_is_running(void); + extern iommu_haddr_t dmar_high; extern int haw; extern int dmar_tbl_pagecnt; diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c --- a/sys/x86/iommu/intel_drv.c +++ b/sys/x86/iommu/intel_drv.c @@ -85,6 +85,7 @@ static device_t *dmar_devs; static int dmar_devcnt; +static bool dmar_running = false; typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *); @@ -555,6 +556,7 @@ DMAR_UNLOCK(unit); #endif + dmar_running = true; return (0); } @@ -1052,6 +1054,13 @@ } +int +dmar_is_running(void) +{ + + return (dmar_running ? 0 : ENXIO); +} + /* * Pre-create all contexts for the DMAR which have RMRR entries. */ diff --git a/sys/x86/iommu/intel_idpgtbl.c b/sys/x86/iommu/intel_idpgtbl.c --- a/sys/x86/iommu/intel_idpgtbl.c +++ b/sys/x86/iommu/intel_idpgtbl.c @@ -690,7 +690,6 @@ int error; domain = IODOM2DOM(iodom); - DMAR_DOMAIN_PGLOCK(domain); error = domain_unmap_buf_locked(domain, base, size, flags); DMAR_DOMAIN_PGUNLOCK(domain); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -519,7 +519,7 @@ struct pci_devinst *pi; struct pci_bar_io bar; enum pcibar_type bartype; - uint64_t base, size; + uint64_t base, old_base, size; pi = sc->psc_pi; @@ -556,8 +556,14 @@ "base %#lx or size %#lx not page aligned\n", sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, i, base, size); - return (-1); } + if ((base & PAGE_MASK) != 0) { + old_base = base; + base = trunc_page(base); + size += old_base - base; + } + if ((size & PAGE_MASK) != 0) + size = round_page(size); } /* Cache information about the "real" BAR */