diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index 331553a93623..d876d443ffa0 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -1,523 +1,528 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 Ruslan Bukin
*
* This software was developed by SRI International and the University of
* Cambridge Computer Laboratory (Department of Computer Science and
* Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the
* DARPA SSITH research programme.
*
* Portions of this work was supported by Innovate UK project 105694,
* "Digital Security by Design (DSbD) Technology Platform Prototype".
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_platform.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef FDT
#include
#include
#include
#endif
#include "iommu.h"
#include "iommu_if.h"
static MALLOC_DEFINE(M_IOMMU, "IOMMU", "IOMMU framework");
#define IOMMU_LIST_LOCK() sx_xlock(&iommu_sx)
#define IOMMU_LIST_UNLOCK() sx_xunlock(&iommu_sx)
#define IOMMU_LIST_ASSERT_LOCKED() sx_assert(&iommu_sx, SA_XLOCKED)
#define dprintf(fmt, ...)
static struct sx iommu_sx;
struct iommu_entry {
struct iommu_unit *iommu;
LIST_ENTRY(iommu_entry) next;
};
static LIST_HEAD(, iommu_entry) iommu_list = LIST_HEAD_INITIALIZER(iommu_list);
static int
iommu_domain_unmap_buf(struct iommu_domain *iodom, iommu_gaddr_t base,
iommu_gaddr_t size, int flags)
{
struct iommu_unit *iommu;
int error;
iommu = iodom->iommu;
error = IOMMU_UNMAP(iommu->dev, iodom, base, size);
return (error);
}
static int
iommu_domain_map_buf(struct iommu_domain *iodom, iommu_gaddr_t base,
iommu_gaddr_t size, vm_page_t *ma, uint64_t eflags, int flags)
{
struct iommu_unit *iommu;
vm_prot_t prot;
vm_offset_t va;
int error;
dprintf("%s: base %lx, size %lx\n", __func__, base, size);
prot = 0;
if (eflags & IOMMU_MAP_ENTRY_READ)
prot |= VM_PROT_READ;
if (eflags & IOMMU_MAP_ENTRY_WRITE)
prot |= VM_PROT_WRITE;
va = base;
iommu = iodom->iommu;
error = IOMMU_MAP(iommu->dev, iodom, va, ma, size, prot);
return (error);
}
static const struct iommu_domain_map_ops domain_map_ops = {
.map = iommu_domain_map_buf,
.unmap = iommu_domain_unmap_buf,
};
static struct iommu_domain *
iommu_domain_alloc(struct iommu_unit *iommu)
{
struct iommu_domain *iodom;
iodom = IOMMU_DOMAIN_ALLOC(iommu->dev, iommu);
if (iodom == NULL)
return (NULL);
KASSERT(iodom->end != 0, ("domain end is not set"));
iommu_domain_init(iommu, iodom, &domain_map_ops);
iodom->iommu = iommu;
iommu_gas_init_domain(iodom);
return (iodom);
}
static int
iommu_domain_free(struct iommu_domain *iodom)
{
struct iommu_unit *iommu;
iommu = iodom->iommu;
IOMMU_LOCK(iommu);
if ((iodom->flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
IOMMU_DOMAIN_LOCK(iodom);
iommu_gas_fini_domain(iodom);
IOMMU_DOMAIN_UNLOCK(iodom);
}
iommu_domain_fini(iodom);
IOMMU_DOMAIN_FREE(iommu->dev, iodom);
IOMMU_UNLOCK(iommu);
return (0);
}
static void
iommu_tag_init(struct iommu_domain *iodom, struct bus_dma_tag_iommu *t)
{
bus_addr_t maxaddr;
maxaddr = MIN(iodom->end, BUS_SPACE_MAXADDR);
t->common.ref_count = 0;
t->common.impl = &bus_dma_iommu_impl;
t->common.alignment = 1;
t->common.boundary = 0;
t->common.lowaddr = maxaddr;
t->common.highaddr = maxaddr;
t->common.maxsize = maxaddr;
t->common.nsegments = BUS_SPACE_UNRESTRICTED;
t->common.maxsegsz = maxaddr;
}
static struct iommu_ctx *
iommu_ctx_alloc(device_t requester, struct iommu_domain *iodom, bool disabled)
{
struct iommu_unit *iommu;
struct iommu_ctx *ioctx;
iommu = iodom->iommu;
ioctx = IOMMU_CTX_ALLOC(iommu->dev, iodom, requester, disabled);
if (ioctx == NULL)
return (NULL);
ioctx->domain = iodom;
return (ioctx);
}
static int
iommu_ctx_init(device_t requester, struct iommu_ctx *ioctx)
{
struct bus_dma_tag_iommu *tag;
struct iommu_domain *iodom;
struct iommu_unit *iommu;
int error;
iodom = ioctx->domain;
iommu = iodom->iommu;
error = IOMMU_CTX_INIT(iommu->dev, ioctx);
if (error)
return (error);
tag = ioctx->tag = malloc(sizeof(struct bus_dma_tag_iommu),
M_IOMMU, M_WAITOK | M_ZERO);
tag->owner = requester;
tag->ctx = ioctx;
tag->ctx->domain = iodom;
iommu_tag_init(iodom, tag);
return (error);
}
static struct iommu_unit *
iommu_lookup(device_t dev)
{
struct iommu_entry *entry;
struct iommu_unit *iommu;
IOMMU_LIST_LOCK();
LIST_FOREACH(entry, &iommu_list, next) {
iommu = entry->iommu;
if (iommu->dev == dev) {
IOMMU_LIST_UNLOCK();
return (iommu);
}
}
IOMMU_LIST_UNLOCK();
return (NULL);
}
#ifdef FDT
struct iommu_ctx *
iommu_get_ctx_ofw(device_t dev, int channel)
{
struct iommu_domain *iodom;
struct iommu_unit *iommu;
struct iommu_ctx *ioctx;
phandle_t node, parent;
device_t iommu_dev;
pcell_t *cells;
int niommus;
int ncells;
int error;
node = ofw_bus_get_node(dev);
if (node <= 0) {
device_printf(dev,
"%s called on not ofw based device.\n", __func__);
return (NULL);
}
error = ofw_bus_parse_xref_list_get_length(node,
"iommus", "#iommu-cells", &niommus);
if (error) {
device_printf(dev, "%s can't get iommu list.\n", __func__);
return (NULL);
}
if (niommus == 0) {
device_printf(dev, "%s iommu list is empty.\n", __func__);
return (NULL);
}
error = ofw_bus_parse_xref_list_alloc(node, "iommus", "#iommu-cells",
channel, &parent, &ncells, &cells);
if (error != 0) {
device_printf(dev, "%s can't get iommu device xref.\n",
__func__);
return (NULL);
}
iommu_dev = OF_device_from_xref(parent);
if (iommu_dev == NULL) {
device_printf(dev, "%s can't get iommu device.\n", __func__);
return (NULL);
}
iommu = iommu_lookup(iommu_dev);
if (iommu == NULL) {
device_printf(dev, "%s can't lookup iommu.\n", __func__);
return (NULL);
}
/*
* In our current configuration we have a domain per each ctx,
* so allocate a domain first.
*/
iodom = iommu_domain_alloc(iommu);
if (iodom == NULL) {
device_printf(dev, "%s can't allocate domain.\n", __func__);
return (NULL);
}
ioctx = iommu_ctx_alloc(dev, iodom, false);
if (ioctx == NULL) {
iommu_domain_free(iodom);
return (NULL);
}
ioctx->domain = iodom;
error = IOMMU_OFW_MD_DATA(iommu->dev, ioctx, cells, ncells);
if (error) {
device_printf(dev, "%s can't set MD data\n", __func__);
return (NULL);
}
error = iommu_ctx_init(dev, ioctx);
if (error) {
IOMMU_CTX_FREE(iommu->dev, ioctx);
iommu_domain_free(iodom);
return (NULL);
}
return (ioctx);
}
#endif
struct iommu_ctx *
iommu_get_ctx(struct iommu_unit *iommu, device_t requester,
uint16_t rid, bool disabled, bool rmrr)
{
struct iommu_domain *iodom;
struct iommu_ctx *ioctx;
int error;
IOMMU_LOCK(iommu);
ioctx = IOMMU_CTX_LOOKUP(iommu->dev, requester);
if (ioctx) {
IOMMU_UNLOCK(iommu);
return (ioctx);
}
IOMMU_UNLOCK(iommu);
/*
* In our current configuration we have a domain per each ctx.
* So allocate a domain first.
*/
iodom = iommu_domain_alloc(iommu);
if (iodom == NULL)
return (NULL);
ioctx = iommu_ctx_alloc(requester, iodom, disabled);
if (ioctx == NULL) {
iommu_domain_free(iodom);
return (NULL);
}
error = iommu_ctx_init(requester, ioctx);
if (error) {
IOMMU_CTX_FREE(iommu->dev, ioctx);
iommu_domain_free(iodom);
return (NULL);
}
return (ioctx);
}
void
iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *ioctx)
{
struct bus_dma_tag_iommu *tag;
IOMMU_ASSERT_LOCKED(iommu);
tag = ioctx->tag;
IOMMU_CTX_FREE(iommu->dev, ioctx);
free(tag, M_IOMMU);
}
void
iommu_free_ctx(struct iommu_ctx *ioctx)
{
struct iommu_unit *iommu;
struct iommu_domain *iodom;
int error;
iodom = ioctx->domain;
iommu = iodom->iommu;
IOMMU_LOCK(iommu);
iommu_free_ctx_locked(iommu, ioctx);
IOMMU_UNLOCK(iommu);
/* Since we have a domain per each ctx, remove the domain too. */
error = iommu_domain_free(iodom);
if (error)
device_printf(iommu->dev, "Could not free a domain\n");
}
static void
iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
{
iommu_gas_free_space(entry);
if (free)
iommu_gas_free_entry(entry);
else
entry->flags = 0;
}
void
iommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
struct iommu_map_entry *entry, *entry1;
int error __diagused;
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("not mapped entry %p %p", iodom, entry));
error = iodom->ops->unmap(iodom, entry->start, entry->end -
entry->start, cansleep ? IOMMU_PGF_WAITOK : 0);
KASSERT(error == 0, ("unmap %p error %d", iodom, error));
TAILQ_REMOVE(entries, entry, dmamap_link);
iommu_domain_free_entry(entry, true);
}
if (TAILQ_EMPTY(entries))
return;
panic("entries map is not empty");
}
int
iommu_register(struct iommu_unit *iommu)
{
struct iommu_entry *entry;
mtx_init(&iommu->lock, "IOMMU", NULL, MTX_DEF);
entry = malloc(sizeof(struct iommu_entry), M_IOMMU, M_WAITOK | M_ZERO);
entry->iommu = iommu;
IOMMU_LIST_LOCK();
LIST_INSERT_HEAD(&iommu_list, entry, next);
IOMMU_LIST_UNLOCK();
iommu_init_busdma(iommu);
return (0);
}
int
iommu_unregister(struct iommu_unit *iommu)
{
struct iommu_entry *entry, *tmp;
IOMMU_LIST_LOCK();
LIST_FOREACH_SAFE(entry, &iommu_list, next, tmp) {
if (entry->iommu == iommu) {
LIST_REMOVE(entry, next);
free(entry, M_IOMMU);
}
}
IOMMU_LIST_UNLOCK();
iommu_fini_busdma(iommu);
mtx_destroy(&iommu->lock);
return (0);
}
struct iommu_unit *
iommu_find(device_t dev, bool verbose)
{
struct iommu_entry *entry;
struct iommu_unit *iommu;
int error;
IOMMU_LIST_LOCK();
LIST_FOREACH(entry, &iommu_list, next) {
iommu = entry->iommu;
error = IOMMU_FIND(iommu->dev, dev);
if (error == 0) {
IOMMU_LIST_UNLOCK();
return (entry->iommu);
}
}
IOMMU_LIST_UNLOCK();
return (NULL);
}
+void
+iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+}
+
void
iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep __unused)
{
dprintf("%s\n", __func__);
iommu_domain_free_entry(entry, free);
}
static void
iommu_init(void)
{
sx_init(&iommu_sx, "IOMMU list");
}
SYSINIT(iommu, SI_SUB_DRIVERS, SI_ORDER_FIRST, iommu_init, NULL);
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 565067488435..0d86ff0d4468 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1,1103 +1,1099 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*
* busdma_iommu.c, the implementation of the busdma(9) interface using
* IOMMU units from Intel VT-d.
*/
static bool
iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
{
char str[128], *env;
int default_bounce;
bool ret;
static const char bounce_str[] = "bounce";
static const char iommu_str[] = "iommu";
static const char dmar_str[] = "dmar"; /* compatibility */
default_bounce = 0;
env = kern_getenv("hw.busdma.default");
if (env != NULL) {
if (strcmp(env, bounce_str) == 0)
default_bounce = 1;
else if (strcmp(env, iommu_str) == 0 ||
strcmp(env, dmar_str) == 0)
default_bounce = 0;
freeenv(env);
}
snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
domain, bus, slot, func);
env = kern_getenv(str);
if (env == NULL)
return (default_bounce != 0);
if (strcmp(env, bounce_str) == 0)
ret = true;
else if (strcmp(env, iommu_str) == 0 ||
strcmp(env, dmar_str) == 0)
ret = false;
else
ret = default_bounce != 0;
freeenv(env);
return (ret);
}
/*
* Given original device, find the requester ID that will be seen by
* the IOMMU unit and used for page table lookup. PCI bridges may take
* ownership of transactions from downstream devices, so it may not be
* the same as the BSF of the target device. In those cases, all
* devices downstream of the bridge must share a single mapping
* domain, and must collectively be assigned to use either IOMMU or
* bounce mapping.
*/
device_t
iommu_get_requester(device_t dev, uint16_t *rid)
{
devclass_t pci_class;
device_t l, pci, pcib, pcip, pcibp, requester;
int cap_offset;
uint16_t pcie_flags;
bool bridge_is_pcie;
pci_class = devclass_find("pci");
l = requester = dev;
*rid = pci_get_rid(dev);
/*
* Walk the bridge hierarchy from the target device to the
* host port to find the translating bridge nearest the IOMMU
* unit.
*/
for (;;) {
pci = device_get_parent(l);
KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
"for %s", device_get_name(dev), device_get_name(l)));
KASSERT(device_get_devclass(pci) == pci_class,
("iommu_get_requester(%s): non-pci parent %s for %s",
device_get_name(dev), device_get_name(pci),
device_get_name(l)));
pcib = device_get_parent(pci);
KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
"for %s", device_get_name(dev), device_get_name(pci)));
/*
* The parent of our "bridge" isn't another PCI bus,
* so pcib isn't a PCI->PCI bridge but rather a host
* port, and the requester ID won't be translated
* further.
*/
pcip = device_get_parent(pcib);
if (device_get_devclass(pcip) != pci_class)
break;
pcibp = device_get_parent(pcip);
if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
/*
* Do not stop the loop even if the target
* device is PCIe, because it is possible (but
* unlikely) to have a PCI->PCIe bridge
* somewhere in the hierarchy.
*/
l = pcib;
} else {
/*
* Device is not PCIe, it cannot be seen as a
* requester by IOMMU unit. Check whether the
* bridge is PCIe.
*/
bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
&cap_offset) == 0;
requester = pcib;
/*
* Check for a buggy PCIe/PCI bridge that
* doesn't report the express capability. If
* the bridge above it is express but isn't a
* PCI bridge, then we know pcib is actually a
* PCIe/PCI bridge.
*/
if (!bridge_is_pcie && pci_find_cap(pcibp,
PCIY_EXPRESS, &cap_offset) == 0) {
pcie_flags = pci_read_config(pcibp,
cap_offset + PCIER_FLAGS, 2);
if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
PCIEM_TYPE_PCI_BRIDGE)
bridge_is_pcie = true;
}
if (bridge_is_pcie) {
/*
* The current device is not PCIe, but
* the bridge above it is. This is a
* PCIe->PCI bridge. Assume that the
* requester ID will be the secondary
* bus number with slot and function
* set to zero.
*
* XXX: Doesn't handle the case where
* the bridge is PCIe->PCI-X, and the
* bridge will only take ownership of
* requests in some cases. We should
* provide context entries with the
* same page tables for taken and
* non-taken transactions.
*/
*rid = PCI_RID(pci_get_bus(l), 0, 0);
l = pcibp;
} else {
/*
* Neither the device nor the bridge
* above it are PCIe. This is a
* conventional PCI->PCI bridge, which
* will use the bridge's BSF as the
* requester ID.
*/
*rid = pci_get_rid(pcib);
l = pcib;
}
}
}
return (requester);
}
struct iommu_ctx *
iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
{
device_t requester;
struct iommu_ctx *ctx;
bool disabled;
uint16_t rid;
requester = iommu_get_requester(dev, &rid);
/*
* If the user requested the IOMMU disabled for the device, we
* cannot disable the IOMMU unit, due to possibility of other
* devices on the same IOMMU unit still requiring translation.
* Instead provide the identity mapping for the device
* context.
*/
disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
pci_get_bus(requester), pci_get_slot(requester),
pci_get_function(requester));
ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
if (ctx == NULL)
return (NULL);
if (disabled) {
/*
* Keep the first reference on context, release the
* later refs.
*/
IOMMU_LOCK(unit);
if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
ctx->flags |= IOMMU_CTX_DISABLED;
IOMMU_UNLOCK(unit);
} else {
iommu_free_ctx_locked(unit, ctx);
}
ctx = NULL;
}
return (ctx);
}
struct iommu_ctx *
iommu_get_dev_ctx(device_t dev)
{
struct iommu_unit *unit;
unit = iommu_find(dev, bootverbose);
/* Not in scope of any IOMMU ? */
if (unit == NULL)
return (NULL);
if (!unit->dma_enabled)
return (NULL);
-#if defined(__amd64__) || defined(__i386__)
- dmar_quirks_pre_use(unit);
- dmar_instantiate_rmrr_ctxs(unit);
-#endif
-
+ iommu_unit_pre_instantiate_ctx(unit);
return (iommu_instantiate_ctx(unit, dev, false));
}
bus_dma_tag_t
iommu_get_dma_tag(device_t dev, device_t child)
{
struct iommu_ctx *ctx;
bus_dma_tag_t res;
ctx = iommu_get_dev_ctx(child);
if (ctx == NULL)
return (NULL);
res = (bus_dma_tag_t)ctx->tag;
return (res);
}
bool
bus_dma_iommu_set_buswide(device_t dev)
{
struct iommu_unit *unit;
device_t parent;
u_int busno, slot, func;
parent = device_get_parent(dev);
if (device_get_devclass(parent) != devclass_find("pci"))
return (false);
unit = iommu_find(dev, bootverbose);
if (unit == NULL)
return (false);
busno = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
if (slot != 0 || func != 0) {
if (bootverbose) {
device_printf(dev,
"iommu%d pci%d:%d:%d requested buswide busdma\n",
unit->unit, busno, slot, func);
}
return (false);
}
iommu_set_buswide_ctx(unit, busno);
return (true);
}
void
iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
{
MPASS(busno <= PCI_BUSMAX);
IOMMU_LOCK(unit);
unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
1 << (busno % (NBBY * sizeof(uint32_t)));
IOMMU_UNLOCK(unit);
}
bool
iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
{
MPASS(busno <= PCI_BUSMAX);
return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
(1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
}
static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
struct bus_dmamap_iommu *map);
static int
iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize,
int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc,
void *lockfuncarg, bus_dma_tag_t *dmat)
{
struct bus_dma_tag_iommu *newtag, *oldtag;
int error;
*dmat = NULL;
error = common_bus_dma_tag_create(parent != NULL ?
&((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
boundary, lowaddr, highaddr, filter, filterarg, maxsize,
nsegments, maxsegsz, flags, lockfunc, lockfuncarg,
sizeof(struct bus_dma_tag_iommu), (void **)&newtag);
if (error != 0)
goto out;
oldtag = (struct bus_dma_tag_iommu *)parent;
newtag->common.impl = &bus_dma_iommu_impl;
newtag->ctx = oldtag->ctx;
newtag->owner = oldtag->owner;
*dmat = (bus_dma_tag_t)newtag;
out:
CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
__func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
error);
return (error);
}
static int
iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
{
return (0);
}
static int
iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
{
struct bus_dma_tag_iommu *dmat, *parent;
struct bus_dma_tag_iommu *dmat_copy __unused;
int error;
error = 0;
dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1;
if (dmat != NULL) {
if (dmat->map_count != 0) {
error = EBUSY;
goto out;
}
while (dmat != NULL) {
parent = (struct bus_dma_tag_iommu *)dmat->common.parent;
if (atomic_fetchadd_int(&dmat->common.ref_count, -1) ==
1) {
if (dmat == dmat->ctx->tag)
iommu_free_ctx(dmat->ctx);
free(dmat->segments, M_IOMMU_DMAMAP);
free(dmat, M_DEVBUF);
dmat = parent;
} else
dmat = NULL;
}
}
out:
CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error);
return (error);
}
static bool
iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
{
return (false);
}
static int
iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
if (map == NULL) {
*mapp = NULL;
return (ENOMEM);
}
if (tag->segments == NULL) {
tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
tag->common.nsegments, M_IOMMU_DMAMAP,
DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
if (tag->segments == NULL) {
free(map, M_IOMMU_DMAMAP);
*mapp = NULL;
return (ENOMEM);
}
}
IOMMU_DMAMAP_INIT(map);
TAILQ_INIT(&map->map_entries);
map->tag = tag;
map->locked = true;
map->cansleep = false;
tag->map_count++;
*mapp = (bus_dmamap_t)map;
return (0);
}
static int
iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if (map != NULL) {
IOMMU_DMAMAP_LOCK(map);
if (!TAILQ_EMPTY(&map->map_entries)) {
IOMMU_DMAMAP_UNLOCK(map);
return (EBUSY);
}
IOMMU_DMAMAP_DESTROY(map);
free(map, M_IOMMU_DMAMAP);
}
tag->map_count--;
return (0);
}
static int
iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
bus_dmamap_t *mapp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
int error, mflags;
vm_memattr_t attr;
error = iommu_bus_dmamap_create(dmat, flags, mapp);
if (error != 0)
return (error);
mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
VM_MEMATTR_DEFAULT;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)*mapp;
if (tag->common.maxsize < PAGE_SIZE &&
tag->common.alignment <= tag->common.maxsize &&
attr == VM_MEMATTR_DEFAULT) {
*vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
DOMAINSET_PREF(tag->common.domain), mflags);
map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
} else {
*vaddr = kmem_alloc_attr_domainset(
DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
mflags, 0ul, BUS_SPACE_MAXADDR, attr);
map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
}
if (*vaddr == NULL) {
iommu_bus_dmamap_destroy(dmat, *mapp);
*mapp = NULL;
return (ENOMEM);
}
return (0);
}
static void
iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
free(vaddr, M_DEVBUF);
map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
} else {
KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
("iommu_bus_dmamem_free for non alloced map %p", map));
kmem_free(vaddr, tag->common.maxsize);
map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
}
iommu_bus_dmamap_destroy(dmat, map1);
}
static int
iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
int flags, bus_dma_segment_t *segs, int *segp,
struct iommu_map_entries_tailq *entries)
{
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entry *entry;
bus_size_t buflen1;
int error, e_flags, idx, gas_flags, seg;
KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
if (segs == NULL)
segs = tag->segments;
ctx = tag->ctx;
domain = ctx->domain;
e_flags = IOMMU_MAP_ENTRY_READ |
((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0);
seg = *segp;
error = 0;
idx = 0;
while (buflen > 0) {
seg++;
if (seg >= tag->common.nsegments) {
error = EFBIG;
break;
}
buflen1 = buflen > tag->common.maxsegsz ?
tag->common.maxsegsz : buflen;
/*
* (Too) optimistically allow split if there are more
* then one segments left.
*/
gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
if (seg + 1 < tag->common.nsegments)
gas_flags |= IOMMU_MF_CANSPLIT;
error = iommu_gas_map(domain, &tag->common, buflen1,
offset, e_flags, gas_flags, ma + idx, &entry);
if (error != 0)
break;
/* Update buflen1 in case buffer split. */
if (buflen1 > entry->end - entry->start - offset)
buflen1 = entry->end - entry->start - offset;
KASSERT(vm_addr_align_ok(entry->start + offset,
tag->common.alignment),
("alignment failed: ctx %p start 0x%jx offset %x "
"align 0x%jx", ctx, (uintmax_t)entry->start, offset,
(uintmax_t)tag->common.alignment));
KASSERT(entry->end <= tag->common.lowaddr ||
entry->start >= tag->common.highaddr,
("entry placement failed: ctx %p start 0x%jx end 0x%jx "
"lowaddr 0x%jx highaddr 0x%jx", ctx,
(uintmax_t)entry->start, (uintmax_t)entry->end,
(uintmax_t)tag->common.lowaddr,
(uintmax_t)tag->common.highaddr));
KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1,
tag->common.boundary),
("boundary failed: ctx %p start 0x%jx end 0x%jx "
"boundary 0x%jx", ctx, (uintmax_t)entry->start,
(uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
KASSERT(buflen1 <= tag->common.maxsegsz,
("segment too large: ctx %p start 0x%jx end 0x%jx "
"buflen1 0x%jx maxsegsz 0x%jx", ctx,
(uintmax_t)entry->start, (uintmax_t)entry->end,
(uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
segs[seg].ds_addr = entry->start + offset;
segs[seg].ds_len = buflen1;
idx += OFF_TO_IDX(offset + buflen1);
offset += buflen1;
offset &= IOMMU_PAGE_MASK;
buflen -= buflen1;
}
if (error == 0)
*segp = seg;
return (error);
}
static int
iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
int flags, bus_dma_segment_t *segs, int *segp)
{
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entries_tailq entries;
int error;
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(&ctx->loads, 1);
TAILQ_INIT(&entries);
error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
buflen, flags, segs, segp, &entries);
if (error == 0) {
IOMMU_DMAMAP_LOCK(map);
TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else if (!TAILQ_EMPTY(&entries)) {
/*
* The busdma interface does not allow us to report
* partial buffer load, so unfortunately we have to
* revert all work done.
*/
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
taskqueue_enqueue(domain->iommu->delayed_taskqueue,
&domain->unload_task);
}
if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
!map->cansleep)
error = EINPROGRESS;
if (error == EINPROGRESS)
iommu_bus_schedule_dmamap(domain->iommu, map);
return (error);
}
static int
iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
bus_dma_segment_t *segs, int *segp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
flags, segs, segp));
}
static int
iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
int *segp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
vm_page_t *ma, fma;
vm_paddr_t pstart, pend, paddr;
int error, i, ma_cnt, mflags, offset;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
pstart = trunc_page(buf);
pend = round_page(buf + buflen);
offset = buf & PAGE_MASK;
ma_cnt = OFF_TO_IDX(pend - pstart);
mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
if (ma == NULL)
return (ENOMEM);
fma = NULL;
for (i = 0; i < ma_cnt; i++) {
paddr = pstart + ptoa(i);
ma[i] = PHYS_TO_VM_PAGE(paddr);
if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
/*
* If PHYS_TO_VM_PAGE() returned NULL or the
* vm_page was not initialized we'll use a
* fake page.
*/
if (fma == NULL) {
fma = malloc(sizeof(struct vm_page) * ma_cnt,
M_DEVBUF, M_ZERO | mflags);
if (fma == NULL) {
free(ma, M_DEVBUF);
return (ENOMEM);
}
}
vm_page_initfake(&fma[i], pstart + ptoa(i),
VM_MEMATTR_DEFAULT);
ma[i] = &fma[i];
}
}
error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
flags, segs, segp);
free(fma, M_DEVBUF);
free(ma, M_DEVBUF);
return (error);
}
static int
iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
int *segp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
vm_page_t *ma, fma;
vm_paddr_t pstart, pend, paddr;
int error, i, ma_cnt, mflags, offset;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
pstart = trunc_page((vm_offset_t)buf);
pend = round_page((vm_offset_t)buf + buflen);
offset = (vm_offset_t)buf & PAGE_MASK;
ma_cnt = OFF_TO_IDX(pend - pstart);
mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
if (ma == NULL)
return (ENOMEM);
fma = NULL;
for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
if (pmap == kernel_pmap)
paddr = pmap_kextract(pstart);
else
paddr = pmap_extract(pmap, pstart);
ma[i] = PHYS_TO_VM_PAGE(paddr);
if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
/*
* If PHYS_TO_VM_PAGE() returned NULL or the
* vm_page was not initialized we'll use a
* fake page.
*/
if (fma == NULL) {
fma = malloc(sizeof(struct vm_page) * ma_cnt,
M_DEVBUF, M_ZERO | mflags);
if (fma == NULL) {
free(ma, M_DEVBUF);
return (ENOMEM);
}
}
vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
ma[i] = &fma[i];
}
}
error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
flags, segs, segp);
free(ma, M_DEVBUF);
free(fma, M_DEVBUF);
return (error);
}
static void
iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
{
struct bus_dmamap_iommu *map;
if (map1 == NULL)
return;
map = (struct bus_dmamap_iommu *)map1;
map->mem = *mem;
map->tag = (struct bus_dma_tag_iommu *)dmat;
map->callback = callback;
map->callback_arg = callback_arg;
}
static bus_dma_segment_t *
iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
bus_dma_segment_t *segs, int nsegs, int error)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if (!map->locked) {
KASSERT(map->cansleep,
("map not locked and not sleepable context %p", map));
/*
* We are called from the delayed context. Relock the
* driver.
*/
(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
map->locked = true;
}
if (segs == NULL)
segs = tag->segments;
return (segs);
}
/*
* The limitations of busdma KPI forces the iommu to perform the actual
* unload, consisting of the unmapping of the map entries page tables,
* from the delayed context on i386, since page table page mapping
* might require a sleep to be successfull. The unfortunate
* consequence is that the DMA requests can be served some time after
* the bus_dmamap_unload() call returned.
*
* On amd64, we assume that sf allocation cannot fail.
*/
static void
iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entries_tailq entries;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(&ctx->unloads, 1);
TAILQ_INIT(&entries);
IOMMU_DMAMAP_LOCK(map);
TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
#if defined(IOMMU_DOMAIN_UNLOAD_SLEEP)
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
taskqueue_enqueue(domain->iommu->delayed_taskqueue,
&domain->unload_task);
#else
THREAD_NO_SLEEPING();
iommu_domain_unload(domain, &entries, false);
THREAD_SLEEPING_OK();
KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
#endif
}
static void
iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1,
bus_dmasync_op_t op)
{
struct bus_dmamap_iommu *map __unused;
map = (struct bus_dmamap_iommu *)map1;
kmsan_bus_dmamap_sync(&map->kmsan_mem, op);
}
#ifdef KMSAN
static void
iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem)
{
struct bus_dmamap_iommu *map;
map = (struct bus_dmamap_iommu *)map1;
if (map == NULL)
return;
memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc));
}
#endif
struct bus_dma_impl bus_dma_iommu_impl = {
.tag_create = iommu_bus_dma_tag_create,
.tag_destroy = iommu_bus_dma_tag_destroy,
.tag_set_domain = iommu_bus_dma_tag_set_domain,
.id_mapped = iommu_bus_dma_id_mapped,
.map_create = iommu_bus_dmamap_create,
.map_destroy = iommu_bus_dmamap_destroy,
.mem_alloc = iommu_bus_dmamem_alloc,
.mem_free = iommu_bus_dmamem_free,
.load_phys = iommu_bus_dmamap_load_phys,
.load_buffer = iommu_bus_dmamap_load_buffer,
.load_ma = iommu_bus_dmamap_load_ma,
.map_waitok = iommu_bus_dmamap_waitok,
.map_complete = iommu_bus_dmamap_complete,
.map_unload = iommu_bus_dmamap_unload,
.map_sync = iommu_bus_dmamap_sync,
#ifdef KMSAN
.load_kmsan = iommu_bus_dmamap_load_kmsan,
#endif
};
static void
iommu_bus_task_dmamap(void *arg, int pending)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
struct iommu_unit *unit;
unit = arg;
IOMMU_LOCK(unit);
while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
IOMMU_UNLOCK(unit);
tag = map->tag;
map->cansleep = true;
map->locked = false;
bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
&map->mem, map->callback, map->callback_arg,
BUS_DMA_WAITOK);
map->cansleep = false;
if (map->locked) {
(tag->common.lockfunc)(tag->common.lockfuncarg,
BUS_DMA_UNLOCK);
} else
map->locked = true;
map->cansleep = false;
IOMMU_LOCK(unit);
}
IOMMU_UNLOCK(unit);
}
static void
iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
{
map->locked = false;
IOMMU_LOCK(unit);
TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
IOMMU_UNLOCK(unit);
taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
}
int
iommu_init_busdma(struct iommu_unit *unit)
{
int error;
unit->dma_enabled = 1;
error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
if (error == 0) /* compatibility */
TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
TAILQ_INIT(&unit->delayed_maps);
TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
taskqueue_thread_enqueue, &unit->delayed_taskqueue);
taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
"iommu%d busdma taskq", unit->unit);
return (0);
}
void
iommu_fini_busdma(struct iommu_unit *unit)
{
if (unit->delayed_taskqueue == NULL)
return;
taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
taskqueue_free(unit->delayed_taskqueue);
unit->delayed_taskqueue = NULL;
}
int
bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
vm_paddr_t start, vm_size_t length, int flags)
{
struct bus_dma_tag_common *tc;
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entry *entry;
vm_page_t *ma;
vm_size_t i;
int error;
bool waitok;
MPASS((start & PAGE_MASK) == 0);
MPASS((length & PAGE_MASK) == 0);
MPASS(length > 0);
MPASS(start + length >= start);
MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
tc = (struct bus_dma_tag_common *)dmat;
if (tc->impl != &bus_dma_iommu_impl)
return (0);
tag = (struct bus_dma_tag_iommu *)dmat;
ctx = tag->ctx;
domain = ctx->domain;
map = (struct bus_dmamap_iommu *)map1;
waitok = (flags & BUS_DMA_NOWAIT) != 0;
entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
if (entry == NULL)
return (ENOMEM);
entry->start = start;
entry->end = start + length;
ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
M_WAITOK : M_NOWAIT);
if (ma == NULL) {
iommu_gas_free_entry(entry);
return (ENOMEM);
}
for (i = 0; i < atop(length); i++) {
ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
VM_MEMATTR_DEFAULT);
}
error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) |
IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma);
if (error == 0) {
IOMMU_DMAMAP_LOCK(map);
TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
iommu_gas_free_entry(entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);
free(ma, M_TEMP);
return (error);
}
static void
iommu_domain_unload_task(void *arg, int pending)
{
struct iommu_domain *domain;
struct iommu_map_entries_tailq entries;
domain = arg;
TAILQ_INIT(&entries);
for (;;) {
IOMMU_DOMAIN_LOCK(domain);
TAILQ_SWAP(&domain->unload_entries, &entries,
iommu_map_entry, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
if (TAILQ_EMPTY(&entries))
break;
iommu_domain_unload(domain, &entries, true);
}
}
void
iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
const struct iommu_domain_map_ops *ops)
{
domain->ops = ops;
domain->iommu = unit;
TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain);
RB_INIT(&domain->rb_root);
TAILQ_INIT(&domain->unload_entries);
mtx_init(&domain->lock, "iodom", NULL, MTX_DEF);
}
void
iommu_domain_fini(struct iommu_domain *domain)
{
mtx_destroy(&domain->lock);
}
diff --git a/sys/dev/iommu/iommu.h b/sys/dev/iommu/iommu.h
index 157f4c62423f..9845b09e8732 100644
--- a/sys/dev/iommu/iommu.h
+++ b/sys/dev/iommu/iommu.h
@@ -1,202 +1,203 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _DEV_IOMMU_IOMMU_H_
#define _DEV_IOMMU_IOMMU_H_
#include
struct bus_dma_tag_common;
struct iommu_map_entry;
TAILQ_HEAD(iommu_map_entries_tailq, iommu_map_entry);
RB_HEAD(iommu_gas_entries_tree, iommu_map_entry);
RB_PROTOTYPE(iommu_gas_entries_tree, iommu_map_entry, rb_entry,
iommu_gas_cmp_entries);
struct iommu_qi_genseq {
u_int gen;
uint32_t seq;
};
struct iommu_map_entry {
iommu_gaddr_t start;
iommu_gaddr_t end;
iommu_gaddr_t first; /* Least start in subtree */
iommu_gaddr_t last; /* Greatest end in subtree */
iommu_gaddr_t free_down; /* Max free space below the
current R/B tree node */
u_int flags;
union {
TAILQ_ENTRY(iommu_map_entry) dmamap_link; /* DMA map entries */
struct iommu_map_entry *tlb_flush_next;
};
RB_ENTRY(iommu_map_entry) rb_entry; /* Links for domain entries */
struct iommu_domain *domain;
struct iommu_qi_genseq gseq;
};
struct iommu_unit {
struct mtx lock;
device_t dev;
int unit;
int dma_enabled;
/* Busdma delayed map load */
struct task dmamap_load_task;
TAILQ_HEAD(, bus_dmamap_iommu) delayed_maps;
struct taskqueue *delayed_taskqueue;
/*
* Bitmap of buses for which context must ignore slot:func,
* duplicating the page table pointer into all context table
* entries. This is a client-controlled quirk to support some
* NTBs.
*/
uint32_t buswide_ctxs[(PCI_BUSMAX + 1) / NBBY / sizeof(uint32_t)];
};
struct iommu_domain_map_ops {
int (*map)(struct iommu_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags);
int (*unmap)(struct iommu_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, int flags);
};
/*
* Locking annotations:
* (u) - Protected by iommu unit lock
* (d) - Protected by domain lock
* (c) - Immutable after initialization
*/
struct iommu_domain {
struct iommu_unit *iommu; /* (c) */
const struct iommu_domain_map_ops *ops;
struct mtx lock; /* (c) */
struct task unload_task; /* (c) */
u_int entries_cnt; /* (d) */
struct iommu_map_entries_tailq unload_entries; /* (d) Entries to
unload */
struct iommu_gas_entries_tree rb_root; /* (d) */
struct iommu_map_entry *start_gap; /* (d) */
iommu_gaddr_t end; /* (c) Highest address + 1 in
the guest AS */
struct iommu_map_entry *first_place, *last_place; /* (d) */
struct iommu_map_entry *msi_entry; /* (d) Arch-specific */
iommu_gaddr_t msi_base; /* (d) Arch-specific */
vm_paddr_t msi_phys; /* (d) Arch-specific */
u_int flags; /* (u) */
};
struct iommu_ctx {
struct iommu_domain *domain; /* (c) */
struct bus_dma_tag_iommu *tag; /* (c) Root tag */
u_long loads; /* atomic updates, for stat only */
u_long unloads; /* same */
u_int flags; /* (u) */
uint16_t rid; /* (c) pci RID */
};
/* struct iommu_ctx flags */
#define IOMMU_CTX_FAULTED 0x0001 /* Fault was reported,
last_fault_rec is valid */
#define IOMMU_CTX_DISABLED 0x0002 /* Device is disabled, the
ephemeral reference is kept
to prevent context destruction */
#define IOMMU_DOMAIN_GAS_INITED 0x0001
#define IOMMU_DOMAIN_PGTBL_INITED 0x0002
#define IOMMU_DOMAIN_IDMAP 0x0010 /* Domain uses identity
page table */
#define IOMMU_DOMAIN_RMRR 0x0020 /* Domain contains RMRR entry,
cannot be turned off */
#define IOMMU_LOCK(unit) mtx_lock(&(unit)->lock)
#define IOMMU_UNLOCK(unit) mtx_unlock(&(unit)->lock)
#define IOMMU_ASSERT_LOCKED(unit) mtx_assert(&(unit)->lock, MA_OWNED)
#define IOMMU_DOMAIN_LOCK(dom) mtx_lock(&(dom)->lock)
#define IOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->lock)
#define IOMMU_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->lock, MA_OWNED)
void iommu_free_ctx(struct iommu_ctx *ctx);
void iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *ctx);
struct iommu_ctx *iommu_get_ctx(struct iommu_unit *, device_t dev,
uint16_t rid, bool id_mapped, bool rmrr_init);
struct iommu_unit *iommu_find(device_t dev, bool verbose);
void iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep);
void iommu_domain_unload(struct iommu_domain *domain,
struct iommu_map_entries_tailq *entries, bool cansleep);
+void iommu_unit_pre_instantiate_ctx(struct iommu_unit *iommu);
struct iommu_ctx *iommu_instantiate_ctx(struct iommu_unit *iommu,
device_t dev, bool rmrr);
device_t iommu_get_requester(device_t dev, uint16_t *rid);
int iommu_init_busdma(struct iommu_unit *unit);
void iommu_fini_busdma(struct iommu_unit *unit);
void iommu_gas_init_domain(struct iommu_domain *domain);
void iommu_gas_fini_domain(struct iommu_domain *domain);
struct iommu_map_entry *iommu_gas_alloc_entry(struct iommu_domain *domain,
u_int flags);
void iommu_gas_free_entry(struct iommu_map_entry *entry);
void iommu_gas_free_space(struct iommu_map_entry *entry);
void iommu_gas_remove(struct iommu_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t size);
int iommu_gas_map(struct iommu_domain *domain,
const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset,
u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res);
void iommu_gas_free_region(struct iommu_map_entry *entry);
int iommu_gas_map_region(struct iommu_domain *domain,
struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma);
int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t end, struct iommu_map_entry **entry0);
int iommu_gas_reserve_region_extend(struct iommu_domain *domain,
iommu_gaddr_t start, iommu_gaddr_t end);
void iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno);
bool iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno);
void iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
const struct iommu_domain_map_ops *ops);
void iommu_domain_fini(struct iommu_domain *domain);
bool bus_dma_iommu_set_buswide(device_t dev);
int bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map,
vm_paddr_t start, vm_size_t length, int flags);
bus_dma_tag_t iommu_get_dma_tag(device_t dev, device_t child);
struct iommu_ctx *iommu_get_dev_ctx(device_t dev);
struct iommu_domain *iommu_get_ctx_domain(struct iommu_ctx *ctx);
SYSCTL_DECL(_hw_iommu);
#endif /* !_DEV_IOMMU_IOMMU_H_ */
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index 2439ef9e4ef5..05fb49538add 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -1,1335 +1,1343 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013-2015 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#if defined(__amd64__)
#define DEV_APIC
#else
#include "opt_apic.h"
#endif
#include "opt_ddb.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef DEV_APIC
#include "pcib_if.h"
#include
#include
#include
#endif
#define DMAR_FAULT_IRQ_RID 0
#define DMAR_QI_IRQ_RID 1
#define DMAR_REG_RID 2
static device_t *dmar_devs;
static int dmar_devcnt;
typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *);
static void
dmar_iterate_tbl(dmar_iter_t iter, void *arg)
{
ACPI_TABLE_DMAR *dmartbl;
ACPI_DMAR_HEADER *dmarh;
char *ptr, *ptrend;
ACPI_STATUS status;
status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
if (ACPI_FAILURE(status))
return;
ptr = (char *)dmartbl + sizeof(*dmartbl);
ptrend = (char *)dmartbl + dmartbl->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
dmarh = (ACPI_DMAR_HEADER *)ptr;
if (dmarh->Length <= 0) {
printf("dmar_identify: corrupted DMAR table, l %d\n",
dmarh->Length);
break;
}
ptr += dmarh->Length;
if (!iter(dmarh, arg))
break;
}
AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl);
}
struct find_iter_args {
int i;
ACPI_DMAR_HARDWARE_UNIT *res;
};
static int
dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
struct find_iter_args *fia;
if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
return (1);
fia = arg;
if (fia->i == 0) {
fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh;
return (0);
}
fia->i--;
return (1);
}
static ACPI_DMAR_HARDWARE_UNIT *
dmar_find_by_index(int idx)
{
struct find_iter_args fia;
fia.i = idx;
fia.res = NULL;
dmar_iterate_tbl(dmar_find_iter, &fia);
return (fia.res);
}
static int
dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT)
dmar_devcnt++;
return (1);
}
int dmar_rmrr_enable = 1;
static int dmar_enable = 0;
static void
dmar_identify(driver_t *driver, device_t parent)
{
ACPI_TABLE_DMAR *dmartbl;
ACPI_DMAR_HARDWARE_UNIT *dmarh;
ACPI_STATUS status;
int i, error;
if (acpi_disabled("dmar"))
return;
TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable);
if (!dmar_enable)
return;
TUNABLE_INT_FETCH("hw.dmar.rmrr_enable", &dmar_rmrr_enable);
status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
if (ACPI_FAILURE(status))
return;
haw = dmartbl->Width + 1;
if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR)
iommu_high = BUS_SPACE_MAXADDR;
else
iommu_high = 1ULL << (haw + 1);
if (bootverbose) {
printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width,
(unsigned)dmartbl->Flags,
"\020\001INTR_REMAP\002X2APIC_OPT_OUT");
}
AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl);
dmar_iterate_tbl(dmar_count_iter, NULL);
if (dmar_devcnt == 0)
return;
dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF,
M_WAITOK | M_ZERO);
for (i = 0; i < dmar_devcnt; i++) {
dmarh = dmar_find_by_index(i);
if (dmarh == NULL) {
printf("dmar_identify: cannot find HWUNIT %d\n", i);
continue;
}
dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i);
if (dmar_devs[i] == NULL) {
printf("dmar_identify: cannot create instance %d\n", i);
continue;
}
error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY,
DMAR_REG_RID, dmarh->Address, PAGE_SIZE);
if (error != 0) {
printf(
"dmar%d: unable to alloc register window at 0x%08jx: error %d\n",
i, (uintmax_t)dmarh->Address, error);
device_delete_child(parent, dmar_devs[i]);
dmar_devs[i] = NULL;
}
}
}
static int
dmar_probe(device_t dev)
{
if (acpi_get_handle(dev) != NULL)
return (ENXIO);
device_set_desc(dev, "DMA remap");
return (BUS_PROBE_NOWILDCARD);
}
static void
dmar_release_resources(device_t dev, struct dmar_unit *unit)
{
int i;
iommu_fini_busdma(&unit->iommu);
dmar_fini_irt(unit);
dmar_fini_qi(unit);
dmar_fini_fault_log(unit);
for (i = 0; i < DMAR_INTR_TOTAL; i++)
iommu_release_intr(DMAR2IOMMU(unit), i);
if (unit->regs != NULL) {
bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
unit->regs);
bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
unit->regs);
unit->regs = NULL;
}
if (unit->domids != NULL) {
delete_unrhdr(unit->domids);
unit->domids = NULL;
}
if (unit->ctx_obj != NULL) {
vm_object_deallocate(unit->ctx_obj);
unit->ctx_obj = NULL;
}
}
#ifdef DEV_APIC
static int
dmar_remap_intr(device_t dev, device_t child, u_int irq)
{
struct dmar_unit *unit;
struct iommu_msi_data *dmd;
uint64_t msi_addr;
uint32_t msi_data;
int i, error;
unit = device_get_softc(dev);
for (i = 0; i < DMAR_INTR_TOTAL; i++) {
dmd = &unit->x86c.intrs[i];
if (irq == dmd->irq) {
error = PCIB_MAP_MSI(device_get_parent(
device_get_parent(dev)),
dev, irq, &msi_addr, &msi_data);
if (error != 0)
return (error);
DMAR_LOCK(unit);
dmd->msi_data = msi_data;
dmd->msi_addr = msi_addr;
(dmd->disable_intr)(DMAR2IOMMU(unit));
dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
dmar_write4(unit, dmd->msi_uaddr_reg,
dmd->msi_addr >> 32);
(dmd->enable_intr)(DMAR2IOMMU(unit));
DMAR_UNLOCK(unit);
return (0);
}
}
return (ENOENT);
}
#endif
static void
dmar_print_caps(device_t dev, struct dmar_unit *unit,
ACPI_DMAR_HARDWARE_UNIT *dmaru)
{
uint32_t caphi, ecaphi;
device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n",
(uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver),
DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment,
dmaru->Flags, "\020\001INCLUDE_ALL_PCI");
caphi = unit->hw_cap >> 32;
device_printf(dev, "cap=%b,", (u_int)unit->hw_cap,
"\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH");
printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD\031FL1GP\034PSI");
printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d",
DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap),
DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap),
DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap));
if ((unit->hw_cap & DMAR_CAP_PSI) != 0)
printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap));
printf("\n");
ecaphi = unit->hw_ecap >> 32;
device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap,
"\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC\031ECS\032MTS"
"\033NEST\034DIS\035PASID\036PRS\037ERS\040SRS");
printf("%b, ", ecaphi, "\020\002NWFS\003EAFS");
printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap),
DMAR_ECAP_IRO(unit->hw_ecap));
}
static int
dmar_attach(device_t dev)
{
struct dmar_unit *unit;
ACPI_DMAR_HARDWARE_UNIT *dmaru;
struct iommu_msi_data *dmd;
uint64_t timeout;
int disable_pmr;
int i, error;
unit = device_get_softc(dev);
unit->iommu.unit = device_get_unit(dev);
unit->iommu.dev = dev;
dmaru = dmar_find_by_index(unit->iommu.unit);
if (dmaru == NULL)
return (EINVAL);
unit->segment = dmaru->Segment;
unit->base = dmaru->Address;
unit->reg_rid = DMAR_REG_RID;
unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&unit->reg_rid, RF_ACTIVE);
if (unit->regs == NULL) {
device_printf(dev, "cannot allocate register window\n");
dmar_devs[unit->iommu.unit] = NULL;
return (ENOMEM);
}
unit->hw_ver = dmar_read4(unit, DMAR_VER_REG);
unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG);
unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG);
if (bootverbose)
dmar_print_caps(dev, unit, dmaru);
dmar_quirks_post_ident(unit);
timeout = dmar_get_timeout();
TUNABLE_UINT64_FETCH("hw.iommu.dmar.timeout", &timeout);
dmar_update_timeout(timeout);
for (i = 0; i < DMAR_INTR_TOTAL; i++)
unit->x86c.intrs[i].irq = -1;
dmd = &unit->x86c.intrs[DMAR_INTR_FAULT];
dmd->name = "fault";
dmd->irq_rid = DMAR_FAULT_IRQ_RID;
dmd->handler = dmar_fault_intr;
dmd->msi_data_reg = DMAR_FEDATA_REG;
dmd->msi_addr_reg = DMAR_FEADDR_REG;
dmd->msi_uaddr_reg = DMAR_FEUADDR_REG;
dmd->enable_intr = dmar_enable_fault_intr;
dmd->disable_intr = dmar_disable_fault_intr;
error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_FAULT);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32);
if (DMAR_HAS_QI(unit)) {
dmd = &unit->x86c.intrs[DMAR_INTR_QI];
dmd->name = "qi";
dmd->irq_rid = DMAR_QI_IRQ_RID;
dmd->handler = dmar_qi_intr;
dmd->msi_data_reg = DMAR_IEDATA_REG;
dmd->msi_addr_reg = DMAR_IEADDR_REG;
dmd->msi_uaddr_reg = DMAR_IEUADDR_REG;
dmd->enable_intr = dmar_enable_qi_intr;
dmd->disable_intr = dmar_disable_qi_intr;
error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_QI);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32);
}
mtx_init(&unit->iommu.lock, "dmarhw", NULL, MTX_DEF);
unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)),
&unit->iommu.lock);
LIST_INIT(&unit->domains);
/*
* 9.2 "Context Entry":
* When Caching Mode (CM) field is reported as Set, the
* domain-id value of zero is architecturally reserved.
* Software must not use domain-id value of zero
* when CM is Set.
*/
if ((unit->hw_cap & DMAR_CAP_CM) != 0)
alloc_unr_specific(unit->domids, 0);
unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 +
DMAR_CTX_CNT), 0, 0, NULL);
/*
* Allocate and load the root entry table pointer. Enable the
* address translation after the required invalidations are
* done.
*/
iommu_pgalloc(unit->ctx_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO);
DMAR_LOCK(unit);
error = dmar_load_root_entry_ptr(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_inv_ctx_glob(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) {
error = dmar_inv_iotlb_glob(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
}
DMAR_UNLOCK(unit);
error = dmar_init_fault_log(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_init_qi(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_init_irt(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
disable_pmr = 0;
TUNABLE_INT_FETCH("hw.dmar.pmr.disable", &disable_pmr);
if (disable_pmr) {
error = dmar_disable_protected_regions(unit);
if (error != 0)
device_printf(dev,
"Failed to disable protected regions\n");
}
error = iommu_init_busdma(&unit->iommu);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
#ifdef NOTYET
DMAR_LOCK(unit);
error = dmar_enable_translation(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
DMAR_UNLOCK(unit);
#endif
return (0);
}
static int
dmar_detach(device_t dev)
{
return (EBUSY);
}
static int
dmar_suspend(device_t dev)
{
return (0);
}
static int
dmar_resume(device_t dev)
{
/* XXXKIB */
return (0);
}
static device_method_t dmar_methods[] = {
DEVMETHOD(device_identify, dmar_identify),
DEVMETHOD(device_probe, dmar_probe),
DEVMETHOD(device_attach, dmar_attach),
DEVMETHOD(device_detach, dmar_detach),
DEVMETHOD(device_suspend, dmar_suspend),
DEVMETHOD(device_resume, dmar_resume),
#ifdef DEV_APIC
DEVMETHOD(bus_remap_intr, dmar_remap_intr),
#endif
DEVMETHOD_END
};
static driver_t dmar_driver = {
"dmar",
dmar_methods,
sizeof(struct dmar_unit),
};
DRIVER_MODULE(dmar, acpi, dmar_driver, 0, 0);
MODULE_DEPEND(dmar, acpi, 1, 1, 1);
static void
dmar_print_path(int busno, int depth, const ACPI_DMAR_PCI_PATH *path)
{
int i;
printf("[%d, ", busno);
for (i = 0; i < depth; i++) {
if (i != 0)
printf(", ");
printf("(%d, %d)", path[i].Device, path[i].Function);
}
printf("]");
}
int
dmar_dev_depth(device_t child)
{
devclass_t pci_class;
device_t bus, pcib;
int depth;
pci_class = devclass_find("pci");
for (depth = 1; ; depth++) {
bus = device_get_parent(child);
pcib = device_get_parent(bus);
if (device_get_devclass(device_get_parent(pcib)) !=
pci_class)
return (depth);
child = pcib;
}
}
void
dmar_dev_path(device_t child, int *busno, void *path1, int depth)
{
devclass_t pci_class;
device_t bus, pcib;
ACPI_DMAR_PCI_PATH *path;
pci_class = devclass_find("pci");
path = path1;
for (depth--; depth != -1; depth--) {
path[depth].Device = pci_get_slot(child);
path[depth].Function = pci_get_function(child);
bus = device_get_parent(child);
pcib = device_get_parent(bus);
if (device_get_devclass(device_get_parent(pcib)) !=
pci_class) {
/* reached a host bridge */
*busno = pcib_get_bus(bus);
return;
}
child = pcib;
}
panic("wrong depth");
}
static int
dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1,
int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2,
enum AcpiDmarScopeType scope_type)
{
int i, depth;
if (busno1 != busno2)
return (0);
if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2)
return (0);
depth = depth1;
if (depth2 < depth)
depth = depth2;
for (i = 0; i < depth; i++) {
if (path1[i].Device != path2[i].Device ||
path1[i].Function != path2[i].Function)
return (0);
}
return (1);
}
static int
dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, int dev_busno,
const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
{
ACPI_DMAR_PCI_PATH *path;
int path_len;
if (devscope->Length < sizeof(*devscope)) {
printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
devscope->Length);
return (-1);
}
if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
return (0);
path_len = devscope->Length - sizeof(*devscope);
if (path_len % 2 != 0) {
printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
devscope->Length);
return (-1);
}
path_len /= 2;
path = (ACPI_DMAR_PCI_PATH *)(devscope + 1);
if (path_len == 0) {
printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
devscope->Length);
return (-1);
}
return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno,
dev_path, dev_path_len, devscope->EntryType));
}
static bool
dmar_match_by_path(struct dmar_unit *unit, int dev_domain, int dev_busno,
const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len, const char **banner)
{
ACPI_DMAR_HARDWARE_UNIT *dmarh;
ACPI_DMAR_DEVICE_SCOPE *devscope;
char *ptr, *ptrend;
int match;
dmarh = dmar_find_by_index(unit->iommu.unit);
if (dmarh == NULL)
return (false);
if (dmarh->Segment != dev_domain)
return (false);
if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) {
if (banner != NULL)
*banner = "INCLUDE_ALL";
return (true);
}
ptr = (char *)dmarh + sizeof(*dmarh);
ptrend = (char *)dmarh + dmarh->Header.Length;
while (ptr < ptrend) {
devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
match = dmar_match_devscope(devscope, dev_busno, dev_path,
dev_path_len);
if (match == -1)
return (false);
if (match == 1) {
if (banner != NULL)
*banner = "specific match";
return (true);
}
}
return (false);
}
static struct dmar_unit *
dmar_find_by_scope(int dev_domain, int dev_busno,
const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
{
struct dmar_unit *unit;
int i;
for (i = 0; i < dmar_devcnt; i++) {
if (dmar_devs[i] == NULL)
continue;
unit = device_get_softc(dmar_devs[i]);
if (dmar_match_by_path(unit, dev_domain, dev_busno, dev_path,
dev_path_len, NULL))
return (unit);
}
return (NULL);
}
struct dmar_unit *
dmar_find(device_t dev, bool verbose)
{
struct dmar_unit *unit;
const char *banner;
int i, dev_domain, dev_busno, dev_path_len;
/*
* This function can only handle PCI(e) devices.
*/
if (device_get_devclass(device_get_parent(dev)) !=
devclass_find("pci"))
return (NULL);
dev_domain = pci_get_domain(dev);
dev_path_len = dmar_dev_depth(dev);
ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
banner = "";
for (i = 0; i < dmar_devcnt; i++) {
if (dmar_devs[i] == NULL)
continue;
unit = device_get_softc(dmar_devs[i]);
if (dmar_match_by_path(unit, dev_domain, dev_busno,
dev_path, dev_path_len, &banner))
break;
}
if (i == dmar_devcnt)
return (NULL);
if (verbose) {
device_printf(dev, "pci%d:%d:%d:%d matched dmar%d by %s",
dev_domain, pci_get_bus(dev), pci_get_slot(dev),
pci_get_function(dev), unit->iommu.unit, banner);
printf(" scope path ");
dmar_print_path(dev_busno, dev_path_len, dev_path);
printf("\n");
}
return (unit);
}
static struct dmar_unit *
dmar_find_nonpci(u_int id, u_int entry_type, uint16_t *rid)
{
device_t dmar_dev;
struct dmar_unit *unit;
ACPI_DMAR_HARDWARE_UNIT *dmarh;
ACPI_DMAR_DEVICE_SCOPE *devscope;
ACPI_DMAR_PCI_PATH *path;
char *ptr, *ptrend;
#ifdef DEV_APIC
int error;
#endif
int i;
for (i = 0; i < dmar_devcnt; i++) {
dmar_dev = dmar_devs[i];
if (dmar_dev == NULL)
continue;
unit = (struct dmar_unit *)device_get_softc(dmar_dev);
dmarh = dmar_find_by_index(i);
if (dmarh == NULL)
continue;
ptr = (char *)dmarh + sizeof(*dmarh);
ptrend = (char *)dmarh + dmarh->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
if (devscope->EntryType != entry_type)
continue;
if (devscope->EnumerationId != id)
continue;
#ifdef DEV_APIC
if (entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
error = ioapic_get_rid(id, rid);
/*
* If our IOAPIC has PCI bindings then
* use the PCI device rid.
*/
if (error == 0)
return (unit);
}
#endif
if (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE)
== 2) {
if (rid != NULL) {
path = (ACPI_DMAR_PCI_PATH *)
(devscope + 1);
*rid = PCI_RID(devscope->Bus,
path->Device, path->Function);
}
return (unit);
}
printf(
"dmar_find_nonpci: id %d type %d path length != 2\n",
id, entry_type);
break;
}
}
return (NULL);
}
struct dmar_unit *
dmar_find_hpet(device_t dev, uint16_t *rid)
{
return (dmar_find_nonpci(hpet_get_uid(dev), ACPI_DMAR_SCOPE_TYPE_HPET,
rid));
}
struct dmar_unit *
dmar_find_ioapic(u_int apic_id, uint16_t *rid)
{
return (dmar_find_nonpci(apic_id, ACPI_DMAR_SCOPE_TYPE_IOAPIC, rid));
}
struct rmrr_iter_args {
struct dmar_domain *domain;
int dev_domain;
int dev_busno;
const ACPI_DMAR_PCI_PATH *dev_path;
int dev_path_len;
struct iommu_map_entries_tailq *rmrr_entries;
};
static int
dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
struct rmrr_iter_args *ria;
ACPI_DMAR_RESERVED_MEMORY *resmem;
ACPI_DMAR_DEVICE_SCOPE *devscope;
struct iommu_map_entry *entry;
char *ptr, *ptrend;
int match;
if (!dmar_rmrr_enable)
return (1);
if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
return (1);
ria = arg;
resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
if (resmem->Segment != ria->dev_domain)
return (1);
ptr = (char *)resmem + sizeof(*resmem);
ptrend = (char *)resmem + resmem->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
match = dmar_match_devscope(devscope, ria->dev_busno,
ria->dev_path, ria->dev_path_len);
if (match == 1) {
entry = iommu_gas_alloc_entry(DOM2IODOM(ria->domain),
IOMMU_PGF_WAITOK);
entry->start = resmem->BaseAddress;
/* The RMRR entry end address is inclusive. */
entry->end = resmem->EndAddress;
TAILQ_INSERT_TAIL(ria->rmrr_entries, entry,
dmamap_link);
}
}
return (1);
}
void
dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno,
const void *dev_path, int dev_path_len,
struct iommu_map_entries_tailq *rmrr_entries)
{
struct rmrr_iter_args ria;
ria.domain = domain;
ria.dev_domain = dev_domain;
ria.dev_busno = dev_busno;
ria.dev_path = (const ACPI_DMAR_PCI_PATH *)dev_path;
ria.dev_path_len = dev_path_len;
ria.rmrr_entries = rmrr_entries;
dmar_iterate_tbl(dmar_rmrr_iter, &ria);
}
struct inst_rmrr_iter_args {
struct dmar_unit *dmar;
};
static device_t
dmar_path_dev(int segment, int path_len, int busno,
const ACPI_DMAR_PCI_PATH *path, uint16_t *rid)
{
device_t dev;
int i;
dev = NULL;
for (i = 0; i < path_len; i++) {
dev = pci_find_dbsf(segment, busno, path->Device,
path->Function);
if (i != path_len - 1) {
busno = pci_cfgregread(segment, busno, path->Device,
path->Function, PCIR_SECBUS_1, 1);
path++;
}
}
*rid = PCI_RID(busno, path->Device, path->Function);
return (dev);
}
static int
dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
const ACPI_DMAR_RESERVED_MEMORY *resmem;
const ACPI_DMAR_DEVICE_SCOPE *devscope;
struct inst_rmrr_iter_args *iria;
const char *ptr, *ptrend;
device_t dev;
struct dmar_unit *unit;
int dev_path_len;
uint16_t rid;
iria = arg;
if (!dmar_rmrr_enable)
return (1);
if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
return (1);
resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
if (resmem->Segment != iria->dmar->segment)
return (1);
ptr = (const char *)resmem + sizeof(*resmem);
ptrend = (const char *)resmem + resmem->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
devscope = (const ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
/* XXXKIB bridge */
if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT)
continue;
rid = 0;
dev_path_len = (devscope->Length -
sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2;
dev = dmar_path_dev(resmem->Segment, dev_path_len,
devscope->Bus,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1), &rid);
if (dev == NULL) {
if (bootverbose) {
printf("dmar%d no dev found for RMRR "
"[%#jx, %#jx] rid %#x scope path ",
iria->dmar->iommu.unit,
(uintmax_t)resmem->BaseAddress,
(uintmax_t)resmem->EndAddress,
rid);
dmar_print_path(devscope->Bus, dev_path_len,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1));
printf("\n");
}
unit = dmar_find_by_scope(resmem->Segment,
devscope->Bus,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1),
dev_path_len);
if (iria->dmar != unit)
continue;
dmar_get_ctx_for_devpath(iria->dmar, rid,
resmem->Segment, devscope->Bus,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1),
dev_path_len, false, true);
} else {
unit = dmar_find(dev, false);
if (iria->dmar != unit)
continue;
iommu_instantiate_ctx(&(iria)->dmar->iommu,
dev, true);
}
}
return (1);
}
/*
* Pre-create all contexts for the DMAR which have RMRR entries.
*/
int
dmar_instantiate_rmrr_ctxs(struct iommu_unit *unit)
{
struct dmar_unit *dmar;
struct inst_rmrr_iter_args iria;
int error;
dmar = IOMMU2DMAR(unit);
if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR))
return (0);
error = 0;
iria.dmar = dmar;
dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria);
DMAR_LOCK(dmar);
if (!LIST_EMPTY(&dmar->domains)) {
KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0,
("dmar%d: RMRR not handled but translation is already enabled",
dmar->iommu.unit));
error = dmar_disable_protected_regions(dmar);
if (error != 0)
printf("dmar%d: Failed to disable protected regions\n",
dmar->iommu.unit);
error = dmar_enable_translation(dmar);
if (bootverbose) {
if (error == 0) {
printf("dmar%d: enabled translation\n",
dmar->iommu.unit);
} else {
printf("dmar%d: enabling translation failed, "
"error %d\n", dmar->iommu.unit, error);
}
}
}
dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR);
return (error);
}
#ifdef DDB
#include
#include
static void
dmar_print_domain_entry(const struct iommu_map_entry *entry)
{
struct iommu_map_entry *l, *r;
db_printf(
" start %jx end %jx first %jx last %jx free_down %jx flags %x ",
entry->start, entry->end, entry->first, entry->last,
entry->free_down, entry->flags);
db_printf("left ");
l = RB_LEFT(entry, rb_entry);
if (l == NULL)
db_printf("NULL ");
else
db_printf("%jx ", l->start);
db_printf("right ");
r = RB_RIGHT(entry, rb_entry);
if (r == NULL)
db_printf("NULL");
else
db_printf("%jx", r->start);
db_printf("\n");
}
static void
dmar_print_ctx(struct dmar_ctx *ctx)
{
db_printf(
" @%p pci%d:%d:%d refs %d flags %x loads %lu unloads %lu\n",
ctx, pci_get_bus(ctx->context.tag->owner),
pci_get_slot(ctx->context.tag->owner),
pci_get_function(ctx->context.tag->owner), ctx->refs,
ctx->context.flags, ctx->context.loads, ctx->context.unloads);
}
static void
dmar_print_domain(struct dmar_domain *domain, bool show_mappings)
{
struct iommu_domain *iodom;
struct iommu_map_entry *entry;
struct dmar_ctx *ctx;
iodom = DOM2IODOM(domain);
db_printf(
" @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n"
" ctx_cnt %d flags %x pgobj %p map_ents %u\n",
domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl,
(uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt,
domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt);
if (!LIST_EMPTY(&domain->contexts)) {
db_printf(" Contexts:\n");
LIST_FOREACH(ctx, &domain->contexts, link)
dmar_print_ctx(ctx);
}
if (!show_mappings)
return;
db_printf(" mapped:\n");
RB_FOREACH(entry, iommu_gas_entries_tree, &iodom->rb_root) {
dmar_print_domain_entry(entry);
if (db_pager_quit)
break;
}
if (db_pager_quit)
return;
db_printf(" unloading:\n");
TAILQ_FOREACH(entry, &domain->iodom.unload_entries, dmamap_link) {
dmar_print_domain_entry(entry);
if (db_pager_quit)
break;
}
}
DB_SHOW_COMMAND_FLAGS(dmar_domain, db_dmar_print_domain, CS_OWN)
{
struct dmar_unit *unit;
struct dmar_domain *domain;
struct dmar_ctx *ctx;
bool show_mappings, valid;
int pci_domain, bus, device, function, i, t;
db_expr_t radix;
valid = false;
radix = db_radix;
db_radix = 10;
t = db_read_token();
if (t == tSLASH) {
t = db_read_token();
if (t != tIDENT) {
db_printf("Bad modifier\n");
db_radix = radix;
db_skip_to_eol();
return;
}
show_mappings = strchr(db_tok_string, 'm') != NULL;
t = db_read_token();
} else {
show_mappings = false;
}
if (t == tNUMBER) {
pci_domain = db_tok_number;
t = db_read_token();
if (t == tNUMBER) {
bus = db_tok_number;
t = db_read_token();
if (t == tNUMBER) {
device = db_tok_number;
t = db_read_token();
if (t == tNUMBER) {
function = db_tok_number;
valid = true;
}
}
}
}
db_radix = radix;
db_skip_to_eol();
if (!valid) {
db_printf("usage: show dmar_domain [/m] "
" \n");
return;
}
for (i = 0; i < dmar_devcnt; i++) {
unit = device_get_softc(dmar_devs[i]);
LIST_FOREACH(domain, &unit->domains, link) {
LIST_FOREACH(ctx, &domain->contexts, link) {
if (pci_domain == unit->segment &&
bus == pci_get_bus(ctx->context.tag->owner) &&
device ==
pci_get_slot(ctx->context.tag->owner) &&
function ==
pci_get_function(ctx->context.tag->owner)) {
dmar_print_domain(domain,
show_mappings);
goto out;
}
}
}
}
out:;
}
static void
dmar_print_one(int idx, bool show_domains, bool show_mappings)
{
struct dmar_unit *unit;
struct dmar_domain *domain;
int i, frir;
unit = device_get_softc(dmar_devs[idx]);
db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->iommu.unit,
unit, dmar_read8(unit, DMAR_RTADDR_REG),
dmar_read4(unit, DMAR_VER_REG));
db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n",
(uintmax_t)dmar_read8(unit, DMAR_CAP_REG),
(uintmax_t)dmar_read8(unit, DMAR_ECAP_REG),
dmar_read4(unit, DMAR_GSTS_REG),
dmar_read4(unit, DMAR_FSTS_REG),
dmar_read4(unit, DMAR_FECTL_REG));
if (unit->ir_enabled) {
db_printf("ir is enabled; IRT @%p phys 0x%jx maxcnt %d\n",
unit->irt, (uintmax_t)unit->irt_phys, unit->irte_cnt);
}
db_printf("fed 0x%x fea 0x%x feua 0x%x\n",
dmar_read4(unit, DMAR_FEDATA_REG),
dmar_read4(unit, DMAR_FEADDR_REG),
dmar_read4(unit, DMAR_FEUADDR_REG));
db_printf("primary fault log:\n");
for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
db_printf(" %d at 0x%x: %jx %jx\n", i, frir,
(uintmax_t)dmar_read8(unit, frir),
(uintmax_t)dmar_read8(unit, frir + 8));
}
if (DMAR_HAS_QI(unit)) {
db_printf("ied 0x%x iea 0x%x ieua 0x%x\n",
dmar_read4(unit, DMAR_IEDATA_REG),
dmar_read4(unit, DMAR_IEADDR_REG),
dmar_read4(unit, DMAR_IEUADDR_REG));
if (unit->qi_enabled) {
db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) "
"size 0x%jx\n"
" head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
" hw compl 0x%jx@%p/phys@%jx next seq 0x%x gen 0x%x\n",
(uintmax_t)unit->x86c.inv_queue,
(uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
(uintmax_t)unit->x86c.inv_queue_size,
dmar_read4(unit, DMAR_IQH_REG),
dmar_read4(unit, DMAR_IQT_REG),
unit->x86c.inv_queue_avail,
dmar_read4(unit, DMAR_ICS_REG),
dmar_read4(unit, DMAR_IECTL_REG),
(uintmax_t)unit->x86c.inv_waitd_seq_hw,
&unit->x86c.inv_waitd_seq_hw,
(uintmax_t)unit->x86c.inv_waitd_seq_hw_phys,
unit->x86c.inv_waitd_seq,
unit->x86c.inv_waitd_gen);
} else {
db_printf("qi is disabled\n");
}
}
if (show_domains) {
db_printf("domains:\n");
LIST_FOREACH(domain, &unit->domains, link) {
dmar_print_domain(domain, show_mappings);
if (db_pager_quit)
break;
}
}
}
DB_SHOW_COMMAND(dmar, db_dmar_print)
{
bool show_domains, show_mappings;
show_domains = strchr(modif, 'd') != NULL;
show_mappings = strchr(modif, 'm') != NULL;
if (!have_addr) {
db_printf("usage: show dmar [/d] [/m] index\n");
return;
}
dmar_print_one((int)addr, show_domains, show_mappings);
}
DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars)
{
int i;
bool show_domains, show_mappings;
show_domains = strchr(modif, 'd') != NULL;
show_mappings = strchr(modif, 'm') != NULL;
for (i = 0; i < dmar_devcnt; i++) {
dmar_print_one(i, show_domains, show_mappings);
if (db_pager_quit)
break;
}
}
#endif
static struct iommu_unit *
dmar_find_method(device_t dev, bool verbose)
{
struct dmar_unit *dmar;
dmar = dmar_find(dev, verbose);
return (&dmar->iommu);
}
static struct x86_unit_common *
dmar_get_x86_common(struct iommu_unit *unit)
{
struct dmar_unit *dmar;
dmar = IOMMU2DMAR(unit);
return (&dmar->x86c);
}
+static void
+dmar_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+ dmar_quirks_pre_use(unit);
+ dmar_instantiate_rmrr_ctxs(unit);
+}
+
static struct x86_iommu dmar_x86_iommu = {
.get_x86_common = dmar_get_x86_common,
+ .unit_pre_instantiate_ctx = dmar_unit_pre_instantiate_ctx,
.domain_unload_entry = dmar_domain_unload_entry,
.domain_unload = dmar_domain_unload,
.get_ctx = dmar_get_ctx,
.free_ctx_locked = dmar_free_ctx_locked_method,
.free_ctx = dmar_free_ctx_method,
.find = dmar_find_method,
.alloc_msi_intr = dmar_alloc_msi_intr,
.map_msi_intr = dmar_map_msi_intr,
.unmap_msi_intr = dmar_unmap_msi_intr,
.map_ioapic_intr = dmar_map_ioapic_intr,
.unmap_ioapic_intr = dmar_unmap_ioapic_intr,
};
static void
x86_iommu_set_intel(void *arg __unused)
{
if (cpu_vendor_id == CPU_VENDOR_INTEL)
set_x86_iommu(&dmar_x86_iommu);
}
SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_intel, NULL);
diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c
index 5d22c6584ab7..2c647fd21c67 100644
--- a/sys/x86/iommu/iommu_utils.c
+++ b/sys/x86/iommu/iommu_utils.c
@@ -1,697 +1,703 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#if defined(__amd64__)
#define DEV_APIC
#else
#include "opt_apic.h"
#endif
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef DEV_APIC
#include "pcib_if.h"
#include
#include
#include
#endif
vm_page_t
iommu_pgalloc(vm_object_t obj, vm_pindex_t idx, int flags)
{
vm_page_t m;
int zeroed, aflags;
zeroed = (flags & IOMMU_PGF_ZERO) != 0 ? VM_ALLOC_ZERO : 0;
aflags = zeroed | VM_ALLOC_NOBUSY | VM_ALLOC_SYSTEM | VM_ALLOC_NODUMP |
((flags & IOMMU_PGF_WAITOK) != 0 ? VM_ALLOC_WAITFAIL :
VM_ALLOC_NOWAIT);
for (;;) {
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WLOCK(obj);
m = vm_page_lookup(obj, idx);
if ((flags & IOMMU_PGF_NOALLOC) != 0 || m != NULL) {
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WUNLOCK(obj);
break;
}
m = vm_page_alloc_contig(obj, idx, aflags, 1, 0,
iommu_high, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT);
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WUNLOCK(obj);
if (m != NULL) {
if (zeroed && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
atomic_add_int(&iommu_tbl_pagecnt, 1);
break;
}
if ((flags & IOMMU_PGF_WAITOK) == 0)
break;
}
return (m);
}
void
iommu_pgfree(vm_object_t obj, vm_pindex_t idx, int flags)
{
vm_page_t m;
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WLOCK(obj);
m = vm_page_grab(obj, idx, VM_ALLOC_NOCREAT);
if (m != NULL) {
vm_page_free(m);
atomic_subtract_int(&iommu_tbl_pagecnt, 1);
}
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WUNLOCK(obj);
}
void *
iommu_map_pgtbl(vm_object_t obj, vm_pindex_t idx, int flags,
struct sf_buf **sf)
{
vm_page_t m;
bool allocated;
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WLOCK(obj);
m = vm_page_lookup(obj, idx);
if (m == NULL && (flags & IOMMU_PGF_ALLOC) != 0) {
m = iommu_pgalloc(obj, idx, flags | IOMMU_PGF_OBJL);
allocated = true;
} else
allocated = false;
if (m == NULL) {
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WUNLOCK(obj);
return (NULL);
}
/* Sleepable allocations cannot fail. */
if ((flags & IOMMU_PGF_WAITOK) != 0)
VM_OBJECT_WUNLOCK(obj);
sched_pin();
*sf = sf_buf_alloc(m, SFB_CPUPRIVATE | ((flags & IOMMU_PGF_WAITOK)
== 0 ? SFB_NOWAIT : 0));
if (*sf == NULL) {
sched_unpin();
if (allocated) {
VM_OBJECT_ASSERT_WLOCKED(obj);
iommu_pgfree(obj, m->pindex, flags | IOMMU_PGF_OBJL);
}
if ((flags & IOMMU_PGF_OBJL) == 0)
VM_OBJECT_WUNLOCK(obj);
return (NULL);
}
if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) ==
(IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL))
VM_OBJECT_WLOCK(obj);
else if ((flags & (IOMMU_PGF_WAITOK | IOMMU_PGF_OBJL)) == 0)
VM_OBJECT_WUNLOCK(obj);
return ((void *)sf_buf_kva(*sf));
}
void
iommu_unmap_pgtbl(struct sf_buf *sf)
{
sf_buf_free(sf);
sched_unpin();
}
iommu_haddr_t iommu_high;
int iommu_tbl_pagecnt;
SYSCTL_NODE(_hw_iommu, OID_AUTO, dmar, CTLFLAG_RD | CTLFLAG_MPSAFE,
NULL, "");
SYSCTL_INT(_hw_iommu, OID_AUTO, tbl_pagecnt, CTLFLAG_RD,
&iommu_tbl_pagecnt, 0,
"Count of pages used for IOMMU pagetables");
int iommu_qi_batch_coalesce = 100;
SYSCTL_INT(_hw_iommu, OID_AUTO, batch_coalesce, CTLFLAG_RWTUN,
&iommu_qi_batch_coalesce, 0,
"Number of qi batches between interrupt");
static struct x86_iommu *x86_iommu;
void
set_x86_iommu(struct x86_iommu *x)
{
MPASS(x86_iommu == NULL);
x86_iommu = x;
}
struct x86_iommu *
get_x86_iommu(void)
{
return (x86_iommu);
}
void
iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep)
{
x86_iommu->domain_unload_entry(entry, free, cansleep);
}
void
iommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
x86_iommu->domain_unload(iodom, entries, cansleep);
}
struct iommu_ctx *
iommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
bool id_mapped, bool rmrr_init)
{
return (x86_iommu->get_ctx(iommu, dev, rid, id_mapped, rmrr_init));
}
void
iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *context)
{
x86_iommu->free_ctx_locked(iommu, context);
}
void
iommu_free_ctx(struct iommu_ctx *context)
{
x86_iommu->free_ctx(context);
}
struct iommu_unit *
iommu_find(device_t dev, bool verbose)
{
return (x86_iommu->find(dev, verbose));
}
int
iommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count)
{
return (x86_iommu->alloc_msi_intr(src, cookies, count));
}
int
iommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
uint64_t *addr, uint32_t *data)
{
return (x86_iommu->map_msi_intr(src, cpu, vector, cookie,
addr, data));
}
int
iommu_unmap_msi_intr(device_t src, u_int cookie)
{
return (x86_iommu->unmap_msi_intr(src, cookie));
}
int
iommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo)
{
return (x86_iommu->map_ioapic_intr(ioapic_id, cpu, vector, edge,
activehi, irq, cookie, hi, lo));
}
int
iommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie)
{
return (x86_iommu->unmap_ioapic_intr(ioapic_id, cookie));
}
+void
+iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
+{
+ x86_iommu->unit_pre_instantiate_ctx(unit);
+}
+
#define IOMMU2X86C(iommu) (x86_iommu->get_x86_common(iommu))
static bool
iommu_qi_seq_processed(struct iommu_unit *unit,
const struct iommu_qi_genseq *pseq)
{
struct x86_unit_common *x86c;
u_int gen;
x86c = IOMMU2X86C(unit);
gen = x86c->inv_waitd_gen;
return (pseq->gen < gen ||
(pseq->gen == gen && pseq->seq <= x86c->inv_waitd_seq_hw));
}
void
iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *pseq,
bool emit_wait)
{
struct x86_unit_common *x86c;
struct iommu_qi_genseq gsec;
uint32_t seq;
KASSERT(pseq != NULL, ("wait descriptor with no place for seq"));
IOMMU_ASSERT_LOCKED(unit);
x86c = IOMMU2X86C(unit);
if (x86c->inv_waitd_seq == 0xffffffff) {
gsec.gen = x86c->inv_waitd_gen;
gsec.seq = x86c->inv_waitd_seq;
x86_iommu->qi_ensure(unit, 1);
x86_iommu->qi_emit_wait_descr(unit, gsec.seq, false,
true, false);
x86_iommu->qi_advance_tail(unit);
while (!iommu_qi_seq_processed(unit, &gsec))
cpu_spinwait();
x86c->inv_waitd_gen++;
x86c->inv_waitd_seq = 1;
}
seq = x86c->inv_waitd_seq++;
pseq->gen = x86c->inv_waitd_gen;
pseq->seq = seq;
if (emit_wait) {
x86_iommu->qi_ensure(unit, 1);
x86_iommu->qi_emit_wait_descr(unit, seq, true, true, false);
}
}
/*
* To avoid missed wakeups, callers must increment the unit's waiters count
* before advancing the tail past the wait descriptor.
*/
void
iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *
gseq, bool nowait)
{
struct x86_unit_common *x86c;
IOMMU_ASSERT_LOCKED(unit);
x86c = IOMMU2X86C(unit);
KASSERT(x86c->inv_seq_waiters > 0, ("%s: no waiters", __func__));
while (!iommu_qi_seq_processed(unit, gseq)) {
if (cold || nowait) {
cpu_spinwait();
} else {
msleep(&x86c->inv_seq_waiters, &unit->lock, 0,
"dmarse", hz);
}
}
x86c->inv_seq_waiters--;
}
/*
* The caller must not be using the entry's dmamap_link field.
*/
void
iommu_qi_invalidate_locked(struct iommu_domain *domain,
struct iommu_map_entry *entry, bool emit_wait)
{
struct iommu_unit *unit;
struct x86_unit_common *x86c;
unit = domain->iommu;
x86c = IOMMU2X86C(unit);
IOMMU_ASSERT_LOCKED(unit);
x86_iommu->qi_invalidate_emit(domain, entry->start, entry->end -
entry->start, &entry->gseq, emit_wait);
/*
* To avoid a data race in dmar_qi_task(), the entry's gseq must be
* initialized before the entry is added to the TLB flush list, and the
* entry must be added to that list before the tail is advanced. More
* precisely, the tail must not be advanced past the wait descriptor
* that will generate the interrupt that schedules dmar_qi_task() for
* execution before the entry is added to the list. While an earlier
* call to dmar_qi_ensure() might have advanced the tail, it will not
* advance it past the wait descriptor.
*
* See the definition of struct dmar_unit for more information on
* synchronization.
*/
entry->tlb_flush_next = NULL;
atomic_store_rel_ptr((uintptr_t *)&x86c->tlb_flush_tail->
tlb_flush_next, (uintptr_t)entry);
x86c->tlb_flush_tail = entry;
x86_iommu->qi_advance_tail(unit);
}
void
iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, bool cansleep)
{
struct iommu_unit *unit;
struct iommu_qi_genseq gseq;
unit = domain->iommu;
IOMMU_LOCK(unit);
x86_iommu->qi_invalidate_emit(domain, base, size, &gseq, true);
/*
* To avoid a missed wakeup in iommu_qi_task(), the unit's
* waiters count must be incremented before the tail is
* advanced.
*/
IOMMU2X86C(unit)->inv_seq_waiters++;
x86_iommu->qi_advance_tail(unit);
iommu_qi_wait_for_seq(unit, &gseq, !cansleep);
IOMMU_UNLOCK(unit);
}
void
iommu_qi_drain_tlb_flush(struct iommu_unit *unit)
{
struct x86_unit_common *x86c;
struct iommu_map_entry *entry, *head;
x86c = IOMMU2X86C(unit);
for (head = x86c->tlb_flush_head;; head = entry) {
entry = (struct iommu_map_entry *)
atomic_load_acq_ptr((uintptr_t *)&head->tlb_flush_next);
if (entry == NULL ||
!iommu_qi_seq_processed(unit, &entry->gseq))
break;
x86c->tlb_flush_head = entry;
iommu_gas_free_entry(head);
if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
iommu_gas_free_region(entry);
else
iommu_gas_free_space(entry);
}
}
void
iommu_qi_common_init(struct iommu_unit *unit, task_fn_t qi_task)
{
struct x86_unit_common *x86c;
u_int qi_sz;
x86c = IOMMU2X86C(unit);
x86c->tlb_flush_head = x86c->tlb_flush_tail =
iommu_gas_alloc_entry(NULL, 0);
TASK_INIT(&x86c->qi_task, 0, qi_task, unit);
x86c->qi_taskqueue = taskqueue_create_fast("iommuqf", M_WAITOK,
taskqueue_thread_enqueue, &x86c->qi_taskqueue);
taskqueue_start_threads(&x86c->qi_taskqueue, 1, PI_AV,
"iommu%d qi taskq", unit->unit);
x86c->inv_waitd_gen = 0;
x86c->inv_waitd_seq = 1;
qi_sz = 3;
TUNABLE_INT_FETCH("hw.iommu.qi_size", &qi_sz);
if (qi_sz > x86c->qi_buf_maxsz)
qi_sz = x86c->qi_buf_maxsz;
x86c->inv_queue_size = (1ULL << qi_sz) * PAGE_SIZE;
/* Reserve one descriptor to prevent wraparound. */
x86c->inv_queue_avail = x86c->inv_queue_size -
x86c->qi_cmd_sz;
/*
* The invalidation queue reads by DMARs/AMDIOMMUs are always
* coherent.
*/
x86c->inv_queue = kmem_alloc_contig(x86c->inv_queue_size,
M_WAITOK | M_ZERO, 0, iommu_high, PAGE_SIZE, 0,
VM_MEMATTR_DEFAULT);
x86c->inv_waitd_seq_hw_phys = pmap_kextract(
(vm_offset_t)&x86c->inv_waitd_seq_hw);
}
void
iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
struct iommu_unit *))
{
struct x86_unit_common *x86c;
struct iommu_qi_genseq gseq;
x86c = IOMMU2X86C(unit);
taskqueue_drain(x86c->qi_taskqueue, &x86c->qi_task);
taskqueue_free(x86c->qi_taskqueue);
x86c->qi_taskqueue = NULL;
IOMMU_LOCK(unit);
/* quisce */
x86_iommu->qi_ensure(unit, 1);
iommu_qi_emit_wait_seq(unit, &gseq, true);
/* See iommu_qi_invalidate_locked(). */
x86c->inv_seq_waiters++;
x86_iommu->qi_advance_tail(unit);
iommu_qi_wait_for_seq(unit, &gseq, false);
/* only after the quisce, disable queue */
disable_qi(unit);
KASSERT(x86c->inv_seq_waiters == 0,
("iommu%d: waiters on disabled queue", unit->unit));
IOMMU_UNLOCK(unit);
kmem_free(x86c->inv_queue, x86c->inv_queue_size);
x86c->inv_queue = NULL;
x86c->inv_queue_size = 0;
}
int
iommu_alloc_irq(struct iommu_unit *unit, int idx)
{
device_t dev, pcib;
struct iommu_msi_data *dmd;
uint64_t msi_addr;
uint32_t msi_data;
int error;
MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
dev = unit->dev;
dmd = &IOMMU2X86C(unit)->intrs[idx];
pcib = device_get_parent(device_get_parent(dev)); /* Really not pcib */
error = PCIB_ALLOC_MSIX(pcib, dev, &dmd->irq);
if (error != 0) {
device_printf(dev, "cannot allocate %s interrupt, %d\n",
dmd->name, error);
goto err1;
}
error = bus_set_resource(dev, SYS_RES_IRQ, dmd->irq_rid,
dmd->irq, 1);
if (error != 0) {
device_printf(dev, "cannot set %s interrupt resource, %d\n",
dmd->name, error);
goto err2;
}
dmd->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ,
&dmd->irq_rid, RF_ACTIVE);
if (dmd->irq_res == NULL) {
device_printf(dev,
"cannot allocate resource for %s interrupt\n", dmd->name);
error = ENXIO;
goto err3;
}
error = bus_setup_intr(dev, dmd->irq_res, INTR_TYPE_MISC,
dmd->handler, NULL, unit, &dmd->intr_handle);
if (error != 0) {
device_printf(dev, "cannot setup %s interrupt, %d\n",
dmd->name, error);
goto err4;
}
bus_describe_intr(dev, dmd->irq_res, dmd->intr_handle, "%s", dmd->name);
error = PCIB_MAP_MSI(pcib, dev, dmd->irq, &msi_addr, &msi_data);
if (error != 0) {
device_printf(dev, "cannot map %s interrupt, %d\n",
dmd->name, error);
goto err5;
}
dmd->msi_data = msi_data;
dmd->msi_addr = msi_addr;
return (0);
err5:
bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
err4:
bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
err3:
bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
err2:
PCIB_RELEASE_MSIX(pcib, dev, dmd->irq);
dmd->irq = -1;
err1:
return (error);
}
void
iommu_release_intr(struct iommu_unit *unit, int idx)
{
device_t dev;
struct iommu_msi_data *dmd;
MPASS(idx >= 0 || idx < IOMMU_MAX_MSI);
dmd = &IOMMU2X86C(unit)->intrs[idx];
if (dmd->handler == NULL || dmd->irq == -1)
return;
dev = unit->dev;
bus_teardown_intr(dev, dmd->irq_res, dmd->intr_handle);
bus_release_resource(dev, SYS_RES_IRQ, dmd->irq_rid, dmd->irq_res);
bus_delete_resource(dev, SYS_RES_IRQ, dmd->irq_rid);
PCIB_RELEASE_MSIX(device_get_parent(device_get_parent(dev)),
dev, dmd->irq);
dmd->irq = -1;
}
void
iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev)
{
bus_addr_t maxaddr;
maxaddr = MIN(ctx->domain->end, BUS_SPACE_MAXADDR);
ctx->tag->common.impl = &bus_dma_iommu_impl;
ctx->tag->common.boundary = 0;
ctx->tag->common.lowaddr = maxaddr;
ctx->tag->common.highaddr = maxaddr;
ctx->tag->common.maxsize = maxaddr;
ctx->tag->common.nsegments = BUS_SPACE_UNRESTRICTED;
ctx->tag->common.maxsegsz = maxaddr;
ctx->tag->ctx = ctx;
ctx->tag->owner = dev;
}
void
iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
{
if ((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0)
iommu_gas_free_region(entry);
else
iommu_gas_free_space(entry);
if (free)
iommu_gas_free_entry(entry);
else
entry->flags = 0;
}
/*
* Index of the pte for the guest address base in the page table at
* the level lvl.
*/
int
pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl)
{
base >>= IOMMU_PAGE_SHIFT + (pglvl - lvl - 1) *
IOMMU_NPTEPGSHIFT;
return (base & IOMMU_PTEMASK);
}
/*
* Returns the page index of the page table page in the page table
* object, which maps the given address base at the page table level
* lvl.
*/
vm_pindex_t
pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl)
{
vm_pindex_t idx, pidx;
int i;
KASSERT(lvl >= 0 && lvl < pglvl,
("wrong lvl %d %d", pglvl, lvl));
for (pidx = idx = 0, i = 0; i < lvl; i++, pidx = idx) {
idx = pglvl_pgtbl_pte_off(pglvl, base, i) +
pidx * IOMMU_NPTEPG + 1;
}
return (idx);
}
/*
* Calculate the total amount of page table pages needed to map the
* whole bus address space on the context with the selected agaw.
*/
vm_pindex_t
pglvl_max_pages(int pglvl)
{
vm_pindex_t res;
int i;
for (res = 0, i = pglvl; i > 0; i--) {
res *= IOMMU_NPTEPG;
res++;
}
return (res);
}
iommu_gaddr_t
pglvl_page_size(int total_pglvl, int lvl)
{
int rlvl;
static const iommu_gaddr_t pg_sz[] = {
(iommu_gaddr_t)IOMMU_PAGE_SIZE,
(iommu_gaddr_t)IOMMU_PAGE_SIZE << IOMMU_NPTEPGSHIFT,
(iommu_gaddr_t)IOMMU_PAGE_SIZE << (2 * IOMMU_NPTEPGSHIFT),
(iommu_gaddr_t)IOMMU_PAGE_SIZE << (3 * IOMMU_NPTEPGSHIFT),
(iommu_gaddr_t)IOMMU_PAGE_SIZE << (4 * IOMMU_NPTEPGSHIFT),
(iommu_gaddr_t)IOMMU_PAGE_SIZE << (5 * IOMMU_NPTEPGSHIFT),
(iommu_gaddr_t)IOMMU_PAGE_SIZE << (6 * IOMMU_NPTEPGSHIFT),
};
KASSERT(lvl >= 0 && lvl < total_pglvl,
("total %d lvl %d", total_pglvl, lvl));
rlvl = total_pglvl - lvl - 1;
KASSERT(rlvl < nitems(pg_sz), ("sizeof pg_sz lvl %d", lvl));
return (pg_sz[rlvl]);
}
diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h
index 210ce82c8f6d..a1ed5c71c513 100644
--- a/sys/x86/iommu/x86_iommu.h
+++ b/sys/x86/iommu/x86_iommu.h
@@ -1,195 +1,196 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013-2015, 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __X86_IOMMU_X86_IOMMU_H
#define __X86_IOMMU_X86_IOMMU_H
/* Both Intel and AMD are not too crazy to have different sizes. */
typedef struct iommu_pte {
uint64_t pte;
} iommu_pte_t;
#define IOMMU_PAGE_SIZE PAGE_SIZE
#define IOMMU_PAGE_MASK (IOMMU_PAGE_SIZE - 1)
#define IOMMU_PAGE_SHIFT PAGE_SHIFT
#define IOMMU_NPTEPG (IOMMU_PAGE_SIZE / sizeof(iommu_pte_t))
#define IOMMU_NPTEPGSHIFT 9
#define IOMMU_PTEMASK (IOMMU_NPTEPG - 1)
struct sf_buf;
struct vm_object;
struct vm_page *iommu_pgalloc(struct vm_object *obj, vm_pindex_t idx,
int flags);
void iommu_pgfree(struct vm_object *obj, vm_pindex_t idx, int flags);
void *iommu_map_pgtbl(struct vm_object *obj, vm_pindex_t idx, int flags,
struct sf_buf **sf);
void iommu_unmap_pgtbl(struct sf_buf *sf);
extern iommu_haddr_t iommu_high;
extern int iommu_tbl_pagecnt;
extern int iommu_qi_batch_coalesce;
SYSCTL_DECL(_hw_iommu);
struct x86_unit_common;
struct x86_iommu {
struct x86_unit_common *(*get_x86_common)(struct
iommu_unit *iommu);
+ void (*unit_pre_instantiate_ctx)(struct iommu_unit *iommu);
void (*qi_ensure)(struct iommu_unit *unit, int descr_count);
void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq,
bool, bool, bool);
void (*qi_advance_tail)(struct iommu_unit *unit);
void (*qi_invalidate_emit)(struct iommu_domain *idomain,
iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *
pseq, bool emit_wait);
void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free,
bool cansleep);
void (*domain_unload)(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep);
struct iommu_ctx *(*get_ctx)(struct iommu_unit *iommu,
device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init);
void (*free_ctx_locked)(struct iommu_unit *iommu,
struct iommu_ctx *context);
void (*free_ctx)(struct iommu_ctx *context);
struct iommu_unit *(*find)(device_t dev, bool verbose);
int (*alloc_msi_intr)(device_t src, u_int *cookies, u_int count);
int (*map_msi_intr)(device_t src, u_int cpu, u_int vector,
u_int cookie, uint64_t *addr, uint32_t *data);
int (*unmap_msi_intr)(device_t src, u_int cookie);
int (*map_ioapic_intr)(u_int ioapic_id, u_int cpu, u_int vector,
bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
uint32_t *lo);
int (*unmap_ioapic_intr)(u_int ioapic_id, u_int *cookie);
};
void set_x86_iommu(struct x86_iommu *);
struct x86_iommu *get_x86_iommu(void);
struct iommu_msi_data {
int irq;
int irq_rid;
struct resource *irq_res;
void *intr_handle;
int (*handler)(void *);
int msi_data_reg;
int msi_addr_reg;
int msi_uaddr_reg;
uint64_t msi_addr;
uint32_t msi_data;
void (*enable_intr)(struct iommu_unit *);
void (*disable_intr)(struct iommu_unit *);
const char *name;
};
#define IOMMU_MAX_MSI 3
struct x86_unit_common {
uint32_t qi_buf_maxsz;
uint32_t qi_cmd_sz;
char *inv_queue;
vm_size_t inv_queue_size;
uint32_t inv_queue_avail;
uint32_t inv_queue_tail;
/*
* Hw writes there on completion of wait descriptor
* processing. Intel writes 4 bytes, while AMD does the
* 8-bytes write. Due to little-endian, and use of 4-byte
* sequence numbers, the difference does not matter for us.
*/
volatile uint64_t inv_waitd_seq_hw;
uint64_t inv_waitd_seq_hw_phys;
uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */
u_int inv_waitd_gen; /* seq number generation AKA seq overflows */
u_int inv_seq_waiters; /* count of waiters for seq */
u_int inv_queue_full; /* informational counter */
/*
* Delayed freeing of map entries queue processing:
*
* tlb_flush_head and tlb_flush_tail are used to implement a FIFO
* queue that supports concurrent dequeues and enqueues. However,
* there can only be a single dequeuer (accessing tlb_flush_head) and
* a single enqueuer (accessing tlb_flush_tail) at a time. Since the
* unit's qi_task is the only dequeuer, it can access tlb_flush_head
* without any locking. In contrast, there may be multiple enqueuers,
* so the enqueuers acquire the iommu unit lock to serialize their
* accesses to tlb_flush_tail.
*
* In this FIFO queue implementation, the key to enabling concurrent
* dequeues and enqueues is that the dequeuer never needs to access
* tlb_flush_tail and the enqueuer never needs to access
* tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail
* are never NULL, so neither a dequeuer nor an enqueuer ever needs to
* update both. Instead, tlb_flush_head always points to a "zombie"
* struct, which previously held the last dequeued item. Thus, the
* zombie's next field actually points to the struct holding the first
* item in the queue. When an item is dequeued, the current zombie is
* finally freed, and the struct that held the just dequeued item
* becomes the new zombie. When the queue is empty, tlb_flush_tail
* also points to the zombie.
*/
struct iommu_map_entry *tlb_flush_head;
struct iommu_map_entry *tlb_flush_tail;
struct task qi_task;
struct taskqueue *qi_taskqueue;
struct iommu_msi_data intrs[IOMMU_MAX_MSI];
};
void iommu_domain_free_entry(struct iommu_map_entry *entry, bool free);
void iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq *
pseq, bool emit_wait);
void iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct
iommu_qi_genseq *gseq, bool nowait);
void iommu_qi_drain_tlb_flush(struct iommu_unit *unit);
void iommu_qi_invalidate_locked(struct iommu_domain *domain,
struct iommu_map_entry *entry, bool emit_wait);
void iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size, bool cansleep);
void iommu_qi_common_init(struct iommu_unit *unit, task_fn_t taskfunc);
void iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)(
struct iommu_unit *));
int iommu_alloc_irq(struct iommu_unit *unit, int idx);
void iommu_release_intr(struct iommu_unit *unit, int idx);
void iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev);
int pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl);
vm_pindex_t pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl);
vm_pindex_t pglvl_max_pages(int pglvl);
iommu_gaddr_t pglvl_page_size(int total_pglvl, int lvl);
#endif