diff --git a/sys/arm64/iommu/iommu.c b/sys/arm64/iommu/iommu.c
index 0fe59c5d10b6..5db48b3bc276 100644
--- a/sys/arm64/iommu/iommu.c
+++ b/sys/arm64/iommu/iommu.c
@@ -1,525 +1,512 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 Ruslan Bukin
*
* This software was developed by SRI International and the University of
* Cambridge Computer Laboratory (Department of Computer Science and
* Technology) under DARPA contract HR0011-18-C-0016 ("ECATS"), as part of the
* DARPA SSITH research programme.
*
* Portions of this work was supported by Innovate UK project 105694,
* "Digital Security by Design (DSbD) Technology Platform Prototype".
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_platform.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef FDT
#include
#include
#include
#endif
#include "iommu.h"
#include "iommu_if.h"
static MALLOC_DEFINE(M_IOMMU, "IOMMU", "IOMMU framework");
#define IOMMU_LIST_LOCK() sx_xlock(&iommu_sx)
#define IOMMU_LIST_UNLOCK() sx_xunlock(&iommu_sx)
#define IOMMU_LIST_ASSERT_LOCKED() sx_assert(&iommu_sx, SA_XLOCKED)
#define dprintf(fmt, ...)
static struct sx iommu_sx;
struct iommu_entry {
struct iommu_unit *iommu;
LIST_ENTRY(iommu_entry) next;
};
static LIST_HEAD(, iommu_entry) iommu_list = LIST_HEAD_INITIALIZER(iommu_list);
static int
iommu_domain_unmap_buf(struct iommu_domain *iodom,
struct iommu_map_entry *entry, int flags)
{
struct iommu_unit *iommu;
int error;
iommu = iodom->iommu;
error = IOMMU_UNMAP(iommu->dev, iodom, entry->start, entry->end -
entry->start);
return (error);
}
static int
iommu_domain_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry,
vm_page_t *ma, uint64_t eflags, int flags)
{
struct iommu_unit *iommu;
vm_prot_t prot;
vm_offset_t va;
int error;
dprintf("%s: base %lx, size %lx\n", __func__, base, size);
prot = 0;
if (eflags & IOMMU_MAP_ENTRY_READ)
prot |= VM_PROT_READ;
if (eflags & IOMMU_MAP_ENTRY_WRITE)
prot |= VM_PROT_WRITE;
va = entry->start;
iommu = iodom->iommu;
error = IOMMU_MAP(iommu->dev, iodom, va, ma, entry->end -
entry->start, prot);
return (error);
}
static const struct iommu_domain_map_ops domain_map_ops = {
.map = iommu_domain_map_buf,
.unmap = iommu_domain_unmap_buf,
};
static struct iommu_domain *
iommu_domain_alloc(struct iommu_unit *iommu)
{
struct iommu_domain *iodom;
iodom = IOMMU_DOMAIN_ALLOC(iommu->dev, iommu);
if (iodom == NULL)
return (NULL);
KASSERT(iodom->end != 0, ("domain end is not set"));
iommu_domain_init(iommu, iodom, &domain_map_ops);
iodom->iommu = iommu;
iommu_gas_init_domain(iodom);
return (iodom);
}
static int
iommu_domain_free(struct iommu_domain *iodom)
{
struct iommu_unit *iommu;
iommu = iodom->iommu;
IOMMU_LOCK(iommu);
if ((iodom->flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
IOMMU_DOMAIN_LOCK(iodom);
iommu_gas_fini_domain(iodom);
IOMMU_DOMAIN_UNLOCK(iodom);
}
iommu_domain_fini(iodom);
IOMMU_DOMAIN_FREE(iommu->dev, iodom);
IOMMU_UNLOCK(iommu);
return (0);
}
static void
iommu_tag_init(struct iommu_domain *iodom, struct bus_dma_tag_iommu *t)
{
bus_addr_t maxaddr;
maxaddr = MIN(iodom->end, BUS_SPACE_MAXADDR);
t->common.impl = &bus_dma_iommu_impl;
t->common.alignment = 1;
t->common.boundary = 0;
t->common.lowaddr = maxaddr;
t->common.highaddr = maxaddr;
t->common.maxsize = maxaddr;
t->common.nsegments = BUS_SPACE_UNRESTRICTED;
t->common.maxsegsz = maxaddr;
}
static struct iommu_ctx *
iommu_ctx_alloc(device_t requester, struct iommu_domain *iodom, bool disabled)
{
struct iommu_unit *iommu;
struct iommu_ctx *ioctx;
iommu = iodom->iommu;
ioctx = IOMMU_CTX_ALLOC(iommu->dev, iodom, requester, disabled);
if (ioctx == NULL)
return (NULL);
ioctx->domain = iodom;
return (ioctx);
}
static int
iommu_ctx_init(device_t requester, struct iommu_ctx *ioctx)
{
struct bus_dma_tag_iommu *tag;
struct iommu_domain *iodom;
struct iommu_unit *iommu;
int error;
iodom = ioctx->domain;
iommu = iodom->iommu;
error = IOMMU_CTX_INIT(iommu->dev, ioctx);
if (error)
return (error);
tag = ioctx->tag = malloc(sizeof(struct bus_dma_tag_iommu),
M_IOMMU, M_WAITOK | M_ZERO);
tag->owner = requester;
tag->ctx = ioctx;
tag->ctx->domain = iodom;
iommu_tag_init(iodom, tag);
return (error);
}
static struct iommu_unit *
iommu_lookup(device_t dev)
{
struct iommu_entry *entry;
struct iommu_unit *iommu;
IOMMU_LIST_LOCK();
LIST_FOREACH(entry, &iommu_list, next) {
iommu = entry->iommu;
if (iommu->dev == dev) {
IOMMU_LIST_UNLOCK();
return (iommu);
}
}
IOMMU_LIST_UNLOCK();
return (NULL);
}
#ifdef FDT
struct iommu_ctx *
iommu_get_ctx_ofw(device_t dev, int channel)
{
struct iommu_domain *iodom;
struct iommu_unit *iommu;
struct iommu_ctx *ioctx;
phandle_t node, parent;
device_t iommu_dev;
pcell_t *cells;
int niommus;
int ncells;
int error;
node = ofw_bus_get_node(dev);
if (node <= 0) {
device_printf(dev,
"%s called on not ofw based device.\n", __func__);
return (NULL);
}
error = ofw_bus_parse_xref_list_get_length(node,
"iommus", "#iommu-cells", &niommus);
if (error) {
device_printf(dev, "%s can't get iommu list.\n", __func__);
return (NULL);
}
if (niommus == 0) {
device_printf(dev, "%s iommu list is empty.\n", __func__);
return (NULL);
}
error = ofw_bus_parse_xref_list_alloc(node, "iommus", "#iommu-cells",
channel, &parent, &ncells, &cells);
if (error != 0) {
device_printf(dev, "%s can't get iommu device xref.\n",
__func__);
return (NULL);
}
iommu_dev = OF_device_from_xref(parent);
if (iommu_dev == NULL) {
device_printf(dev, "%s can't get iommu device.\n", __func__);
return (NULL);
}
iommu = iommu_lookup(iommu_dev);
if (iommu == NULL) {
device_printf(dev, "%s can't lookup iommu.\n", __func__);
return (NULL);
}
/*
* In our current configuration we have a domain per each ctx,
* so allocate a domain first.
*/
iodom = iommu_domain_alloc(iommu);
if (iodom == NULL) {
device_printf(dev, "%s can't allocate domain.\n", __func__);
return (NULL);
}
ioctx = iommu_ctx_alloc(dev, iodom, false);
if (ioctx == NULL) {
iommu_domain_free(iodom);
return (NULL);
}
ioctx->domain = iodom;
error = IOMMU_OFW_MD_DATA(iommu->dev, ioctx, cells, ncells);
if (error) {
device_printf(dev, "%s can't set MD data\n", __func__);
return (NULL);
}
error = iommu_ctx_init(dev, ioctx);
if (error) {
IOMMU_CTX_FREE(iommu->dev, ioctx);
iommu_domain_free(iodom);
return (NULL);
}
return (ioctx);
}
#endif
struct iommu_ctx *
iommu_get_ctx(struct iommu_unit *iommu, device_t requester,
uint16_t rid, bool disabled, bool rmrr)
{
struct iommu_domain *iodom;
struct iommu_ctx *ioctx;
int error;
IOMMU_LOCK(iommu);
ioctx = IOMMU_CTX_LOOKUP(iommu->dev, requester);
if (ioctx) {
IOMMU_UNLOCK(iommu);
return (ioctx);
}
IOMMU_UNLOCK(iommu);
/*
* In our current configuration we have a domain per each ctx.
* So allocate a domain first.
*/
iodom = iommu_domain_alloc(iommu);
if (iodom == NULL)
return (NULL);
ioctx = iommu_ctx_alloc(requester, iodom, disabled);
if (ioctx == NULL) {
iommu_domain_free(iodom);
return (NULL);
}
error = iommu_ctx_init(requester, ioctx);
if (error) {
IOMMU_CTX_FREE(iommu->dev, ioctx);
iommu_domain_free(iodom);
return (NULL);
}
return (ioctx);
}
void
iommu_free_ctx_locked(struct iommu_unit *iommu, struct iommu_ctx *ioctx)
{
struct bus_dma_tag_iommu *tag;
+ int error;
IOMMU_ASSERT_LOCKED(iommu);
tag = ioctx->tag;
IOMMU_CTX_FREE(iommu->dev, ioctx);
+ IOMMU_UNLOCK(iommu);
free(tag, M_IOMMU);
-}
-
-void
-iommu_free_ctx(struct iommu_ctx *ioctx)
-{
- struct iommu_unit *iommu;
- struct iommu_domain *iodom;
- int error;
-
- iodom = ioctx->domain;
- iommu = iodom->iommu;
-
- IOMMU_LOCK(iommu);
- iommu_free_ctx_locked(iommu, ioctx);
- IOMMU_UNLOCK(iommu);
/* Since we have a domain per each ctx, remove the domain too. */
- error = iommu_domain_free(iodom);
+ error = iommu_domain_free(ioctx->domain);
if (error)
device_printf(iommu->dev, "Could not free a domain\n");
}
static void
iommu_domain_free_entry(struct iommu_map_entry *entry, bool free)
{
iommu_gas_free_space(entry);
if (free)
iommu_gas_free_entry(entry);
else
entry->flags = 0;
}
void
iommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
struct iommu_map_entry *entry, *entry1;
int error __diagused;
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("not mapped entry %p %p", iodom, entry));
error = iodom->ops->unmap(iodom, entry,
cansleep ? IOMMU_PGF_WAITOK : 0);
KASSERT(error == 0, ("unmap %p error %d", iodom, error));
TAILQ_REMOVE(entries, entry, dmamap_link);
iommu_domain_free_entry(entry, true);
}
if (TAILQ_EMPTY(entries))
return;
panic("entries map is not empty");
}
int
iommu_register(struct iommu_unit *iommu)
{
struct iommu_entry *entry;
mtx_init(&iommu->lock, "IOMMU", NULL, MTX_DEF);
entry = malloc(sizeof(struct iommu_entry), M_IOMMU, M_WAITOK | M_ZERO);
entry->iommu = iommu;
IOMMU_LIST_LOCK();
LIST_INSERT_HEAD(&iommu_list, entry, next);
IOMMU_LIST_UNLOCK();
sysctl_ctx_init(&iommu->sysctl_ctx);
iommu_init_busdma(iommu);
return (0);
}
int
iommu_unregister(struct iommu_unit *iommu)
{
struct iommu_entry *entry, *tmp;
IOMMU_LIST_LOCK();
LIST_FOREACH_SAFE(entry, &iommu_list, next, tmp) {
if (entry->iommu == iommu) {
LIST_REMOVE(entry, next);
free(entry, M_IOMMU);
}
}
IOMMU_LIST_UNLOCK();
iommu_fini_busdma(iommu);
sysctl_ctx_free(&iommu->sysctl_ctx);
mtx_destroy(&iommu->lock);
return (0);
}
struct iommu_unit *
iommu_find(device_t dev, bool verbose)
{
struct iommu_entry *entry;
struct iommu_unit *iommu;
int error;
IOMMU_LIST_LOCK();
LIST_FOREACH(entry, &iommu_list, next) {
iommu = entry->iommu;
error = IOMMU_FIND(iommu->dev, dev);
if (error == 0) {
IOMMU_LIST_UNLOCK();
return (entry->iommu);
}
}
IOMMU_LIST_UNLOCK();
return (NULL);
}
void
iommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
{
}
void
iommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep __unused)
{
dprintf("%s\n", __func__);
iommu_domain_free_entry(entry, free);
}
static void
iommu_init(void)
{
sx_init(&iommu_sx, "IOMMU list");
}
SYSINIT(iommu, SI_SUB_DRIVERS, SI_ORDER_FIRST, iommu_init, NULL);
diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c
index 4d295ed7c6b2..6856b0551dde 100644
--- a/sys/dev/iommu/busdma_iommu.c
+++ b/sys/dev/iommu/busdma_iommu.c
@@ -1,1098 +1,1104 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*
* busdma_iommu.c, the implementation of the busdma(9) interface using
* IOMMU units from Intel VT-d.
*/
static bool
iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func)
{
char str[128], *env;
int default_bounce;
bool ret;
static const char bounce_str[] = "bounce";
static const char iommu_str[] = "iommu";
static const char dmar_str[] = "dmar"; /* compatibility */
default_bounce = 0;
env = kern_getenv("hw.busdma.default");
if (env != NULL) {
if (strcmp(env, bounce_str) == 0)
default_bounce = 1;
else if (strcmp(env, iommu_str) == 0 ||
strcmp(env, dmar_str) == 0)
default_bounce = 0;
freeenv(env);
}
snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d",
domain, bus, slot, func);
env = kern_getenv(str);
if (env == NULL)
return (default_bounce != 0);
if (strcmp(env, bounce_str) == 0)
ret = true;
else if (strcmp(env, iommu_str) == 0 ||
strcmp(env, dmar_str) == 0)
ret = false;
else
ret = default_bounce != 0;
freeenv(env);
return (ret);
}
/*
* Given original device, find the requester ID that will be seen by
* the IOMMU unit and used for page table lookup. PCI bridges may take
* ownership of transactions from downstream devices, so it may not be
* the same as the BSF of the target device. In those cases, all
* devices downstream of the bridge must share a single mapping
* domain, and must collectively be assigned to use either IOMMU or
* bounce mapping.
*/
device_t
iommu_get_requester(device_t dev, uint16_t *rid)
{
devclass_t pci_class;
device_t l, pci, pcib, pcip, pcibp, requester;
int cap_offset;
uint16_t pcie_flags;
bool bridge_is_pcie;
pci_class = devclass_find("pci");
l = requester = dev;
pci = device_get_parent(dev);
if (pci == NULL || device_get_devclass(pci) != pci_class) {
*rid = 0; /* XXXKIB: Could be ACPI HID */
return (requester);
}
*rid = pci_get_rid(dev);
/*
* Walk the bridge hierarchy from the target device to the
* host port to find the translating bridge nearest the IOMMU
* unit.
*/
for (;;) {
pci = device_get_parent(l);
KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent "
"for %s", device_get_name(dev), device_get_name(l)));
KASSERT(device_get_devclass(pci) == pci_class,
("iommu_get_requester(%s): non-pci parent %s for %s",
device_get_name(dev), device_get_name(pci),
device_get_name(l)));
pcib = device_get_parent(pci);
KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge "
"for %s", device_get_name(dev), device_get_name(pci)));
/*
* The parent of our "bridge" isn't another PCI bus,
* so pcib isn't a PCI->PCI bridge but rather a host
* port, and the requester ID won't be translated
* further.
*/
pcip = device_get_parent(pcib);
if (device_get_devclass(pcip) != pci_class)
break;
pcibp = device_get_parent(pcip);
if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) {
/*
* Do not stop the loop even if the target
* device is PCIe, because it is possible (but
* unlikely) to have a PCI->PCIe bridge
* somewhere in the hierarchy.
*/
l = pcib;
} else {
/*
* Device is not PCIe, it cannot be seen as a
* requester by IOMMU unit. Check whether the
* bridge is PCIe.
*/
bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS,
&cap_offset) == 0;
requester = pcib;
/*
* Check for a buggy PCIe/PCI bridge that
* doesn't report the express capability. If
* the bridge above it is express but isn't a
* PCI bridge, then we know pcib is actually a
* PCIe/PCI bridge.
*/
if (!bridge_is_pcie && pci_find_cap(pcibp,
PCIY_EXPRESS, &cap_offset) == 0) {
pcie_flags = pci_read_config(pcibp,
cap_offset + PCIER_FLAGS, 2);
if ((pcie_flags & PCIEM_FLAGS_TYPE) !=
PCIEM_TYPE_PCI_BRIDGE)
bridge_is_pcie = true;
}
if (bridge_is_pcie) {
/*
* The current device is not PCIe, but
* the bridge above it is. This is a
* PCIe->PCI bridge. Assume that the
* requester ID will be the secondary
* bus number with slot and function
* set to zero.
*
* XXX: Doesn't handle the case where
* the bridge is PCIe->PCI-X, and the
* bridge will only take ownership of
* requests in some cases. We should
* provide context entries with the
* same page tables for taken and
* non-taken transactions.
*/
*rid = PCI_RID(pci_get_bus(l), 0, 0);
l = pcibp;
} else {
/*
* Neither the device nor the bridge
* above it are PCIe. This is a
* conventional PCI->PCI bridge, which
* will use the bridge's BSF as the
* requester ID.
*/
*rid = pci_get_rid(pcib);
l = pcib;
}
}
}
return (requester);
}
struct iommu_ctx *
iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr)
{
device_t requester;
struct iommu_ctx *ctx;
bool disabled;
uint16_t rid;
requester = iommu_get_requester(dev, &rid);
/*
* If the user requested the IOMMU disabled for the device, we
* cannot disable the IOMMU unit, due to possibility of other
* devices on the same IOMMU unit still requiring translation.
* Instead provide the identity mapping for the device
* context.
*/
disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester),
pci_get_bus(requester), pci_get_slot(requester),
pci_get_function(requester));
ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr);
if (ctx == NULL)
return (NULL);
if (disabled) {
/*
* Keep the first reference on context, release the
* later refs.
*/
IOMMU_LOCK(unit);
if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) {
ctx->flags |= IOMMU_CTX_DISABLED;
IOMMU_UNLOCK(unit);
} else {
iommu_free_ctx_locked(unit, ctx);
}
ctx = NULL;
}
return (ctx);
}
struct iommu_ctx *
iommu_get_dev_ctx(device_t dev)
{
struct iommu_unit *unit;
unit = iommu_find(dev, bootverbose);
/* Not in scope of any IOMMU ? */
if (unit == NULL)
return (NULL);
if (!unit->dma_enabled)
return (NULL);
iommu_unit_pre_instantiate_ctx(unit);
return (iommu_instantiate_ctx(unit, dev, false));
}
bus_dma_tag_t
iommu_get_dma_tag(device_t dev, device_t child)
{
struct iommu_ctx *ctx;
bus_dma_tag_t res;
ctx = iommu_get_dev_ctx(child);
if (ctx == NULL)
return (NULL);
res = (bus_dma_tag_t)ctx->tag;
return (res);
}
bool
bus_dma_iommu_set_buswide(device_t dev)
{
struct iommu_unit *unit;
device_t parent;
u_int busno, slot, func;
parent = device_get_parent(dev);
if (device_get_devclass(parent) != devclass_find("pci"))
return (false);
unit = iommu_find(dev, bootverbose);
if (unit == NULL)
return (false);
busno = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
if (slot != 0 || func != 0) {
if (bootverbose) {
device_printf(dev,
"iommu%d pci%d:%d:%d requested buswide busdma\n",
unit->unit, busno, slot, func);
}
return (false);
}
iommu_set_buswide_ctx(unit, busno);
return (true);
}
void
iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno)
{
MPASS(busno <= PCI_BUSMAX);
IOMMU_LOCK(unit);
unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |=
1 << (busno % (NBBY * sizeof(uint32_t)));
IOMMU_UNLOCK(unit);
}
bool
iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno)
{
MPASS(busno <= PCI_BUSMAX);
return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] &
(1U << (busno % (NBBY * sizeof(uint32_t))))) != 0);
}
static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map");
static void iommu_bus_schedule_dmamap(struct iommu_unit *unit,
struct bus_dmamap_iommu *map);
static int
iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr,
bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags,
bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat)
{
struct bus_dma_tag_iommu *newtag, *oldtag;
int error;
*dmat = NULL;
error = common_bus_dma_tag_create(parent != NULL ?
&((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment,
boundary, lowaddr, highaddr, maxsize, nsegments, maxsegsz, flags,
lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu),
(void **)&newtag);
if (error != 0)
goto out;
oldtag = (struct bus_dma_tag_iommu *)parent;
newtag->common.impl = &bus_dma_iommu_impl;
newtag->ctx = oldtag->ctx;
newtag->owner = oldtag->owner;
*dmat = (bus_dma_tag_t)newtag;
out:
CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d",
__func__, newtag, (newtag != NULL ? newtag->common.flags : 0),
error);
return (error);
}
static int
iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat)
{
return (0);
}
static int
iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1)
{
struct bus_dma_tag_iommu *dmat;
+ struct iommu_unit *iommu;
+ struct iommu_ctx *ctx;
int error;
error = 0;
dmat = (struct bus_dma_tag_iommu *)dmat1;
if (dmat != NULL) {
if (dmat->map_count != 0) {
error = EBUSY;
goto out;
}
- if (dmat == dmat->ctx->tag)
- iommu_free_ctx(dmat->ctx);
+ ctx = dmat->ctx;
+ if (dmat == ctx->tag) {
+ iommu = ctx->domain->iommu;
+ IOMMU_LOCK(iommu);
+ iommu_free_ctx_locked(iommu, dmat->ctx);
+ }
free(dmat->segments, M_IOMMU_DMAMAP);
free(dmat, M_DEVBUF);
}
out:
CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat, error);
return (error);
}
static bool
iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen)
{
return (false);
}
static int
iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP,
DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO);
if (map == NULL) {
*mapp = NULL;
return (ENOMEM);
}
if (tag->segments == NULL) {
tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) *
tag->common.nsegments, M_IOMMU_DMAMAP,
DOMAINSET_PREF(tag->common.domain), M_NOWAIT);
if (tag->segments == NULL) {
free(map, M_IOMMU_DMAMAP);
*mapp = NULL;
return (ENOMEM);
}
}
IOMMU_DMAMAP_INIT(map);
TAILQ_INIT(&map->map_entries);
map->tag = tag;
map->locked = true;
map->cansleep = false;
tag->map_count++;
*mapp = (bus_dmamap_t)map;
return (0);
}
static int
iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if (map != NULL) {
IOMMU_DMAMAP_LOCK(map);
if (!TAILQ_EMPTY(&map->map_entries)) {
IOMMU_DMAMAP_UNLOCK(map);
return (EBUSY);
}
IOMMU_DMAMAP_DESTROY(map);
free(map, M_IOMMU_DMAMAP);
}
tag->map_count--;
return (0);
}
static int
iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
bus_dmamap_t *mapp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
int error, mflags;
vm_memattr_t attr;
error = iommu_bus_dmamap_create(dmat, flags, mapp);
if (error != 0)
return (error);
mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK;
mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0;
attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE :
VM_MEMATTR_DEFAULT;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)*mapp;
if (tag->common.maxsize < PAGE_SIZE &&
tag->common.alignment <= tag->common.maxsize &&
attr == VM_MEMATTR_DEFAULT) {
*vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF,
DOMAINSET_PREF(tag->common.domain), mflags);
map->flags |= BUS_DMAMAP_IOMMU_MALLOC;
} else {
*vaddr = kmem_alloc_attr_domainset(
DOMAINSET_PREF(tag->common.domain), tag->common.maxsize,
mflags, 0ul, BUS_SPACE_MAXADDR, attr);
map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC;
}
if (*vaddr == NULL) {
iommu_bus_dmamap_destroy(dmat, *mapp);
*mapp = NULL;
return (ENOMEM);
}
return (0);
}
static void
iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) {
free(vaddr, M_DEVBUF);
map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC;
} else {
KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0,
("iommu_bus_dmamem_free for non alloced map %p", map));
kmem_free(vaddr, tag->common.maxsize);
map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC;
}
iommu_bus_dmamap_destroy(dmat, map1);
}
static int
iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag,
struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
int flags, bus_dma_segment_t *segs, int *segp,
struct iommu_map_entries_tailq *entries)
{
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entry *entry;
bus_size_t buflen1;
int error, e_flags, idx, gas_flags, seg;
KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset));
if (segs == NULL)
segs = tag->segments;
ctx = tag->ctx;
domain = ctx->domain;
e_flags = IOMMU_MAP_ENTRY_READ |
((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0);
seg = *segp;
error = 0;
idx = 0;
while (buflen > 0) {
seg++;
if (seg >= tag->common.nsegments) {
error = EFBIG;
break;
}
buflen1 = buflen > tag->common.maxsegsz ?
tag->common.maxsegsz : buflen;
/*
* (Too) optimistically allow split if there are more
* then one segments left.
*/
gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0;
if (seg + 1 < tag->common.nsegments)
gas_flags |= IOMMU_MF_CANSPLIT;
error = iommu_gas_map(domain, &tag->common, buflen1,
offset, e_flags, gas_flags, ma + idx, &entry);
if (error != 0)
break;
/* Update buflen1 in case buffer split. */
if (buflen1 > entry->end - entry->start - offset)
buflen1 = entry->end - entry->start - offset;
KASSERT(vm_addr_align_ok(entry->start + offset,
tag->common.alignment),
("alignment failed: ctx %p start 0x%jx offset %x "
"align 0x%jx", ctx, (uintmax_t)entry->start, offset,
(uintmax_t)tag->common.alignment));
KASSERT(entry->end <= tag->common.lowaddr ||
entry->start >= tag->common.highaddr,
("entry placement failed: ctx %p start 0x%jx end 0x%jx "
"lowaddr 0x%jx highaddr 0x%jx", ctx,
(uintmax_t)entry->start, (uintmax_t)entry->end,
(uintmax_t)tag->common.lowaddr,
(uintmax_t)tag->common.highaddr));
KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1,
tag->common.boundary),
("boundary failed: ctx %p start 0x%jx end 0x%jx "
"boundary 0x%jx", ctx, (uintmax_t)entry->start,
(uintmax_t)entry->end, (uintmax_t)tag->common.boundary));
KASSERT(buflen1 <= tag->common.maxsegsz,
("segment too large: ctx %p start 0x%jx end 0x%jx "
"buflen1 0x%jx maxsegsz 0x%jx", ctx,
(uintmax_t)entry->start, (uintmax_t)entry->end,
(uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz));
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("entry %p missing IOMMU_MAP_ENTRY_MAP", entry));
TAILQ_INSERT_TAIL(entries, entry, dmamap_link);
segs[seg].ds_addr = entry->start + offset;
segs[seg].ds_len = buflen1;
idx += OFF_TO_IDX(offset + buflen1);
offset += buflen1;
offset &= IOMMU_PAGE_MASK;
buflen -= buflen1;
}
if (error == 0)
*segp = seg;
return (error);
}
static int
iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag,
struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen,
int flags, bus_dma_segment_t *segs, int *segp)
{
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entries_tailq entries;
int error;
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(&ctx->loads, 1);
TAILQ_INIT(&entries);
error = iommu_bus_dmamap_load_something1(tag, map, ma, offset,
buflen, flags, segs, segp, &entries);
if (error == 0) {
IOMMU_DMAMAP_LOCK(map);
TAILQ_CONCAT(&map->map_entries, &entries, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else if (!TAILQ_EMPTY(&entries)) {
/*
* The busdma interface does not allow us to report
* partial buffer load, so unfortunately we have to
* revert all work done.
*/
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
taskqueue_enqueue(domain->iommu->delayed_taskqueue,
&domain->unload_task);
}
if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 &&
!map->cansleep)
error = EINPROGRESS;
if (error == EINPROGRESS)
iommu_bus_schedule_dmamap(domain->iommu, map);
return (error);
}
static int
iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1,
struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags,
bus_dma_segment_t *segs, int *segp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen,
flags, segs, segp));
}
static int
iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1,
vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs,
int *segp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
vm_page_t *ma, fma;
vm_paddr_t pstart, pend, paddr;
int error, i, ma_cnt, mflags, offset;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
pstart = trunc_page(buf);
pend = round_page(buf + buflen);
offset = buf & PAGE_MASK;
ma_cnt = OFF_TO_IDX(pend - pstart);
mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
if (ma == NULL)
return (ENOMEM);
fma = NULL;
for (i = 0; i < ma_cnt; i++) {
paddr = pstart + ptoa(i);
ma[i] = PHYS_TO_VM_PAGE(paddr);
if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
/*
* If PHYS_TO_VM_PAGE() returned NULL or the
* vm_page was not initialized we'll use a
* fake page.
*/
if (fma == NULL) {
fma = malloc(sizeof(struct vm_page) * ma_cnt,
M_DEVBUF, M_ZERO | mflags);
if (fma == NULL) {
free(ma, M_DEVBUF);
return (ENOMEM);
}
}
vm_page_initfake(&fma[i], pstart + ptoa(i),
VM_MEMATTR_DEFAULT);
ma[i] = &fma[i];
}
}
error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
flags, segs, segp);
free(fma, M_DEVBUF);
free(ma, M_DEVBUF);
return (error);
}
static int
iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf,
bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs,
int *segp)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
vm_page_t *ma, fma;
vm_paddr_t pstart, pend, paddr;
int error, i, ma_cnt, mflags, offset;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
pstart = trunc_page((vm_offset_t)buf);
pend = round_page((vm_offset_t)buf + buflen);
offset = (vm_offset_t)buf & PAGE_MASK;
ma_cnt = OFF_TO_IDX(pend - pstart);
mflags = map->cansleep ? M_WAITOK : M_NOWAIT;
ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags);
if (ma == NULL)
return (ENOMEM);
fma = NULL;
for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) {
if (pmap == kernel_pmap)
paddr = pmap_kextract(pstart);
else
paddr = pmap_extract(pmap, pstart);
ma[i] = PHYS_TO_VM_PAGE(paddr);
if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) {
/*
* If PHYS_TO_VM_PAGE() returned NULL or the
* vm_page was not initialized we'll use a
* fake page.
*/
if (fma == NULL) {
fma = malloc(sizeof(struct vm_page) * ma_cnt,
M_DEVBUF, M_ZERO | mflags);
if (fma == NULL) {
free(ma, M_DEVBUF);
return (ENOMEM);
}
}
vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT);
ma[i] = &fma[i];
}
}
error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen,
flags, segs, segp);
free(ma, M_DEVBUF);
free(fma, M_DEVBUF);
return (error);
}
static void
iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1,
struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg)
{
struct bus_dmamap_iommu *map;
if (map1 == NULL)
return;
map = (struct bus_dmamap_iommu *)map1;
map->mem = *mem;
map->tag = (struct bus_dma_tag_iommu *)dmat;
map->callback = callback;
map->callback_arg = callback_arg;
}
static bus_dma_segment_t *
iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1,
bus_dma_segment_t *segs, int nsegs, int error)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
if (!map->locked) {
KASSERT(map->cansleep,
("map not locked and not sleepable context %p", map));
/*
* We are called from the delayed context. Relock the
* driver.
*/
(tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK);
map->locked = true;
}
if (segs == NULL)
segs = tag->segments;
return (segs);
}
/*
* The limitations of busdma KPI forces the iommu to perform the actual
* unload, consisting of the unmapping of the map entries page tables,
* from the delayed context on i386, since page table page mapping
* might require a sleep to be successfull. The unfortunate
* consequence is that the DMA requests can be served some time after
* the bus_dmamap_unload() call returned.
*
* On amd64, we assume that sf allocation cannot fail.
*/
static void
iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entries_tailq entries;
tag = (struct bus_dma_tag_iommu *)dmat;
map = (struct bus_dmamap_iommu *)map1;
ctx = tag->ctx;
domain = ctx->domain;
atomic_add_long(&ctx->unloads, 1);
TAILQ_INIT(&entries);
IOMMU_DMAMAP_LOCK(map);
TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
#if defined(IOMMU_DOMAIN_UNLOAD_SLEEP)
IOMMU_DOMAIN_LOCK(domain);
TAILQ_CONCAT(&domain->unload_entries, &entries, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
taskqueue_enqueue(domain->iommu->delayed_taskqueue,
&domain->unload_task);
#else
THREAD_NO_SLEEPING();
iommu_domain_unload(domain, &entries, false);
THREAD_SLEEPING_OK();
KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx));
#endif
}
static void
iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1,
bus_dmasync_op_t op)
{
struct bus_dmamap_iommu *map __unused;
map = (struct bus_dmamap_iommu *)map1;
kmsan_bus_dmamap_sync(&map->kmsan_mem, op);
}
#ifdef KMSAN
static void
iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem)
{
struct bus_dmamap_iommu *map;
map = (struct bus_dmamap_iommu *)map1;
if (map == NULL)
return;
memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc));
}
#endif
struct bus_dma_impl bus_dma_iommu_impl = {
.tag_create = iommu_bus_dma_tag_create,
.tag_destroy = iommu_bus_dma_tag_destroy,
.tag_set_domain = iommu_bus_dma_tag_set_domain,
.id_mapped = iommu_bus_dma_id_mapped,
.map_create = iommu_bus_dmamap_create,
.map_destroy = iommu_bus_dmamap_destroy,
.mem_alloc = iommu_bus_dmamem_alloc,
.mem_free = iommu_bus_dmamem_free,
.load_phys = iommu_bus_dmamap_load_phys,
.load_buffer = iommu_bus_dmamap_load_buffer,
.load_ma = iommu_bus_dmamap_load_ma,
.map_waitok = iommu_bus_dmamap_waitok,
.map_complete = iommu_bus_dmamap_complete,
.map_unload = iommu_bus_dmamap_unload,
.map_sync = iommu_bus_dmamap_sync,
#ifdef KMSAN
.load_kmsan = iommu_bus_dmamap_load_kmsan,
#endif
};
static void
iommu_bus_task_dmamap(void *arg, int pending)
{
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
struct iommu_unit *unit;
unit = arg;
IOMMU_LOCK(unit);
while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) {
TAILQ_REMOVE(&unit->delayed_maps, map, delay_link);
IOMMU_UNLOCK(unit);
tag = map->tag;
map->cansleep = true;
map->locked = false;
bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map,
&map->mem, map->callback, map->callback_arg,
BUS_DMA_WAITOK);
map->cansleep = false;
if (map->locked) {
(tag->common.lockfunc)(tag->common.lockfuncarg,
BUS_DMA_UNLOCK);
} else
map->locked = true;
map->cansleep = false;
IOMMU_LOCK(unit);
}
IOMMU_UNLOCK(unit);
}
static void
iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map)
{
map->locked = false;
IOMMU_LOCK(unit);
TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link);
IOMMU_UNLOCK(unit);
taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task);
}
int
iommu_init_busdma(struct iommu_unit *unit)
{
int error;
unit->dma_enabled = 0;
error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled);
if (error == 0) /* compatibility */
TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled);
SYSCTL_ADD_INT(&unit->sysctl_ctx,
SYSCTL_CHILDREN(device_get_sysctl_tree(unit->dev)),
OID_AUTO, "dma", CTLFLAG_RD, &unit->dma_enabled, 0,
"DMA ops enabled");
TAILQ_INIT(&unit->delayed_maps);
TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit);
unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK,
taskqueue_thread_enqueue, &unit->delayed_taskqueue);
taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK,
"iommu%d busdma taskq", unit->unit);
return (0);
}
void
iommu_fini_busdma(struct iommu_unit *unit)
{
if (unit->delayed_taskqueue == NULL)
return;
taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task);
taskqueue_free(unit->delayed_taskqueue);
unit->delayed_taskqueue = NULL;
}
int
bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1,
vm_paddr_t start, vm_size_t length, int flags)
{
struct bus_dma_tag_common *tc;
struct bus_dma_tag_iommu *tag;
struct bus_dmamap_iommu *map;
struct iommu_ctx *ctx;
struct iommu_domain *domain;
struct iommu_map_entry *entry;
vm_page_t *ma;
vm_size_t i;
int error;
bool waitok;
MPASS((start & PAGE_MASK) == 0);
MPASS((length & PAGE_MASK) == 0);
MPASS(length > 0);
MPASS(start + length >= start);
MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0);
tc = (struct bus_dma_tag_common *)dmat;
if (tc->impl != &bus_dma_iommu_impl)
return (0);
tag = (struct bus_dma_tag_iommu *)dmat;
ctx = tag->ctx;
domain = ctx->domain;
map = (struct bus_dmamap_iommu *)map1;
waitok = (flags & BUS_DMA_NOWAIT) != 0;
entry = iommu_gas_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK);
if (entry == NULL)
return (ENOMEM);
entry->start = start;
entry->end = start + length;
ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ?
M_WAITOK : M_NOWAIT);
if (ma == NULL) {
iommu_gas_free_entry(entry);
return (ENOMEM);
}
for (i = 0; i < atop(length); i++) {
ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
VM_MEMATTR_DEFAULT);
}
error = iommu_gas_map_region(domain, entry, IOMMU_MAP_ENTRY_READ |
((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE) |
IOMMU_MAP_ENTRY_MAP, waitok ? IOMMU_MF_CANWAIT : 0, ma);
if (error == 0) {
IOMMU_DMAMAP_LOCK(map);
TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link);
IOMMU_DMAMAP_UNLOCK(map);
} else {
iommu_gas_free_entry(entry);
}
for (i = 0; i < atop(length); i++)
vm_page_putfake(ma[i]);
free(ma, M_TEMP);
return (error);
}
static void
iommu_domain_unload_task(void *arg, int pending)
{
struct iommu_domain *domain;
struct iommu_map_entries_tailq entries;
domain = arg;
TAILQ_INIT(&entries);
for (;;) {
IOMMU_DOMAIN_LOCK(domain);
TAILQ_SWAP(&domain->unload_entries, &entries,
iommu_map_entry, dmamap_link);
IOMMU_DOMAIN_UNLOCK(domain);
if (TAILQ_EMPTY(&entries))
break;
iommu_domain_unload(domain, &entries, true);
}
}
void
iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain,
const struct iommu_domain_map_ops *ops)
{
domain->ops = ops;
domain->iommu = unit;
TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain);
RB_INIT(&domain->rb_root);
TAILQ_INIT(&domain->unload_entries);
mtx_init(&domain->lock, "iodom", NULL, MTX_DEF);
}
void
iommu_domain_fini(struct iommu_domain *domain)
{
mtx_destroy(&domain->lock);
}
diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c
index b3e85350a995..7e41ecf2dc04 100644
--- a/sys/x86/iommu/amd_ctx.c
+++ b/sys/x86/iommu/amd_ctx.c
@@ -1,639 +1,620 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context");
static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain");
static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
struct amdiommu_domain *domain);
static struct amdiommu_dte *
amdiommu_get_dtep(struct amdiommu_ctx *ctx)
{
return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]);
}
void
amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
domain = IODOM2DOM(entry->domain);
unit = DOM2AMD(domain);
/*
* If "free" is false, then the IOTLB invalidation must be performed
* synchronously. Otherwise, the caller might free the entry before
* dmar_qi_task() is finished processing it.
*/
if (free) {
AMDIOMMU_LOCK(unit);
iommu_qi_invalidate_locked(&domain->iodom, entry, true);
AMDIOMMU_UNLOCK(unit);
} else {
iommu_qi_invalidate_sync(&domain->iodom, entry->start,
entry->end - entry->start, cansleep);
iommu_domain_free_entry(entry, false);
}
}
static bool
amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain,
struct iommu_map_entry *entry)
{
return (true); /* XXXKIB */
}
void
amdiommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
struct iommu_map_entry *entry, *entry1;
int error __diagused;
domain = IODOM2DOM(iodom);
unit = DOM2AMD(domain);
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("not mapped entry %p %p", domain, entry));
error = iodom->ops->unmap(iodom, entry,
cansleep ? IOMMU_PGF_WAITOK : 0);
KASSERT(error == 0, ("unmap %p error %d", domain, error));
}
if (TAILQ_EMPTY(entries))
return;
AMDIOMMU_LOCK(unit);
while ((entry = TAILQ_FIRST(entries)) != NULL) {
TAILQ_REMOVE(entries, entry, dmamap_link);
iommu_qi_invalidate_locked(&domain->iodom, entry,
amdiommu_domain_unload_emit_wait(domain, entry));
}
AMDIOMMU_UNLOCK(unit);
}
static void
amdiommu_domain_destroy(struct amdiommu_domain *domain)
{
struct iommu_domain *iodom;
struct amdiommu_unit *unit;
iodom = DOM2IODOM(domain);
KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
("unfinished unloads %p", domain));
KASSERT(LIST_EMPTY(&iodom->contexts),
("destroying dom %p with contexts", domain));
KASSERT(domain->ctx_cnt == 0,
("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
KASSERT(domain->refs == 0,
("destroying dom %p with refs %d", domain, domain->refs));
if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
AMDIOMMU_DOMAIN_LOCK(domain);
iommu_gas_fini_domain(iodom);
AMDIOMMU_DOMAIN_UNLOCK(domain);
}
if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
if (domain->pgtbl_obj != NULL)
AMDIOMMU_DOMAIN_PGLOCK(domain);
amdiommu_domain_free_pgtbl(domain);
}
iommu_domain_fini(iodom);
unit = DOM2AMD(domain);
free_unr(unit->domids, domain->domain);
free(domain, M_AMDIOMMU_DOMAIN);
}
static iommu_gaddr_t
lvl2addr(int lvl)
{
int x;
x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl;
/* Level 6 has only 8 bits for page table index */
if (x >= NBBY * sizeof(uint64_t))
return (-1ull);
return (1ull < (1ull << x));
}
static void
amdiommu_domain_init_pglvl(struct amdiommu_unit *unit,
struct amdiommu_domain *domain)
{
iommu_gaddr_t end;
int hats, i;
uint64_t efr_hats;
end = DOM2IODOM(domain)->end;
for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) {
if (lvl2addr(i) >= end && lvl2addr(i - 1) < end)
break;
}
domain->pglvl = i;
efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK;
switch (efr_hats) {
case AMDIOMMU_EFR_HATS_6LVL:
hats = 6;
break;
case AMDIOMMU_EFR_HATS_5LVL:
hats = 5;
break;
case AMDIOMMU_EFR_HATS_4LVL:
hats = 4;
break;
default:
printf("amdiommu%d: HATS %#jx (reserved) ignoring\n",
unit->iommu.unit, (uintmax_t)efr_hats);
return;
}
if (hats >= domain->pglvl)
return;
printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n",
unit->iommu.unit, domain->domain, hats, domain->pglvl);
domain->pglvl = hats;
domain->iodom.end = lvl2addr(hats);
}
static struct amdiommu_domain *
amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped)
{
struct amdiommu_domain *domain;
struct iommu_domain *iodom;
int error, id;
id = alloc_unr(unit->domids);
if (id == -1)
return (NULL);
domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO);
iodom = DOM2IODOM(domain);
domain->domain = id;
LIST_INIT(&iodom->contexts);
iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops);
domain->unit = unit;
domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
amdiommu_domain_init_pglvl(unit, domain);
iommu_gas_init_domain(DOM2IODOM(domain));
if (id_mapped) {
domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
} else {
error = amdiommu_domain_alloc_pgtbl(domain);
if (error != 0)
goto fail;
/* Disable local apic region access */
error = iommu_gas_reserve_region(iodom, 0xfee00000,
0xfeefffff + 1, &iodom->msi_entry);
if (error != 0)
goto fail;
}
return (domain);
fail:
amdiommu_domain_destroy(domain);
return (NULL);
}
static struct amdiommu_ctx *
amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid)
{
struct amdiommu_ctx *ctx;
ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
ctx->context.domain = DOM2IODOM(domain);
ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
M_AMDIOMMU_CTX, M_WAITOK | M_ZERO);
ctx->context.rid = rid;
ctx->context.refs = 1;
return (ctx);
}
static void
amdiommu_ctx_link(struct amdiommu_ctx *ctx)
{
struct amdiommu_domain *domain;
domain = CTX2DOM(ctx);
IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
KASSERT(domain->refs >= domain->ctx_cnt,
("dom %p ref underflow %d %d", domain, domain->refs,
domain->ctx_cnt));
domain->refs++;
domain->ctx_cnt++;
LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
}
static void
amdiommu_ctx_unlink(struct amdiommu_ctx *ctx)
{
struct amdiommu_domain *domain;
domain = CTX2DOM(ctx);
IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
KASSERT(domain->refs > 0,
("domain %p ctx dtr refs %d", domain, domain->refs));
KASSERT(domain->ctx_cnt >= domain->refs,
("domain %p ctx dtr refs %d ctx_cnt %d", domain,
domain->refs, domain->ctx_cnt));
domain->refs--;
domain->ctx_cnt--;
LIST_REMOVE(&ctx->context, link);
}
struct amdiommu_ctx *
amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid)
{
struct amdiommu_domain *domain;
struct iommu_ctx *ctx;
AMDIOMMU_ASSERT_LOCKED(unit);
LIST_FOREACH(domain, &unit->domains, link) {
LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
if (ctx->rid == rid)
return (IOCTX2CTX(ctx));
}
}
return (NULL);
}
struct amdiommu_domain *
amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid)
{
struct amdiommu_domain *domain;
struct iommu_ctx *ctx;
AMDIOMMU_LOCK(unit);
LIST_FOREACH(domain, &unit->domains, link) {
LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
if (ctx->rid == rid)
break;
}
}
AMDIOMMU_UNLOCK(unit);
return (domain);
}
static void
amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx)
{
struct amdiommu_dte *dtep;
struct amdiommu_domain *domain;
AMDIOMMU_ASSERT_LOCKED(unit);
KASSERT(ctx->context.refs >= 1,
("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs));
/*
* If our reference is not last, only the dereference should
* be performed.
*/
if (ctx->context.refs > 1) {
ctx->context.refs--;
AMDIOMMU_UNLOCK(unit);
return;
}
KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
("lost ref on disabled ctx %p", ctx));
/*
* Otherwise, the device table entry must be cleared before
* the page table is destroyed.
*/
dtep = amdiommu_get_dtep(ctx);
dtep->v = 0;
atomic_thread_fence_rel();
memset(dtep, 0, sizeof(*dtep));
domain = CTX2DOM(ctx);
amdiommu_qi_invalidate_ctx_locked_nowait(ctx);
amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid);
amdiommu_qi_invalidate_all_pages_locked_nowait(domain);
amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx)));
if (unit->irte_enabled)
amdiommu_ctx_fini_irte(ctx);
amdiommu_ctx_unlink(ctx);
free(ctx->context.tag, M_AMDIOMMU_CTX);
free(ctx, M_AMDIOMMU_CTX);
amdiommu_unref_domain_locked(unit, domain);
}
-static void
-amdiommu_free_ctx(struct amdiommu_ctx *ctx)
-{
- struct amdiommu_unit *unit;
-
- unit = CTX2AMD(ctx);
- AMDIOMMU_LOCK(unit);
- amdiommu_free_ctx_locked(unit, ctx);
-}
-
static void
amdiommu_unref_domain_locked(struct amdiommu_unit *unit,
struct amdiommu_domain *domain)
{
AMDIOMMU_ASSERT_LOCKED(unit);
KASSERT(domain->refs >= 1,
("amdiommu%d domain %p refs %u", unit->iommu.unit, domain,
domain->refs));
KASSERT(domain->refs > domain->ctx_cnt,
("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit,
domain, domain->refs, domain->ctx_cnt));
if (domain->refs > 1) {
domain->refs--;
AMDIOMMU_UNLOCK(unit);
return;
}
LIST_REMOVE(domain, link);
AMDIOMMU_UNLOCK(unit);
taskqueue_drain(unit->iommu.delayed_taskqueue,
&domain->iodom.unload_task);
amdiommu_domain_destroy(domain);
}
static void
dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx,
vm_page_t pgtblr, uint8_t dte, uint32_t edte)
{
struct amdiommu_domain *domain;
struct amdiommu_unit *unit;
domain = CTX2DOM(ctx);
unit = DOM2AMD(domain);
dtep->tv = 1;
/* dtep->had not used for now */
dtep->ir = 1;
dtep->iw = 1;
dtep->domainid = domain->domain;
dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS;
/* fill device interrupt passing hints from IVHD. */
dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0;
dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0;
dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0;
dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4;
dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0;
dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0;
if (unit->irte_enabled) {
dtep->iv = 1;
dtep->i = 0;
dtep->inttablen = ilog2(unit->irte_nentries);
dtep->intrroot = pmap_kextract(unit->irte_x2apic ?
(vm_offset_t)ctx->irtx2 :
(vm_offset_t)ctx->irtb) >> 6;
dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP;
}
if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) {
dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1;
} else {
MPASS(domain->pglvl > 0 && domain->pglvl <=
AMDIOMMU_PGTBL_MAXLVL);
dtep->pgmode = domain->pglvl;
dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12;
}
atomic_thread_fence_rel();
dtep->v = 1;
}
static void
dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte)
{
struct amdiommu_dte *dtep;
struct amdiommu_unit *unit;
struct amdiommu_domain *domain;
int i;
domain = CTX2DOM(ctx);
unit = DOM2AMD(domain);
dtep = amdiommu_get_dtep(ctx);
KASSERT(dtep->v == 0,
("amdiommu%d initializing valid dte @%p %#jx",
CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep)));
if (iommu_is_buswide_ctx(AMD2IOMMU(unit),
PCI_RID2BUS(ctx->context.rid))) {
MPASS(!move);
for (i = 0; i <= PCI_BUSMAX; i++) {
dte_entry_init_one(&dtep[i], ctx, domain->pgtblr,
dte, edte);
}
} else {
dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte);
}
}
struct amdiommu_ctx *
amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid,
int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte)
{
struct amdiommu_domain *domain, *domain1;
struct amdiommu_ctx *ctx, *ctx1;
int bus, slot, func;
if (dev != NULL) {
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
} else {
bus = PCI_RID2BUS(rid);
slot = PCI_RID2SLOT(rid);
func = PCI_RID2FUNC(rid);
}
AMDIOMMU_LOCK(unit);
KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) ||
(slot == 0 && func == 0),
("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit,
bus, slot, func));
ctx = amdiommu_find_ctx_locked(unit, rid);
if (ctx == NULL) {
/*
* Perform the allocations which require sleep or have
* higher chance to succeed if the sleep is allowed.
*/
AMDIOMMU_UNLOCK(unit);
domain1 = amdiommu_domain_alloc(unit, id_mapped);
if (domain1 == NULL)
return (NULL);
if (!id_mapped) {
/*
* XXXKIB IVMD seems to be less significant
* and less used on AMD than RMRR on Intel.
* Not implemented for now.
*/
}
ctx1 = amdiommu_ctx_alloc(domain1, rid);
amdiommu_ctx_init_irte(ctx1);
AMDIOMMU_LOCK(unit);
/*
* Recheck the contexts, other thread might have
* already allocated needed one.
*/
ctx = amdiommu_find_ctx_locked(unit, rid);
if (ctx == NULL) {
domain = domain1;
ctx = ctx1;
amdiommu_ctx_link(ctx);
ctx->context.tag->owner = dev;
iommu_device_tag_init(CTX2IOCTX(ctx), dev);
LIST_INSERT_HEAD(&unit->domains, domain, link);
dte_entry_init(ctx, false, dte, edte);
amdiommu_qi_invalidate_ctx_locked(ctx);
if (dev != NULL) {
device_printf(dev,
"amdiommu%d pci%d:%d:%d:%d rid %x domain %d "
"%s-mapped\n",
AMD2IOMMU(unit)->unit, unit->unit_dom,
bus, slot, func, rid, domain->domain,
id_mapped ? "id" : "re");
}
} else {
amdiommu_domain_destroy(domain1);
/* Nothing needs to be done to destroy ctx1. */
free(ctx1, M_AMDIOMMU_CTX);
domain = CTX2DOM(ctx);
ctx->context.refs++; /* tag referenced us */
}
} else {
domain = CTX2DOM(ctx);
if (ctx->context.tag->owner == NULL)
ctx->context.tag->owner = dev;
ctx->context.refs++; /* tag referenced us */
}
AMDIOMMU_UNLOCK(unit);
return (ctx);
}
struct iommu_ctx *
amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
bool id_mapped, bool rmrr_init)
{
struct amdiommu_unit *unit;
struct amdiommu_ctx *ret;
int error;
uint32_t edte;
uint16_t rid1;
uint8_t dte;
error = amdiommu_find_unit(dev, &unit, &rid1, &dte, &edte,
bootverbose);
if (error != 0)
return (NULL);
if (AMD2IOMMU(unit) != iommu) /* XXX complain loudly */
return (NULL);
ret = amdiommu_get_ctx_for_dev(unit, dev, rid1, pci_get_domain(dev),
id_mapped, rmrr_init, dte, edte);
return (CTX2IOCTX(ret));
}
void
amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
struct iommu_ctx *context)
{
struct amdiommu_unit *unit;
struct amdiommu_ctx *ctx;
unit = IOMMU2AMD(iommu);
ctx = IOCTX2CTX(context);
amdiommu_free_ctx_locked(unit, ctx);
}
-
-void
-amdiommu_free_ctx_method(struct iommu_ctx *context)
-{
- struct amdiommu_ctx *ctx;
-
- ctx = IOCTX2CTX(context);
- amdiommu_free_ctx(ctx);
-}
diff --git a/sys/x86/iommu/amd_drv.c b/sys/x86/iommu/amd_drv.c
index 395cefc65caa..62315902fcd9 100644
--- a/sys/x86/iommu/amd_drv.c
+++ b/sys/x86/iommu/amd_drv.c
@@ -1,1205 +1,1204 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#include "opt_ddb.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "pcib_if.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
static int amdiommu_enable = 0;
/*
* All enumerated AMD IOMMU units.
* Access is unlocked, the list is not modified after early
* single-threaded startup.
*/
static TAILQ_HEAD(, amdiommu_unit) amdiommu_units =
TAILQ_HEAD_INITIALIZER(amdiommu_units);
static u_int
ivrs_info_to_unit_id(UINT32 info)
{
return ((info & ACPI_IVHD_UNIT_ID_MASK) >> 8);
}
typedef bool (*amdiommu_itercc_t)(void *, void *);
typedef bool (*amdiommu_iter40_t)(ACPI_IVRS_HARDWARE2 *, void *);
typedef bool (*amdiommu_iter11_t)(ACPI_IVRS_HARDWARE2 *, void *);
typedef bool (*amdiommu_iter10_t)(ACPI_IVRS_HARDWARE1 *, void *);
static bool
amdiommu_ivrs_iterate_tbl_typed(amdiommu_itercc_t iter, void *arg,
int type, ACPI_TABLE_IVRS *ivrs_tbl)
{
char *ptr, *ptrend;
bool done;
done = false;
ptr = (char *)ivrs_tbl + sizeof(*ivrs_tbl);
ptrend = (char *)ivrs_tbl + ivrs_tbl->Header.Length;
for (;;) {
ACPI_IVRS_HEADER *ivrsh;
if (ptr >= ptrend)
break;
ivrsh = (ACPI_IVRS_HEADER *)ptr;
if (ivrsh->Length <= 0) {
printf("amdiommu_iterate_tbl: corrupted IVRS table, "
"length %d\n", ivrsh->Length);
break;
}
ptr += ivrsh->Length;
if (ivrsh->Type == type) {
done = iter((void *)ivrsh, arg);
if (done)
break;
}
}
return (done);
}
/*
* Walk over IVRS, calling callback iterators following priority:
* 0x40, then 0x11, then 0x10 subtable. First iterator returning true
* ends the walk.
* Returns true if any iterator returned true, otherwise false.
*/
static bool
amdiommu_ivrs_iterate_tbl(amdiommu_iter40_t iter40, amdiommu_iter11_t iter11,
amdiommu_iter10_t iter10, void *arg)
{
ACPI_TABLE_IVRS *ivrs_tbl;
ACPI_STATUS status;
bool done;
status = AcpiGetTable(ACPI_SIG_IVRS, 1,
(ACPI_TABLE_HEADER **)&ivrs_tbl);
if (ACPI_FAILURE(status))
return (false);
done = false;
if (iter40 != NULL)
done = amdiommu_ivrs_iterate_tbl_typed(
(amdiommu_itercc_t)iter40, arg,
ACPI_IVRS_TYPE_HARDWARE3, ivrs_tbl);
if (!done && iter11 != NULL)
done = amdiommu_ivrs_iterate_tbl_typed(
(amdiommu_itercc_t)iter11, arg, ACPI_IVRS_TYPE_HARDWARE2,
ivrs_tbl);
if (!done && iter10 != NULL)
done = amdiommu_ivrs_iterate_tbl_typed(
(amdiommu_itercc_t)iter10, arg, ACPI_IVRS_TYPE_HARDWARE1,
ivrs_tbl);
AcpiPutTable((ACPI_TABLE_HEADER *)ivrs_tbl);
return (done);
}
struct ivhd_lookup_data {
struct amdiommu_unit *sc;
uint16_t devid;
};
static bool
ivrs_lookup_ivhd_0x40(ACPI_IVRS_HARDWARE2 *h2, void *arg)
{
struct ivhd_lookup_data *ildp;
KASSERT(h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 ||
h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE3,
("Misparsed IVHD, h2 type %#x", h2->Header.Type));
ildp = arg;
if (h2->Header.DeviceId != ildp->devid)
return (false);
ildp->sc->unit_dom = h2->PciSegmentGroup;
ildp->sc->iommu.unit = ivrs_info_to_unit_id(h2->Info);
ildp->sc->efr = h2->EfrRegisterImage;
return (true);
}
static bool
ivrs_lookup_ivhd_0x10(ACPI_IVRS_HARDWARE1 *h1, void *arg)
{
struct ivhd_lookup_data *ildp;
KASSERT(h1->Header.Type == ACPI_IVRS_TYPE_HARDWARE1,
("Misparsed IVHD, h1 type %#x", h1->Header.Type));
ildp = arg;
if (h1->Header.DeviceId != ildp->devid)
return (false);
ildp->sc->unit_dom = h1->PciSegmentGroup;
ildp->sc->iommu.unit = ivrs_info_to_unit_id(h1->Info);
return (true);
}
static u_int
amdiommu_devtbl_sz(struct amdiommu_unit *sc __unused)
{
return (sizeof(struct amdiommu_dte) * (1 << 16));
}
static void
amdiommu_free_dev_tbl(struct amdiommu_unit *sc)
{
u_int devtbl_sz;
devtbl_sz = amdiommu_devtbl_sz(sc);
pmap_qremove((vm_offset_t)sc->dev_tbl, atop(devtbl_sz));
kva_free((vm_offset_t)sc->dev_tbl, devtbl_sz);
sc->dev_tbl = NULL;
vm_object_deallocate(sc->devtbl_obj);
sc->devtbl_obj = NULL;
}
static int
amdiommu_create_dev_tbl(struct amdiommu_unit *sc)
{
vm_offset_t seg_vaddr;
u_int devtbl_sz, dom, i, reclaimno, segnum_log, segnum, seg_sz;
int error;
segnum_log = (sc->efr & AMDIOMMU_EFR_DEVTBLSEG_MASK) >>
AMDIOMMU_EFR_DEVTBLSEG_SHIFT;
segnum = 1 << segnum_log;
devtbl_sz = amdiommu_devtbl_sz(sc);
seg_sz = devtbl_sz / segnum;
sc->devtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, atop(devtbl_sz),
VM_PROT_ALL, 0, NULL);
if (bus_get_domain(sc->iommu.dev, &dom) == 0)
sc->devtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom);
sc->hw_ctrl &= ~AMDIOMMU_CTRL_DEVTABSEG_MASK;
sc->hw_ctrl |= (uint64_t)segnum_log << ilog2(AMDIOMMU_CTRL_DEVTABSEG_2);
sc->hw_ctrl |= AMDIOMMU_CTRL_COHERENT;
amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
seg_vaddr = kva_alloc(devtbl_sz);
if (seg_vaddr == 0)
return (ENOMEM);
sc->dev_tbl = (void *)seg_vaddr;
for (i = 0; i < segnum; i++) {
vm_page_t m;
uint64_t rval;
u_int reg;
for (reclaimno = 0; reclaimno < 3; reclaimno++) {
VM_OBJECT_WLOCK(sc->devtbl_obj);
m = vm_page_alloc_contig(sc->devtbl_obj,
i * atop(seg_sz),
VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY,
atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0,
VM_MEMATTR_DEFAULT);
VM_OBJECT_WUNLOCK(sc->devtbl_obj);
if (m != NULL)
break;
error = vm_page_reclaim_contig(VM_ALLOC_NORMAL,
atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0);
if (error != 0)
vm_wait(sc->devtbl_obj);
}
if (m == NULL) {
amdiommu_free_dev_tbl(sc);
return (ENOMEM);
}
rval = VM_PAGE_TO_PHYS(m) | (atop(seg_sz) - 1);
for (u_int j = 0; j < atop(seg_sz);
j++, seg_vaddr += PAGE_SIZE, m++) {
pmap_zero_page(m);
pmap_qenter(seg_vaddr, &m, 1);
}
reg = i == 0 ? AMDIOMMU_DEVTAB_BASE : AMDIOMMU_DEVTAB_S1_BASE +
i - 1;
amdiommu_write8(sc, reg, rval);
}
return (0);
}
static int
amdiommu_cmd_event_intr(void *arg)
{
struct amdiommu_unit *unit;
uint64_t status;
unit = arg;
status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS);
if ((status & AMDIOMMU_CMDEVS_COMWAITINT) != 0) {
amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS,
AMDIOMMU_CMDEVS_COMWAITINT);
taskqueue_enqueue(unit->x86c.qi_taskqueue,
&unit->x86c.qi_task);
}
if ((status & (AMDIOMMU_CMDEVS_EVLOGINT |
AMDIOMMU_CMDEVS_EVOVRFLW)) != 0)
amdiommu_event_intr(unit, status);
return (FILTER_HANDLED);
}
static int
amdiommu_setup_intr(struct amdiommu_unit *sc)
{
int error, msi_count, msix_count;
msi_count = pci_msi_count(sc->iommu.dev);
msix_count = pci_msix_count(sc->iommu.dev);
if (msi_count == 0 && msix_count == 0) {
device_printf(sc->iommu.dev, "needs MSI-class intr\n");
return (ENXIO);
}
#if 0
/*
* XXXKIB how MSI-X is supposed to be organized for BAR-less
* function? Practically available hardware implements only
* one IOMMU unit per function, and uses MSI.
*/
if (msix_count > 0) {
sc->msix_table = bus_alloc_resource_any(sc->iommu.dev,
SYS_RES_MEMORY, &sc->msix_tab_rid, RF_ACTIVE);
if (sc->msix_table == NULL)
return (ENXIO);
if (sc->msix_pba_rid != sc->msix_tab_rid) {
/* Separate BAR for PBA */
sc->msix_pba = bus_alloc_resource_any(sc->iommu.dev,
SYS_RES_MEMORY,
&sc->msix_pba_rid, RF_ACTIVE);
if (sc->msix_pba == NULL) {
bus_release_resource(sc->iommu.dev,
SYS_RES_MEMORY, &sc->msix_tab_rid,
sc->msix_table);
return (ENXIO);
}
}
}
#endif
error = ENXIO;
if (msix_count > 0) {
error = pci_alloc_msix(sc->iommu.dev, &msix_count);
if (error == 0)
sc->numirqs = msix_count;
}
if (error != 0 && msi_count > 0) {
error = pci_alloc_msi(sc->iommu.dev, &msi_count);
if (error == 0)
sc->numirqs = msi_count;
}
if (error != 0) {
device_printf(sc->iommu.dev,
"Failed to allocate MSI/MSI-x (%d)\n", error);
return (ENXIO);
}
/*
* XXXKIB Spec states that MISC0.MsiNum must be zero for IOMMU
* using MSI interrupts. But at least one BIOS programmed '2'
* there, making driver use wrong rid and causing
* command/event interrupt ignored as stray. Try to fix it
* with dirty force by assuming MsiNum is zero for MSI.
*/
sc->irq_cmdev_rid = 1;
if (msix_count > 0) {
sc->irq_cmdev_rid += pci_read_config(sc->iommu.dev,
sc->seccap_reg + PCIR_AMDIOMMU_MISC0, 4) &
PCIM_AMDIOMMU_MISC0_MSINUM_MASK;
}
sc->irq_cmdev = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_IRQ,
&sc->irq_cmdev_rid, RF_SHAREABLE | RF_ACTIVE);
if (sc->irq_cmdev == NULL) {
device_printf(sc->iommu.dev,
"unable to map CMD/EV interrupt\n");
return (ENXIO);
}
error = bus_setup_intr(sc->iommu.dev, sc->irq_cmdev,
INTR_TYPE_MISC, amdiommu_cmd_event_intr, NULL, sc,
&sc->irq_cmdev_cookie);
if (error != 0) {
device_printf(sc->iommu.dev,
"unable to setup interrupt (%d)\n", error);
return (ENXIO);
}
bus_describe_intr(sc->iommu.dev, sc->irq_cmdev, sc->irq_cmdev_cookie,
"cmdev");
if (x2apic_mode) {
AMDIOMMU_LOCK(sc);
sc->hw_ctrl |= AMDIOMMU_CTRL_GA_EN | AMDIOMMU_CTRL_XT_EN;
amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
// XXXKIB AMDIOMMU_CTRL_INTCAPXT_EN and program x2APIC_CTRL
AMDIOMMU_UNLOCK(sc);
}
return (0);
}
static int
amdiommu_probe(device_t dev)
{
int seccap_reg;
int error;
uint32_t cap_h, cap_type, cap_rev;
if (acpi_disabled("amdiommu"))
return (ENXIO);
TUNABLE_INT_FETCH("hw.amdiommu.enable", &amdiommu_enable);
if (!amdiommu_enable)
return (ENXIO);
if (pci_get_class(dev) != PCIC_BASEPERIPH ||
pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU)
return (ENXIO);
error = pci_find_cap(dev, PCIY_SECDEV, &seccap_reg);
if (error != 0 || seccap_reg == 0)
return (ENXIO);
cap_h = pci_read_config(dev, seccap_reg + PCIR_AMDIOMMU_CAP_HEADER,
4);
cap_type = cap_h & PCIM_AMDIOMMU_CAP_TYPE_MASK;
cap_rev = cap_h & PCIM_AMDIOMMU_CAP_REV_MASK;
if (cap_type != PCIM_AMDIOMMU_CAP_TYPE_VAL &&
cap_rev != PCIM_AMDIOMMU_CAP_REV_VAL)
return (ENXIO);
device_set_desc(dev, "DMA remap");
return (BUS_PROBE_SPECIFIC);
}
static int
amdiommu_attach(device_t dev)
{
struct amdiommu_unit *sc;
struct ivhd_lookup_data ild;
int error;
uint32_t base_low, base_high;
bool res;
sc = device_get_softc(dev);
sc->iommu.dev = dev;
error = pci_find_cap(dev, PCIY_SECDEV, &sc->seccap_reg);
if (error != 0 || sc->seccap_reg == 0)
return (ENXIO);
base_low = pci_read_config(dev, sc->seccap_reg +
PCIR_AMDIOMMU_BASE_LOW, 4);
base_high = pci_read_config(dev, sc->seccap_reg +
PCIR_AMDIOMMU_BASE_HIGH, 4);
sc->mmio_base = (base_low & PCIM_AMDIOMMU_BASE_LOW_ADDRM) |
((uint64_t)base_high << 32);
sc->device_id = pci_get_rid(dev);
ild.sc = sc;
ild.devid = sc->device_id;
res = amdiommu_ivrs_iterate_tbl(ivrs_lookup_ivhd_0x40,
ivrs_lookup_ivhd_0x40, ivrs_lookup_ivhd_0x10, &ild);
if (!res) {
device_printf(dev, "Cannot find IVHD\n");
return (ENXIO);
}
mtx_init(&sc->iommu.lock, "amdihw", NULL, MTX_DEF);
sc->domids = new_unrhdr(0, 0xffff, &sc->iommu.lock);
LIST_INIT(&sc->domains);
sysctl_ctx_init(&sc->iommu.sysctl_ctx);
sc->mmio_sz = ((sc->efr & AMDIOMMU_EFR_PC_SUP) != 0 ? 512 : 16) *
1024;
sc->mmio_rid = AMDIOMMU_RID;
error = bus_set_resource(dev, SYS_RES_MEMORY, AMDIOMMU_RID,
sc->mmio_base, sc->mmio_sz);
if (error != 0) {
device_printf(dev,
"bus_set_resource %#jx-%#jx failed, error %d\n",
(uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base +
sc->mmio_sz, error);
error = ENXIO;
goto errout1;
}
sc->mmio_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->mmio_rid,
sc->mmio_base, sc->mmio_base + sc->mmio_sz - 1, sc->mmio_sz,
RF_ALLOCATED | RF_ACTIVE | RF_SHAREABLE);
if (sc->mmio_res == NULL) {
device_printf(dev,
"bus_alloc_resource %#jx-%#jx failed\n",
(uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base +
sc->mmio_sz);
error = ENXIO;
goto errout2;
}
sc->hw_ctrl = amdiommu_read8(sc, AMDIOMMU_CTRL);
if (bootverbose)
device_printf(dev, "ctrl reg %#jx\n", (uintmax_t)sc->hw_ctrl);
if ((sc->hw_ctrl & AMDIOMMU_CTRL_EN) != 0) {
device_printf(dev, "CTRL_EN is set, bailing out\n");
error = EBUSY;
goto errout2;
}
iommu_high = BUS_SPACE_MAXADDR;
error = amdiommu_create_dev_tbl(sc);
if (error != 0)
goto errout3;
error = amdiommu_init_cmd(sc);
if (error != 0)
goto errout4;
error = amdiommu_init_event(sc);
if (error != 0)
goto errout5;
error = amdiommu_setup_intr(sc);
if (error != 0)
goto errout6;
error = iommu_init_busdma(AMD2IOMMU(sc));
if (error != 0)
goto errout7;
error = amdiommu_init_irt(sc);
if (error != 0)
goto errout8;
/*
* Unlike DMAR, AMD IOMMU does not process command queue
* unless IOMMU is enabled. But since non-present devtab
* entry makes IOMMU ignore transactions from corresponding
* initiator, de-facto IOMMU operations are disabled for the
* DMA and intr remapping.
*/
AMDIOMMU_LOCK(sc);
sc->hw_ctrl |= AMDIOMMU_CTRL_EN;
amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl);
if (bootverbose) {
printf("amdiommu%d: enabled translation\n",
AMD2IOMMU(sc)->unit);
}
AMDIOMMU_UNLOCK(sc);
TAILQ_INSERT_TAIL(&amdiommu_units, sc, unit_next);
return (0);
errout8:
iommu_fini_busdma(&sc->iommu);
errout7:
pci_release_msi(dev);
errout6:
amdiommu_fini_event(sc);
errout5:
amdiommu_fini_cmd(sc);
errout4:
amdiommu_free_dev_tbl(sc);
errout3:
bus_release_resource(dev, SYS_RES_MEMORY, sc->mmio_rid, sc->mmio_res);
errout2:
bus_delete_resource(dev, SYS_RES_MEMORY, sc->mmio_rid);
errout1:
sysctl_ctx_free(&sc->iommu.sysctl_ctx);
delete_unrhdr(sc->domids);
mtx_destroy(&sc->iommu.lock);
return (error);
}
static int
amdiommu_detach(device_t dev)
{
return (EBUSY);
}
static int
amdiommu_suspend(device_t dev)
{
/* XXXKIB */
return (0);
}
static int
amdiommu_resume(device_t dev)
{
/* XXXKIB */
return (0);
}
static device_method_t amdiommu_methods[] = {
DEVMETHOD(device_probe, amdiommu_probe),
DEVMETHOD(device_attach, amdiommu_attach),
DEVMETHOD(device_detach, amdiommu_detach),
DEVMETHOD(device_suspend, amdiommu_suspend),
DEVMETHOD(device_resume, amdiommu_resume),
DEVMETHOD_END
};
static driver_t amdiommu_driver = {
"amdiommu",
amdiommu_methods,
sizeof(struct amdiommu_unit),
};
EARLY_DRIVER_MODULE(amdiommu, pci, amdiommu_driver, 0, 0, BUS_PASS_SUPPORTDEV);
MODULE_DEPEND(amdiommu, pci, 1, 1, 1);
static struct amdiommu_unit *
amdiommu_unit_by_device_id(u_int pci_seg, u_int device_id)
{
struct amdiommu_unit *unit;
TAILQ_FOREACH(unit, &amdiommu_units, unit_next) {
if (unit->unit_dom == pci_seg && unit->device_id == device_id)
return (unit);
}
return (NULL);
}
struct ivhd_find_unit {
u_int domain;
uintptr_t rid;
int devno;
enum {
IFU_DEV_PCI,
IFU_DEV_IOAPIC,
IFU_DEV_HPET,
} type;
u_int device_id;
uint16_t rid_real;
uint8_t dte;
uint32_t edte;
};
static bool
amdiommu_find_unit_scan_ivrs(ACPI_IVRS_DE_HEADER *d, size_t tlen,
struct ivhd_find_unit *ifu)
{
char *db, *de;
size_t len;
for (de = (char *)d + tlen; (char *)d < de;
d = (ACPI_IVRS_DE_HEADER *)(db + len)) {
db = (char *)d;
if (d->Type == ACPI_IVRS_TYPE_PAD4) {
len = sizeof(ACPI_IVRS_DEVICE4);
} else if (d->Type == ACPI_IVRS_TYPE_ALL) {
ACPI_IVRS_DEVICE4 *d4;
d4 = (ACPI_IVRS_DEVICE4 *)db;
len = sizeof(*d4);
ifu->dte = d4->Header.DataSetting;
} else if (d->Type == ACPI_IVRS_TYPE_SELECT) {
ACPI_IVRS_DEVICE4 *d4;
d4 = (ACPI_IVRS_DEVICE4 *)db;
if (d4->Header.Id == ifu->rid) {
ifu->dte = d4->Header.DataSetting;
ifu->rid_real = ifu->rid;
return (true);
}
len = sizeof(*d4);
} else if (d->Type == ACPI_IVRS_TYPE_START) {
ACPI_IVRS_DEVICE4 *d4, *d4n;
d4 = (ACPI_IVRS_DEVICE4 *)db;
d4n = d4 + 1;
if (d4n->Header.Type != ACPI_IVRS_TYPE_END) {
printf("IVRS dev4 start not followed by END "
"(%#x)\n", d4n->Header.Type);
return (false);
}
if (d4->Header.Id <= ifu->rid &&
ifu->rid <= d4n->Header.Id) {
ifu->dte = d4->Header.DataSetting;
ifu->rid_real = ifu->rid;
return (true);
}
len = 2 * sizeof(*d4);
} else if (d->Type == ACPI_IVRS_TYPE_PAD8) {
len = sizeof(ACPI_IVRS_DEVICE8A);
} else if (d->Type == ACPI_IVRS_TYPE_ALIAS_SELECT) {
ACPI_IVRS_DEVICE8A *d8a;
d8a = (ACPI_IVRS_DEVICE8A *)db;
if (d8a->Header.Id == ifu->rid) {
ifu->dte = d8a->Header.DataSetting;
ifu->rid_real = d8a->UsedId;
return (true);
}
len = sizeof(*d8a);
} else if (d->Type == ACPI_IVRS_TYPE_ALIAS_START) {
ACPI_IVRS_DEVICE8A *d8a;
ACPI_IVRS_DEVICE4 *d4;
d8a = (ACPI_IVRS_DEVICE8A *)db;
d4 = (ACPI_IVRS_DEVICE4 *)(d8a + 1);
if (d4->Header.Type != ACPI_IVRS_TYPE_END) {
printf("IVRS alias start not followed by END "
"(%#x)\n", d4->Header.Type);
return (false);
}
if (d8a->Header.Id <= ifu->rid &&
ifu->rid <= d4->Header.Id) {
ifu->dte = d8a->Header.DataSetting;
ifu->rid_real = d8a->UsedId;
return (true);
}
len = sizeof(*d8a) + sizeof(*d4);
} else if (d->Type == ACPI_IVRS_TYPE_EXT_SELECT) {
ACPI_IVRS_DEVICE8B *d8b;
d8b = (ACPI_IVRS_DEVICE8B *)db;
if (d8b->Header.Id == ifu->rid) {
ifu->dte = d8b->Header.DataSetting;
ifu->rid_real = ifu->rid;
ifu->edte = d8b->ExtendedData;
return (true);
}
len = sizeof(*d8b);
} else if (d->Type == ACPI_IVRS_TYPE_EXT_START) {
ACPI_IVRS_DEVICE8B *d8b;
ACPI_IVRS_DEVICE4 *d4;
d8b = (ACPI_IVRS_DEVICE8B *)db;
d4 = (ACPI_IVRS_DEVICE4 *)(db + sizeof(*d8b));
if (d4->Header.Type != ACPI_IVRS_TYPE_END) {
printf("IVRS ext start not followed by END "
"(%#x)\n", d4->Header.Type);
return (false);
}
if (d8b->Header.Id >= ifu->rid &&
ifu->rid <= d4->Header.Id) {
ifu->dte = d8b->Header.DataSetting;
ifu->rid_real = ifu->rid;
ifu->edte = d8b->ExtendedData;
return (true);
}
len = sizeof(*d8b) + sizeof(*d4);
} else if (d->Type == ACPI_IVRS_TYPE_SPECIAL) {
ACPI_IVRS_DEVICE8C *d8c;
d8c = (ACPI_IVRS_DEVICE8C *)db;
if (((ifu->type == IFU_DEV_IOAPIC &&
d8c->Variety == ACPI_IVHD_IOAPIC) ||
(ifu->type == IFU_DEV_HPET &&
d8c->Variety == ACPI_IVHD_HPET)) &&
ifu->devno == d8c->Handle) {
ifu->dte = d8c->Header.DataSetting;
ifu->rid_real = d8c->UsedId;
return (true);
}
len = sizeof(*d8c);
} else if (d->Type == ACPI_IVRS_TYPE_HID) {
ACPI_IVRS_DEVICE_HID *dh;
dh = (ACPI_IVRS_DEVICE_HID *)db;
len = sizeof(*dh) + dh->UidLength;
/* XXXKIB */
} else {
#if 0
printf("amdiommu: unknown IVRS device entry type %#x\n",
d->Type);
#endif
if (d->Type <= 63)
len = sizeof(ACPI_IVRS_DEVICE4);
else if (d->Type <= 127)
len = sizeof(ACPI_IVRS_DEVICE8A);
else {
printf("amdiommu: abort, cannot "
"advance iterator, item type %#x\n",
d->Type);
return (false);
}
}
}
return (false);
}
static bool
amdiommu_find_unit_scan_0x11(ACPI_IVRS_HARDWARE2 *ivrs, void *arg)
{
struct ivhd_find_unit *ifu = arg;
ACPI_IVRS_DE_HEADER *d;
bool res;
KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 ||
ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE3,
("Misparsed IVHD h2, ivrs type %#x", ivrs->Header.Type));
if (ifu->domain != ivrs->PciSegmentGroup)
return (false);
d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1);
res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu);
if (res)
ifu->device_id = ivrs->Header.DeviceId;
return (res);
}
static bool
amdiommu_find_unit_scan_0x10(ACPI_IVRS_HARDWARE1 *ivrs, void *arg)
{
struct ivhd_find_unit *ifu = arg;
ACPI_IVRS_DE_HEADER *d;
bool res;
KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE1,
("Misparsed IVHD h1, ivrs type %#x", ivrs->Header.Type));
if (ifu->domain != ivrs->PciSegmentGroup)
return (false);
d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1);
res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu);
if (res)
ifu->device_id = ivrs->Header.DeviceId;
return (res);
}
static void
amdiommu_dev_prop_dtr(device_t dev, const char *name, void *val, void *dtr_ctx)
{
free(val, M_DEVBUF);
}
static int *
amdiommu_dev_fetch_flagsp(struct amdiommu_unit *unit, device_t dev)
{
int *flagsp, error;
bus_topo_assert();
error = device_get_prop(dev, device_get_nameunit(unit->iommu.dev),
(void **)&flagsp);
if (error == ENOENT) {
flagsp = malloc(sizeof(int), M_DEVBUF, M_WAITOK | M_ZERO);
device_set_prop(dev, device_get_nameunit(unit->iommu.dev),
flagsp, amdiommu_dev_prop_dtr, unit);
}
return (flagsp);
}
static int
amdiommu_get_dev_prop_flags(struct amdiommu_unit *unit, device_t dev)
{
int *flagsp, flags;
bus_topo_lock();
flagsp = amdiommu_dev_fetch_flagsp(unit, dev);
flags = *flagsp;
bus_topo_unlock();
return (flags);
}
static void
amdiommu_set_dev_prop_flags(struct amdiommu_unit *unit, device_t dev,
int flag)
{
int *flagsp;
bus_topo_lock();
flagsp = amdiommu_dev_fetch_flagsp(unit, dev);
*flagsp |= flag;
bus_topo_unlock();
}
int
amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp, uint16_t *ridp,
uint8_t *dtep, uint32_t *edtep, bool verbose)
{
struct ivhd_find_unit ifu;
struct amdiommu_unit *unit;
int error, flags;
bool res;
if (device_get_devclass(device_get_parent(dev)) !=
devclass_find("pci"))
return (ENXIO);
bzero(&ifu, sizeof(ifu));
ifu.type = IFU_DEV_PCI;
error = pci_get_id(dev, PCI_ID_RID, &ifu.rid);
if (error != 0) {
if (verbose)
device_printf(dev,
"amdiommu cannot get rid, error %d\n", error);
return (ENXIO);
}
ifu.domain = pci_get_domain(dev);
res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
if (!res) {
if (verbose)
device_printf(dev,
"(%#06x:%#06x) amdiommu cannot match rid in IVHD\n",
ifu.domain, (unsigned)ifu.rid);
return (ENXIO);
}
unit = amdiommu_unit_by_device_id(ifu.domain, ifu.device_id);
if (unit == NULL) {
if (verbose)
device_printf(dev,
"(%#06x:%#06x) amdiommu cannot find unit\n",
ifu.domain, (unsigned)ifu.rid);
return (ENXIO);
}
*unitp = unit;
iommu_device_set_iommu_prop(dev, unit->iommu.dev);
if (ridp != NULL)
*ridp = ifu.rid_real;
if (dtep != NULL)
*dtep = ifu.dte;
if (edtep != NULL)
*edtep = ifu.edte;
if (verbose) {
flags = amdiommu_get_dev_prop_flags(unit, dev);
if ((flags & AMDIOMMU_DEV_REPORTED) == 0) {
amdiommu_set_dev_prop_flags(unit, dev,
AMDIOMMU_DEV_REPORTED);
device_printf(dev, "amdiommu%d "
"initiator rid %#06x dte %#x edte %#x\n",
unit->iommu.unit, ifu.rid_real, ifu.dte, ifu.edte);
}
}
return (0);
}
int
amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose)
{
struct ivhd_find_unit ifu;
struct amdiommu_unit *unit;
device_t apic_dev;
bool res;
bzero(&ifu, sizeof(ifu));
ifu.type = IFU_DEV_IOAPIC;
ifu.devno = apic_id;
ifu.rid = -1;
res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
if (!res) {
if (verbose)
printf("amdiommu cannot match ioapic no %d in IVHD\n",
apic_id);
return (ENXIO);
}
unit = amdiommu_unit_by_device_id(0, ifu.device_id);
apic_dev = ioapic_get_dev(apic_id);
if (apic_dev != NULL)
iommu_device_set_iommu_prop(apic_dev, unit->iommu.dev);
if (unit == NULL) {
if (verbose)
printf("amdiommu cannot find unit by dev id %#x\n",
ifu.device_id);
return (ENXIO);
}
*unitp = unit;
if (ridp != NULL)
*ridp = ifu.rid_real;
if (dtep != NULL)
*dtep = ifu.dte;
if (edtep != NULL)
*edtep = ifu.edte;
if (verbose) {
printf("amdiommu%d IOAPIC %d "
"initiator rid %#06x dte %#x edte %#x\n",
unit->iommu.unit, apic_id, ifu.rid_real, ifu.dte,
ifu.edte);
}
return (0);
}
int
amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose)
{
struct ivhd_find_unit ifu;
struct amdiommu_unit *unit;
int hpet_no;
bool res;
hpet_no = hpet_get_uid(hpet);
bzero(&ifu, sizeof(ifu));
ifu.type = IFU_DEV_HPET;
ifu.devno = hpet_no;
ifu.rid = -1;
res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11,
amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu);
if (!res) {
printf("amdiommu cannot match hpet no %d in IVHD\n",
hpet_no);
return (ENXIO);
}
unit = amdiommu_unit_by_device_id(0, ifu.device_id);
if (unit == NULL) {
if (verbose)
printf("amdiommu cannot find unit id %d\n",
hpet_no);
return (ENXIO);
}
*unitp = unit;
iommu_device_set_iommu_prop(hpet, unit->iommu.dev);
if (ridp != NULL)
*ridp = ifu.rid_real;
if (dtep != NULL)
*dtep = ifu.dte;
if (edtep != NULL)
*edtep = ifu.edte;
if (verbose) {
printf("amdiommu%d HPET no %d "
"initiator rid %#06x dte %#x edte %#x\n",
unit->iommu.unit, hpet_no, ifu.rid_real, ifu.dte,
ifu.edte);
}
return (0);
}
static struct iommu_unit *
amdiommu_find_method(device_t dev, bool verbose)
{
struct amdiommu_unit *unit;
int error;
uint32_t edte;
uint16_t rid;
uint8_t dte;
error = amdiommu_find_unit(dev, &unit, &rid, &dte, &edte, verbose);
if (error != 0) {
if (verbose)
device_printf(dev,
"cannot find amdiommu unit, error %d\n",
error);
return (NULL);
}
return (&unit->iommu);
}
static struct x86_unit_common *
amdiommu_get_x86_common(struct iommu_unit *unit)
{
struct amdiommu_unit *iommu;
iommu = IOMMU2AMD(unit);
return (&iommu->x86c);
}
static void
amdiommu_unit_pre_instantiate_ctx(struct iommu_unit *unit)
{
}
static struct x86_iommu amd_x86_iommu = {
.get_x86_common = amdiommu_get_x86_common,
.unit_pre_instantiate_ctx = amdiommu_unit_pre_instantiate_ctx,
.find = amdiommu_find_method,
.domain_unload_entry = amdiommu_domain_unload_entry,
.domain_unload = amdiommu_domain_unload,
.get_ctx = amdiommu_get_ctx,
.free_ctx_locked = amdiommu_free_ctx_locked_method,
- .free_ctx = amdiommu_free_ctx_method,
.alloc_msi_intr = amdiommu_alloc_msi_intr,
.map_msi_intr = amdiommu_map_msi_intr,
.unmap_msi_intr = amdiommu_unmap_msi_intr,
.map_ioapic_intr = amdiommu_map_ioapic_intr,
.unmap_ioapic_intr = amdiommu_unmap_ioapic_intr,
};
static void
x86_iommu_set_amd(void *arg __unused)
{
if (cpu_vendor_id == CPU_VENDOR_AMD)
set_x86_iommu(&amd_x86_iommu);
}
SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_amd, NULL);
#ifdef DDB
#include
#include
static void
amdiommu_print_domain(struct amdiommu_domain *domain, bool show_mappings)
{
struct iommu_domain *iodom;
iodom = DOM2IODOM(domain);
#if 0
db_printf(
" @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n"
" ctx_cnt %d flags %x pgobj %p map_ents %u\n",
domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl,
(uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt,
domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt);
#endif
iommu_db_domain_print_contexts(iodom);
if (show_mappings)
iommu_db_domain_print_mappings(iodom);
}
static void
amdiommu_print_one(struct amdiommu_unit *unit, bool show_domains,
bool show_mappings, bool show_cmdq)
{
struct amdiommu_domain *domain;
struct amdiommu_cmd_generic *cp;
u_int cmd_head, cmd_tail, ci;
cmd_head = amdiommu_read4(unit, AMDIOMMU_CMDBUF_HEAD);
cmd_tail = amdiommu_read4(unit, AMDIOMMU_CMDBUF_TAIL);
db_printf("amdiommu%d at %p, mmio at %#jx/sz %#jx\n",
unit->iommu.unit, unit, (uintmax_t)unit->mmio_base,
(uintmax_t)unit->mmio_sz);
db_printf(" hw ctrl %#018jx cmdevst %#018jx\n",
(uintmax_t)amdiommu_read8(unit, AMDIOMMU_CTRL),
(uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS));
db_printf(" devtbl at %p\n", unit->dev_tbl);
db_printf(" hwseq at %p phys %#jx val %#jx\n",
&unit->x86c.inv_waitd_seq_hw,
pmap_kextract((vm_offset_t)&unit->x86c.inv_waitd_seq_hw),
unit->x86c.inv_waitd_seq_hw);
db_printf(" invq at %p base %#jx hw head/tail %#x/%#x\n",
unit->x86c.inv_queue,
(uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDBUF_BASE),
cmd_head, cmd_tail);
if (show_cmdq) {
db_printf(" cmd q:\n");
for (ci = cmd_head; ci != cmd_tail;) {
cp = (struct amdiommu_cmd_generic *)(unit->
x86c.inv_queue + ci);
db_printf(
" idx %#x op %#x %#010x %#010x %#010x %#010x\n",
ci >> AMDIOMMU_CMD_SZ_SHIFT, cp->op,
cp->w0, cp->ww1, cp->w2, cp->w3);
ci += AMDIOMMU_CMD_SZ;
if (ci == unit->x86c.inv_queue_size)
ci = 0;
}
}
if (show_domains) {
db_printf(" domains:\n");
LIST_FOREACH(domain, &unit->domains, link) {
amdiommu_print_domain(domain, show_mappings);
if (db_pager_quit)
break;
}
}
}
DB_SHOW_COMMAND(amdiommu, db_amdiommu_print)
{
struct amdiommu_unit *unit;
bool show_domains, show_mappings, show_cmdq;
show_domains = strchr(modif, 'd') != NULL;
show_mappings = strchr(modif, 'm') != NULL;
show_cmdq = strchr(modif, 'q') != NULL;
if (!have_addr) {
db_printf("usage: show amdiommu [/d] [/m] [/q] index\n");
return;
}
if ((vm_offset_t)addr < 0x10000)
unit = amdiommu_unit_by_device_id(0, (u_int)addr);
else
unit = (struct amdiommu_unit *)addr;
amdiommu_print_one(unit, show_domains, show_mappings, show_cmdq);
}
DB_SHOW_ALL_COMMAND(amdiommus, db_show_all_amdiommus)
{
struct amdiommu_unit *unit;
bool show_domains, show_mappings, show_cmdq;
show_domains = strchr(modif, 'd') != NULL;
show_mappings = strchr(modif, 'm') != NULL;
show_cmdq = strchr(modif, 'q') != NULL;
TAILQ_FOREACH(unit, &amdiommu_units, unit_next) {
amdiommu_print_one(unit, show_domains, show_mappings,
show_cmdq);
if (db_pager_quit)
break;
}
}
#endif
diff --git a/sys/x86/iommu/amd_iommu.h b/sys/x86/iommu/amd_iommu.h
index 2b0db4f8fe6d..64dd13db9745 100644
--- a/sys/x86/iommu/amd_iommu.h
+++ b/sys/x86/iommu/amd_iommu.h
@@ -1,243 +1,242 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __X86_IOMMU_AMD_IOMMU_H
#define __X86_IOMMU_AMD_IOMMU_H
#include
#define AMDIOMMU_DEV_REPORTED 0x00000001
struct amdiommu_unit;
struct amdiommu_domain {
struct iommu_domain iodom;
int domain; /* (c) DID, written in context entry */
struct amdiommu_unit *unit; /* (c) */
u_int ctx_cnt; /* (u) Number of contexts owned */
u_int refs; /* (u) Refs, including ctx */
LIST_ENTRY(amdiommu_domain) link;/* (u) Member in the iommu list */
vm_object_t pgtbl_obj; /* (c) Page table pages */
vm_page_t pgtblr; /* (c) Page table root page */
u_int pglvl; /* (c) Page table levels */
};
struct amdiommu_ctx {
struct iommu_ctx context;
struct amdiommu_irte_basic_novapic *irtb;
struct amdiommu_irte_basic_vapic_x2 *irtx2;
vmem_t *irtids;
};
struct amdiommu_unit {
struct iommu_unit iommu;
struct x86_unit_common x86c;
u_int unit_dom; /* Served PCI domain, from IVRS */
u_int device_id; /* basically PCI RID */
u_int unit_id; /* Hypertransport Unit ID, deprecated */
TAILQ_ENTRY(amdiommu_unit) unit_next;
int seccap_reg;
uint64_t efr;
vm_paddr_t mmio_base;
vm_size_t mmio_sz;
struct resource *mmio_res;
int mmio_rid;
uint64_t hw_ctrl;
u_int numirqs;
struct resource *msix_table;
int msix_table_rid;
int irq_cmdev_rid;
struct resource *irq_cmdev;
void *irq_cmdev_cookie;
struct amdiommu_dte *dev_tbl;
vm_object_t devtbl_obj;
LIST_HEAD(, amdiommu_domain) domains;
struct unrhdr *domids;
struct mtx event_lock;
struct amdiommu_event_generic *event_log;
u_int event_log_size;
u_int event_log_head;
u_int event_log_tail;
struct task event_task;
struct taskqueue *event_taskqueue;
struct amdiommu_event_generic event_copy_log[16];
u_int event_copy_head;
u_int event_copy_tail;
int irte_enabled; /* int for sysctl type */
bool irte_x2apic;
u_int irte_nentries;
};
#define AMD2IOMMU(unit) (&((unit)->iommu))
#define IOMMU2AMD(unit) \
__containerof((unit), struct amdiommu_unit, iommu)
#define AMDIOMMU_LOCK(unit) mtx_lock(&AMD2IOMMU(unit)->lock)
#define AMDIOMMU_UNLOCK(unit) mtx_unlock(&AMD2IOMMU(unit)->lock)
#define AMDIOMMU_ASSERT_LOCKED(unit) mtx_assert(&AMD2IOMMU(unit)->lock, \
MA_OWNED)
#define AMDIOMMU_EVENT_LOCK(unit) mtx_lock_spin(&(unit)->event_lock)
#define AMDIOMMU_EVENT_UNLOCK(unit) mtx_unlock_spin(&(unit)->event_lock)
#define AMDIOMMU_EVENT_ASSERT_LOCKED(unit) \
mtx_assert(&(unit)->event_lock, MA_OWNED)
#define DOM2IODOM(domain) (&((domain)->iodom))
#define IODOM2DOM(domain) \
__containerof((domain), struct amdiommu_domain, iodom)
#define CTX2IOCTX(ctx) (&((ctx)->context))
#define IOCTX2CTX(ctx) \
__containerof((ctx), struct amdiommu_ctx, context)
#define CTX2DOM(ctx) IODOM2DOM((ctx)->context.domain)
#define CTX2AMD(ctx) (CTX2DOM(ctx)->unit)
#define DOM2AMD(domain) ((domain)->unit)
#define AMDIOMMU_DOMAIN_LOCK(dom) mtx_lock(&(dom)->iodom.lock)
#define AMDIOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock)
#define AMDIOMMU_DOMAIN_ASSERT_LOCKED(dom) \
mtx_assert(&(dom)->iodom.lock, MA_OWNED)
#define AMDIOMMU_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj)
#define AMDIOMMU_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj)
#define AMDIOMMU_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj)
#define AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(dom) \
VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj)
#define AMDIOMMU_RID 1001
static inline uint32_t
amdiommu_read4(const struct amdiommu_unit *unit, int reg)
{
return (bus_read_4(unit->mmio_res, reg));
}
static inline uint64_t
amdiommu_read8(const struct amdiommu_unit *unit, int reg)
{
#ifdef __i386__
uint32_t high, low;
low = bus_read_4(unit->mmio_res, reg);
high = bus_read_4(unit->mmio_res, reg + 4);
return (low | ((uint64_t)high << 32));
#else
return (bus_read_8(unit->mmio_res, reg));
#endif
}
static inline void
amdiommu_write4(const struct amdiommu_unit *unit, int reg, uint32_t val)
{
bus_write_4(unit->mmio_res, reg, val);
}
static inline void
amdiommu_write8(const struct amdiommu_unit *unit, int reg, uint64_t val)
{
#ifdef __i386__
uint32_t high, low;
low = val;
high = val >> 32;
bus_write_4(unit->mmio_res, reg, low);
bus_write_4(unit->mmio_res, reg + 4, high);
#else
bus_write_8(unit->mmio_res, reg, val);
#endif
}
int amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
int amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
int amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp,
uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose);
int amdiommu_init_cmd(struct amdiommu_unit *unit);
void amdiommu_fini_cmd(struct amdiommu_unit *unit);
void amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status);
int amdiommu_init_event(struct amdiommu_unit *unit);
void amdiommu_fini_event(struct amdiommu_unit *unit);
int amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count);
int amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector,
u_int cookie, uint64_t *addr, uint32_t *data);
int amdiommu_unmap_msi_intr(device_t src, u_int cookie);
int amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector,
bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi,
uint32_t *lo);
int amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie);
int amdiommu_init_irt(struct amdiommu_unit *unit);
void amdiommu_fini_irt(struct amdiommu_unit *unit);
int amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx);
void amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx);
void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep);
void amdiommu_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep);
struct amdiommu_ctx *amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit,
device_t dev, uint16_t rid, int dev_domain, bool id_mapped,
bool rmrr_init, uint8_t dte, uint32_t edte);
struct iommu_ctx *amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev,
uint16_t rid, bool id_mapped, bool rmrr_init);
struct amdiommu_ctx *amdiommu_find_ctx_locked(struct amdiommu_unit *unit,
uint16_t rid);
void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu,
struct iommu_ctx *context);
-void amdiommu_free_ctx_method(struct iommu_ctx *context);
struct amdiommu_domain *amdiommu_find_domain(struct amdiommu_unit *unit,
uint16_t rid);
void amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx);
void amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx);
void amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit,
uint16_t devid);
void amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit,
uint16_t devid);
void amdiommu_qi_invalidate_all_pages_locked_nowait(
struct amdiommu_domain *domain);
void amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu);
int amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain);
void amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain);
extern const struct iommu_domain_map_ops amdiommu_domain_map_ops;
#endif
diff --git a/sys/x86/iommu/intel_ctx.c b/sys/x86/iommu/intel_ctx.c
index c2371d4d9c4f..2bc5f8a9d361 100644
--- a/sys/x86/iommu/intel_ctx.c
+++ b/sys/x86/iommu/intel_ctx.c
@@ -1,952 +1,933 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
static MALLOC_DEFINE(M_DMAR_CTX, "dmar_ctx", "Intel DMAR Context");
static MALLOC_DEFINE(M_DMAR_DOMAIN, "dmar_dom", "Intel DMAR Domain");
static void dmar_unref_domain_locked(struct dmar_unit *dmar,
struct dmar_domain *domain);
static void dmar_domain_destroy(struct dmar_domain *domain);
static void dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx);
static void dmar_free_ctx(struct dmar_ctx *ctx);
static void
dmar_ensure_ctx_page(struct dmar_unit *dmar, int bus)
{
struct sf_buf *sf;
dmar_root_entry_t *re;
vm_page_t ctxm;
/*
* Allocated context page must be linked.
*/
ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_NOALLOC);
if (ctxm != NULL)
return;
/*
* Page not present, allocate and link. Note that other
* thread might execute this sequence in parallel. This
* should be safe, because the context entries written by both
* threads are equal.
*/
TD_PREP_PINNED_ASSERT;
ctxm = iommu_pgalloc(dmar->ctx_obj, 1 + bus, IOMMU_PGF_ZERO |
IOMMU_PGF_WAITOK);
re = iommu_map_pgtbl(dmar->ctx_obj, 0, IOMMU_PGF_NOALLOC, &sf);
re += bus;
dmar_pte_store(&re->r1, DMAR_ROOT_R1_P | (DMAR_ROOT_R1_CTP_MASK &
VM_PAGE_TO_PHYS(ctxm)));
dmar_flush_root_to_ram(dmar, re);
iommu_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
}
static dmar_ctx_entry_t *
dmar_map_ctx_entry(struct dmar_ctx *ctx, struct sf_buf **sfp)
{
struct dmar_unit *dmar;
dmar_ctx_entry_t *ctxp;
dmar = CTX2DMAR(ctx);
ctxp = iommu_map_pgtbl(dmar->ctx_obj, 1 + PCI_RID2BUS(ctx->context.rid),
IOMMU_PGF_NOALLOC | IOMMU_PGF_WAITOK, sfp);
ctxp += ctx->context.rid & 0xff;
return (ctxp);
}
static void
ctx_id_entry_init_one(dmar_ctx_entry_t *ctxp, struct dmar_domain *domain,
vm_page_t ctx_root)
{
/*
* For update due to move, the store is not atomic. It is
* possible that DMAR read upper doubleword, while low
* doubleword is not yet updated. The domain id is stored in
* the upper doubleword, while the table pointer in the lower.
*
* There is no good solution, for the same reason it is wrong
* to clear P bit in the ctx entry for update.
*/
dmar_pte_store1(&ctxp->ctx2, DMAR_CTX2_DID(domain->domain) |
domain->awlvl);
if (ctx_root == NULL) {
dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_PASS | DMAR_CTX1_P);
} else {
dmar_pte_store1(&ctxp->ctx1, DMAR_CTX1_T_UNTR |
(DMAR_CTX1_ASR_MASK & VM_PAGE_TO_PHYS(ctx_root)) |
DMAR_CTX1_P);
}
}
static void
ctx_id_entry_init(struct dmar_ctx *ctx, dmar_ctx_entry_t *ctxp, bool move,
int busno)
{
struct dmar_unit *unit;
struct dmar_domain *domain;
vm_page_t ctx_root;
int i;
domain = CTX2DOM(ctx);
unit = DOM2DMAR(domain);
KASSERT(move || (ctxp->ctx1 == 0 && ctxp->ctx2 == 0),
("dmar%d: initialized ctx entry %d:%d:%d 0x%jx 0x%jx",
unit->iommu.unit, busno, pci_get_slot(ctx->context.tag->owner),
pci_get_function(ctx->context.tag->owner),
ctxp->ctx1, ctxp->ctx2));
if ((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 &&
(unit->hw_ecap & DMAR_ECAP_PT) != 0) {
KASSERT(domain->pgtbl_obj == NULL,
("ctx %p non-null pgtbl_obj", ctx));
ctx_root = NULL;
} else {
ctx_root = iommu_pgalloc(domain->pgtbl_obj, 0,
IOMMU_PGF_NOALLOC);
}
if (iommu_is_buswide_ctx(DMAR2IOMMU(unit), busno)) {
MPASS(!move);
for (i = 0; i <= PCI_BUSMAX; i++) {
ctx_id_entry_init_one(&ctxp[i], domain, ctx_root);
}
} else {
ctx_id_entry_init_one(ctxp, domain, ctx_root);
}
dmar_flush_ctx_to_ram(unit, ctxp);
}
static int
dmar_flush_for_ctx_entry(struct dmar_unit *dmar, bool force)
{
int error;
/*
* If dmar declares Caching Mode as Set, follow 11.5 "Caching
* Mode Consideration" and do the (global) invalidation of the
* negative TLB entries.
*/
if ((dmar->hw_cap & DMAR_CAP_CM) == 0 && !force)
return (0);
if (dmar->qi_enabled) {
dmar_qi_invalidate_ctx_glob_locked(dmar);
if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force)
dmar_qi_invalidate_iotlb_glob_locked(dmar);
return (0);
}
error = dmar_inv_ctx_glob(dmar);
if (error == 0 && ((dmar->hw_ecap & DMAR_ECAP_DI) != 0 || force))
error = dmar_inv_iotlb_glob(dmar);
return (error);
}
static int
domain_init_rmrr(struct dmar_domain *domain, device_t dev, int bus,
int slot, int func, int dev_domain, int dev_busno,
const void *dev_path, int dev_path_len)
{
struct iommu_map_entries_tailq rmrr_entries;
struct iommu_map_entry *entry, *entry1;
vm_page_t *ma;
iommu_gaddr_t start, end;
vm_pindex_t size, i;
int error, error1;
if (!dmar_rmrr_enable)
return (0);
error = 0;
TAILQ_INIT(&rmrr_entries);
dmar_dev_parse_rmrr(domain, dev_domain, dev_busno, dev_path,
dev_path_len, &rmrr_entries);
TAILQ_FOREACH_SAFE(entry, &rmrr_entries, dmamap_link, entry1) {
/*
* VT-d specification requires that the start of an
* RMRR entry is 4k-aligned. Buggy BIOSes put
* anything into the start and end fields. Truncate
* and round as neccesary.
*
* We also allow the overlapping RMRR entries, see
* iommu_gas_alloc_region().
*/
start = entry->start;
end = entry->end;
if (bootverbose)
printf("dmar%d ctx pci%d:%d:%d RMRR [%#jx, %#jx]\n",
domain->iodom.iommu->unit, bus, slot, func,
(uintmax_t)start, (uintmax_t)end);
entry->start = trunc_page(start);
entry->end = round_page(end);
if (entry->start == entry->end) {
/* Workaround for some AMI (?) BIOSes */
if (bootverbose) {
if (dev != NULL)
device_printf(dev, "");
printf("pci%d:%d:%d ", bus, slot, func);
printf("BIOS bug: dmar%d RMRR "
"region (%jx, %jx) corrected\n",
domain->iodom.iommu->unit, start, end);
}
entry->end += IOMMU_PAGE_SIZE * 0x20;
}
size = OFF_TO_IDX(entry->end - entry->start);
ma = malloc(sizeof(vm_page_t) * size, M_TEMP, M_WAITOK);
for (i = 0; i < size; i++) {
ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i,
VM_MEMATTR_DEFAULT);
}
error1 = iommu_gas_map_region(DOM2IODOM(domain), entry,
IOMMU_MAP_ENTRY_READ | IOMMU_MAP_ENTRY_WRITE,
IOMMU_MF_CANWAIT | IOMMU_MF_RMRR, ma);
/*
* Non-failed RMRR entries are owned by context rb
* tree. Get rid of the failed entry, but do not stop
* the loop. Rest of the parsed RMRR entries are
* loaded and removed on the context destruction.
*/
if (error1 == 0 && entry->end != entry->start) {
IOMMU_LOCK(domain->iodom.iommu);
domain->refs++; /* XXXKIB prevent free */
domain->iodom.flags |= IOMMU_DOMAIN_RMRR;
IOMMU_UNLOCK(domain->iodom.iommu);
} else {
if (error1 != 0) {
if (dev != NULL)
device_printf(dev, "");
printf("pci%d:%d:%d ", bus, slot, func);
printf(
"dmar%d failed to map RMRR region (%jx, %jx) %d\n",
domain->iodom.iommu->unit, start, end,
error1);
error = error1;
}
TAILQ_REMOVE(&rmrr_entries, entry, dmamap_link);
iommu_gas_free_entry(entry);
}
for (i = 0; i < size; i++)
vm_page_putfake(ma[i]);
free(ma, M_TEMP);
}
return (error);
}
/*
* PCI memory address space is shared between memory-mapped devices (MMIO) and
* host memory (which may be remapped by an IOMMU). Device accesses to an
* address within a memory aperture in a PCIe root port will be treated as
* peer-to-peer and not forwarded to an IOMMU. To avoid this, reserve the
* address space of the root port's memory apertures in the address space used
* by the IOMMU for remapping.
*/
static int
dmar_reserve_pci_regions(struct dmar_domain *domain, device_t dev)
{
struct iommu_domain *iodom;
device_t root;
uint32_t val;
uint64_t base, limit;
int error;
iodom = DOM2IODOM(domain);
root = pci_find_pcie_root_port(dev);
if (root == NULL)
return (0);
/* Disable downstream memory */
base = PCI_PPBMEMBASE(0, pci_read_config(root, PCIR_MEMBASE_1, 2));
limit = PCI_PPBMEMLIMIT(0, pci_read_config(root, PCIR_MEMLIMIT_1, 2));
error = iommu_gas_reserve_region_extend(iodom, base, limit + 1);
if (bootverbose || error != 0)
device_printf(dev, "DMAR reserve [%#jx-%#jx] (error %d)\n",
base, limit + 1, error);
if (error != 0)
return (error);
/* Disable downstream prefetchable memory */
val = pci_read_config(root, PCIR_PMBASEL_1, 2);
if (val != 0 || pci_read_config(root, PCIR_PMLIMITL_1, 2) != 0) {
if ((val & PCIM_BRPM_MASK) == PCIM_BRPM_64) {
base = PCI_PPBMEMBASE(
pci_read_config(root, PCIR_PMBASEH_1, 4),
val);
limit = PCI_PPBMEMLIMIT(
pci_read_config(root, PCIR_PMLIMITH_1, 4),
pci_read_config(root, PCIR_PMLIMITL_1, 2));
} else {
base = PCI_PPBMEMBASE(0, val);
limit = PCI_PPBMEMLIMIT(0,
pci_read_config(root, PCIR_PMLIMITL_1, 2));
}
error = iommu_gas_reserve_region_extend(iodom, base,
limit + 1);
if (bootverbose || error != 0)
device_printf(dev, "DMAR reserve [%#jx-%#jx] "
"(error %d)\n", base, limit + 1, error);
if (error != 0)
return (error);
}
return (error);
}
static struct dmar_domain *
dmar_domain_alloc(struct dmar_unit *dmar, bool id_mapped)
{
struct iommu_domain *iodom;
struct iommu_unit *unit;
struct dmar_domain *domain;
int error, id, mgaw;
id = alloc_unr(dmar->domids);
if (id == -1)
return (NULL);
domain = malloc(sizeof(*domain), M_DMAR_DOMAIN, M_WAITOK | M_ZERO);
iodom = DOM2IODOM(domain);
unit = DMAR2IOMMU(dmar);
domain->domain = id;
LIST_INIT(&iodom->contexts);
iommu_domain_init(unit, iodom, &dmar_domain_map_ops);
domain->dmar = dmar;
/*
* For now, use the maximal usable physical address of the
* installed memory to calculate the mgaw on id_mapped domain.
* It is useful for the identity mapping, and less so for the
* virtualized bus address space.
*/
domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR;
mgaw = dmar_maxaddr2mgaw(dmar, domain->iodom.end, !id_mapped);
error = domain_set_agaw(domain, mgaw);
if (error != 0)
goto fail;
if (!id_mapped)
/* Use all supported address space for remapping. */
domain->iodom.end = 1ULL << (domain->agaw - 1);
iommu_gas_init_domain(DOM2IODOM(domain));
if (id_mapped) {
if ((dmar->hw_ecap & DMAR_ECAP_PT) == 0) {
domain->pgtbl_obj = dmar_get_idmap_pgtbl(domain,
domain->iodom.end);
}
domain->iodom.flags |= IOMMU_DOMAIN_IDMAP;
} else {
error = dmar_domain_alloc_pgtbl(domain);
if (error != 0)
goto fail;
/* Disable local apic region access */
error = iommu_gas_reserve_region(iodom, 0xfee00000,
0xfeefffff + 1, &iodom->msi_entry);
if (error != 0)
goto fail;
}
return (domain);
fail:
dmar_domain_destroy(domain);
return (NULL);
}
static struct dmar_ctx *
dmar_ctx_alloc(struct dmar_domain *domain, uint16_t rid)
{
struct dmar_ctx *ctx;
ctx = malloc(sizeof(*ctx), M_DMAR_CTX, M_WAITOK | M_ZERO);
ctx->context.domain = DOM2IODOM(domain);
ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu),
M_DMAR_CTX, M_WAITOK | M_ZERO);
ctx->context.rid = rid;
ctx->context.refs = 1;
return (ctx);
}
static void
dmar_ctx_link(struct dmar_ctx *ctx)
{
struct dmar_domain *domain;
domain = CTX2DOM(ctx);
IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
KASSERT(domain->refs >= domain->ctx_cnt,
("dom %p ref underflow %d %d", domain, domain->refs,
domain->ctx_cnt));
domain->refs++;
domain->ctx_cnt++;
LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link);
}
static void
dmar_ctx_unlink(struct dmar_ctx *ctx)
{
struct dmar_domain *domain;
domain = CTX2DOM(ctx);
IOMMU_ASSERT_LOCKED(domain->iodom.iommu);
KASSERT(domain->refs > 0,
("domain %p ctx dtr refs %d", domain, domain->refs));
KASSERT(domain->ctx_cnt >= domain->refs,
("domain %p ctx dtr refs %d ctx_cnt %d", domain,
domain->refs, domain->ctx_cnt));
domain->refs--;
domain->ctx_cnt--;
LIST_REMOVE(&ctx->context, link);
}
static void
dmar_domain_destroy(struct dmar_domain *domain)
{
struct iommu_domain *iodom;
struct dmar_unit *dmar;
iodom = DOM2IODOM(domain);
KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries),
("unfinished unloads %p", domain));
KASSERT(LIST_EMPTY(&iodom->contexts),
("destroying dom %p with contexts", domain));
KASSERT(domain->ctx_cnt == 0,
("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt));
KASSERT(domain->refs == 0,
("destroying dom %p with refs %d", domain, domain->refs));
if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) {
DMAR_DOMAIN_LOCK(domain);
iommu_gas_fini_domain(iodom);
DMAR_DOMAIN_UNLOCK(domain);
}
if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) {
if (domain->pgtbl_obj != NULL)
DMAR_DOMAIN_PGLOCK(domain);
dmar_domain_free_pgtbl(domain);
}
iommu_domain_fini(iodom);
dmar = DOM2DMAR(domain);
free_unr(dmar->domids, domain->domain);
free(domain, M_DMAR_DOMAIN);
}
static struct dmar_ctx *
dmar_get_ctx_for_dev1(struct dmar_unit *dmar, device_t dev, uint16_t rid,
int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
bool id_mapped, bool rmrr_init)
{
struct dmar_domain *domain, *domain1;
struct dmar_ctx *ctx, *ctx1;
struct iommu_unit *unit __diagused;
dmar_ctx_entry_t *ctxp;
struct sf_buf *sf;
int bus, slot, func, error;
bool enable;
if (dev != NULL) {
bus = pci_get_bus(dev);
slot = pci_get_slot(dev);
func = pci_get_function(dev);
} else {
bus = PCI_RID2BUS(rid);
slot = PCI_RID2SLOT(rid);
func = PCI_RID2FUNC(rid);
}
enable = false;
TD_PREP_PINNED_ASSERT;
unit = DMAR2IOMMU(dmar);
DMAR_LOCK(dmar);
KASSERT(!iommu_is_buswide_ctx(unit, bus) || (slot == 0 && func == 0),
("iommu%d pci%d:%d:%d get_ctx for buswide", dmar->iommu.unit, bus,
slot, func));
ctx = dmar_find_ctx_locked(dmar, rid);
error = 0;
if (ctx == NULL) {
/*
* Perform the allocations which require sleep or have
* higher chance to succeed if the sleep is allowed.
*/
DMAR_UNLOCK(dmar);
dmar_ensure_ctx_page(dmar, PCI_RID2BUS(rid));
domain1 = dmar_domain_alloc(dmar, id_mapped);
if (domain1 == NULL) {
TD_PINNED_ASSERT;
return (NULL);
}
if (!id_mapped) {
error = domain_init_rmrr(domain1, dev, bus,
slot, func, dev_domain, dev_busno, dev_path,
dev_path_len);
if (error == 0 && dev != NULL)
error = dmar_reserve_pci_regions(domain1, dev);
if (error != 0) {
dmar_domain_destroy(domain1);
TD_PINNED_ASSERT;
return (NULL);
}
}
ctx1 = dmar_ctx_alloc(domain1, rid);
ctxp = dmar_map_ctx_entry(ctx1, &sf);
DMAR_LOCK(dmar);
/*
* Recheck the contexts, other thread might have
* already allocated needed one.
*/
ctx = dmar_find_ctx_locked(dmar, rid);
if (ctx == NULL) {
domain = domain1;
ctx = ctx1;
dmar_ctx_link(ctx);
ctx->context.tag->owner = dev;
iommu_device_tag_init(CTX2IOCTX(ctx), dev);
/*
* This is the first activated context for the
* DMAR unit. Enable the translation after
* everything is set up.
*/
if (LIST_EMPTY(&dmar->domains))
enable = true;
LIST_INSERT_HEAD(&dmar->domains, domain, link);
ctx_id_entry_init(ctx, ctxp, false, bus);
if (dev != NULL) {
device_printf(dev,
"dmar%d pci%d:%d:%d:%d rid %x domain %d mgaw %d "
"agaw %d %s-mapped\n",
dmar->iommu.unit, dmar->segment, bus, slot,
func, rid, domain->domain, domain->mgaw,
domain->agaw, id_mapped ? "id" : "re");
}
iommu_unmap_pgtbl(sf);
} else {
iommu_unmap_pgtbl(sf);
dmar_domain_destroy(domain1);
/* Nothing needs to be done to destroy ctx1. */
free(ctx1, M_DMAR_CTX);
domain = CTX2DOM(ctx);
ctx->context.refs++; /* tag referenced us */
}
} else {
domain = CTX2DOM(ctx);
if (ctx->context.tag->owner == NULL)
ctx->context.tag->owner = dev;
ctx->context.refs++; /* tag referenced us */
}
error = dmar_flush_for_ctx_entry(dmar, enable);
if (error != 0) {
dmar_free_ctx_locked(dmar, ctx);
TD_PINNED_ASSERT;
return (NULL);
}
/*
* The dmar lock was potentially dropped between check for the
* empty context list and now. Recheck the state of GCMD_TE
* to avoid unneeded command.
*/
if (enable && !rmrr_init && (dmar->hw_gcmd & DMAR_GCMD_TE) == 0) {
error = dmar_disable_protected_regions(dmar);
if (error != 0)
printf("dmar%d: Failed to disable protected regions\n",
dmar->iommu.unit);
error = dmar_enable_translation(dmar);
if (error == 0) {
if (bootverbose) {
printf("dmar%d: enabled translation\n",
dmar->iommu.unit);
}
} else {
printf("dmar%d: enabling translation failed, "
"error %d\n", dmar->iommu.unit, error);
dmar_free_ctx_locked(dmar, ctx);
TD_PINNED_ASSERT;
return (NULL);
}
}
DMAR_UNLOCK(dmar);
TD_PINNED_ASSERT;
return (ctx);
}
struct dmar_ctx *
dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev, uint16_t rid,
bool id_mapped, bool rmrr_init)
{
int dev_domain, dev_path_len, dev_busno;
dev_domain = pci_get_domain(dev);
dev_path_len = dmar_dev_depth(dev);
ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
return (dmar_get_ctx_for_dev1(dmar, dev, rid, dev_domain, dev_busno,
dev_path, dev_path_len, id_mapped, rmrr_init));
}
struct dmar_ctx *
dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
int dev_domain, int dev_busno,
const void *dev_path, int dev_path_len,
bool id_mapped, bool rmrr_init)
{
return (dmar_get_ctx_for_dev1(dmar, NULL, rid, dev_domain, dev_busno,
dev_path, dev_path_len, id_mapped, rmrr_init));
}
int
dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx)
{
struct dmar_unit *dmar;
struct dmar_domain *old_domain;
dmar_ctx_entry_t *ctxp;
struct sf_buf *sf;
int error;
dmar = domain->dmar;
old_domain = CTX2DOM(ctx);
if (domain == old_domain)
return (0);
KASSERT(old_domain->iodom.iommu == domain->iodom.iommu,
("domain %p %u moving between dmars %u %u", domain,
domain->domain, old_domain->iodom.iommu->unit,
domain->iodom.iommu->unit));
TD_PREP_PINNED_ASSERT;
ctxp = dmar_map_ctx_entry(ctx, &sf);
DMAR_LOCK(dmar);
dmar_ctx_unlink(ctx);
ctx->context.domain = &domain->iodom;
dmar_ctx_link(ctx);
ctx_id_entry_init(ctx, ctxp, true, PCI_BUSMAX + 100);
iommu_unmap_pgtbl(sf);
error = dmar_flush_for_ctx_entry(dmar, true);
/* If flush failed, rolling back would not work as well. */
printf("dmar%d rid %x domain %d->%d %s-mapped\n",
dmar->iommu.unit, ctx->context.rid, old_domain->domain,
domain->domain, (domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0 ?
"id" : "re");
dmar_unref_domain_locked(dmar, old_domain);
TD_PINNED_ASSERT;
return (error);
}
static void
dmar_unref_domain_locked(struct dmar_unit *dmar, struct dmar_domain *domain)
{
DMAR_ASSERT_LOCKED(dmar);
KASSERT(domain->refs >= 1,
("dmar %d domain %p refs %u", dmar->iommu.unit, domain,
domain->refs));
KASSERT(domain->refs > domain->ctx_cnt,
("dmar %d domain %p refs %d ctx_cnt %d", dmar->iommu.unit, domain,
domain->refs, domain->ctx_cnt));
if (domain->refs > 1) {
domain->refs--;
DMAR_UNLOCK(dmar);
return;
}
KASSERT((domain->iodom.flags & IOMMU_DOMAIN_RMRR) == 0,
("lost ref on RMRR domain %p", domain));
LIST_REMOVE(domain, link);
DMAR_UNLOCK(dmar);
taskqueue_drain(dmar->iommu.delayed_taskqueue,
&domain->iodom.unload_task);
dmar_domain_destroy(domain);
}
static void
dmar_free_ctx_locked(struct dmar_unit *dmar, struct dmar_ctx *ctx)
{
struct sf_buf *sf;
dmar_ctx_entry_t *ctxp;
struct dmar_domain *domain;
DMAR_ASSERT_LOCKED(dmar);
KASSERT(ctx->context.refs >= 1,
("dmar %p ctx %p refs %u", dmar, ctx, ctx->context.refs));
/*
* If our reference is not last, only the dereference should
* be performed.
*/
if (ctx->context.refs > 1) {
ctx->context.refs--;
DMAR_UNLOCK(dmar);
return;
}
KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
("lost ref on disabled ctx %p", ctx));
/*
* Otherwise, the context entry must be cleared before the
* page table is destroyed. The mapping of the context
* entries page could require sleep, unlock the dmar.
*/
DMAR_UNLOCK(dmar);
TD_PREP_PINNED_ASSERT;
ctxp = dmar_map_ctx_entry(ctx, &sf);
DMAR_LOCK(dmar);
KASSERT(ctx->context.refs >= 1,
("dmar %p ctx %p refs %u", dmar, ctx, ctx->context.refs));
/*
* Other thread might have referenced the context, in which
* case again only the dereference should be performed.
*/
if (ctx->context.refs > 1) {
ctx->context.refs--;
DMAR_UNLOCK(dmar);
iommu_unmap_pgtbl(sf);
TD_PINNED_ASSERT;
return;
}
KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0,
("lost ref on disabled ctx %p", ctx));
/*
* Clear the context pointer and flush the caches.
* XXXKIB: cannot do this if any RMRR entries are still present.
*/
dmar_pte_clear(&ctxp->ctx1);
ctxp->ctx2 = 0;
dmar_flush_ctx_to_ram(dmar, ctxp);
dmar_inv_ctx_glob(dmar);
if ((dmar->hw_ecap & DMAR_ECAP_DI) != 0) {
if (dmar->qi_enabled)
dmar_qi_invalidate_iotlb_glob_locked(dmar);
else
dmar_inv_iotlb_glob(dmar);
}
iommu_unmap_pgtbl(sf);
domain = CTX2DOM(ctx);
dmar_ctx_unlink(ctx);
free(ctx->context.tag, M_DMAR_CTX);
free(ctx, M_DMAR_CTX);
dmar_unref_domain_locked(dmar, domain);
TD_PINNED_ASSERT;
}
-static void
-dmar_free_ctx(struct dmar_ctx *ctx)
-{
- struct dmar_unit *dmar;
-
- dmar = CTX2DMAR(ctx);
- DMAR_LOCK(dmar);
- dmar_free_ctx_locked(dmar, ctx);
-}
-
/*
* Returns with the domain locked.
*/
struct dmar_ctx *
dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid)
{
struct dmar_domain *domain;
struct iommu_ctx *ctx;
DMAR_ASSERT_LOCKED(dmar);
LIST_FOREACH(domain, &dmar->domains, link) {
LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
if (ctx->rid == rid)
return (IOCTX2CTX(ctx));
}
}
return (NULL);
}
/*
* If the given value for "free" is true, then the caller must not be using
* the entry's dmamap_link field.
*/
void
dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep)
{
struct dmar_domain *domain;
struct dmar_unit *unit;
domain = IODOM2DOM(entry->domain);
unit = DOM2DMAR(domain);
/*
* If "free" is false, then the IOTLB invalidation must be performed
* synchronously. Otherwise, the caller might free the entry before
* dmar_qi_task() is finished processing it.
*/
if (unit->qi_enabled) {
if (free) {
DMAR_LOCK(unit);
iommu_qi_invalidate_locked(&domain->iodom, entry,
true);
DMAR_UNLOCK(unit);
} else {
iommu_qi_invalidate_sync(&domain->iodom, entry->start,
entry->end - entry->start, cansleep);
iommu_domain_free_entry(entry, false);
}
} else {
dmar_flush_iotlb_sync(domain, entry->start, entry->end -
entry->start);
iommu_domain_free_entry(entry, free);
}
}
static bool
dmar_domain_unload_emit_wait(struct dmar_domain *domain,
struct iommu_map_entry *entry)
{
if (TAILQ_NEXT(entry, dmamap_link) == NULL)
return (true);
return (domain->batch_no++ % iommu_qi_batch_coalesce == 0);
}
void
dmar_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep)
{
struct dmar_domain *domain;
struct dmar_unit *unit;
struct iommu_map_entry *entry, *entry1;
int error __diagused;
domain = IODOM2DOM(iodom);
unit = DOM2DMAR(domain);
TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) {
KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0,
("not mapped entry %p %p", domain, entry));
error = iodom->ops->unmap(iodom, entry,
cansleep ? IOMMU_PGF_WAITOK : 0);
KASSERT(error == 0, ("unmap %p error %d", domain, error));
if (!unit->qi_enabled) {
dmar_flush_iotlb_sync(domain, entry->start,
entry->end - entry->start);
TAILQ_REMOVE(entries, entry, dmamap_link);
iommu_domain_free_entry(entry, true);
}
}
if (TAILQ_EMPTY(entries))
return;
KASSERT(unit->qi_enabled, ("loaded entry left"));
DMAR_LOCK(unit);
while ((entry = TAILQ_FIRST(entries)) != NULL) {
TAILQ_REMOVE(entries, entry, dmamap_link);
iommu_qi_invalidate_locked(&domain->iodom, entry,
dmar_domain_unload_emit_wait(domain, entry));
}
DMAR_UNLOCK(unit);
}
struct iommu_ctx *
dmar_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid,
bool id_mapped, bool rmrr_init)
{
struct dmar_unit *dmar;
struct dmar_ctx *ret;
dmar = IOMMU2DMAR(iommu);
ret = dmar_get_ctx_for_dev(dmar, dev, rid, id_mapped, rmrr_init);
return (CTX2IOCTX(ret));
}
void
dmar_free_ctx_locked_method(struct iommu_unit *iommu,
struct iommu_ctx *context)
{
struct dmar_unit *dmar;
struct dmar_ctx *ctx;
dmar = IOMMU2DMAR(iommu);
ctx = IOCTX2CTX(context);
dmar_free_ctx_locked(dmar, ctx);
}
-
-void
-dmar_free_ctx_method(struct iommu_ctx *context)
-{
- struct dmar_ctx *ctx;
-
- ctx = IOCTX2CTX(context);
- dmar_free_ctx(ctx);
-}
diff --git a/sys/x86/iommu/intel_dmar.h b/sys/x86/iommu/intel_dmar.h
index 1a9b5041975c..57a66aae69b2 100644
--- a/sys/x86/iommu/intel_dmar.h
+++ b/sys/x86/iommu/intel_dmar.h
@@ -1,427 +1,426 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013-2015 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef __X86_IOMMU_INTEL_DMAR_H
#define __X86_IOMMU_INTEL_DMAR_H
#include
struct dmar_unit;
/*
* Locking annotations:
* (u) - Protected by iommu unit lock
* (d) - Protected by domain lock
* (c) - Immutable after initialization
*/
/*
* The domain abstraction. Most non-constant members of the domain
* are protected by owning dmar unit lock, not by the domain lock.
* Most important, the dmar lock protects the contexts list.
*
* The domain lock protects the address map for the domain, and list
* of unload entries delayed.
*
* Page tables pages and pages content is protected by the vm object
* lock pgtbl_obj, which contains the page tables pages.
*/
struct dmar_domain {
struct iommu_domain iodom;
int domain; /* (c) DID, written in context entry */
int mgaw; /* (c) Real max address width */
int agaw; /* (c) Adjusted guest address width */
int pglvl; /* (c) The pagelevel */
int awlvl; /* (c) The pagelevel as the bitmask,
to set in context entry */
u_int ctx_cnt; /* (u) Number of contexts owned */
u_int refs; /* (u) Refs, including ctx */
struct dmar_unit *dmar; /* (c) */
LIST_ENTRY(dmar_domain) link; /* (u) Member in the dmar list */
vm_object_t pgtbl_obj; /* (c) Page table pages */
u_int batch_no;
};
struct dmar_ctx {
struct iommu_ctx context;
uint64_t last_fault_rec[2]; /* Last fault reported */
};
#define DMAR_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj)
#define DMAR_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj)
#define DMAR_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj)
#define DMAR_DOMAIN_ASSERT_PGLOCKED(dom) \
VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj)
#define DMAR_DOMAIN_LOCK(dom) mtx_lock(&(dom)->iodom.lock)
#define DMAR_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock)
#define DMAR_DOMAIN_ASSERT_LOCKED(dom) mtx_assert(&(dom)->iodom.lock, MA_OWNED)
#define DMAR2IOMMU(dmar) (&((dmar)->iommu))
#define IOMMU2DMAR(dmar) \
__containerof((dmar), struct dmar_unit, iommu)
#define DOM2IODOM(domain) (&((domain)->iodom))
#define IODOM2DOM(domain) \
__containerof((domain), struct dmar_domain, iodom)
#define CTX2IOCTX(ctx) (&((ctx)->context))
#define IOCTX2CTX(ctx) \
__containerof((ctx), struct dmar_ctx, context)
#define CTX2DOM(ctx) IODOM2DOM((ctx)->context.domain)
#define CTX2DMAR(ctx) (CTX2DOM(ctx)->dmar)
#define DOM2DMAR(domain) ((domain)->dmar)
#define DMAR_INTR_FAULT 0
#define DMAR_INTR_QI 1
#define DMAR_INTR_TOTAL 2
struct dmar_unit {
struct iommu_unit iommu;
struct x86_unit_common x86c;
uint16_t segment;
uint64_t base;
int memdomain;
/* Resources */
int reg_rid;
struct resource *regs;
/* Hardware registers cache */
uint32_t hw_ver;
uint64_t hw_cap;
uint64_t hw_ecap;
uint32_t hw_gcmd;
/* Data for being a dmar */
LIST_HEAD(, dmar_domain) domains;
struct unrhdr *domids;
vm_object_t ctx_obj;
u_int barrier_flags;
/* Fault handler data */
struct mtx fault_lock;
uint64_t *fault_log;
int fault_log_head;
int fault_log_tail;
int fault_log_size;
struct task fault_task;
struct taskqueue *fault_taskqueue;
/* QI */
int qi_enabled;
/* IR */
int ir_enabled;
vm_paddr_t irt_phys;
dmar_irte_t *irt;
u_int irte_cnt;
vmem_t *irtids;
};
#define DMAR_LOCK(dmar) mtx_lock(&DMAR2IOMMU(dmar)->lock)
#define DMAR_UNLOCK(dmar) mtx_unlock(&DMAR2IOMMU(dmar)->lock)
#define DMAR_ASSERT_LOCKED(dmar) mtx_assert(&DMAR2IOMMU(dmar)->lock, MA_OWNED)
#define DMAR_FAULT_LOCK(dmar) mtx_lock_spin(&(dmar)->fault_lock)
#define DMAR_FAULT_UNLOCK(dmar) mtx_unlock_spin(&(dmar)->fault_lock)
#define DMAR_FAULT_ASSERT_LOCKED(dmar) mtx_assert(&(dmar)->fault_lock, MA_OWNED)
#define DMAR_IS_COHERENT(dmar) (((dmar)->hw_ecap & DMAR_ECAP_C) != 0)
#define DMAR_HAS_QI(dmar) (((dmar)->hw_ecap & DMAR_ECAP_QI) != 0)
#define DMAR_X2APIC(dmar) \
(x2apic_mode && ((dmar)->hw_ecap & DMAR_ECAP_EIM) != 0)
/* Barrier ids */
#define DMAR_BARRIER_RMRR 0
#define DMAR_BARRIER_USEQ 1
SYSCTL_DECL(_hw_iommu_dmar);
struct dmar_unit *dmar_find(device_t dev, bool verbose);
struct dmar_unit *dmar_find_hpet(device_t dev, uint16_t *rid);
struct dmar_unit *dmar_find_ioapic(u_int apic_id, uint16_t *rid);
u_int dmar_nd2mask(u_int nd);
bool dmar_pglvl_supported(struct dmar_unit *unit, int pglvl);
int domain_set_agaw(struct dmar_domain *domain, int mgaw);
int dmar_maxaddr2mgaw(struct dmar_unit *unit, iommu_gaddr_t maxaddr,
bool allow_less);
int domain_is_sp_lvl(struct dmar_domain *domain, int lvl);
iommu_gaddr_t domain_page_size(struct dmar_domain *domain, int lvl);
int calc_am(struct dmar_unit *unit, iommu_gaddr_t base, iommu_gaddr_t size,
iommu_gaddr_t *isizep);
int dmar_load_root_entry_ptr(struct dmar_unit *unit);
int dmar_inv_ctx_glob(struct dmar_unit *unit);
int dmar_inv_iotlb_glob(struct dmar_unit *unit);
int dmar_flush_write_bufs(struct dmar_unit *unit);
void dmar_flush_pte_to_ram(struct dmar_unit *unit, iommu_pte_t *dst);
void dmar_flush_ctx_to_ram(struct dmar_unit *unit, dmar_ctx_entry_t *dst);
void dmar_flush_root_to_ram(struct dmar_unit *unit, dmar_root_entry_t *dst);
int dmar_disable_protected_regions(struct dmar_unit *unit);
int dmar_enable_translation(struct dmar_unit *unit);
int dmar_disable_translation(struct dmar_unit *unit);
int dmar_load_irt_ptr(struct dmar_unit *unit);
int dmar_enable_ir(struct dmar_unit *unit);
int dmar_disable_ir(struct dmar_unit *unit);
bool dmar_barrier_enter(struct dmar_unit *dmar, u_int barrier_id);
void dmar_barrier_exit(struct dmar_unit *dmar, u_int barrier_id);
uint64_t dmar_get_timeout(void);
void dmar_update_timeout(uint64_t newval);
int dmar_fault_intr(void *arg);
void dmar_enable_fault_intr(struct iommu_unit *unit);
void dmar_disable_fault_intr(struct iommu_unit *unit);
int dmar_init_fault_log(struct dmar_unit *unit);
void dmar_fini_fault_log(struct dmar_unit *unit);
int dmar_qi_intr(void *arg);
void dmar_enable_qi_intr(struct iommu_unit *unit);
void dmar_disable_qi_intr(struct iommu_unit *unit);
int dmar_init_qi(struct dmar_unit *unit);
void dmar_fini_qi(struct dmar_unit *unit);
void dmar_qi_invalidate_locked(struct dmar_domain *domain,
struct iommu_map_entry *entry, bool emit_wait);
void dmar_qi_invalidate_sync(struct dmar_domain *domain, iommu_gaddr_t start,
iommu_gaddr_t size, bool cansleep);
void dmar_qi_invalidate_ctx_glob_locked(struct dmar_unit *unit);
void dmar_qi_invalidate_iotlb_glob_locked(struct dmar_unit *unit);
void dmar_qi_invalidate_iec_glob(struct dmar_unit *unit);
void dmar_qi_invalidate_iec(struct dmar_unit *unit, u_int start, u_int cnt);
vm_object_t dmar_get_idmap_pgtbl(struct dmar_domain *domain,
iommu_gaddr_t maxaddr);
void dmar_put_idmap_pgtbl(vm_object_t obj);
void dmar_flush_iotlb_sync(struct dmar_domain *domain, iommu_gaddr_t base,
iommu_gaddr_t size);
int dmar_domain_alloc_pgtbl(struct dmar_domain *domain);
void dmar_domain_free_pgtbl(struct dmar_domain *domain);
extern const struct iommu_domain_map_ops dmar_domain_map_ops;
int dmar_dev_depth(device_t child);
void dmar_dev_path(device_t child, int *busno, void *path1, int depth);
struct dmar_ctx *dmar_get_ctx_for_dev(struct dmar_unit *dmar, device_t dev,
uint16_t rid, bool id_mapped, bool rmrr_init);
struct dmar_ctx *dmar_get_ctx_for_devpath(struct dmar_unit *dmar, uint16_t rid,
int dev_domain, int dev_busno, const void *dev_path, int dev_path_len,
bool id_mapped, bool rmrr_init);
int dmar_move_ctx_to_domain(struct dmar_domain *domain, struct dmar_ctx *ctx);
void dmar_free_ctx_locked_method(struct iommu_unit *dmar,
struct iommu_ctx *ctx);
-void dmar_free_ctx_method(struct iommu_ctx *ctx);
struct dmar_ctx *dmar_find_ctx_locked(struct dmar_unit *dmar, uint16_t rid);
struct iommu_ctx *dmar_get_ctx(struct iommu_unit *iommu, device_t dev,
uint16_t rid, bool id_mapped, bool rmrr_init);
void dmar_domain_unload_entry(struct iommu_map_entry *entry, bool free,
bool cansleep);
void dmar_domain_unload(struct iommu_domain *iodom,
struct iommu_map_entries_tailq *entries, bool cansleep);
void dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain,
int dev_busno, const void *dev_path, int dev_path_len,
struct iommu_map_entries_tailq *rmrr_entries);
int dmar_instantiate_rmrr_ctxs(struct iommu_unit *dmar);
void dmar_quirks_post_ident(struct dmar_unit *dmar);
void dmar_quirks_pre_use(struct iommu_unit *dmar);
int dmar_init_irt(struct dmar_unit *unit);
void dmar_fini_irt(struct dmar_unit *unit);
int dmar_alloc_msi_intr(device_t src, u_int *cookies, u_int count);
int dmar_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie,
uint64_t *addr, uint32_t *data);
int dmar_unmap_msi_intr(device_t src, u_int cookie);
int dmar_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge,
bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo);
int dmar_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie);
extern int haw;
extern int dmar_rmrr_enable;
static inline uint32_t
dmar_read4(const struct dmar_unit *unit, int reg)
{
return (bus_read_4(unit->regs, reg));
}
static inline uint64_t
dmar_read8(const struct dmar_unit *unit, int reg)
{
#ifdef __i386__
uint32_t high, low;
low = bus_read_4(unit->regs, reg);
high = bus_read_4(unit->regs, reg + 4);
return (low | ((uint64_t)high << 32));
#else
return (bus_read_8(unit->regs, reg));
#endif
}
static inline void
dmar_write4(const struct dmar_unit *unit, int reg, uint32_t val)
{
KASSERT(reg != DMAR_GCMD_REG || (val & DMAR_GCMD_TE) ==
(unit->hw_gcmd & DMAR_GCMD_TE),
("dmar%d clearing TE 0x%08x 0x%08x", unit->iommu.unit,
unit->hw_gcmd, val));
bus_write_4(unit->regs, reg, val);
}
static inline void
dmar_write8(const struct dmar_unit *unit, int reg, uint64_t val)
{
KASSERT(reg != DMAR_GCMD_REG, ("8byte GCMD write"));
#ifdef __i386__
uint32_t high, low;
low = val;
high = val >> 32;
bus_write_4(unit->regs, reg, low);
bus_write_4(unit->regs, reg + 4, high);
#else
bus_write_8(unit->regs, reg, val);
#endif
}
/*
* dmar_pte_store and dmar_pte_clear ensure that on i386, 32bit writes
* are issued in the correct order. For store, the lower word,
* containing the P or R and W bits, is set only after the high word
* is written. For clear, the P bit is cleared first, then the high
* word is cleared.
*
* dmar_pte_update updates the pte. For amd64, the update is atomic.
* For i386, it first disables the entry by clearing the word
* containing the P bit, and then defer to dmar_pte_store. The locked
* cmpxchg8b is probably available on any machine having DMAR support,
* but interrupt translation table may be mapped uncached.
*/
static inline void
dmar_pte_store1(volatile uint64_t *dst, uint64_t val)
{
#ifdef __i386__
volatile uint32_t *p;
uint32_t hi, lo;
hi = val >> 32;
lo = val;
p = (volatile uint32_t *)dst;
*(p + 1) = hi;
*p = lo;
#else
*dst = val;
#endif
}
static inline void
dmar_pte_store(volatile uint64_t *dst, uint64_t val)
{
KASSERT(*dst == 0, ("used pte %p oldval %jx newval %jx",
dst, (uintmax_t)*dst, (uintmax_t)val));
dmar_pte_store1(dst, val);
}
static inline void
dmar_pte_update(volatile uint64_t *dst, uint64_t val)
{
#ifdef __i386__
volatile uint32_t *p;
p = (volatile uint32_t *)dst;
*p = 0;
#endif
dmar_pte_store1(dst, val);
}
static inline void
dmar_pte_clear(volatile uint64_t *dst)
{
#ifdef __i386__
volatile uint32_t *p;
p = (volatile uint32_t *)dst;
*p = 0;
*(p + 1) = 0;
#else
*dst = 0;
#endif
}
extern struct timespec dmar_hw_timeout;
#define DMAR_WAIT_UNTIL(cond) \
{ \
struct timespec last, curr; \
bool forever; \
\
if (dmar_hw_timeout.tv_sec == 0 && \
dmar_hw_timeout.tv_nsec == 0) { \
forever = true; \
} else { \
forever = false; \
nanouptime(&curr); \
timespecadd(&curr, &dmar_hw_timeout, &last); \
} \
for (;;) { \
if (cond) { \
error = 0; \
break; \
} \
nanouptime(&curr); \
if (!forever && timespeccmp(&last, &curr, <)) { \
error = ETIMEDOUT; \
break; \
} \
cpu_spinwait(); \
} \
}
#ifdef INVARIANTS
#define TD_PREP_PINNED_ASSERT \
int old_td_pinned; \
old_td_pinned = curthread->td_pinned
#define TD_PINNED_ASSERT \
KASSERT(curthread->td_pinned == old_td_pinned, \
("pin count leak: %d %d %s:%d", curthread->td_pinned, \
old_td_pinned, __FILE__, __LINE__))
#else
#define TD_PREP_PINNED_ASSERT
#define TD_PINNED_ASSERT
#endif
#endif
diff --git a/sys/x86/iommu/intel_drv.c b/sys/x86/iommu/intel_drv.c
index 22d04029f9ae..b21c8460e830 100644
--- a/sys/x86/iommu/intel_drv.c
+++ b/sys/x86/iommu/intel_drv.c
@@ -1,1336 +1,1335 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013-2015 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#if defined(__amd64__)
#define DEV_APIC
#else
#include "opt_apic.h"
#endif
#include "opt_ddb.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#ifdef DEV_APIC
#include "pcib_if.h"
#include
#include
#include
#endif
#define DMAR_FAULT_IRQ_RID 0
#define DMAR_QI_IRQ_RID 1
#define DMAR_REG_RID 2
static device_t *dmar_devs;
static int dmar_devcnt;
typedef int (*dmar_iter_t)(ACPI_DMAR_HEADER *, void *);
static void
dmar_iterate_tbl(dmar_iter_t iter, void *arg)
{
ACPI_TABLE_DMAR *dmartbl;
ACPI_DMAR_HEADER *dmarh;
char *ptr, *ptrend;
ACPI_STATUS status;
status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
if (ACPI_FAILURE(status))
return;
ptr = (char *)dmartbl + sizeof(*dmartbl);
ptrend = (char *)dmartbl + dmartbl->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
dmarh = (ACPI_DMAR_HEADER *)ptr;
if (dmarh->Length <= 0) {
printf("dmar_identify: corrupted DMAR table, l %d\n",
dmarh->Length);
break;
}
ptr += dmarh->Length;
if (!iter(dmarh, arg))
break;
}
AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl);
}
struct find_iter_args {
int i;
ACPI_DMAR_HARDWARE_UNIT *res;
};
static int
dmar_find_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
struct find_iter_args *fia;
if (dmarh->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT)
return (1);
fia = arg;
if (fia->i == 0) {
fia->res = (ACPI_DMAR_HARDWARE_UNIT *)dmarh;
return (0);
}
fia->i--;
return (1);
}
static ACPI_DMAR_HARDWARE_UNIT *
dmar_find_by_index(int idx)
{
struct find_iter_args fia;
fia.i = idx;
fia.res = NULL;
dmar_iterate_tbl(dmar_find_iter, &fia);
return (fia.res);
}
static int
dmar_count_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_UNIT)
dmar_devcnt++;
return (1);
}
int dmar_rmrr_enable = 1;
static int dmar_enable = 0;
static void
dmar_identify(driver_t *driver, device_t parent)
{
ACPI_TABLE_DMAR *dmartbl;
ACPI_DMAR_HARDWARE_UNIT *dmarh;
ACPI_STATUS status;
int i, error;
if (acpi_disabled("dmar"))
return;
TUNABLE_INT_FETCH("hw.dmar.enable", &dmar_enable);
if (!dmar_enable)
return;
TUNABLE_INT_FETCH("hw.dmar.rmrr_enable", &dmar_rmrr_enable);
status = AcpiGetTable(ACPI_SIG_DMAR, 1, (ACPI_TABLE_HEADER **)&dmartbl);
if (ACPI_FAILURE(status))
return;
haw = dmartbl->Width + 1;
if ((1ULL << (haw + 1)) > BUS_SPACE_MAXADDR)
iommu_high = BUS_SPACE_MAXADDR;
else
iommu_high = 1ULL << (haw + 1);
if (bootverbose) {
printf("DMAR HAW=%d flags=<%b>\n", dmartbl->Width,
(unsigned)dmartbl->Flags,
"\020\001INTR_REMAP\002X2APIC_OPT_OUT");
}
AcpiPutTable((ACPI_TABLE_HEADER *)dmartbl);
dmar_iterate_tbl(dmar_count_iter, NULL);
if (dmar_devcnt == 0)
return;
dmar_devs = malloc(sizeof(device_t) * dmar_devcnt, M_DEVBUF,
M_WAITOK | M_ZERO);
for (i = 0; i < dmar_devcnt; i++) {
dmarh = dmar_find_by_index(i);
if (dmarh == NULL) {
printf("dmar_identify: cannot find HWUNIT %d\n", i);
continue;
}
dmar_devs[i] = BUS_ADD_CHILD(parent, 1, "dmar", i);
if (dmar_devs[i] == NULL) {
printf("dmar_identify: cannot create instance %d\n", i);
continue;
}
error = bus_set_resource(dmar_devs[i], SYS_RES_MEMORY,
DMAR_REG_RID, dmarh->Address, PAGE_SIZE);
if (error != 0) {
printf(
"dmar%d: unable to alloc register window at 0x%08jx: error %d\n",
i, (uintmax_t)dmarh->Address, error);
device_delete_child(parent, dmar_devs[i]);
dmar_devs[i] = NULL;
}
}
}
static int
dmar_probe(device_t dev)
{
if (acpi_get_handle(dev) != NULL)
return (ENXIO);
device_set_desc(dev, "DMA remap");
return (BUS_PROBE_NOWILDCARD);
}
static void
dmar_release_resources(device_t dev, struct dmar_unit *unit)
{
int i;
iommu_fini_busdma(&unit->iommu);
dmar_fini_irt(unit);
dmar_fini_qi(unit);
dmar_fini_fault_log(unit);
for (i = 0; i < DMAR_INTR_TOTAL; i++)
iommu_release_intr(DMAR2IOMMU(unit), i);
if (unit->regs != NULL) {
bus_deactivate_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
unit->regs);
bus_release_resource(dev, SYS_RES_MEMORY, unit->reg_rid,
unit->regs);
unit->regs = NULL;
}
if (unit->domids != NULL) {
delete_unrhdr(unit->domids);
unit->domids = NULL;
}
if (unit->ctx_obj != NULL) {
vm_object_deallocate(unit->ctx_obj);
unit->ctx_obj = NULL;
}
sysctl_ctx_free(&unit->iommu.sysctl_ctx);
}
#ifdef DEV_APIC
static int
dmar_remap_intr(device_t dev, device_t child, u_int irq)
{
struct dmar_unit *unit;
struct iommu_msi_data *dmd;
uint64_t msi_addr;
uint32_t msi_data;
int i, error;
unit = device_get_softc(dev);
for (i = 0; i < DMAR_INTR_TOTAL; i++) {
dmd = &unit->x86c.intrs[i];
if (irq == dmd->irq) {
error = PCIB_MAP_MSI(device_get_parent(
device_get_parent(dev)),
dev, irq, &msi_addr, &msi_data);
if (error != 0)
return (error);
DMAR_LOCK(unit);
dmd->msi_data = msi_data;
dmd->msi_addr = msi_addr;
(dmd->disable_intr)(DMAR2IOMMU(unit));
dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
dmar_write4(unit, dmd->msi_uaddr_reg,
dmd->msi_addr >> 32);
(dmd->enable_intr)(DMAR2IOMMU(unit));
DMAR_UNLOCK(unit);
return (0);
}
}
return (ENOENT);
}
#endif
static void
dmar_print_caps(device_t dev, struct dmar_unit *unit,
ACPI_DMAR_HARDWARE_UNIT *dmaru)
{
uint32_t caphi, ecaphi;
device_printf(dev, "regs@0x%08jx, ver=%d.%d, seg=%d, flags=<%b>\n",
(uintmax_t)dmaru->Address, DMAR_MAJOR_VER(unit->hw_ver),
DMAR_MINOR_VER(unit->hw_ver), dmaru->Segment,
dmaru->Flags, "\020\001INCLUDE_ALL_PCI");
caphi = unit->hw_cap >> 32;
device_printf(dev, "cap=%b,", (u_int)unit->hw_cap,
"\020\004AFL\005WBF\006PLMR\007PHMR\010CM\027ZLR\030ISOCH");
printf("%b, ", caphi, "\020\010PSI\027DWD\030DRD\031FL1GP\034PSI");
printf("ndoms=%d, sagaw=%d, mgaw=%d, fro=%d, nfr=%d, superp=%d",
DMAR_CAP_ND(unit->hw_cap), DMAR_CAP_SAGAW(unit->hw_cap),
DMAR_CAP_MGAW(unit->hw_cap), DMAR_CAP_FRO(unit->hw_cap),
DMAR_CAP_NFR(unit->hw_cap), DMAR_CAP_SPS(unit->hw_cap));
if ((unit->hw_cap & DMAR_CAP_PSI) != 0)
printf(", mamv=%d", DMAR_CAP_MAMV(unit->hw_cap));
printf("\n");
ecaphi = unit->hw_ecap >> 32;
device_printf(dev, "ecap=%b,", (u_int)unit->hw_ecap,
"\020\001C\002QI\003DI\004IR\005EIM\007PT\010SC\031ECS\032MTS"
"\033NEST\034DIS\035PASID\036PRS\037ERS\040SRS");
printf("%b, ", ecaphi, "\020\002NWFS\003EAFS");
printf("mhmw=%d, iro=%d\n", DMAR_ECAP_MHMV(unit->hw_ecap),
DMAR_ECAP_IRO(unit->hw_ecap));
}
/* Remapping Hardware Static Affinity Structure lookup */
struct rhsa_iter_arg {
uint64_t base;
u_int proxim_dom;
};
static int
dmar_rhsa_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
struct rhsa_iter_arg *ria;
ACPI_DMAR_RHSA *adr;
if (dmarh->Type == ACPI_DMAR_TYPE_HARDWARE_AFFINITY) {
ria = arg;
adr = (ACPI_DMAR_RHSA *)dmarh;
if (adr->BaseAddress == ria->base)
ria->proxim_dom = adr->ProximityDomain;
}
return (1);
}
static int
dmar_attach(device_t dev)
{
struct dmar_unit *unit;
ACPI_DMAR_HARDWARE_UNIT *dmaru;
struct iommu_msi_data *dmd;
struct rhsa_iter_arg ria;
uint64_t timeout;
int disable_pmr;
int i, error;
unit = device_get_softc(dev);
unit->iommu.unit = device_get_unit(dev);
unit->iommu.dev = dev;
sysctl_ctx_init(&unit->iommu.sysctl_ctx);
dmaru = dmar_find_by_index(unit->iommu.unit);
if (dmaru == NULL)
return (EINVAL);
unit->segment = dmaru->Segment;
unit->base = dmaru->Address;
unit->reg_rid = DMAR_REG_RID;
unit->regs = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
&unit->reg_rid, RF_ACTIVE);
if (unit->regs == NULL) {
device_printf(dev, "cannot allocate register window\n");
dmar_devs[unit->iommu.unit] = NULL;
return (ENOMEM);
}
unit->hw_ver = dmar_read4(unit, DMAR_VER_REG);
unit->hw_cap = dmar_read8(unit, DMAR_CAP_REG);
unit->hw_ecap = dmar_read8(unit, DMAR_ECAP_REG);
if (bootverbose)
dmar_print_caps(dev, unit, dmaru);
dmar_quirks_post_ident(unit);
unit->memdomain = -1;
ria.base = unit->base;
ria.proxim_dom = -1;
dmar_iterate_tbl(dmar_rhsa_iter, &ria);
if (ria.proxim_dom != -1)
unit->memdomain = acpi_map_pxm_to_vm_domainid(ria.proxim_dom);
timeout = dmar_get_timeout();
TUNABLE_UINT64_FETCH("hw.iommu.dmar.timeout", &timeout);
dmar_update_timeout(timeout);
for (i = 0; i < DMAR_INTR_TOTAL; i++)
unit->x86c.intrs[i].irq = -1;
dmd = &unit->x86c.intrs[DMAR_INTR_FAULT];
dmd->name = "fault";
dmd->irq_rid = DMAR_FAULT_IRQ_RID;
dmd->handler = dmar_fault_intr;
dmd->msi_data_reg = DMAR_FEDATA_REG;
dmd->msi_addr_reg = DMAR_FEADDR_REG;
dmd->msi_uaddr_reg = DMAR_FEUADDR_REG;
dmd->enable_intr = dmar_enable_fault_intr;
dmd->disable_intr = dmar_disable_fault_intr;
error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_FAULT);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32);
if (DMAR_HAS_QI(unit)) {
dmd = &unit->x86c.intrs[DMAR_INTR_QI];
dmd->name = "qi";
dmd->irq_rid = DMAR_QI_IRQ_RID;
dmd->handler = dmar_qi_intr;
dmd->msi_data_reg = DMAR_IEDATA_REG;
dmd->msi_addr_reg = DMAR_IEADDR_REG;
dmd->msi_uaddr_reg = DMAR_IEUADDR_REG;
dmd->enable_intr = dmar_enable_qi_intr;
dmd->disable_intr = dmar_disable_qi_intr;
error = iommu_alloc_irq(DMAR2IOMMU(unit), DMAR_INTR_QI);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
dmar_write4(unit, dmd->msi_data_reg, dmd->msi_data);
dmar_write4(unit, dmd->msi_addr_reg, dmd->msi_addr);
dmar_write4(unit, dmd->msi_uaddr_reg, dmd->msi_addr >> 32);
}
mtx_init(&unit->iommu.lock, "dmarhw", NULL, MTX_DEF);
unit->domids = new_unrhdr(0, dmar_nd2mask(DMAR_CAP_ND(unit->hw_cap)),
&unit->iommu.lock);
LIST_INIT(&unit->domains);
/*
* 9.2 "Context Entry":
* When Caching Mode (CM) field is reported as Set, the
* domain-id value of zero is architecturally reserved.
* Software must not use domain-id value of zero
* when CM is Set.
*/
if ((unit->hw_cap & DMAR_CAP_CM) != 0)
alloc_unr_specific(unit->domids, 0);
unit->ctx_obj = vm_pager_allocate(OBJT_PHYS, NULL, IDX_TO_OFF(1 +
DMAR_CTX_CNT), 0, 0, NULL);
if (unit->memdomain != -1) {
unit->ctx_obj->domain.dr_policy = DOMAINSET_PREF(
unit->memdomain);
}
/*
* Allocate and load the root entry table pointer. Enable the
* address translation after the required invalidations are
* done.
*/
iommu_pgalloc(unit->ctx_obj, 0, IOMMU_PGF_WAITOK | IOMMU_PGF_ZERO);
DMAR_LOCK(unit);
error = dmar_load_root_entry_ptr(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_inv_ctx_glob(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
if ((unit->hw_ecap & DMAR_ECAP_DI) != 0) {
error = dmar_inv_iotlb_glob(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
}
DMAR_UNLOCK(unit);
error = dmar_init_fault_log(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_init_qi(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
error = dmar_init_irt(unit);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
disable_pmr = 0;
TUNABLE_INT_FETCH("hw.dmar.pmr.disable", &disable_pmr);
if (disable_pmr) {
error = dmar_disable_protected_regions(unit);
if (error != 0)
device_printf(dev,
"Failed to disable protected regions\n");
}
error = iommu_init_busdma(&unit->iommu);
if (error != 0) {
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
#ifdef NOTYET
DMAR_LOCK(unit);
error = dmar_enable_translation(unit);
if (error != 0) {
DMAR_UNLOCK(unit);
dmar_release_resources(dev, unit);
dmar_devs[unit->iommu.unit] = NULL;
return (error);
}
DMAR_UNLOCK(unit);
#endif
return (0);
}
static int
dmar_detach(device_t dev)
{
return (EBUSY);
}
static int
dmar_suspend(device_t dev)
{
return (0);
}
static int
dmar_resume(device_t dev)
{
/* XXXKIB */
return (0);
}
static device_method_t dmar_methods[] = {
DEVMETHOD(device_identify, dmar_identify),
DEVMETHOD(device_probe, dmar_probe),
DEVMETHOD(device_attach, dmar_attach),
DEVMETHOD(device_detach, dmar_detach),
DEVMETHOD(device_suspend, dmar_suspend),
DEVMETHOD(device_resume, dmar_resume),
#ifdef DEV_APIC
DEVMETHOD(bus_remap_intr, dmar_remap_intr),
#endif
DEVMETHOD_END
};
static driver_t dmar_driver = {
"dmar",
dmar_methods,
sizeof(struct dmar_unit),
};
DRIVER_MODULE(dmar, acpi, dmar_driver, 0, 0);
MODULE_DEPEND(dmar, acpi, 1, 1, 1);
static void
dmar_print_path(int busno, int depth, const ACPI_DMAR_PCI_PATH *path)
{
int i;
printf("[%d, ", busno);
for (i = 0; i < depth; i++) {
if (i != 0)
printf(", ");
printf("(%d, %d)", path[i].Device, path[i].Function);
}
printf("]");
}
int
dmar_dev_depth(device_t child)
{
devclass_t pci_class;
device_t bus, pcib;
int depth;
pci_class = devclass_find("pci");
for (depth = 1; ; depth++) {
bus = device_get_parent(child);
pcib = device_get_parent(bus);
if (device_get_devclass(device_get_parent(pcib)) !=
pci_class)
return (depth);
child = pcib;
}
}
void
dmar_dev_path(device_t child, int *busno, void *path1, int depth)
{
devclass_t pci_class;
device_t bus, pcib;
ACPI_DMAR_PCI_PATH *path;
pci_class = devclass_find("pci");
path = path1;
for (depth--; depth != -1; depth--) {
path[depth].Device = pci_get_slot(child);
path[depth].Function = pci_get_function(child);
bus = device_get_parent(child);
pcib = device_get_parent(bus);
if (device_get_devclass(device_get_parent(pcib)) !=
pci_class) {
/* reached a host bridge */
*busno = pcib_get_bus(bus);
return;
}
child = pcib;
}
panic("wrong depth");
}
static int
dmar_match_pathes(int busno1, const ACPI_DMAR_PCI_PATH *path1, int depth1,
int busno2, const ACPI_DMAR_PCI_PATH *path2, int depth2,
enum AcpiDmarScopeType scope_type)
{
int i, depth;
if (busno1 != busno2)
return (0);
if (scope_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && depth1 != depth2)
return (0);
depth = depth1;
if (depth2 < depth)
depth = depth2;
for (i = 0; i < depth; i++) {
if (path1[i].Device != path2[i].Device ||
path1[i].Function != path2[i].Function)
return (0);
}
return (1);
}
static int
dmar_match_devscope(ACPI_DMAR_DEVICE_SCOPE *devscope, int dev_busno,
const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
{
ACPI_DMAR_PCI_PATH *path;
int path_len;
if (devscope->Length < sizeof(*devscope)) {
printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
devscope->Length);
return (-1);
}
if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
return (0);
path_len = devscope->Length - sizeof(*devscope);
if (path_len % 2 != 0) {
printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
devscope->Length);
return (-1);
}
path_len /= 2;
path = (ACPI_DMAR_PCI_PATH *)(devscope + 1);
if (path_len == 0) {
printf("dmar_match_devscope: corrupted DMAR table, dl %d\n",
devscope->Length);
return (-1);
}
return (dmar_match_pathes(devscope->Bus, path, path_len, dev_busno,
dev_path, dev_path_len, devscope->EntryType));
}
static bool
dmar_match_by_path(struct dmar_unit *unit, int dev_domain, int dev_busno,
const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len, const char **banner)
{
ACPI_DMAR_HARDWARE_UNIT *dmarh;
ACPI_DMAR_DEVICE_SCOPE *devscope;
char *ptr, *ptrend;
int match;
dmarh = dmar_find_by_index(unit->iommu.unit);
if (dmarh == NULL)
return (false);
if (dmarh->Segment != dev_domain)
return (false);
if ((dmarh->Flags & ACPI_DMAR_INCLUDE_ALL) != 0) {
if (banner != NULL)
*banner = "INCLUDE_ALL";
return (true);
}
ptr = (char *)dmarh + sizeof(*dmarh);
ptrend = (char *)dmarh + dmarh->Header.Length;
while (ptr < ptrend) {
devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
match = dmar_match_devscope(devscope, dev_busno, dev_path,
dev_path_len);
if (match == -1)
return (false);
if (match == 1) {
if (banner != NULL)
*banner = "specific match";
return (true);
}
}
return (false);
}
static struct dmar_unit *
dmar_find_by_scope(int dev_domain, int dev_busno,
const ACPI_DMAR_PCI_PATH *dev_path, int dev_path_len)
{
struct dmar_unit *unit;
int i;
for (i = 0; i < dmar_devcnt; i++) {
if (dmar_devs[i] == NULL)
continue;
unit = device_get_softc(dmar_devs[i]);
if (dmar_match_by_path(unit, dev_domain, dev_busno, dev_path,
dev_path_len, NULL))
return (unit);
}
return (NULL);
}
struct dmar_unit *
dmar_find(device_t dev, bool verbose)
{
struct dmar_unit *unit;
const char *banner;
int i, dev_domain, dev_busno, dev_path_len;
/*
* This function can only handle PCI(e) devices.
*/
if (device_get_devclass(device_get_parent(dev)) !=
devclass_find("pci"))
return (NULL);
dev_domain = pci_get_domain(dev);
dev_path_len = dmar_dev_depth(dev);
ACPI_DMAR_PCI_PATH dev_path[dev_path_len];
dmar_dev_path(dev, &dev_busno, dev_path, dev_path_len);
banner = "";
for (i = 0; i < dmar_devcnt; i++) {
if (dmar_devs[i] == NULL)
continue;
unit = device_get_softc(dmar_devs[i]);
if (dmar_match_by_path(unit, dev_domain, dev_busno,
dev_path, dev_path_len, &banner))
break;
}
if (i == dmar_devcnt)
return (NULL);
if (verbose) {
device_printf(dev, "pci%d:%d:%d:%d matched dmar%d by %s",
dev_domain, pci_get_bus(dev), pci_get_slot(dev),
pci_get_function(dev), unit->iommu.unit, banner);
printf(" scope path ");
dmar_print_path(dev_busno, dev_path_len, dev_path);
printf("\n");
}
iommu_device_set_iommu_prop(dev, unit->iommu.dev);
return (unit);
}
static struct dmar_unit *
dmar_find_nonpci(u_int id, u_int entry_type, uint16_t *rid)
{
device_t dmar_dev;
struct dmar_unit *unit;
ACPI_DMAR_HARDWARE_UNIT *dmarh;
ACPI_DMAR_DEVICE_SCOPE *devscope;
ACPI_DMAR_PCI_PATH *path;
char *ptr, *ptrend;
#ifdef DEV_APIC
int error;
#endif
int i;
for (i = 0; i < dmar_devcnt; i++) {
dmar_dev = dmar_devs[i];
if (dmar_dev == NULL)
continue;
unit = (struct dmar_unit *)device_get_softc(dmar_dev);
dmarh = dmar_find_by_index(i);
if (dmarh == NULL)
continue;
ptr = (char *)dmarh + sizeof(*dmarh);
ptrend = (char *)dmarh + dmarh->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
if (devscope->EntryType != entry_type)
continue;
if (devscope->EnumerationId != id)
continue;
#ifdef DEV_APIC
if (entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
error = ioapic_get_rid(id, rid);
/*
* If our IOAPIC has PCI bindings then
* use the PCI device rid.
*/
if (error == 0)
return (unit);
}
#endif
if (devscope->Length - sizeof(ACPI_DMAR_DEVICE_SCOPE)
== 2) {
if (rid != NULL) {
path = (ACPI_DMAR_PCI_PATH *)
(devscope + 1);
*rid = PCI_RID(devscope->Bus,
path->Device, path->Function);
}
return (unit);
}
printf(
"dmar_find_nonpci: id %d type %d path length != 2\n",
id, entry_type);
break;
}
}
return (NULL);
}
struct dmar_unit *
dmar_find_hpet(device_t dev, uint16_t *rid)
{
struct dmar_unit *unit;
unit = dmar_find_nonpci(hpet_get_uid(dev), ACPI_DMAR_SCOPE_TYPE_HPET,
rid);
if (unit != NULL)
iommu_device_set_iommu_prop(dev, unit->iommu.dev);
return (unit);
}
struct dmar_unit *
dmar_find_ioapic(u_int apic_id, uint16_t *rid)
{
struct dmar_unit *unit;
device_t apic_dev;
unit = dmar_find_nonpci(apic_id, ACPI_DMAR_SCOPE_TYPE_IOAPIC, rid);
if (unit != NULL) {
apic_dev = ioapic_get_dev(apic_id);
if (apic_dev != NULL)
iommu_device_set_iommu_prop(apic_dev, unit->iommu.dev);
}
return (unit);
}
struct rmrr_iter_args {
struct dmar_domain *domain;
int dev_domain;
int dev_busno;
const ACPI_DMAR_PCI_PATH *dev_path;
int dev_path_len;
struct iommu_map_entries_tailq *rmrr_entries;
};
static int
dmar_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
struct rmrr_iter_args *ria;
ACPI_DMAR_RESERVED_MEMORY *resmem;
ACPI_DMAR_DEVICE_SCOPE *devscope;
struct iommu_map_entry *entry;
char *ptr, *ptrend;
int match;
if (!dmar_rmrr_enable)
return (1);
if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
return (1);
ria = arg;
resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
if (resmem->Segment != ria->dev_domain)
return (1);
ptr = (char *)resmem + sizeof(*resmem);
ptrend = (char *)resmem + resmem->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
devscope = (ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
match = dmar_match_devscope(devscope, ria->dev_busno,
ria->dev_path, ria->dev_path_len);
if (match == 1) {
entry = iommu_gas_alloc_entry(DOM2IODOM(ria->domain),
IOMMU_PGF_WAITOK);
entry->start = resmem->BaseAddress;
/* The RMRR entry end address is inclusive. */
entry->end = resmem->EndAddress;
TAILQ_INSERT_TAIL(ria->rmrr_entries, entry,
dmamap_link);
}
}
return (1);
}
void
dmar_dev_parse_rmrr(struct dmar_domain *domain, int dev_domain, int dev_busno,
const void *dev_path, int dev_path_len,
struct iommu_map_entries_tailq *rmrr_entries)
{
struct rmrr_iter_args ria;
ria.domain = domain;
ria.dev_domain = dev_domain;
ria.dev_busno = dev_busno;
ria.dev_path = (const ACPI_DMAR_PCI_PATH *)dev_path;
ria.dev_path_len = dev_path_len;
ria.rmrr_entries = rmrr_entries;
dmar_iterate_tbl(dmar_rmrr_iter, &ria);
}
struct inst_rmrr_iter_args {
struct dmar_unit *dmar;
};
static device_t
dmar_path_dev(int segment, int path_len, int busno,
const ACPI_DMAR_PCI_PATH *path, uint16_t *rid)
{
device_t dev;
int i;
dev = NULL;
for (i = 0; i < path_len; i++) {
dev = pci_find_dbsf(segment, busno, path->Device,
path->Function);
if (i != path_len - 1) {
busno = pci_cfgregread(segment, busno, path->Device,
path->Function, PCIR_SECBUS_1, 1);
path++;
}
}
*rid = PCI_RID(busno, path->Device, path->Function);
return (dev);
}
static int
dmar_inst_rmrr_iter(ACPI_DMAR_HEADER *dmarh, void *arg)
{
const ACPI_DMAR_RESERVED_MEMORY *resmem;
const ACPI_DMAR_DEVICE_SCOPE *devscope;
struct inst_rmrr_iter_args *iria;
const char *ptr, *ptrend;
device_t dev;
struct dmar_unit *unit;
int dev_path_len;
uint16_t rid;
iria = arg;
if (!dmar_rmrr_enable)
return (1);
if (dmarh->Type != ACPI_DMAR_TYPE_RESERVED_MEMORY)
return (1);
resmem = (ACPI_DMAR_RESERVED_MEMORY *)dmarh;
if (resmem->Segment != iria->dmar->segment)
return (1);
ptr = (const char *)resmem + sizeof(*resmem);
ptrend = (const char *)resmem + resmem->Header.Length;
for (;;) {
if (ptr >= ptrend)
break;
devscope = (const ACPI_DMAR_DEVICE_SCOPE *)ptr;
ptr += devscope->Length;
/* XXXKIB bridge */
if (devscope->EntryType != ACPI_DMAR_SCOPE_TYPE_ENDPOINT)
continue;
rid = 0;
dev_path_len = (devscope->Length -
sizeof(ACPI_DMAR_DEVICE_SCOPE)) / 2;
dev = dmar_path_dev(resmem->Segment, dev_path_len,
devscope->Bus,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1), &rid);
if (dev == NULL) {
if (bootverbose) {
printf("dmar%d no dev found for RMRR "
"[%#jx, %#jx] rid %#x scope path ",
iria->dmar->iommu.unit,
(uintmax_t)resmem->BaseAddress,
(uintmax_t)resmem->EndAddress,
rid);
dmar_print_path(devscope->Bus, dev_path_len,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1));
printf("\n");
}
unit = dmar_find_by_scope(resmem->Segment,
devscope->Bus,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1),
dev_path_len);
if (iria->dmar != unit)
continue;
dmar_get_ctx_for_devpath(iria->dmar, rid,
resmem->Segment, devscope->Bus,
(const ACPI_DMAR_PCI_PATH *)(devscope + 1),
dev_path_len, false, true);
} else {
unit = dmar_find(dev, false);
if (iria->dmar != unit)
continue;
iommu_instantiate_ctx(&(iria)->dmar->iommu,
dev, true);
}
}
return (1);
}
/*
* Pre-create all contexts for the DMAR which have RMRR entries.
*/
int
dmar_instantiate_rmrr_ctxs(struct iommu_unit *unit)
{
struct dmar_unit *dmar;
struct inst_rmrr_iter_args iria;
int error;
dmar = IOMMU2DMAR(unit);
if (!dmar_barrier_enter(dmar, DMAR_BARRIER_RMRR))
return (0);
error = 0;
iria.dmar = dmar;
dmar_iterate_tbl(dmar_inst_rmrr_iter, &iria);
DMAR_LOCK(dmar);
if (!LIST_EMPTY(&dmar->domains)) {
KASSERT((dmar->hw_gcmd & DMAR_GCMD_TE) == 0,
("dmar%d: RMRR not handled but translation is already enabled",
dmar->iommu.unit));
error = dmar_disable_protected_regions(dmar);
if (error != 0)
printf("dmar%d: Failed to disable protected regions\n",
dmar->iommu.unit);
error = dmar_enable_translation(dmar);
if (bootverbose) {
if (error == 0) {
printf("dmar%d: enabled translation\n",
dmar->iommu.unit);
} else {
printf("dmar%d: enabling translation failed, "
"error %d\n", dmar->iommu.unit, error);
}
}
}
dmar_barrier_exit(dmar, DMAR_BARRIER_RMRR);
return (error);
}
#ifdef DDB
#include
#include
static void
dmar_print_domain(struct dmar_domain *domain, bool show_mappings)
{
struct iommu_domain *iodom;
iodom = DOM2IODOM(domain);
db_printf(
" @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n"
" ctx_cnt %d flags %x pgobj %p map_ents %u\n",
domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl,
(uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt,
domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt);
iommu_db_domain_print_contexts(iodom);
if (show_mappings)
iommu_db_domain_print_mappings(iodom);
}
DB_SHOW_COMMAND_FLAGS(dmar_domain, db_dmar_print_domain, CS_OWN)
{
struct dmar_unit *unit;
struct dmar_domain *domain;
struct iommu_ctx *ctx;
bool show_mappings, valid;
int pci_domain, bus, device, function, i, t;
db_expr_t radix;
valid = false;
radix = db_radix;
db_radix = 10;
t = db_read_token();
if (t == tSLASH) {
t = db_read_token();
if (t != tIDENT) {
db_printf("Bad modifier\n");
db_radix = radix;
db_skip_to_eol();
return;
}
show_mappings = strchr(db_tok_string, 'm') != NULL;
t = db_read_token();
} else {
show_mappings = false;
}
if (t == tNUMBER) {
pci_domain = db_tok_number;
t = db_read_token();
if (t == tNUMBER) {
bus = db_tok_number;
t = db_read_token();
if (t == tNUMBER) {
device = db_tok_number;
t = db_read_token();
if (t == tNUMBER) {
function = db_tok_number;
valid = true;
}
}
}
}
db_radix = radix;
db_skip_to_eol();
if (!valid) {
db_printf("usage: show dmar_domain [/m] "
" \n");
return;
}
for (i = 0; i < dmar_devcnt; i++) {
unit = device_get_softc(dmar_devs[i]);
LIST_FOREACH(domain, &unit->domains, link) {
LIST_FOREACH(ctx, &domain->iodom.contexts, link) {
if (pci_domain == unit->segment &&
bus == pci_get_bus(ctx->tag->owner) &&
device == pci_get_slot(ctx->tag->owner) &&
function == pci_get_function(ctx->tag->
owner)) {
dmar_print_domain(domain,
show_mappings);
goto out;
}
}
}
}
out:;
}
static void
dmar_print_one(int idx, bool show_domains, bool show_mappings)
{
struct dmar_unit *unit;
struct dmar_domain *domain;
int i, frir;
unit = device_get_softc(dmar_devs[idx]);
db_printf("dmar%d at %p, root at 0x%jx, ver 0x%x\n", unit->iommu.unit,
unit, dmar_read8(unit, DMAR_RTADDR_REG),
dmar_read4(unit, DMAR_VER_REG));
db_printf("cap 0x%jx ecap 0x%jx gsts 0x%x fsts 0x%x fectl 0x%x\n",
(uintmax_t)dmar_read8(unit, DMAR_CAP_REG),
(uintmax_t)dmar_read8(unit, DMAR_ECAP_REG),
dmar_read4(unit, DMAR_GSTS_REG),
dmar_read4(unit, DMAR_FSTS_REG),
dmar_read4(unit, DMAR_FECTL_REG));
if (unit->ir_enabled) {
db_printf("ir is enabled; IRT @%p phys 0x%jx maxcnt %d\n",
unit->irt, (uintmax_t)unit->irt_phys, unit->irte_cnt);
}
db_printf("fed 0x%x fea 0x%x feua 0x%x\n",
dmar_read4(unit, DMAR_FEDATA_REG),
dmar_read4(unit, DMAR_FEADDR_REG),
dmar_read4(unit, DMAR_FEUADDR_REG));
db_printf("primary fault log:\n");
for (i = 0; i < DMAR_CAP_NFR(unit->hw_cap); i++) {
frir = (DMAR_CAP_FRO(unit->hw_cap) + i) * 16;
db_printf(" %d at 0x%x: %jx %jx\n", i, frir,
(uintmax_t)dmar_read8(unit, frir),
(uintmax_t)dmar_read8(unit, frir + 8));
}
if (DMAR_HAS_QI(unit)) {
db_printf("ied 0x%x iea 0x%x ieua 0x%x\n",
dmar_read4(unit, DMAR_IEDATA_REG),
dmar_read4(unit, DMAR_IEADDR_REG),
dmar_read4(unit, DMAR_IEUADDR_REG));
if (unit->qi_enabled) {
db_printf("qi is enabled: queue @0x%jx (IQA 0x%jx) "
"size 0x%jx\n"
" head 0x%x tail 0x%x avail 0x%x status 0x%x ctrl 0x%x\n"
" hw compl 0x%jx@%p/phys@%jx next seq 0x%x gen 0x%x\n",
(uintmax_t)unit->x86c.inv_queue,
(uintmax_t)dmar_read8(unit, DMAR_IQA_REG),
(uintmax_t)unit->x86c.inv_queue_size,
dmar_read4(unit, DMAR_IQH_REG),
dmar_read4(unit, DMAR_IQT_REG),
unit->x86c.inv_queue_avail,
dmar_read4(unit, DMAR_ICS_REG),
dmar_read4(unit, DMAR_IECTL_REG),
(uintmax_t)unit->x86c.inv_waitd_seq_hw,
&unit->x86c.inv_waitd_seq_hw,
(uintmax_t)unit->x86c.inv_waitd_seq_hw_phys,
unit->x86c.inv_waitd_seq,
unit->x86c.inv_waitd_gen);
} else {
db_printf("qi is disabled\n");
}
}
if (show_domains) {
db_printf("domains:\n");
LIST_FOREACH(domain, &unit->domains, link) {
dmar_print_domain(domain, show_mappings);
if (db_pager_quit)
break;
}
}
}
DB_SHOW_COMMAND(dmar, db_dmar_print)
{
bool show_domains, show_mappings;
show_domains = strchr(modif, 'd') != NULL;
show_mappings = strchr(modif, 'm') != NULL;
if (!have_addr) {
db_printf("usage: show dmar [/d] [/m] index\n");
return;
}
dmar_print_one((int)addr, show_domains, show_mappings);
}
DB_SHOW_ALL_COMMAND(dmars, db_show_all_dmars)
{
int i;
bool show_domains, show_mappings;
show_domains = strchr(modif, 'd') != NULL;
show_mappings = strchr(modif, 'm') != NULL;
for (i = 0; i < dmar_devcnt; i++) {
dmar_print_one(i, show_domains, show_mappings);
if (db_pager_quit)
break;
}
}
#endif
static struct iommu_unit *
dmar_find_method(device_t dev, bool verbose)
{
struct dmar_unit *dmar;
dmar = dmar_find(dev, verbose);
return (&dmar->iommu);
}
static struct x86_unit_common *
dmar_get_x86_common(struct iommu_unit *unit)
{
struct dmar_unit *dmar;
dmar = IOMMU2DMAR(unit);
return (&dmar->x86c);
}
static void
dmar_unit_pre_instantiate_ctx(struct iommu_unit *unit)
{
dmar_quirks_pre_use(unit);
dmar_instantiate_rmrr_ctxs(unit);
}
static struct x86_iommu dmar_x86_iommu = {
.get_x86_common = dmar_get_x86_common,
.unit_pre_instantiate_ctx = dmar_unit_pre_instantiate_ctx,
.domain_unload_entry = dmar_domain_unload_entry,
.domain_unload = dmar_domain_unload,
.get_ctx = dmar_get_ctx,
.free_ctx_locked = dmar_free_ctx_locked_method,
- .free_ctx = dmar_free_ctx_method,
.find = dmar_find_method,
.alloc_msi_intr = dmar_alloc_msi_intr,
.map_msi_intr = dmar_map_msi_intr,
.unmap_msi_intr = dmar_unmap_msi_intr,
.map_ioapic_intr = dmar_map_ioapic_intr,
.unmap_ioapic_intr = dmar_unmap_ioapic_intr,
};
static void
x86_iommu_set_intel(void *arg __unused)
{
if (cpu_vendor_id == CPU_VENDOR_INTEL)
set_x86_iommu(&dmar_x86_iommu);
}
SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_intel, NULL);
diff --git a/sys/x86/iommu/iommu_utils.c b/sys/x86/iommu/iommu_utils.c
index 479e8637ee8e..2db02a98ca91 100644
--- a/sys/x86/iommu/iommu_utils.c
+++ b/sys/x86/iommu/iommu_utils.c
@@ -1,847 +1,841 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2013, 2014, 2024 The FreeBSD Foundation
*
* This software was developed by Konstantin Belousov
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "opt_acpi.h"
#if defined(__amd64__)
#define DEV_APIC
#else
#include "opt_apic.h"
#endif
#include "opt_ddb.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include