diff --git a/sys/conf/files.x86 b/sys/conf/files.x86 --- a/sys/conf/files.x86 +++ b/sys/conf/files.x86 @@ -344,6 +344,12 @@ x86/cpufreq/hwpstate_intel.c optional cpufreq x86/cpufreq/p4tcc.c optional cpufreq x86/cpufreq/powernow.c optional cpufreq +x86/iommu/amd_cmd.c optional acpi iommu pci +x86/iommu/amd_ctx.c optional acpi iommu pci +x86/iommu/amd_drv.c optional acpi iommu pci +x86/iommu/amd_event.c optional acpi iommu pci +x86/iommu/amd_idpgtbl.c optional acpi iommu pci +x86/iommu/amd_intrmap.c optional acpi iommu pci x86/iommu/intel_ctx.c optional acpi iommu pci x86/iommu/intel_drv.c optional acpi iommu pci x86/iommu/intel_fault.c optional acpi iommu pci diff --git a/sys/x86/iommu/amd_cmd.c b/sys/x86/iommu/amd_cmd.c new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_cmd.c @@ -0,0 +1,360 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void +amdiommu_enable_cmdbuf(struct amdiommu_unit *unit) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + + unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); +} + +static void +amdiommu_disable_cmdbuf(struct amdiommu_unit *unit) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + + unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); +} + + +static void +amdiommu_enable_qi_intr(struct iommu_unit *iommu) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); + amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, + AMDIOMMU_CMDEVS_COMWAITINT); +} + +static void +amdiommu_disable_qi_intr(struct iommu_unit *iommu) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); +} + +static void +amdiommu_cmd_advance_tail(struct iommu_unit *iommu) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail); +} + +static void +amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count) +{ + struct amdiommu_unit *unit; + uint64_t head; + int bytes; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT; + for (;;) { + if (bytes <= unit->x86c.inv_queue_avail) + break; + /* refill */ + head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD); + head &= AMDIOMMU_CMDPTR_MASK; + unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail - + AMDIOMMU_CMD_SZ; + if (head <= unit->x86c.inv_queue_tail) + unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size; + if (bytes <= unit->x86c.inv_queue_avail) + break; + + /* + * No space in the queue, do busy wait. Hardware must + * make a progress. But first advance the tail to + * inform the descriptor streamer about entries we + * might have already filled, otherwise they could + * clog the whole queue.. + * + * See dmar_qi_invalidate_locked() for a discussion + * about data race prevention. + */ + amdiommu_cmd_advance_tail(iommu); + unit->x86c.inv_queue_full++; + cpu_spinwait(); + } + unit->x86c.inv_queue_avail -= bytes; +} + +static void +amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct + amdiommu_cmd_generic *cmd) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + + memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd, + sizeof(*cmd)); + unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ; + KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, + ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, + (uintmax_t)unit->x86c.inv_queue_size)); + unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; +} + +static void +amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, + bool intr, bool memw, bool fence) +{ + struct amdiommu_unit *unit; + struct amdiommu_cmd_completion_wait c; + + unit = IOMMU2AMD(iommu); + AMDIOMMU_ASSERT_LOCKED(unit); + + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_COMPLETION_WAIT; + if (memw) { + uint32_t x; + + c.s = 1; + x = unit->x86c.inv_waitd_seq_hw_phys; + x >>= 3; + c.address0 = x; + x = unit->x86c.inv_waitd_seq_hw_phys >> 32; + c.address1 = x; + c.data0 = seq; + } + if (fence) + c.f = 1; + if (intr) + c.i = 1; + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); +} + +static void +amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base, + iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + struct amdiommu_cmd_invalidate_iommu_pages c; + u_int isize; + + domain = IODOM2DOM(adomain); + unit = domain->unit; + AMDIOMMU_ASSERT_LOCKED(unit); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES; + c.domainid = domain->domain; + isize = IOMMU_PAGE_SIZE; // XXXKIB + + for (; size > 0; base += isize, size -= isize) { + amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); + c.s = 0; + c.pde = 1; + c.address = base >> IOMMU_PAGE_SHIFT; + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); + } + iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait); +} + +void +amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain) +{ + struct amdiommu_unit *unit; + struct amdiommu_cmd_invalidate_iommu_pages c; + + unit = domain->unit; + AMDIOMMU_ASSERT_LOCKED(unit); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES; + c.domainid = domain->domain; + + /* + * The magic specified in the note for INVALIDATE_IOMMU_PAGES + * description. + */ + c.s = 1; + c.pde = 1; + c.address = 0x7ffffffffffff; + + amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); +} + +void +amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu) +{ + struct iommu_qi_genseq gseq; + + amdiommu_cmd_ensure(iommu, 1); + iommu_qi_emit_wait_seq(iommu, &gseq, true); + IOMMU2AMD(iommu)->x86c.inv_seq_waiters++; + amdiommu_cmd_advance_tail(iommu); + iommu_qi_wait_for_seq(iommu, &gseq, true); +} + +void +amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx) +{ + struct amdiommu_cmd_invalidate_devtab_entry c; + + amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY; + c.devid = ctx->context.rid; + amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c); +} + + +void +amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx) +{ + amdiommu_qi_invalidate_ctx_locked_nowait(ctx); + amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); +} + +void +amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit, + uint16_t devid) +{ + struct amdiommu_cmd_invalidate_interrupt_table c; + + AMDIOMMU_ASSERT_LOCKED(unit); + + amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); + bzero(&c, sizeof(c)); + c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE; + c.devid = devid; + amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); +} + +void +amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid) +{ + amdiommu_qi_invalidate_ir_locked_nowait(unit, devid); + amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit)); +} + +static void +amdiommu_qi_task(void *arg, int pending __unused) +{ + struct amdiommu_unit *unit; + + unit = IOMMU2AMD(arg); + iommu_qi_drain_tlb_flush(AMD2IOMMU(unit)); + + AMDIOMMU_LOCK(unit); + if (unit->x86c.inv_seq_waiters > 0) + wakeup(&unit->x86c.inv_seq_waiters); + AMDIOMMU_UNLOCK(unit); +} + +int +amdiommu_init_cmd(struct amdiommu_unit *unit) +{ + uint64_t qi_sz, rv; + + unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE); + unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ; + iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task); + get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure; + get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr; + get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail; + get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit; + + rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue); + + /* + * See the description of the ComLen encoding for Command + * buffer Base Address Register. + */ + qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8; + rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT; + + AMDIOMMU_LOCK(unit); + amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv); + amdiommu_enable_cmdbuf(unit); + amdiommu_enable_qi_intr(AMD2IOMMU(unit)); + AMDIOMMU_UNLOCK(unit); + + return (0); +} + +static void +amdiommu_fini_cmd_helper(struct iommu_unit *iommu) +{ + amdiommu_disable_cmdbuf(IOMMU2AMD(iommu)); + amdiommu_disable_qi_intr(iommu); +} + +void +amdiommu_fini_cmd(struct amdiommu_unit *unit) +{ + iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper); +} diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_ctx.c @@ -0,0 +1,628 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context"); +static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain"); + +static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit, + struct amdiommu_domain *domain); + +static struct amdiommu_dte * +amdiommu_get_dtep(struct amdiommu_ctx *ctx) +{ + return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]); +} + +void +amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, + bool cansleep) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + + domain = IODOM2DOM(entry->domain); + unit = DOM2AMD(domain); + + /* + * If "free" is false, then the IOTLB invalidation must be performed + * synchronously. Otherwise, the caller might free the entry before + * dmar_qi_task() is finished processing it. + */ + if (free) { + AMDIOMMU_LOCK(unit); + iommu_qi_invalidate_locked(&domain->iodom, entry, true); + AMDIOMMU_UNLOCK(unit); + } else { + iommu_qi_invalidate_sync(&domain->iodom, entry->start, + entry->end - entry->start, cansleep); + iommu_domain_free_entry(entry, false); + } +} + +static bool +amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain, + struct iommu_map_entry *entry) +{ + return (true); //XXXKIB +} + +void +amdiommu_domain_unload(struct iommu_domain *iodom, + struct iommu_map_entries_tailq *entries, bool cansleep) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + struct iommu_map_entry *entry, *entry1; + int error __diagused; + + domain = IODOM2DOM(iodom); + unit = DOM2AMD(domain); + + TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { + KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, + ("not mapped entry %p %p", domain, entry)); + error = iodom->ops->unmap(iodom, entry, + cansleep ? IOMMU_PGF_WAITOK : 0); + KASSERT(error == 0, ("unmap %p error %d", domain, error)); + } + if (TAILQ_EMPTY(entries)) + return; + + AMDIOMMU_LOCK(unit); + while ((entry = TAILQ_FIRST(entries)) != NULL) { + TAILQ_REMOVE(entries, entry, dmamap_link); + iommu_qi_invalidate_locked(&domain->iodom, entry, + amdiommu_domain_unload_emit_wait(domain, entry)); + } + AMDIOMMU_UNLOCK(unit); +} + +static void +amdiommu_domain_destroy(struct amdiommu_domain *domain) +{ + struct iommu_domain *iodom; + struct amdiommu_unit *unit; + + iodom = DOM2IODOM(domain); + + KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), + ("unfinished unloads %p", domain)); + KASSERT(LIST_EMPTY(&iodom->contexts), + ("destroying dom %p with contexts", domain)); + KASSERT(domain->ctx_cnt == 0, + ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); + KASSERT(domain->refs == 0, + ("destroying dom %p with refs %d", domain, domain->refs)); + + if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { + AMDIOMMU_DOMAIN_LOCK(domain); + iommu_gas_fini_domain(iodom); + AMDIOMMU_DOMAIN_UNLOCK(domain); + } + if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { + if (domain->pgtbl_obj != NULL) + AMDIOMMU_DOMAIN_PGLOCK(domain); + amdiommu_domain_free_pgtbl(domain); + } + iommu_domain_fini(iodom); + unit = DOM2AMD(domain); + free_unr(unit->domids, domain->domain); + free(domain, M_AMDIOMMU_DOMAIN); +} + +static iommu_gaddr_t +lvl2addr(int lvl) +{ + int x; + + x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl; + /* Level 6 has only 8 bits for page table index */ + if (x >= NBBY * sizeof(uint64_t)) + return (-1ull); + return (1ull < (1ull << x)); +} + +static void +amdiommu_domain_init_pglvl(struct amdiommu_unit *unit, + struct amdiommu_domain *domain) +{ + iommu_gaddr_t end; + int hats, i; + uint64_t efr_hats; + + end = DOM2IODOM(domain)->end; + for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) { + if (lvl2addr(i) >= end && lvl2addr(i - 1) < end) + break; + } + domain->pglvl = i; + + efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK; + switch (efr_hats) { + case AMDIOMMU_EFR_HATS_6LVL: + hats = 6; + break; + case AMDIOMMU_EFR_HATS_5LVL: + hats = 5; + break; + case AMDIOMMU_EFR_HATS_4LVL: + hats = 4; + break; + default: + printf("amdiommu%d: HATS %#jx (reserved) ignoring\n", + unit->iommu.unit, (uintmax_t)efr_hats); + return; + } + if (hats >= domain->pglvl) + return; + + printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n", + unit->iommu.unit, domain->domain, hats, domain->pglvl); + domain->pglvl = hats; + domain->iodom.end = lvl2addr(hats); +} + +static struct amdiommu_domain * +amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped) +{ + struct amdiommu_domain *domain; + struct iommu_domain *iodom; + int error, id; + + id = alloc_unr(unit->domids); + if (id == -1) + return (NULL); + domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO); + iodom = DOM2IODOM(domain); + domain->domain = id; + LIST_INIT(&iodom->contexts); + iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops); + + domain->unit = unit; + + domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; + amdiommu_domain_init_pglvl(unit, domain); + iommu_gas_init_domain(DOM2IODOM(domain)); + + if (id_mapped) { + domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; + } else { + error = amdiommu_domain_alloc_pgtbl(domain); + if (error != 0) + goto fail; + /* Disable local apic region access */ + error = iommu_gas_reserve_region(iodom, 0xfee00000, + 0xfeefffff + 1, &iodom->msi_entry); + if (error != 0) + goto fail; + } + + return (domain); + +fail: + amdiommu_domain_destroy(domain); + return (NULL); +} + +static struct amdiommu_ctx * +amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid) +{ + struct amdiommu_ctx *ctx; + + ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); + ctx->context.domain = DOM2IODOM(domain); + ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), + M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); + ctx->context.rid = rid; + ctx->context.refs = 1; + return (ctx); +} + +static void +amdiommu_ctx_link(struct amdiommu_ctx *ctx) +{ + struct amdiommu_domain *domain; + + domain = CTX2DOM(ctx); + IOMMU_ASSERT_LOCKED(domain->iodom.iommu); + KASSERT(domain->refs >= domain->ctx_cnt, + ("dom %p ref underflow %d %d", domain, domain->refs, + domain->ctx_cnt)); + domain->refs++; + domain->ctx_cnt++; + LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link); +} + +static void +amdiommu_ctx_unlink(struct amdiommu_ctx *ctx) +{ + struct amdiommu_domain *domain; + + domain = CTX2DOM(ctx); + IOMMU_ASSERT_LOCKED(domain->iodom.iommu); + KASSERT(domain->refs > 0, + ("domain %p ctx dtr refs %d", domain, domain->refs)); + KASSERT(domain->ctx_cnt >= domain->refs, + ("domain %p ctx dtr refs %d ctx_cnt %d", domain, + domain->refs, domain->ctx_cnt)); + domain->refs--; + domain->ctx_cnt--; + LIST_REMOVE(&ctx->context, link); +} + +static struct amdiommu_ctx * +amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid) +{ + struct amdiommu_domain *domain; + struct iommu_ctx *ctx; + + AMDIOMMU_ASSERT_LOCKED(unit); + + LIST_FOREACH(domain, &unit->domains, link) { + LIST_FOREACH(ctx, &domain->iodom.contexts, link) { + if (ctx->rid == rid) + return (IOCTX2CTX(ctx)); + } + } + return (NULL); +} + +struct amdiommu_domain * +amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid) +{ + struct amdiommu_domain *domain; + struct iommu_ctx *ctx; + + AMDIOMMU_LOCK(unit); + LIST_FOREACH(domain, &unit->domains, link) { + LIST_FOREACH(ctx, &domain->iodom.contexts, link) { + if (ctx->rid == rid) + break; + } + } + AMDIOMMU_UNLOCK(unit); + return (domain); +} + +static void +amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx) +{ + struct amdiommu_dte *dtep; + struct amdiommu_domain *domain; + + AMDIOMMU_ASSERT_LOCKED(unit); + KASSERT(ctx->context.refs >= 1, + ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs)); + + /* + * If our reference is not last, only the dereference should + * be performed. + */ + if (ctx->context.refs > 1) { + ctx->context.refs--; + AMDIOMMU_UNLOCK(unit); + return; + } + + KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, + ("lost ref on disabled ctx %p", ctx)); + + /* + * Otherwise, the device table entry must be cleared before + * the page table is destroyed. + */ + dtep = amdiommu_get_dtep(ctx); + dtep->v = 0; + atomic_thread_fence_rel(); + memset(dtep, 0, sizeof(*dtep)); + + domain = CTX2DOM(ctx); + amdiommu_qi_invalidate_ctx_locked_nowait(ctx); + amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid); + amdiommu_qi_invalidate_all_pages_locked_nowait(domain); + amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); + + if (unit->irte_enabled) + amdiommu_ctx_fini_irte(ctx); + + amdiommu_ctx_unlink(ctx); + free(ctx->context.tag, M_AMDIOMMU_CTX); + free(ctx, M_AMDIOMMU_CTX); + amdiommu_unref_domain_locked(unit, domain); +} + +static void +amdiommu_free_ctx(struct amdiommu_ctx *ctx) +{ + struct amdiommu_unit *unit; + + unit = CTX2AMD(ctx); + AMDIOMMU_LOCK(unit); + amdiommu_free_ctx_locked(unit, ctx); +} + +static void +amdiommu_unref_domain_locked(struct amdiommu_unit *unit, + struct amdiommu_domain *domain) +{ + AMDIOMMU_ASSERT_LOCKED(unit); + KASSERT(domain->refs >= 1, + ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain, + domain->refs)); + KASSERT(domain->refs > domain->ctx_cnt, + ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit, + domain, domain->refs, domain->ctx_cnt)); + + if (domain->refs > 1) { + domain->refs--; + AMDIOMMU_UNLOCK(unit); + return; + } + + LIST_REMOVE(domain, link); + AMDIOMMU_UNLOCK(unit); + + taskqueue_drain(unit->iommu.delayed_taskqueue, + &domain->iodom.unload_task); + amdiommu_domain_destroy(domain); +} + +static void +dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx, + vm_page_t pgtblr) +{ + struct amdiommu_domain *domain; + struct amdiommu_unit *unit; + + domain = CTX2DOM(ctx); + unit = DOM2AMD(domain); + + dtep->tv = 1; + //dtep->had = XXX; + dtep->ir = 1; + dtep->iw = 1; + dtep->domainid = domain->domain; + dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS; + + if (unit->irte_enabled) { + dtep->iv = 1; + dtep->i = 0; + dtep->inttablen = ilog2(unit->irte_nentries); + dtep->intrroot = pmap_kextract(unit->irte_x2apic ? + (vm_offset_t)ctx->irtx2 : + (vm_offset_t)ctx->irtb) >> 6; + // XXXKIB fill device interrupt passing hints from IVHD + dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP; + } + + if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) { + dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1; + } else { + MPASS(domain->pglvl > 0 && domain->pglvl <= + AMDIOMMU_PGTBL_MAXLVL); + dtep->pgmode = domain->pglvl; + dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12; + } + + atomic_thread_fence_rel(); + dtep->v = 1; +} + +static void +dte_entry_init(struct amdiommu_ctx *ctx, bool move) +{ + struct amdiommu_dte *dtep; + struct amdiommu_unit *unit; + struct amdiommu_domain *domain; + int i; + + domain = CTX2DOM(ctx); + unit = DOM2AMD(domain); + + dtep = amdiommu_get_dtep(ctx); + KASSERT(dtep->v == 0, + ("amdiommu%d initializing valid dte @%p %#jx", + CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep))); + + if (iommu_is_buswide_ctx(AMD2IOMMU(unit), + PCI_RID2BUS(ctx->context.rid))) { + MPASS(!move); + for (i = 0; i <= PCI_BUSMAX; i++) { + dte_entry_init_one(&dtep[i], ctx, domain->pgtblr); + } + } else { + dte_entry_init_one(dtep, ctx, domain->pgtblr); + } +} + +static struct amdiommu_ctx * +amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid, + int dev_domain, bool id_mapped, bool rmrr_init) +{ + struct amdiommu_domain *domain, *domain1; + struct amdiommu_ctx *ctx, *ctx1; + int bus, slot, func; + + if (dev != NULL) { + bus = pci_get_bus(dev); + slot = pci_get_slot(dev); + func = pci_get_function(dev); + } else { + bus = PCI_RID2BUS(rid); + slot = PCI_RID2SLOT(rid); + func = PCI_RID2FUNC(rid); + } + AMDIOMMU_LOCK(unit); + KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) || + (slot == 0 && func == 0), + ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit, + bus, slot, func)); + ctx = amdiommu_find_ctx_locked(unit, rid); + if (ctx == NULL) { + /* + * Perform the allocations which require sleep or have + * higher chance to succeed if the sleep is allowed. + */ + AMDIOMMU_UNLOCK(unit); + domain1 = amdiommu_domain_alloc(unit, id_mapped); + if (domain1 == NULL) + return (NULL); + if (!id_mapped) { +#if 0 +// XXXKIB + error = domain_init_rmrr(domain1, dev, bus, + slot, func, dev_domain, dev_busno, dev_path, + dev_path_len); + if (error == 0 && dev != NULL) + error = dmar_reserve_pci_regions(domain1, dev); + if (error != 0) { + dmar_domain_destroy(domain1); + return (NULL); + } +#endif + } + ctx1 = amdiommu_ctx_alloc(domain1, rid); + amdiommu_ctx_init_irte(ctx1); + AMDIOMMU_LOCK(unit); + + /* + * Recheck the contexts, other thread might have + * already allocated needed one. + */ + ctx = amdiommu_find_ctx_locked(unit, rid); + if (ctx == NULL) { + domain = domain1; + ctx = ctx1; + amdiommu_ctx_link(ctx); + ctx->context.tag->owner = dev; + iommu_device_tag_init(CTX2IOCTX(ctx), dev); + + LIST_INSERT_HEAD(&unit->domains, domain, link); + dte_entry_init(ctx, false); + amdiommu_qi_invalidate_ctx_locked(ctx); + if (dev != NULL) { + device_printf(dev, + "amdiommu%d pci%d:%d:%d:%d rid %x domain %d " + "%s-mapped\n", + AMD2IOMMU(unit)->unit, unit->unit_dom, + bus, slot, func, rid, domain->domain, + id_mapped ? "id" : "re"); + } + } else { + amdiommu_domain_destroy(domain1); + /* Nothing needs to be done to destroy ctx1. */ + free(ctx1, M_AMDIOMMU_CTX); + domain = CTX2DOM(ctx); + ctx->context.refs++; /* tag referenced us */ + } + } else { + domain = CTX2DOM(ctx); + if (ctx->context.tag->owner == NULL) + ctx->context.tag->owner = dev; + ctx->context.refs++; /* tag referenced us */ + } + AMDIOMMU_UNLOCK(unit); + + return (ctx); +} + +struct iommu_ctx * +amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, + bool id_mapped, bool rmrr_init) +{ + struct amdiommu_unit *unit; + struct amdiommu_ctx *ret; + + unit = IOMMU2AMD(iommu); + ret = amdiommu_get_ctx_for_dev(unit, dev, rid, pci_get_domain(dev), + id_mapped, rmrr_init); + return (CTX2IOCTX(ret)); +} + +void +amdiommu_free_ctx_locked_method(struct iommu_unit *iommu, + struct iommu_ctx *context) +{ + struct amdiommu_unit *unit; + struct amdiommu_ctx *ctx; + + unit = IOMMU2AMD(iommu); + ctx = IOCTX2CTX(context); + amdiommu_free_ctx_locked(unit, ctx); +} + +void +amdiommu_free_ctx_method(struct iommu_ctx *context) +{ + struct amdiommu_ctx *ctx; + + ctx = IOCTX2CTX(context); + amdiommu_free_ctx(ctx); +} diff --git a/sys/x86/iommu/amd_drv.c b/sys/x86/iommu/amd_drv.c new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_drv.c @@ -0,0 +1,1202 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pcib_if.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int amdiommu_enable = 0; + +/* + * All enumerated AMD IOMMU units. + * Access is unlocked, the list is not modified after early + * single-threaded startup. + */ +static TAILQ_HEAD(, amdiommu_unit) amdiommu_units = + TAILQ_HEAD_INITIALIZER(amdiommu_units); + +static u_int +ivrs_info_to_unit_id(UINT32 info) +{ + return ((info & ACPI_IVHD_UNIT_ID_MASK) >> 8); +} + +typedef bool (*amdiommu_itercc_t)(void *, void *); +typedef bool (*amdiommu_iter40_t)(ACPI_IVRS_HARDWARE2 *, void *); +typedef bool (*amdiommu_iter11_t)(ACPI_IVRS_HARDWARE2 *, void *); +typedef bool (*amdiommu_iter10_t)(ACPI_IVRS_HARDWARE1 *, void *); + +static bool +amdiommu_ivrs_iterate_tbl_typed(amdiommu_itercc_t iter, void *arg, + int type, ACPI_TABLE_IVRS *ivrs_tbl) +{ + char *ptr, *ptrend; + bool done; + + done = false; + ptr = (char *)ivrs_tbl + sizeof(*ivrs_tbl); + ptrend = (char *)ivrs_tbl + ivrs_tbl->Header.Length; + for (;;) { + ACPI_IVRS_HEADER *ivrsh; + + if (ptr >= ptrend) + break; + ivrsh = (ACPI_IVRS_HEADER *)ptr; + if (ivrsh->Length <= 0) { + printf("amdiommu_iterate_tbl: corrupted IVRS table, " + "length %d\n", ivrsh->Length); + break; + } + ptr += ivrsh->Length; + if (ivrsh->Type == type) { + done = iter((void *)ivrsh, arg); + if (done) + break; + } + } + return (done); +} + +/* + * Walk over IVRS, calling callback iterators following priority: + * 0x40, then 0x11, then 0x10 subtable. First iterator returning true + * ends the walk. + * Returns true if any iterator returned true, otherwise false. + */ +static bool +amdiommu_ivrs_iterate_tbl(amdiommu_iter40_t iter40, amdiommu_iter11_t iter11, + amdiommu_iter10_t iter10, void *arg) +{ + ACPI_TABLE_IVRS *ivrs_tbl; + ACPI_STATUS status; + bool done; + + status = AcpiGetTable(ACPI_SIG_IVRS, 1, + (ACPI_TABLE_HEADER **)&ivrs_tbl); + if (ACPI_FAILURE(status)) + return (false); + done = false; + if (iter40 != NULL) + done = amdiommu_ivrs_iterate_tbl_typed( + (amdiommu_itercc_t)iter40, arg, + ACPI_IVRS_TYPE_HARDWARE3, ivrs_tbl); + if (!done && iter11 != NULL) + done = amdiommu_ivrs_iterate_tbl_typed( + (amdiommu_itercc_t)iter11, arg, ACPI_IVRS_TYPE_HARDWARE2, + ivrs_tbl); + if (!done && iter10 != NULL) + done = amdiommu_ivrs_iterate_tbl_typed( + (amdiommu_itercc_t)iter10, arg, ACPI_IVRS_TYPE_HARDWARE1, + ivrs_tbl); + AcpiPutTable((ACPI_TABLE_HEADER *)ivrs_tbl); + return (done); +} + +struct ivhd_lookup_data { + struct amdiommu_unit *sc; + uint16_t devid; +}; + +static bool +ivrs_lookup_ivhd_0x40(ACPI_IVRS_HARDWARE2 *h2, void *arg) +{ + struct ivhd_lookup_data *ildp; + + KASSERT(h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 || + h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE3, + ("Misparsed IVHD, h2 type %#x", h2->Header.Type)); + + ildp = arg; + if (h2->Header.DeviceId != ildp->devid) + return (false); + + ildp->sc->unit_dom = h2->PciSegmentGroup; + ildp->sc->iommu.unit = ivrs_info_to_unit_id(h2->Info); + ildp->sc->efr = h2->EfrRegisterImage; + return (true); +} + +static bool +ivrs_lookup_ivhd_0x10(ACPI_IVRS_HARDWARE1 *h1, void *arg) +{ + struct ivhd_lookup_data *ildp; + + KASSERT(h1->Header.Type == ACPI_IVRS_TYPE_HARDWARE1, + ("Misparsed IVHD, h1 type %#x", h1->Header.Type)); + + ildp = arg; + if (h1->Header.DeviceId != ildp->devid) + return (false); + + ildp->sc->unit_dom = h1->PciSegmentGroup; + ildp->sc->iommu.unit = ivrs_info_to_unit_id(h1->Info); + return (true); +} + +static u_int +amdiommu_devtbl_sz(struct amdiommu_unit *sc __unused) +{ + return (sizeof(struct amdiommu_dte) * (1 << 16)); +} + +static void +amdiommu_free_dev_tbl(struct amdiommu_unit *sc) +{ + u_int devtbl_sz; + + devtbl_sz = amdiommu_devtbl_sz(sc); + pmap_qremove((vm_offset_t)sc->dev_tbl, atop(devtbl_sz)); + kva_free((vm_offset_t)sc->dev_tbl, devtbl_sz); + sc->dev_tbl = NULL; + vm_object_deallocate(sc->devtbl_obj); + sc->devtbl_obj = NULL; +} + +static int +amdiommu_create_dev_tbl(struct amdiommu_unit *sc) +{ + vm_offset_t seg_vaddr; + u_int devtbl_sz, dom, i, reclaimno, segnum_log, segnum, seg_sz; + int error; + + segnum_log = (sc->efr & AMDIOMMU_EFR_DEVTBLSEG_MASK) >> + AMDIOMMU_EFR_DEVTBLSEG_SHIFT; + segnum = 1 << segnum_log; + + devtbl_sz = amdiommu_devtbl_sz(sc); + seg_sz = devtbl_sz / segnum; + sc->devtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, atop(devtbl_sz), + VM_PROT_ALL, 0, NULL); + if (bus_get_domain(sc->iommu.dev, &dom) == 0) + sc->devtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom); + + sc->hw_ctrl &= ~AMDIOMMU_CTRL_DEVTABSEG_MASK; + sc->hw_ctrl |= (uint64_t)segnum_log << ilog2(AMDIOMMU_CTRL_DEVTABSEG_2); + sc->hw_ctrl |= AMDIOMMU_CTRL_COHERENT; + amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl); + + seg_vaddr = kva_alloc(devtbl_sz); + if (seg_vaddr == 0) + return (ENOMEM); + sc->dev_tbl = (void *)seg_vaddr; + + for (i = 0; i < segnum; i++) { + vm_page_t m; + uint64_t rval; + u_int reg; + + for (reclaimno = 0; reclaimno < 3; reclaimno++) { + VM_OBJECT_WLOCK(sc->devtbl_obj); + m = vm_page_alloc_contig(sc->devtbl_obj, + i * atop(seg_sz), + VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY, + atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + VM_OBJECT_WUNLOCK(sc->devtbl_obj); + if (m != NULL) + break; + error = vm_page_reclaim_contig(VM_ALLOC_NORMAL, + atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0); + if (error != 0) + vm_wait(sc->devtbl_obj); + } + if (m == NULL) { + amdiommu_free_dev_tbl(sc); + return (ENOMEM); + } + + rval = VM_PAGE_TO_PHYS(m) | (atop(seg_sz) - 1); + for (u_int j = 0; j < atop(seg_sz); + j++, seg_vaddr += PAGE_SIZE, m++) { + pmap_zero_page(m); + pmap_qenter(seg_vaddr, &m, 1); + } + reg = i == 0 ? AMDIOMMU_DEVTAB_BASE : AMDIOMMU_DEVTAB_S1_BASE + + i - 1; + amdiommu_write8(sc, reg, rval); + } + + return (0); +} + +static int +amdiommu_cmd_event_intr(void *arg) +{ + struct amdiommu_unit *unit; + uint64_t status; + + unit = arg; + status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS); + if ((status & AMDIOMMU_CMDEVS_COMWAITINT) != 0) { + amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, + AMDIOMMU_CMDEVS_COMWAITINT); + taskqueue_enqueue(unit->x86c.qi_taskqueue, + &unit->x86c.qi_task); + } + if ((status & (AMDIOMMU_CMDEVS_EVLOGINT | + AMDIOMMU_CMDEVS_EVOVRFLW)) != 0) + amdiommu_event_intr(unit, status); + return (FILTER_HANDLED); +} + +static int +amdiommu_setup_intr(struct amdiommu_unit *sc) +{ + int error, msi_count, msix_count; + + msi_count = pci_msi_count(sc->iommu.dev); + msix_count = pci_msix_count(sc->iommu.dev); + if (msi_count == 0 && msix_count == 0) { + device_printf(sc->iommu.dev, "needs MSI-class intr\n"); + return (ENXIO); + } + +#if 0 + /* + * XXXKIB how MSI-X is supposed to be organized for BAR-less + * function? Practically available hardware implements only + * one IOMMU unit per function, and uses MSI. + */ + if (msix_count > 0) { + sc->msix_table = bus_alloc_resource_any(sc->iommu.dev, + SYS_RES_MEMORY, &sc->msix_tab_rid, RF_ACTIVE); + if (sc->msix_table == NULL) + return (ENXIO); + + if (sc->msix_pba_rid != sc->msix_tab_rid) { + /* Separate BAR for PBA */ + sc->msix_pba = bus_alloc_resource_any(sc->iommu.dev, + SYS_RES_MEMORY, + &sc->msix_pba_rid, RF_ACTIVE); + if (sc->msix_pba == NULL) { + bus_release_resource(sc->iommu.dev, + SYS_RES_MEMORY, &sc->msix_tab_rid, + sc->msix_table); + return (ENXIO); + } + } + } +#endif + + error = ENXIO; + if (msix_count > 0) { + error = pci_alloc_msix(sc->iommu.dev, &msix_count); + if (error == 0) + sc->numirqs = msix_count; + } + if (error != 0 && msi_count > 0) { + error = pci_alloc_msi(sc->iommu.dev, &msi_count); + if (error == 0) + sc->numirqs = msi_count; + } + if (error != 0) { + device_printf(sc->iommu.dev, + "Failed to allocate MSI/MSI-x (%d)\n", error); + return (ENXIO); + } + + /* + * XXXKIB Spec states that MISC0.MsiNum must be zero for IOMMU + * using MSI interrupts. But at least one BIOS programmed '2' + * there, making driver use wrong rid and causing + * command/event interrupt ignored as stray. Try to fix it + * with dirty force by assuming MsiNum is zero for MSI. + */ + sc->irq_cmdev_rid = 1; + if (msix_count > 0) { + sc->irq_cmdev_rid += pci_read_config(sc->iommu.dev, + sc->seccap_reg + PCIR_AMDIOMMU_MISC0, 4) & + PCIM_AMDIOMMU_MISC0_MSINUM_MASK; + } + + sc->irq_cmdev = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_IRQ, + &sc->irq_cmdev_rid, RF_SHAREABLE | RF_ACTIVE); + if (sc->irq_cmdev == NULL) { + device_printf(sc->iommu.dev, + "unable to map CMD/EV interrupt\n"); + return (ENXIO); + } + error = bus_setup_intr(sc->iommu.dev, sc->irq_cmdev, + INTR_TYPE_MISC, amdiommu_cmd_event_intr, NULL, sc, + &sc->irq_cmdev_cookie); + if (error != 0) { + device_printf(sc->iommu.dev, + "unable to setup interrupt (%d)\n", error); + return (ENXIO); + } + bus_describe_intr(sc->iommu.dev, sc->irq_cmdev, sc->irq_cmdev_cookie, + "cmdev"); + + if (x2apic_mode) { + AMDIOMMU_LOCK(sc); + sc->hw_ctrl |= AMDIOMMU_CTRL_GA_EN | AMDIOMMU_CTRL_XT_EN; + amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl); + // XXXKIB AMDIOMMU_CTRL_INTCAPXT_EN and program x2APIC_CTRL + AMDIOMMU_UNLOCK(sc); + } + + return (0); +} + +static int +amdiommu_probe(device_t dev) +{ + int seccap_reg; + int error; + uint32_t cap_h, cap_type, cap_rev; + + if (acpi_disabled("amdiommu")) + return (ENXIO); + TUNABLE_INT_FETCH("hw.amdiommu.enable", &amdiommu_enable); + if (!amdiommu_enable) + return (ENXIO); + if (pci_get_class(dev) != PCIC_BASEPERIPH || + pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU) + return (ENXIO); + + error = pci_find_cap(dev, PCIY_SECDEV, &seccap_reg); + if (error != 0 || seccap_reg == 0) + return (ENXIO); + + cap_h = pci_read_config(dev, seccap_reg + PCIR_AMDIOMMU_CAP_HEADER, + 4); + cap_type = cap_h & PCIM_AMDIOMMU_CAP_TYPE_MASK; + cap_rev = cap_h & PCIM_AMDIOMMU_CAP_REV_MASK; + if (cap_type != PCIM_AMDIOMMU_CAP_TYPE_VAL && + cap_rev != PCIM_AMDIOMMU_CAP_REV_VAL) + return (ENXIO); + + device_set_desc(dev, "DMA remap"); + return (BUS_PROBE_SPECIFIC); +} + +static int +amdiommu_attach(device_t dev) +{ + struct amdiommu_unit *sc; + struct ivhd_lookup_data ild; + int error; + uint32_t base_low, base_high; + bool res; + + sc = device_get_softc(dev); + sc->iommu.dev = dev; + + error = pci_find_cap(dev, PCIY_SECDEV, &sc->seccap_reg); + if (error != 0 || sc->seccap_reg == 0) + return (ENXIO); + + base_low = pci_read_config(dev, sc->seccap_reg + + PCIR_AMDIOMMU_BASE_LOW, 4); + base_high = pci_read_config(dev, sc->seccap_reg + + PCIR_AMDIOMMU_BASE_HIGH, 4); + sc->mmio_base = (base_low & PCIM_AMDIOMMU_BASE_LOW_ADDRM) | + ((uint64_t)base_high << 32); + + sc->device_id = pci_get_rid(dev); + ild.sc = sc; + ild.devid = sc->device_id; + res = amdiommu_ivrs_iterate_tbl(ivrs_lookup_ivhd_0x40, + ivrs_lookup_ivhd_0x40, ivrs_lookup_ivhd_0x10, &ild); + if (!res) { + device_printf(dev, "Cannot find IVHD\n"); + return (ENXIO); + } + + mtx_init(&sc->iommu.lock, "amdihw", NULL, MTX_DEF); + sc->domids = new_unrhdr(0, 0xffff, &sc->iommu.lock); + LIST_INIT(&sc->domains); + sysctl_ctx_init(&sc->iommu.sysctl_ctx); + + sc->mmio_sz = ((sc->efr & AMDIOMMU_EFR_PC_SUP) != 0 ? 512 : 16) * + 1024; + + sc->mmio_rid = AMDIOMMU_RID; + error = bus_set_resource(dev, SYS_RES_MEMORY, AMDIOMMU_RID, + sc->mmio_base, sc->mmio_sz); + if (error != 0) { + device_printf(dev, + "bus_set_resource %#jx-%#jx failed, error %d\n", + (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base + + sc->mmio_sz, error); + error = ENXIO; + goto errout1; + } + sc->mmio_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->mmio_rid, + sc->mmio_base, sc->mmio_base + sc->mmio_sz - 1, sc->mmio_sz, + RF_ALLOCATED | RF_ACTIVE | RF_SHAREABLE); + if (sc->mmio_res == NULL) { + device_printf(dev, + "bus_alloc_resource %#jx-%#jx failed\n", + (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base + + sc->mmio_sz); + error = ENXIO; + goto errout2; + } + + sc->hw_ctrl = amdiommu_read8(sc, AMDIOMMU_CTRL); + if (bootverbose) + device_printf(dev, "ctrl reg %#jx\n", (uintmax_t)sc->hw_ctrl); + if ((sc->hw_ctrl & AMDIOMMU_CTRL_EN) != 0) { + device_printf(dev, "CTRL_EN is set, bailing out\n"); + error = EBUSY; + goto errout2; + } + + iommu_high = BUS_SPACE_MAXADDR; + + error = amdiommu_create_dev_tbl(sc); + if (error != 0) + goto errout3; + + error = amdiommu_init_cmd(sc); + if (error != 0) + goto errout4; + + error = amdiommu_init_event(sc); + if (error != 0) + goto errout5; + + error = amdiommu_setup_intr(sc); + if (error != 0) + goto errout6; + + error = iommu_init_busdma(AMD2IOMMU(sc)); + if (error != 0) + goto errout7; + + error = amdiommu_init_irt(sc); + if (error != 0) + goto errout8; + + /* + * Unlike DMAR, AMD IOMMU does not process command queue + * unless IOMMU is enabled. But since non-present devtab + * entry makes IOMMU ignore transactions from corresponding + * initiator, de-facto IOMMU operations are disabled for the + * DMA and intr remapping. + */ + AMDIOMMU_LOCK(sc); + sc->hw_ctrl |= AMDIOMMU_CTRL_EN; + amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl); + if (bootverbose) { + printf("amdiommu%d: enabled translation\n", + AMD2IOMMU(sc)->unit); + } + AMDIOMMU_UNLOCK(sc); + + TAILQ_INSERT_TAIL(&amdiommu_units, sc, unit_next); + return (0); + +errout8: + iommu_fini_busdma(&sc->iommu); +errout7: + pci_release_msi(dev); +errout6: + amdiommu_fini_event(sc); +errout5: + amdiommu_fini_cmd(sc); +errout4: + amdiommu_free_dev_tbl(sc); +errout3: + bus_release_resource(dev, SYS_RES_MEMORY, sc->mmio_rid, sc->mmio_res); +errout2: + bus_delete_resource(dev, SYS_RES_MEMORY, sc->mmio_rid); +errout1: + sysctl_ctx_free(&sc->iommu.sysctl_ctx); + delete_unrhdr(sc->domids); + mtx_destroy(&sc->iommu.lock); + + return (error); +} + +static int +amdiommu_detach(device_t dev) +{ + return (EBUSY); +} + +static int +amdiommu_suspend(device_t dev) +{ + /* XXXKIB */ + return (0); +} + +static int +amdiommu_resume(device_t dev) +{ + /* XXXKIB */ + return (0); +} + +static device_method_t amdiommu_methods[] = { + DEVMETHOD(device_probe, amdiommu_probe), + DEVMETHOD(device_attach, amdiommu_attach), + DEVMETHOD(device_detach, amdiommu_detach), + DEVMETHOD(device_suspend, amdiommu_suspend), + DEVMETHOD(device_resume, amdiommu_resume), + DEVMETHOD_END +}; + +static driver_t amdiommu_driver = { + "amdiommu", + amdiommu_methods, + sizeof(struct amdiommu_unit), +}; + +EARLY_DRIVER_MODULE(amdiommu, pci, amdiommu_driver, 0, 0, BUS_PASS_SUPPORTDEV); +MODULE_DEPEND(amdiommu, pci, 1, 1, 1); + +static struct amdiommu_unit * +amdiommu_unit_by_device_id(u_int pci_seg, u_int device_id) +{ + struct amdiommu_unit *unit; + + TAILQ_FOREACH(unit, &amdiommu_units, unit_next) { + if (unit->unit_dom == pci_seg && unit->device_id == device_id) + return (unit); + } + return (NULL); +} + +struct ivhd_find_unit { + u_int domain; + uintptr_t rid; + int devno; + enum { + IFU_DEV_PCI, + IFU_DEV_IOAPIC, + IFU_DEV_HPET, + } type; + u_int device_id; + uint16_t rid_real; + uint8_t dte; + uint32_t edte; +}; + +static bool +amdiommu_find_unit_scan_ivrs(ACPI_IVRS_DE_HEADER *d, size_t tlen, + struct ivhd_find_unit *ifu) +{ + char *db, *de; + size_t len; + + for (de = (char *)d + tlen; (char *)d < de; + d = (ACPI_IVRS_DE_HEADER *)(db + len)) { + db = (char *)d; + if (d->Type == ACPI_IVRS_TYPE_PAD4) { + len = sizeof(ACPI_IVRS_DEVICE4); + } else if (d->Type == ACPI_IVRS_TYPE_ALL) { + ACPI_IVRS_DEVICE4 *d4; + + d4 = (ACPI_IVRS_DEVICE4 *)db; + len = sizeof(*d4); + ifu->dte = d4->Header.DataSetting; + } else if (d->Type == ACPI_IVRS_TYPE_SELECT) { + ACPI_IVRS_DEVICE4 *d4; + + d4 = (ACPI_IVRS_DEVICE4 *)db; + if (d4->Header.Id == ifu->rid) { + ifu->dte = d4->Header.DataSetting; + ifu->rid_real = ifu->rid; + return (true); + } + len = sizeof(*d4); + } else if (d->Type == ACPI_IVRS_TYPE_START) { + ACPI_IVRS_DEVICE4 *d4, *d4n; + + d4 = (ACPI_IVRS_DEVICE4 *)db; + d4n = d4 + 1; + if (d4n->Header.Type != ACPI_IVRS_TYPE_END) { + printf("IVRS dev4 start not followed by END " + "(%#x)\n", d4n->Header.Type); + return (false); + } + if (d4->Header.Id <= ifu->rid && + ifu->rid <= d4n->Header.Id) { + ifu->dte = d4->Header.DataSetting; + ifu->rid_real = ifu->rid; + return (true); + } + len = 2 * sizeof(*d4); + } else if (d->Type == ACPI_IVRS_TYPE_PAD8) { + len = sizeof(ACPI_IVRS_DEVICE8A); + } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_SELECT) { + ACPI_IVRS_DEVICE8A *d8a; + + d8a = (ACPI_IVRS_DEVICE8A *)db; + if (d8a->Header.Id == ifu->rid) { + ifu->dte = d8a->Header.DataSetting; + ifu->rid_real = d8a->UsedId; + return (true); + } + len = sizeof(*d8a); + } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_START) { + ACPI_IVRS_DEVICE8A *d8a; + ACPI_IVRS_DEVICE4 *d4; + + d8a = (ACPI_IVRS_DEVICE8A *)db; + d4 = (ACPI_IVRS_DEVICE4 *)(d8a + 1); + if (d4->Header.Type != ACPI_IVRS_TYPE_END) { + printf("IVRS alias start not followed by END " + "(%#x)\n", d4->Header.Type); + return (false); + } + if (d8a->Header.Id <= ifu->rid && + ifu->rid <= d4->Header.Id) { + ifu->dte = d8a->Header.DataSetting; + ifu->rid_real = d8a->UsedId; + return (true); + } + len = sizeof(*d8a) + sizeof(*d4); + } else if (d->Type == ACPI_IVRS_TYPE_EXT_SELECT) { + ACPI_IVRS_DEVICE8B *d8b; + + d8b = (ACPI_IVRS_DEVICE8B *)db; + if (d8b->Header.Id == ifu->rid) { + ifu->dte = d8b->Header.DataSetting; + ifu->rid_real = ifu->rid; + ifu->edte = d8b->ExtendedData; + return (true); + } + len = sizeof(*d8b); + } else if (d->Type == ACPI_IVRS_TYPE_EXT_START) { + ACPI_IVRS_DEVICE8B *d8b; + ACPI_IVRS_DEVICE4 *d4; + + d8b = (ACPI_IVRS_DEVICE8B *)db; + d4 = (ACPI_IVRS_DEVICE4 *)(db + sizeof(*d8b)); + if (d4->Header.Type != ACPI_IVRS_TYPE_END) { + printf("IVRS ext start not followed by END " + "(%#x)\n", d4->Header.Type); + return (false); + } + if (d8b->Header.Id >= ifu->rid && + ifu->rid <= d4->Header.Id) { + ifu->dte = d8b->Header.DataSetting; + ifu->rid_real = ifu->rid; + ifu->edte = d8b->ExtendedData; + return (true); + } + len = sizeof(*d8b) + sizeof(*d4); + } else if (d->Type == ACPI_IVRS_TYPE_SPECIAL) { + ACPI_IVRS_DEVICE8C *d8c; + + d8c = (ACPI_IVRS_DEVICE8C *)db; + if (((ifu->type == IFU_DEV_IOAPIC && + d8c->Variety == ACPI_IVHD_IOAPIC) || + (ifu->type == IFU_DEV_HPET && + d8c->Variety == ACPI_IVHD_HPET)) && + ifu->devno == d8c->Handle) { + ifu->dte = d8c->Header.DataSetting; + ifu->rid_real = d8c->UsedId; + return (true); + } + len = sizeof(*d8c); + } else if (d->Type == ACPI_IVRS_TYPE_HID) { + ACPI_IVRS_DEVICE_HID *dh; + + dh = (ACPI_IVRS_DEVICE_HID *)db; + len = sizeof(*dh) + dh->UidLength; + /* XXXKIB */ + } else { + printf("amdiommu: unknown IVRS device entry type %#x\n", + d->Type); + if (d->Type <= 63) + len = sizeof(ACPI_IVRS_DEVICE4); + else if (d->Type <= 127) + len = sizeof(ACPI_IVRS_DEVICE8A); + else { + printf("amdiommu: abort, cannot " + "advance iterator\n"); + return (false); + } + } + } + return (false); +} + +static bool +amdiommu_find_unit_scan_0x11(ACPI_IVRS_HARDWARE2 *ivrs, void *arg) +{ + struct ivhd_find_unit *ifu = arg; + ACPI_IVRS_DE_HEADER *d; + bool res; + + KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 || + ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE3, + ("Misparsed IVHD h2, ivrs type %#x", ivrs->Header.Type)); + + if (ifu->domain != ivrs->PciSegmentGroup) + return (false); + d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1); + res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu); + if (res) + ifu->device_id = ivrs->Header.DeviceId; + return (res); +} + +static bool +amdiommu_find_unit_scan_0x10(ACPI_IVRS_HARDWARE1 *ivrs, void *arg) +{ + struct ivhd_find_unit *ifu = arg; + ACPI_IVRS_DE_HEADER *d; + bool res; + + KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE1, + ("Misparsed IVHD h1, ivrs type %#x", ivrs->Header.Type)); + + if (ifu->domain != ivrs->PciSegmentGroup) + return (false); + d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1); + res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu); + if (res) + ifu->device_id = ivrs->Header.DeviceId; + return (res); +} + +static void +amdiommu_dev_prop_dtr(device_t dev, const char *name, void *val, void *dtr_ctx) +{ + free(val, M_DEVBUF); +} + +static int * +amdiommu_dev_fetch_flagsp(struct amdiommu_unit *unit, device_t dev) +{ + int *flagsp, error; + + bus_topo_assert(); + error = device_get_prop(dev, device_get_nameunit(unit->iommu.dev), + (void **)&flagsp); + if (error == ENOENT) { + flagsp = malloc(sizeof(int), M_DEVBUF, M_WAITOK | M_ZERO); + device_set_prop(dev, device_get_nameunit(unit->iommu.dev), + flagsp, amdiommu_dev_prop_dtr, unit); + } + return (flagsp); +} + +static int +amdiommu_get_dev_prop_flags(struct amdiommu_unit *unit, device_t dev) +{ + int *flagsp, flags; + + bus_topo_lock(); + flagsp = amdiommu_dev_fetch_flagsp(unit, dev); + flags = *flagsp; + bus_topo_unlock(); + return (flags); +} + +static void +amdiommu_set_dev_prop_flags(struct amdiommu_unit *unit, device_t dev, + int flag) +{ + int *flagsp; + + bus_topo_lock(); + flagsp = amdiommu_dev_fetch_flagsp(unit, dev); + *flagsp |= flag; + bus_topo_unlock(); +} + +int +amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp, uint16_t *ridp, + uint8_t *dtep, uint32_t *edtep, bool verbose) +{ + struct ivhd_find_unit ifu; + struct amdiommu_unit *unit; + int error, flags; + bool res; + + if (device_get_devclass(device_get_parent(dev)) != + devclass_find("pci")) + return (ENXIO); + + bzero(&ifu, sizeof(ifu)); + ifu.type = IFU_DEV_PCI; + + error = pci_get_id(dev, PCI_ID_RID, &ifu.rid); + if (error != 0) { + if (verbose) + device_printf(dev, + "amdiommu cannot get rid, error %d\n", error); + return (ENXIO); + } + + ifu.domain = pci_get_domain(dev); + res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11, + amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu); + if (!res) { + if (verbose) + device_printf(dev, + "(%#06x:%#06x) amdiommu cannot match rid in IVHD\n", + ifu.domain, (unsigned)ifu.rid); + return (ENXIO); + } + + unit = amdiommu_unit_by_device_id(ifu.domain, ifu.device_id); + if (unit == NULL) { + if (verbose) + device_printf(dev, + "(%#06x:%#06x) amdiommu cannot find unit\n", + ifu.domain, (unsigned)ifu.rid); + return (ENXIO); + } + *unitp = unit; + iommu_device_set_iommu_prop(dev, unit->iommu.dev); + if (ridp != NULL) + *ridp = ifu.rid_real; + if (dtep != NULL) + *dtep = ifu.dte; + if (edtep != NULL) + *edtep = ifu.edte; + if (verbose) { + flags = amdiommu_get_dev_prop_flags(unit, dev); + if ((flags & AMDIOMMU_DEV_REPORTED) == 0) { + amdiommu_set_dev_prop_flags(unit, dev, + AMDIOMMU_DEV_REPORTED); + device_printf(dev, "amdiommu%d " + "initiator rid %#06x dte %#x edte %#x\n", + unit->iommu.unit, ifu.rid_real, ifu.dte, ifu.edte); + } + } + return (0); +} + +int +amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp, + uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose) +{ + struct ivhd_find_unit ifu; + struct amdiommu_unit *unit; + device_t apic_dev; + bool res; + + bzero(&ifu, sizeof(ifu)); + ifu.type = IFU_DEV_IOAPIC; + ifu.devno = apic_id; + ifu.rid = -1; + + res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11, + amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu); + if (!res) { + if (verbose) + printf("amdiommu cannot match ioapic no %d in IVHD\n", + apic_id); + return (ENXIO); + } + + unit = amdiommu_unit_by_device_id(0, ifu.device_id); + apic_dev = ioapic_get_dev(apic_id); + if (apic_dev != NULL) + iommu_device_set_iommu_prop(apic_dev, unit->iommu.dev); + if (unit == NULL) { + if (verbose) + printf("amdiommu cannot find unit by dev id %#x\n", + ifu.device_id); + return (ENXIO); + } + *unitp = unit; + if (ridp != NULL) + *ridp = ifu.rid_real; + if (dtep != NULL) + *dtep = ifu.dte; + if (edtep != NULL) + *edtep = ifu.edte; + if (verbose) { + printf("amdiommu%d IOAPIC %d " + "initiator rid %#06x dte %#x edte %#x\n", + unit->iommu.unit, apic_id, ifu.rid_real, ifu.dte, + ifu.edte); + } + return (0); +} + +int +amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp, + uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose) +{ + struct ivhd_find_unit ifu; + struct amdiommu_unit *unit; + int hpet_no; + bool res; + + hpet_no = hpet_get_uid(hpet); + bzero(&ifu, sizeof(ifu)); + ifu.type = IFU_DEV_HPET; + ifu.devno = hpet_no; + ifu.rid = -1; + + res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11, + amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu); + if (!res) { + printf("amdiommu cannot match hpet no %d in IVHD\n", + hpet_no); + return (ENXIO); + } + + unit = amdiommu_unit_by_device_id(0, ifu.device_id); + if (unit == NULL) { + if (verbose) + printf("amdiommu cannot find unit id %d\n", + hpet_no); + return (ENXIO); + } + *unitp = unit; + iommu_device_set_iommu_prop(hpet, unit->iommu.dev); + if (ridp != NULL) + *ridp = ifu.rid_real; + if (dtep != NULL) + *dtep = ifu.dte; + if (edtep != NULL) + *edtep = ifu.edte; + if (verbose) { + printf("amdiommu%d HPET no %d " + "initiator rid %#06x dte %#x edte %#x\n", + unit->iommu.unit, hpet_no, ifu.rid_real, ifu.dte, + ifu.edte); + } + return (0); +} + +static struct iommu_unit * +amdiommu_find_method(device_t dev, bool verbose) +{ + struct amdiommu_unit *unit; + int error; + uint32_t edte; + uint16_t rid; + uint8_t dte; + + error = amdiommu_find_unit(dev, &unit, &rid, &dte, &edte, verbose); + if (error != 0) { + if (verbose) + device_printf(dev, + "cannot find amdiommu unit, error %d\n", + error); + return (NULL); + } + return (&unit->iommu); +} + +static struct x86_unit_common * +amdiommu_get_x86_common(struct iommu_unit *unit) +{ + struct amdiommu_unit *iommu; + + iommu = IOMMU2AMD(unit); + return (&iommu->x86c); +} + +static void +amdiommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) +{ +} + +static struct x86_iommu amd_x86_iommu = { + .get_x86_common = amdiommu_get_x86_common, + .unit_pre_instantiate_ctx = amdiommu_unit_pre_instantiate_ctx, + .find = amdiommu_find_method, + .domain_unload_entry = amdiommu_domain_unload_entry, + .domain_unload = amdiommu_domain_unload, + .get_ctx = amdiommu_get_ctx, + .free_ctx_locked = amdiommu_free_ctx_locked_method, + .free_ctx = amdiommu_free_ctx_method, + .alloc_msi_intr = amdiommu_alloc_msi_intr, + .map_msi_intr = amdiommu_map_msi_intr, + .unmap_msi_intr = amdiommu_unmap_msi_intr, + .map_ioapic_intr = amdiommu_map_ioapic_intr, + .unmap_ioapic_intr = amdiommu_unmap_ioapic_intr, +}; + +static void +x86_iommu_set_amd(void *arg __unused) +{ + if (cpu_vendor_id == CPU_VENDOR_AMD) + set_x86_iommu(&amd_x86_iommu); +} + +SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_amd, NULL); + +#ifdef DDB +#include +#include + +static void +amdiommu_print_domain(struct amdiommu_domain *domain, bool show_mappings) +{ + struct iommu_domain *iodom; + + iodom = DOM2IODOM(domain); + +#if 0 + db_printf( + " @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n" + " ctx_cnt %d flags %x pgobj %p map_ents %u\n", + domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl, + (uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt, + domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt); +#endif + + iommu_db_domain_print_contexts(iodom); + + if (show_mappings) + iommu_db_domain_print_mappings(iodom); +} + +static void +amdiommu_print_one(struct amdiommu_unit *unit, bool show_domains, + bool show_mappings, bool show_cmdq) +{ + struct amdiommu_domain *domain; + struct amdiommu_cmd_generic *cp; + u_int cmd_head, cmd_tail, ci; + + cmd_head = amdiommu_read4(unit, AMDIOMMU_CMDBUF_HEAD); + cmd_tail = amdiommu_read4(unit, AMDIOMMU_CMDBUF_TAIL); + db_printf("amdiommu%d at %p, mmio at %#jx/sz %#jx\n", + unit->iommu.unit, unit, (uintmax_t)unit->mmio_base, + (uintmax_t)unit->mmio_sz); + db_printf(" hw ctrl %#018jx cmdevst %#018jx\n", + (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CTRL), + (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS)); + db_printf(" devtbl at %p\n", unit->dev_tbl); + db_printf(" hwseq at %p phys %#jx val %#jx\n", + &unit->x86c.inv_waitd_seq_hw, + pmap_kextract((vm_offset_t)&unit->x86c.inv_waitd_seq_hw), + unit->x86c.inv_waitd_seq_hw); + db_printf(" invq at %p base %#jx hw head/tail %#x/%#x\n", + unit->x86c.inv_queue, + (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDBUF_BASE), + cmd_head, cmd_tail); + + if (show_cmdq) { + db_printf(" cmd q:\n"); + for (ci = cmd_head; ci != cmd_tail;) { + cp = (struct amdiommu_cmd_generic *)(unit-> + x86c.inv_queue + ci); + db_printf( + " idx %#x op %#x %#010x %#010x %#010x %#010x\n", + ci >> AMDIOMMU_CMD_SZ_SHIFT, cp->op, + cp->w0, cp->ww1, cp->w2, cp->w3); + + ci += AMDIOMMU_CMD_SZ; + if (ci == unit->x86c.inv_queue_size) + ci = 0; + } + } + + if (show_domains) { + db_printf(" domains:\n"); + LIST_FOREACH(domain, &unit->domains, link) { + amdiommu_print_domain(domain, show_mappings); + if (db_pager_quit) + break; + } + } +} + +DB_SHOW_COMMAND(amdiommu, db_amdiommu_print) +{ + struct amdiommu_unit *unit; + bool show_domains, show_mappings, show_cmdq; + + show_domains = strchr(modif, 'd') != NULL; + show_mappings = strchr(modif, 'm') != NULL; + show_cmdq = strchr(modif, 'q') != NULL; + if (!have_addr) { + db_printf("usage: show amdiommu [/d] [/m] [/q] index\n"); + return; + } + if ((vm_offset_t)addr < 0x10000) + unit = amdiommu_unit_by_device_id(0, (u_int)addr); + else + unit = (struct amdiommu_unit *)addr; + amdiommu_print_one(unit, show_domains, show_mappings, show_cmdq); +} + +DB_SHOW_ALL_COMMAND(amdiommus, db_show_all_amdiommus) +{ + struct amdiommu_unit *unit; + bool show_domains, show_mappings, show_cmdq; + + show_domains = strchr(modif, 'd') != NULL; + show_mappings = strchr(modif, 'm') != NULL; + show_cmdq = strchr(modif, 'q') != NULL; + + TAILQ_FOREACH(unit, &amdiommu_units, unit_next) { + amdiommu_print_one(unit, show_domains, show_mappings, + show_cmdq); + if (db_pager_quit) + break; + } +} +#endif diff --git a/sys/x86/iommu/amd_event.c b/sys/x86/iommu/amd_event.c new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_event.c @@ -0,0 +1,256 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_acpi.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "pcib_if.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void +amdiommu_event_rearm_intr(struct amdiommu_unit *unit) +{ + amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, + AMDIOMMU_CMDEVS_EVLOGINT); +} + +static void +amdiommu_event_log_inc_head(struct amdiommu_unit *unit) +{ + unit->event_log_head++; + if (unit->event_log_head >= unit->event_log_size) + unit->event_log_head = 0; +} + +static void +amdiommu_event_log_print(struct amdiommu_unit *unit, + const struct amdiommu_event_generic *evp) +{ + printf("amdiommu%d: event type 0x%x 0x%08x 0x%08x 0x%08x 0x%08x\n", + unit->iommu.unit, evp->code, evp->w0, evp->ww1, evp->w2, evp->w3); + + if (evp->code == AMDIOMMU_EV_ILL_DEV_TABLE_ENTRY) { + const struct amdiommu_event_ill_dev_table_entry *ev_dte_p; + const struct amdiommu_dte *dte; + const uint32_t *x; + int i; + + ev_dte_p = (const struct + amdiommu_event_ill_dev_table_entry *)evp; + dte = &unit->dev_tbl[ev_dte_p->devid]; + + printf("\tdte %p:", dte); + for (i = 0, x = (const uint32_t *)dte; i < sizeof(*dte) / + sizeof(uint32_t); i++, x++) + printf(" 0x%08x", *x); + printf("\n"); + } +} + +static u_int +amdiommu_event_log_tail(struct amdiommu_unit *unit) +{ + return (amdiommu_read8(unit, AMDIOMMU_EVNTLOG_TAIL) >> + AMDIOMMU_EV_SZ_SHIFT); +} + +void +amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status) +{ + struct amdiommu_event_generic *evp; + u_int hw_tail, hw_tail1; + + if ((status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0) { + taskqueue_enqueue(unit->event_taskqueue, &unit->event_task); + return; + } + + hw_tail1 = amdiommu_event_log_tail(unit); + do { + hw_tail = hw_tail1; + for (; hw_tail != unit->event_log_head; + amdiommu_event_log_inc_head(unit)) { + evp = &unit->event_log[unit->event_log_head]; + amdiommu_event_log_print(unit, evp); + } + amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, + unit->event_log_head << AMDIOMMU_EV_SZ_SHIFT); + hw_tail1 = amdiommu_event_log_tail(unit); + } while (hw_tail1 != hw_tail); + amdiommu_event_rearm_intr(unit); +} + +static void +amdiommu_event_task(void *arg, int pending __unused) +{ + struct amdiommu_unit *unit; + uint64_t hwev_status, status; + struct amdiommu_event_generic hwev; + + unit = arg; + AMDIOMMU_LOCK(unit); + + if ((unit->efr & AMDIOMMU_EFR_HWEV_SUP) != 0) { + hwev_status = amdiommu_read8(unit, AMDIOMMU_HWEV_STATUS); + if ((hwev_status & AMDIOMMU_HWEVS_HEV) != 0) { + *(uint64_t *)&hwev = amdiommu_read8(unit, + AMDIOMMU_HWEV_LOWER); + *((uint64_t *)&hwev + 1) = amdiommu_read8(unit, + AMDIOMMU_HWEV_UPPER); + printf("amdiommu%d: hw event%s\n", unit->iommu.unit, + (hwev_status & AMDIOMMU_HWEVS_HEO) != 0 ? + " (overflown)" : ""); + amdiommu_event_log_print(unit, &hwev); + amdiommu_write8(unit, AMDIOMMU_HWEV_STATUS, + hwev_status); + } + } + + status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS); + if ((status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0) { + printf("amdiommu%d: event log overflow\n", unit->iommu.unit); + + while ((status & AMDIOMMU_CMDEVS_EVLOGRUN) != 0) { + DELAY(1); + status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS); + } + + unit->hw_ctrl &= ~AMDIOMMU_CTRL_EVNTLOG_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); + + unit->event_log_head = 0; + amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, 0); + + amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, + AMDIOMMU_CMDEVS_EVOVRFLW); /* RW1C */ + + unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); + + amdiommu_event_rearm_intr(unit); + } + AMDIOMMU_UNLOCK(unit); +} + +int +amdiommu_init_event(struct amdiommu_unit *unit) +{ + uint64_t base_reg; + + mtx_init(&unit->event_lock, "amdevl", NULL, MTX_SPIN); + + /* event log entries */ + unit->event_log_size = AMDIOMMU_EVNTLOG_MIN; + TUNABLE_INT_FETCH("hw.amdiommu.event_log_size", &unit->event_log_size); + if (unit->event_log_size < AMDIOMMU_EVNTLOG_MIN || + unit->event_log_size > AMDIOMMU_EVNTLOG_MAX || + !powerof2(unit->event_log_size)) + panic("invalid hw.amdiommu.event_log_size"); + unit->event_log = kmem_alloc_contig(AMDIOMMU_EV_SZ * + unit->event_log_size, M_WAITOK | M_ZERO, 0, ~0ull, PAGE_SIZE, + 0, VM_MEMATTR_DEFAULT); + + TASK_INIT(&unit->event_task, 0, amdiommu_event_task, unit); + unit->event_taskqueue = taskqueue_create_fast("amdiommuff", M_WAITOK, + taskqueue_thread_enqueue, &unit->event_taskqueue); + taskqueue_start_threads(&unit->event_taskqueue, 1, PI_AV, + "amdiommu%d event taskq", unit->iommu.unit); + + base_reg = pmap_kextract((vm_offset_t)unit->event_log) | + (((uint64_t)0x8 + ilog2(unit->event_log_size / + AMDIOMMU_EVNTLOG_MIN)) << AMDIOMMU_EVNTLOG_BASE_SZSHIFT); + AMDIOMMU_LOCK(unit); + /* + * Re-arm before enabling interrupt, to not loose it when + * re-arming in the interrupt handler. + */ + amdiommu_event_rearm_intr(unit); + amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, base_reg); + unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN | AMDIOMMU_CTRL_EVENTINT_EN; + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); + AMDIOMMU_UNLOCK(unit); + + return (0); +} + +void +amdiommu_fini_event(struct amdiommu_unit *unit) +{ + AMDIOMMU_LOCK(unit); + unit->hw_ctrl &= ~(AMDIOMMU_CTRL_EVNTLOG_EN | + AMDIOMMU_CTRL_EVENTINT_EN); + amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); + amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, 0); + AMDIOMMU_UNLOCK(unit); + + taskqueue_drain(unit->event_taskqueue, &unit->event_task); + taskqueue_free(unit->event_taskqueue); + unit->event_taskqueue = NULL; + + kmem_free(unit->event_log, unit->event_log_size * AMDIOMMU_EV_SZ); + unit->event_log = NULL; + unit->event_log_head = unit->event_log_tail = 0; + + mtx_destroy(&unit->event_lock); +} diff --git a/sys/x86/iommu/amd_idpgtbl.c b/sys/x86/iommu/amd_idpgtbl.c new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_idpgtbl.c @@ -0,0 +1,391 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, + iommu_gaddr_t base, int lvl, int flags, iommu_pte_t *pte, + struct sf_buf **sf, struct iommu_map_entry *entry, bool free_sf); +static int amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, + iommu_gaddr_t base, iommu_gaddr_t size, int flags, + struct iommu_map_entry *entry); + +int +amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain) +{ + vm_page_t m; + int dom; + + KASSERT(domain->pgtbl_obj == NULL, + ("already initialized %p", domain)); + + domain->pgtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, + IDX_TO_OFF(pglvl_max_pages(domain->pglvl)), 0, 0, NULL); + if (bus_get_domain(domain->iodom.iommu->dev, &dom) == 0) + domain->pgtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom); + AMDIOMMU_DOMAIN_PGLOCK(domain); + m = iommu_pgalloc(domain->pgtbl_obj, 0, IOMMU_PGF_WAITOK | + IOMMU_PGF_ZERO | IOMMU_PGF_OBJL); + /* No implicit free of the top level page table page. */ + vm_page_wire(m); + domain->pgtblr = m; + AMDIOMMU_DOMAIN_PGUNLOCK(domain); + AMDIOMMU_LOCK(domain->unit); + domain->iodom.flags |= IOMMU_DOMAIN_PGTBL_INITED; + AMDIOMMU_UNLOCK(domain->unit); + return (0); +} + +void +amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain) +{ + vm_object_t obj; + vm_page_t m; + + obj = domain->pgtbl_obj; + if (obj == NULL) { + KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) != 0, + ("lost pagetable object domain %p", domain)); + return; + } + AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); + domain->pgtbl_obj = NULL; + domain->pgtblr = NULL; + + /* Obliterate ref_counts */ + VM_OBJECT_ASSERT_WLOCKED(obj); + for (m = vm_page_lookup(obj, 0); m != NULL; m = vm_page_next(m)) + vm_page_clearref(m); + VM_OBJECT_WUNLOCK(obj); + vm_object_deallocate(obj); +} + +static iommu_pte_t * +amdiommu_pgtbl_map_pte(struct amdiommu_domain *domain, iommu_gaddr_t base, + int lvl, int flags, vm_pindex_t *idxp, struct sf_buf **sf) +{ + iommu_pte_t *pte, *ptep; + struct sf_buf *sfp; + vm_page_t m; + vm_pindex_t idx, idx1; + + idx = pglvl_pgtbl_get_pindex(domain->pglvl, base, lvl); + if (*sf != NULL && idx == *idxp) { + pte = (iommu_pte_t *)sf_buf_kva(*sf); + } else { + if (*sf != NULL) + iommu_unmap_pgtbl(*sf); + *idxp = idx; +retry: + pte = iommu_map_pgtbl(domain->pgtbl_obj, idx, flags, sf); + if (pte == NULL) { + KASSERT(lvl > 0, + ("lost root page table page %p", domain)); + /* + * Page table page does not exist, allocate + * it and create a pte in the preceeding page level + * to reference the allocated page table page. + */ + m = iommu_pgalloc(domain->pgtbl_obj, idx, flags | + IOMMU_PGF_ZERO); + if (m == NULL) + return (NULL); + + vm_page_wire(m); + + sfp = NULL; + ptep = amdiommu_pgtbl_map_pte(domain, base, lvl - 1, + flags, &idx1, &sfp); + if (ptep == NULL) { + KASSERT(m->pindex != 0, + ("loosing root page %p", domain)); + vm_page_unwire_noq(m); + iommu_pgfree(domain->pgtbl_obj, m->pindex, + flags, NULL); + return (NULL); + } + ptep->pte = VM_PAGE_TO_PHYS(m) | AMDIOMMU_PTE_IR | + AMDIOMMU_PTE_IW | AMDIOMMU_PTE_PR | + ((domain->pglvl - lvl) << AMDIOMMU_PTE_NLVL_SHIFT); + vm_page_wire(sf_buf_page(sfp)); + vm_page_unwire_noq(m); + iommu_unmap_pgtbl(sfp); + /* Only executed once. */ + goto retry; + } + } + pte += pglvl_pgtbl_pte_off(domain->pglvl, base, lvl); + return (pte); +} + +static int +amdiommu_map_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base, + iommu_gaddr_t size, vm_page_t *ma, uint64_t pflags, int flags, + struct iommu_map_entry *entry) +{ + iommu_pte_t *pte; + struct sf_buf *sf; + iommu_gaddr_t base1; + vm_pindex_t pi, idx; + + AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); + + base1 = base; + flags |= IOMMU_PGF_OBJL; + idx = -1; + pte = NULL; + sf = NULL; + + for (pi = 0; size > 0; base += IOMMU_PAGE_SIZE, size -= IOMMU_PAGE_SIZE, + pi++) { + KASSERT(size >= IOMMU_PAGE_SIZE, + ("mapping loop overflow %p %jx %jx %jx", domain, + (uintmax_t)base, (uintmax_t)size, (uintmax_t)IOMMU_PAGE_SIZE)); + pte = amdiommu_pgtbl_map_pte(domain, base, domain->pglvl - 1, + flags, &idx, &sf); + if (pte == NULL) { + KASSERT((flags & IOMMU_PGF_WAITOK) == 0, + ("failed waitable pte alloc %p", domain)); + if (sf != NULL) + iommu_unmap_pgtbl(sf); + amdiommu_unmap_buf_locked(domain, base1, base - base1, + flags, entry); + return (ENOMEM); + } + /* next level 0, no superpages */ + pte->pte = VM_PAGE_TO_PHYS(ma[pi]) | pflags | AMDIOMMU_PTE_PR; + vm_page_wire(sf_buf_page(sf)); + } + if (sf != NULL) + iommu_unmap_pgtbl(sf); + return (0); +} + +static int +amdiommu_map_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry, + vm_page_t *ma, uint64_t eflags, int flags) +{ + struct amdiommu_domain *domain; + uint64_t pflags; + iommu_gaddr_t base, size; + int error; + + base = entry->start; + size = entry->end - entry->start; + pflags = ((eflags & IOMMU_MAP_ENTRY_READ) != 0 ? AMDIOMMU_PTE_IR : 0) | + ((eflags & IOMMU_MAP_ENTRY_WRITE) != 0 ? AMDIOMMU_PTE_IW : 0) | + ((eflags & IOMMU_MAP_ENTRY_SNOOP) != 0 ? AMDIOMMU_PTE_FC : 0); + /* IOMMU_MAP_ENTRY_TM ignored */ + + domain = IODOM2DOM(iodom); + + KASSERT((iodom->flags & IOMMU_DOMAIN_IDMAP) == 0, + ("modifying idmap pagetable domain %p", domain)); + KASSERT((base & IOMMU_PAGE_MASK) == 0, + ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((size & IOMMU_PAGE_MASK) == 0, + ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT(size > 0, ("zero size %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT(base < iodom->end, + ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base, + (uintmax_t)size, (uintmax_t)iodom->end)); + KASSERT(base + size < iodom->end, + ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base, + (uintmax_t)size, (uintmax_t)iodom->end)); + KASSERT(base + size > base, + ("size overflow %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((pflags & (AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW)) != 0, + ("neither read nor write %jx", (uintmax_t)pflags)); + KASSERT((pflags & ~(AMDIOMMU_PTE_IR | AMDIOMMU_PTE_IW | AMDIOMMU_PTE_FC + )) == 0, + ("invalid pte flags %jx", (uintmax_t)pflags)); + KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags)); + + AMDIOMMU_DOMAIN_PGLOCK(domain); + error = amdiommu_map_buf_locked(domain, base, size, ma, pflags, + flags, entry); + AMDIOMMU_DOMAIN_PGUNLOCK(domain); + iommu_qi_invalidate_sync(iodom, base, size, + (flags & IOMMU_PGF_WAITOK) != 0); /* XXXKIB seems to be needed */ + return (error); +} + +static void +amdiommu_free_pgtbl_pde(struct amdiommu_domain *domain, iommu_gaddr_t base, + int lvl, int flags, struct iommu_map_entry *entry) +{ + struct sf_buf *sf; + iommu_pte_t *pde; + vm_pindex_t idx; + + sf = NULL; + pde = amdiommu_pgtbl_map_pte(domain, base, lvl, flags, &idx, &sf); + amdiommu_unmap_clear_pte(domain, base, lvl, flags, pde, &sf, entry, + true); +} + +static void +amdiommu_unmap_clear_pte(struct amdiommu_domain *domain, iommu_gaddr_t base, + int lvl, int flags, iommu_pte_t *pte, struct sf_buf **sf, + struct iommu_map_entry *entry, bool free_sf) +{ + vm_page_t m; + + pte->pte = 0; + m = sf_buf_page(*sf); + if (free_sf) { + iommu_unmap_pgtbl(*sf); + *sf = NULL; + } + if (!vm_page_unwire_noq(m)) + return; + KASSERT(lvl != 0, + ("lost reference (lvl) on root pg domain %p base %jx lvl %d", + domain, (uintmax_t)base, lvl)); + KASSERT(m->pindex != 0, + ("lost reference (idx) on root pg domain %p base %jx lvl %d", + domain, (uintmax_t)base, lvl)); + iommu_pgfree(domain->pgtbl_obj, m->pindex, flags, entry); + amdiommu_free_pgtbl_pde(domain, base, lvl - 1, flags, entry); +} + +static int +amdiommu_unmap_buf_locked(struct amdiommu_domain *domain, iommu_gaddr_t base, + iommu_gaddr_t size, int flags, struct iommu_map_entry *entry) +{ + iommu_pte_t *pte; + struct sf_buf *sf; + vm_pindex_t idx; + iommu_gaddr_t pg_sz; + + AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(domain); + if (size == 0) + return (0); + + KASSERT((domain->iodom.flags & IOMMU_DOMAIN_IDMAP) == 0, + ("modifying idmap pagetable domain %p", domain)); + KASSERT((base & IOMMU_PAGE_MASK) == 0, + ("non-aligned base %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((size & IOMMU_PAGE_MASK) == 0, + ("non-aligned size %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT(base < DOM2IODOM(domain)->end, + ("base too high %p %jx %jx end %jx", domain, (uintmax_t)base, + (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end)); + KASSERT(base + size < DOM2IODOM(domain)->end, + ("end too high %p %jx %jx end %jx", domain, (uintmax_t)base, + (uintmax_t)size, (uintmax_t)DOM2IODOM(domain)->end)); + KASSERT(base + size > base, + ("size overflow %p %jx %jx", domain, (uintmax_t)base, + (uintmax_t)size)); + KASSERT((flags & ~IOMMU_PGF_WAITOK) == 0, ("invalid flags %x", flags)); + + pg_sz = IOMMU_PAGE_SIZE; + flags |= IOMMU_PGF_OBJL; + + for (sf = NULL; size > 0; base += pg_sz, size -= pg_sz) { + pte = amdiommu_pgtbl_map_pte(domain, base, + domain->pglvl - 1, flags, &idx, &sf); + KASSERT(pte != NULL, + ("sleeping or page missed %p %jx %d 0x%x", + domain, (uintmax_t)base, domain->pglvl - 1, flags)); + amdiommu_unmap_clear_pte(domain, base, domain->pglvl - 1, + flags, pte, &sf, entry, false); + KASSERT(size >= pg_sz, + ("unmapping loop overflow %p %jx %jx %jx", domain, + (uintmax_t)base, (uintmax_t)size, (uintmax_t)pg_sz)); + } + if (sf != NULL) + iommu_unmap_pgtbl(sf); + return (0); +} + +static int +amdiommu_unmap_buf(struct iommu_domain *iodom, struct iommu_map_entry *entry, + int flags) +{ + struct amdiommu_domain *domain; + int error; + + domain = IODOM2DOM(iodom); + + AMDIOMMU_DOMAIN_PGLOCK(domain); + error = amdiommu_unmap_buf_locked(domain, entry->start, + entry->end - entry->start, flags, entry); + AMDIOMMU_DOMAIN_PGUNLOCK(domain); + return (error); +} + +const struct iommu_domain_map_ops amdiommu_domain_map_ops = { + .map = amdiommu_map_buf, + .unmap = amdiommu_unmap_buf, +}; diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_intrmap.c @@ -0,0 +1,392 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct amdiommu_ctx *amdiommu_ir_find(device_t src, uint16_t *rid, + bool *is_iommu); +static void amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src, + u_int cookie); + +int +amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) +{ + struct amdiommu_ctx *ctx; + vmem_addr_t vmem_res; + u_int idx, i; + int error; + + ctx = amdiommu_ir_find(src, NULL, NULL); + if (ctx == NULL || !CTX2AMD(ctx)->irte_enabled) { + for (i = 0; i < count; i++) + cookies[i] = -1; + return (EOPNOTSUPP); + } + + error = vmem_alloc(ctx->irtids, count, M_FIRSTFIT | M_NOWAIT, + &vmem_res); + if (error != 0) { + KASSERT(error != EOPNOTSUPP, + ("impossible EOPNOTSUPP from vmem")); + return (error); + } + idx = vmem_res; + for (i = 0; i < count; i++) + cookies[i] = idx + i; + return (0); +} + +int +amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector, + u_int cookie, uint64_t *addr, uint32_t *data) +{ + struct amdiommu_ctx *ctx; + struct amdiommu_unit *unit; + uint16_t rid; + bool is_iommu; + + ctx = amdiommu_ir_find(src, &rid, &is_iommu); + if (is_iommu) { + if (addr != NULL) { + *data = vector; + *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12); + if (x2apic_mode) + *addr |= ((uint64_t)cpu & 0xffffff00) << 32; + else + KASSERT(cpu <= 0xff, + ("cpu id too big %d", cpu)); + } + return (0); + } + + if (ctx == NULL) + return (EOPNOTSUPP); + unit = CTX2AMD(ctx); + if (!unit->irte_enabled || cookie == -1) + return (EOPNOTSUPP); + if (cookie >= unit->irte_nentries) { + device_printf(src, "amdiommu%d: cookie %u irte max %u\n", + unit->iommu.unit, cookie, unit->irte_nentries); + return (EINVAL); + } + + if (unit->irte_x2apic) { + struct amdiommu_irte_basic_vapic_x2 *irte; + + irte = &ctx->irtx2[cookie]; + irte->supiopf = 0; + irte->inttype = 0; + irte->rqeoi = 0; + irte->dm = 0; + irte->guestmode = 0; + irte->dest0 = cpu; + irte->rsrv0 = 0; + irte->vector = vector; + irte->rsrv1 = 0; + irte->rsrv2 = 0; + irte->dest1 = cpu >> 24; + atomic_thread_fence_rel(); + irte->remapen = 1; + } else { + struct amdiommu_irte_basic_novapic *irte; + + irte = &ctx->irtb[cookie]; + irte->supiopf = 0; + irte->inttype = 0; /* fixed */ + irte->rqeoi = 0; + irte->dm = 0; /* phys */ + irte->guestmode = 0; + irte->dest = cpu; + irte->vector = vector; + irte->rsrv = 0; + atomic_thread_fence_rel(); + irte->remapen = 1; + } + + if (addr != NULL) { + *data = cookie; + *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12); + if (unit->irte_x2apic) + *addr |= ((uint64_t)cpu & 0xffffff00) << 32; + } + + iommu_get_requester(src, &rid); + AMDIOMMU_LOCK(unit); + amdiommu_qi_invalidate_ir_locked(unit, rid); + AMDIOMMU_UNLOCK(unit); + + return (0); +} + +int +amdiommu_unmap_msi_intr(device_t src, u_int cookie) +{ + struct amdiommu_ctx *ctx; + + if (cookie == -1) + return (0); + ctx = amdiommu_ir_find(src, NULL, NULL); + amdiommu_ir_free_irte(ctx, src, cookie); + return (0); +} + +int +amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, + bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, + uint32_t *lo) +{ + /* XXXKIB for early call from ioapic_create() */ + return (EOPNOTSUPP); +} + +int +amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) +{ + /* XXXKIB */ + return (0); +} + +static struct amdiommu_ctx * +amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) +{ + devclass_t src_class; + struct amdiommu_unit *unit; + struct amdiommu_ctx *ctx; + struct iommu_ctx *ioctx; + uint16_t rid; + int error; + + /* + * We need to determine if the interrupt source generates FSB + * interrupts. If yes, it is either IOMMU, in which case + * interrupts are not remapped. Or it is HPET, and interrupts + * are remapped. For HPET, source id is reported by HPET + * record in IVHD ACPI table. + */ + if (is_iommu != NULL) + *is_iommu = false; + + ctx = NULL; + + src_class = device_get_devclass(src); + if (src_class == devclass_find("amdiommu")) { + if (is_iommu != NULL) + *is_iommu = true; + } else if (src_class == devclass_find("hpet")) { + error = amdiommu_find_unit_for_hpet(src, &unit, &rid, NULL, + NULL, bootverbose); + ctx = NULL; // XXXKIB allocate ctx + } else { + error = amdiommu_find_unit(src, &unit, &rid, NULL, NULL, + bootverbose); + if (error == 0) { + iommu_get_requester(src, &rid); + ioctx = amdiommu_get_ctx(AMD2IOMMU(unit), src, rid, + false /* XXXKIB */, false); + if (ioctx != NULL) + ctx = IOCTX2CTX(ioctx); + } + } + if (ridp != NULL) + *ridp = rid; + return (ctx); +} + +static void +amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src, + u_int cookie) +{ + struct amdiommu_unit *unit; + uint16_t rid; + + MPASS(ctx != NULL); + unit = CTX2AMD(ctx); + + KASSERT(unit->irte_enabled, + ("unmap: cookie %d ctx %p unit %p", cookie, ctx, unit)); + KASSERT(cookie < unit->irte_nentries, + ("bad cookie %u %u", cookie, unit->irte_nentries)); + + if (unit->irte_x2apic) { + struct amdiommu_irte_basic_vapic_x2 *irte; + + irte = &ctx->irtx2[cookie]; + irte->remapen = 0; + atomic_thread_fence_rel(); + bzero(irte, sizeof(*irte)); + } else { + struct amdiommu_irte_basic_novapic *irte; + + irte = &ctx->irtb[cookie]; + irte->remapen = 0; + atomic_thread_fence_rel(); + bzero(irte, sizeof(*irte)); + } + iommu_get_requester(src, &rid); + AMDIOMMU_LOCK(unit); + amdiommu_qi_invalidate_ir_locked(unit, rid); + AMDIOMMU_UNLOCK(unit); +} + +int +amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx) +{ + struct amdiommu_unit *unit; + void *ptr; + unsigned long sz; + int dom; + + unit = CTX2AMD(ctx); + if (!unit->irte_enabled) + return (0); + + KASSERT(unit->irte_nentries > 0 && + unit->irte_nentries <= 2048 && + powerof2(unit->irte_nentries), + ("amdiommu%d: unit %p irte_nentries %u", unit->iommu.unit, + unit, unit->irte_nentries)); + + if (bus_get_domain(unit->iommu.dev, &dom) != 0) + dom = -1; + sz = unit->irte_nentries; + sz *= unit->irte_x2apic ? sizeof(struct amdiommu_irte_basic_vapic_x2) : + sizeof(struct amdiommu_irte_basic_novapic); + + if (dom != -1) { + ptr = contigmalloc_domainset(sz, M_DEVBUF, DOMAINSET_PREF(dom), + M_WAITOK | M_ZERO, 0, ~0ull, 128, 0); + } else { + ptr = contigmalloc(sz, M_DEVBUF, M_WAITOK | M_ZERO, + 0, ~0ull, 128, 0); + } + if (unit->irte_x2apic) + ctx->irtx2 = ptr; + else + ctx->irtb = ptr; + ctx->irtids = vmem_create("amdirt", 0, unit->irte_nentries, 1, 0, + M_FIRSTFIT | M_NOWAIT); + + intr_reprogram(); // XXXKIB + + return (0); +} + +void +amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx) +{ + struct amdiommu_unit *unit; + + unit = CTX2AMD(ctx); + if (!unit->irte_enabled) + return; + if (unit->irte_x2apic) + free(ctx->irtx2, M_DEVBUF); + else + free(ctx->irtb, M_DEVBUF); + vmem_destroy(ctx->irtids); +} + +int +amdiommu_init_irt(struct amdiommu_unit *unit) +{ + int enabled, nentries; + + SYSCTL_ADD_INT(&unit->iommu.sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(unit->iommu.dev)), + OID_AUTO, "ir", CTLFLAG_RD, &unit->irte_enabled, 0, + "Interrupt remapping ops enabled"); + + enabled = 1; + TUNABLE_INT_FETCH("hw.iommu.ir", &enabled); + + unit->irte_enabled = enabled != 0; + if (!unit->irte_enabled) + return (0); + + nentries = 32; + TUNABLE_INT_FETCH("hw.iommu.amd.ir_num", &nentries); + nentries = roundup_pow_of_two(nentries); + if (nentries < 1) + nentries = 1; + if (nentries > 2048) + nentries = 2048; + unit->irte_nentries = nentries; + + unit->irte_x2apic = x2apic_mode; + return (0); +} + +void +amdiommu_fini_irt(struct amdiommu_unit *unit) +{ +} diff --git a/sys/x86/iommu/amd_iommu.h b/sys/x86/iommu/amd_iommu.h new file mode 100644 --- /dev/null +++ b/sys/x86/iommu/amd_iommu.h @@ -0,0 +1,235 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 The FreeBSD Foundation + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef __X86_IOMMU_AMD_IOMMU_H +#define __X86_IOMMU_AMD_IOMMU_H + +#include + +#define AMDIOMMU_DEV_REPORTED 0x00000001 + +struct amdiommu_unit; + +struct amdiommu_domain { + struct iommu_domain iodom; + int domain; /* (c) DID, written in context entry */ + struct amdiommu_unit *unit; /* (c) */ + + u_int ctx_cnt; /* (u) Number of contexts owned */ + u_int refs; /* (u) Refs, including ctx */ + LIST_ENTRY(amdiommu_domain) link;/* (u) Member in the iommu list */ + vm_object_t pgtbl_obj; /* (c) Page table pages */ + vm_page_t pgtblr; /* (c) Page table root page */ + u_int pglvl; /* (c) Page table levels */ +}; + +struct amdiommu_ctx { + struct iommu_ctx context; + struct amdiommu_irte_basic_novapic *irtb; + struct amdiommu_irte_basic_vapic_x2 *irtx2; + vmem_t *irtids; +}; + +struct amdiommu_unit { + struct iommu_unit iommu; + struct x86_unit_common x86c; + u_int unit_dom; /* Served PCI domain, from IVRS */ + u_int device_id; /* basically PCI RID */ + u_int unit_id; /* Hypertransport Unit ID, deprecated */ + TAILQ_ENTRY(amdiommu_unit) unit_next; + int seccap_reg; + uint64_t efr; + vm_paddr_t mmio_base; + vm_size_t mmio_sz; + struct resource *mmio_res; + int mmio_rid; + uint64_t hw_ctrl; + + u_int numirqs; + struct resource *msix_table; + int msix_table_rid; + int irq_cmdev_rid; + struct resource *irq_cmdev; + void *irq_cmdev_cookie; + + struct amdiommu_dte *dev_tbl; + vm_object_t devtbl_obj; + + LIST_HEAD(, amdiommu_domain) domains; + struct unrhdr *domids; + + struct mtx event_lock; + struct amdiommu_event_generic *event_log; + u_int event_log_size; + u_int event_log_head; + u_int event_log_tail; + struct task event_task; + struct taskqueue *event_taskqueue; + + int irte_enabled; /* int for sysctl type */ + bool irte_x2apic; + u_int irte_nentries; +}; + +#define AMD2IOMMU(unit) (&((unit)->iommu)) +#define IOMMU2AMD(unit) \ + __containerof((unit), struct amdiommu_unit, iommu) + +#define AMDIOMMU_LOCK(unit) mtx_lock(&AMD2IOMMU(unit)->lock) +#define AMDIOMMU_UNLOCK(unit) mtx_unlock(&AMD2IOMMU(unit)->lock) +#define AMDIOMMU_ASSERT_LOCKED(unit) mtx_assert(&AMD2IOMMU(unit)->lock, \ + MA_OWNED) + +#define AMDIOMMU_EVENT_LOCK(unit) mtx_lock_spin(&(unit)->event_lock) +#define AMDIOMMU_EVENT_UNLOCK(unit) mtx_unlock_spin(&(unit)->event_lock) +#define AMDIOMMU_EVENT_ASSERT_LOCKED(unit) \ + mtx_assert(&(unit)->event_lock, MA_OWNED) + +#define DOM2IODOM(domain) (&((domain)->iodom)) +#define IODOM2DOM(domain) \ + __containerof((domain), struct amdiommu_domain, iodom) + +#define CTX2IOCTX(ctx) (&((ctx)->context)) +#define IOCTX2CTX(ctx) \ + __containerof((ctx), struct amdiommu_ctx, context) + +#define CTX2DOM(ctx) IODOM2DOM((ctx)->context.domain) +#define CTX2AMD(ctx) (CTX2DOM(ctx)->unit) +#define DOM2AMD(domain) ((domain)->unit) + +#define AMDIOMMU_DOMAIN_LOCK(dom) mtx_lock(&(dom)->iodom.lock) +#define AMDIOMMU_DOMAIN_UNLOCK(dom) mtx_unlock(&(dom)->iodom.lock) +#define AMDIOMMU_DOMAIN_ASSERT_LOCKED(dom) \ + mtx_assert(&(dom)->iodom.lock, MA_OWNED) + +#define AMDIOMMU_DOMAIN_PGLOCK(dom) VM_OBJECT_WLOCK((dom)->pgtbl_obj) +#define AMDIOMMU_DOMAIN_PGTRYLOCK(dom) VM_OBJECT_TRYWLOCK((dom)->pgtbl_obj) +#define AMDIOMMU_DOMAIN_PGUNLOCK(dom) VM_OBJECT_WUNLOCK((dom)->pgtbl_obj) +#define AMDIOMMU_DOMAIN_ASSERT_PGLOCKED(dom) \ + VM_OBJECT_ASSERT_WLOCKED((dom)->pgtbl_obj) + +#define AMDIOMMU_RID 1001 + +static inline uint32_t +amdiommu_read4(const struct amdiommu_unit *unit, int reg) +{ + + return (bus_read_4(unit->mmio_res, reg)); +} + +static inline uint64_t +amdiommu_read8(const struct amdiommu_unit *unit, int reg) +{ +#ifdef __i386__ + uint32_t high, low; + + low = bus_read_4(unit->mmio_res, reg); + high = bus_read_4(unit->mmio_res, reg + 4); + return (low | ((uint64_t)high << 32)); +#else + return (bus_read_8(unit->mmio_res, reg)); +#endif +} + +static inline void +amdiommu_write4(const struct amdiommu_unit *unit, int reg, uint32_t val) +{ + bus_write_4(unit->mmio_res, reg, val); +} + +static inline void +amdiommu_write8(const struct amdiommu_unit *unit, int reg, uint64_t val) +{ +#ifdef __i386__ + uint32_t high, low; + + low = val; + high = val >> 32; + bus_write_4(unit->mmio_res, reg, low); + bus_write_4(unit->mmio_res, reg + 4, high); +#else + bus_write_8(unit->mmio_res, reg, val); +#endif +} + +int amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp, + uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose); +int amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp, + uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose); +int amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp, + uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose); + +int amdiommu_init_cmd(struct amdiommu_unit *unit); +void amdiommu_fini_cmd(struct amdiommu_unit *unit); + +void amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status); +int amdiommu_init_event(struct amdiommu_unit *unit); +void amdiommu_fini_event(struct amdiommu_unit *unit); + +int amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count); +int amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector, + u_int cookie, uint64_t *addr, uint32_t *data); +int amdiommu_unmap_msi_intr(device_t src, u_int cookie); +int amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, + bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, + uint32_t *lo); +int amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie); +int amdiommu_init_irt(struct amdiommu_unit *unit); +void amdiommu_fini_irt(struct amdiommu_unit *unit); +int amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx); +void amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx); + +void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, + bool cansleep); +void amdiommu_domain_unload(struct iommu_domain *iodom, + struct iommu_map_entries_tailq *entries, bool cansleep); +struct iommu_ctx *amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, + uint16_t rid, bool id_mapped, bool rmrr_init); +void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu, + struct iommu_ctx *context); +void amdiommu_free_ctx_method(struct iommu_ctx *context); +struct amdiommu_domain *amdiommu_find_domain(struct amdiommu_unit *unit, + uint16_t rid); + +void amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx); +void amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx); +void amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, + uint16_t devid); +void amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit, + uint16_t devid); +void amdiommu_qi_invalidate_all_pages_locked_nowait( + struct amdiommu_domain *domain); +void amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu); + +int amdiommu_domain_alloc_pgtbl(struct amdiommu_domain *domain); +void amdiommu_domain_free_pgtbl(struct amdiommu_domain *domain); +extern const struct iommu_domain_map_ops amdiommu_domain_map_ops; + +#endif