diff --git a/sys/x86/iommu/amd_cmd.c b/sys/x86/iommu/amd_cmd.c index bbc2a8e0ad9f..384d92066e36 100644 --- a/sys/x86/iommu/amd_cmd.c +++ b/sys/x86/iommu/amd_cmd.c @@ -1,360 +1,360 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void amdiommu_enable_cmdbuf(struct amdiommu_unit *unit) { AMDIOMMU_ASSERT_LOCKED(unit); unit->hw_ctrl |= AMDIOMMU_CTRL_CMDBUF_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); } static void amdiommu_disable_cmdbuf(struct amdiommu_unit *unit) { AMDIOMMU_ASSERT_LOCKED(unit); unit->hw_ctrl &= ~AMDIOMMU_CTRL_CMDBUF_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); } static void amdiommu_enable_qi_intr(struct iommu_unit *iommu) { struct amdiommu_unit *unit; unit = IOMMU2AMD(iommu); AMDIOMMU_ASSERT_LOCKED(unit); unit->hw_ctrl |= AMDIOMMU_CTRL_COMWINT_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, AMDIOMMU_CMDEVS_COMWAITINT); } static void amdiommu_disable_qi_intr(struct iommu_unit *iommu) { struct amdiommu_unit *unit; unit = IOMMU2AMD(iommu); AMDIOMMU_ASSERT_LOCKED(unit); unit->hw_ctrl &= ~AMDIOMMU_CTRL_COMWINT_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); } static void amdiommu_cmd_advance_tail(struct iommu_unit *iommu) { struct amdiommu_unit *unit; unit = IOMMU2AMD(iommu); AMDIOMMU_ASSERT_LOCKED(unit); amdiommu_write8(unit, AMDIOMMU_CMDBUF_TAIL, unit->x86c.inv_queue_tail); } static void amdiommu_cmd_ensure(struct iommu_unit *iommu, int descr_count) { struct amdiommu_unit *unit; uint64_t head; int bytes; unit = IOMMU2AMD(iommu); AMDIOMMU_ASSERT_LOCKED(unit); bytes = descr_count << AMDIOMMU_CMD_SZ_SHIFT; for (;;) { if (bytes <= unit->x86c.inv_queue_avail) break; /* refill */ head = amdiommu_read8(unit, AMDIOMMU_CMDBUF_HEAD); head &= AMDIOMMU_CMDPTR_MASK; unit->x86c.inv_queue_avail = head - unit->x86c.inv_queue_tail - AMDIOMMU_CMD_SZ; if (head <= unit->x86c.inv_queue_tail) unit->x86c.inv_queue_avail += unit->x86c.inv_queue_size; if (bytes <= unit->x86c.inv_queue_avail) break; /* * No space in the queue, do busy wait. Hardware must * make a progress. But first advance the tail to * inform the descriptor streamer about entries we * might have already filled, otherwise they could * clog the whole queue.. * * See dmar_qi_invalidate_locked() for a discussion * about data race prevention. */ amdiommu_cmd_advance_tail(iommu); unit->x86c.inv_queue_full++; cpu_spinwait(); } unit->x86c.inv_queue_avail -= bytes; } static void amdiommu_cmd_emit(struct amdiommu_unit *unit, const struct amdiommu_cmd_generic *cmd) { AMDIOMMU_ASSERT_LOCKED(unit); memcpy(unit->x86c.inv_queue + unit->x86c.inv_queue_tail, cmd, sizeof(*cmd)); unit->x86c.inv_queue_tail += AMDIOMMU_CMD_SZ; KASSERT(unit->x86c.inv_queue_tail <= unit->x86c.inv_queue_size, ("tail overflow 0x%x 0x%jx", unit->x86c.inv_queue_tail, (uintmax_t)unit->x86c.inv_queue_size)); unit->x86c.inv_queue_tail &= unit->x86c.inv_queue_size - 1; } static void amdiommu_cmd_emit_wait_descr(struct iommu_unit *iommu, uint32_t seq, bool intr, bool memw, bool fence) { struct amdiommu_unit *unit; struct amdiommu_cmd_completion_wait c; unit = IOMMU2AMD(iommu); AMDIOMMU_ASSERT_LOCKED(unit); bzero(&c, sizeof(c)); c.op = AMDIOMMU_CMD_COMPLETION_WAIT; if (memw) { uint32_t x; c.s = 1; x = unit->x86c.inv_waitd_seq_hw_phys; x >>= 3; c.address0 = x; x = unit->x86c.inv_waitd_seq_hw_phys >> 32; c.address1 = x; c.data0 = seq; } if (fence) c.f = 1; if (intr) c.i = 1; amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); } static void amdiommu_qi_invalidate_emit(struct iommu_domain *adomain, iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq *pseq, bool emit_wait) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; struct amdiommu_cmd_invalidate_iommu_pages c; u_int isize; domain = IODOM2DOM(adomain); unit = domain->unit; AMDIOMMU_ASSERT_LOCKED(unit); bzero(&c, sizeof(c)); c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES; c.domainid = domain->domain; isize = IOMMU_PAGE_SIZE; /* XXXKIB handle superpages */ for (; size > 0; base += isize, size -= isize) { amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); c.s = 0; c.pde = 1; c.address = base >> IOMMU_PAGE_SHIFT; amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); } iommu_qi_emit_wait_seq(AMD2IOMMU(unit), pseq, emit_wait); } void amdiommu_qi_invalidate_all_pages_locked_nowait(struct amdiommu_domain *domain) { struct amdiommu_unit *unit; struct amdiommu_cmd_invalidate_iommu_pages c; unit = domain->unit; AMDIOMMU_ASSERT_LOCKED(unit); bzero(&c, sizeof(c)); c.op = AMDIOMMU_CMD_INVALIDATE_IOMMU_PAGES; c.domainid = domain->domain; /* * The magic specified in the note for INVALIDATE_IOMMU_PAGES * description. */ c.s = 1; c.pde = 1; c.address = 0x7ffffffffffff; amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); } void amdiommu_qi_invalidate_wait_sync(struct iommu_unit *iommu) { struct iommu_qi_genseq gseq; amdiommu_cmd_ensure(iommu, 1); iommu_qi_emit_wait_seq(iommu, &gseq, true); IOMMU2AMD(iommu)->x86c.inv_seq_waiters++; amdiommu_cmd_advance_tail(iommu); iommu_qi_wait_for_seq(iommu, &gseq, true); } void amdiommu_qi_invalidate_ctx_locked_nowait(struct amdiommu_ctx *ctx) { struct amdiommu_cmd_invalidate_devtab_entry c; amdiommu_cmd_ensure(AMD2IOMMU(CTX2AMD(ctx)), 1); bzero(&c, sizeof(c)); c.op = AMDIOMMU_CMD_INVALIDATE_DEVTAB_ENTRY; c.devid = ctx->context.rid; amdiommu_cmd_emit(CTX2AMD(ctx), (struct amdiommu_cmd_generic *)&c); } void amdiommu_qi_invalidate_ctx_locked(struct amdiommu_ctx *ctx) { amdiommu_qi_invalidate_ctx_locked_nowait(ctx); amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); } void amdiommu_qi_invalidate_ir_locked_nowait(struct amdiommu_unit *unit, uint16_t devid) { struct amdiommu_cmd_invalidate_interrupt_table c; AMDIOMMU_ASSERT_LOCKED(unit); amdiommu_cmd_ensure(AMD2IOMMU(unit), 1); bzero(&c, sizeof(c)); c.op = AMDIOMMU_CMD_INVALIDATE_INTERRUPT_TABLE; c.devid = devid; amdiommu_cmd_emit(unit, (struct amdiommu_cmd_generic *)&c); } void amdiommu_qi_invalidate_ir_locked(struct amdiommu_unit *unit, uint16_t devid) { amdiommu_qi_invalidate_ir_locked_nowait(unit, devid); amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(unit)); } static void amdiommu_qi_task(void *arg, int pending __unused) { struct amdiommu_unit *unit; unit = IOMMU2AMD(arg); iommu_qi_drain_tlb_flush(AMD2IOMMU(unit)); AMDIOMMU_LOCK(unit); if (unit->x86c.inv_seq_waiters > 0) wakeup(&unit->x86c.inv_seq_waiters); AMDIOMMU_UNLOCK(unit); } int amdiommu_init_cmd(struct amdiommu_unit *unit) { uint64_t qi_sz, rv; - unit->x86c.qi_buf_maxsz = ilog2(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE); + unit->x86c.qi_buf_maxsz = ilog2_local(AMDIOMMU_CMDBUF_MAX / PAGE_SIZE); unit->x86c.qi_cmd_sz = AMDIOMMU_CMD_SZ; iommu_qi_common_init(AMD2IOMMU(unit), amdiommu_qi_task); get_x86_iommu()->qi_ensure = amdiommu_cmd_ensure; get_x86_iommu()->qi_emit_wait_descr = amdiommu_cmd_emit_wait_descr; get_x86_iommu()->qi_advance_tail = amdiommu_cmd_advance_tail; get_x86_iommu()->qi_invalidate_emit = amdiommu_qi_invalidate_emit; rv = pmap_kextract((uintptr_t)unit->x86c.inv_queue); /* * See the description of the ComLen encoding for Command * buffer Base Address Register. */ - qi_sz = ilog2(unit->x86c.inv_queue_size / PAGE_SIZE) + 8; + qi_sz = ilog2_local(unit->x86c.inv_queue_size / PAGE_SIZE) + 8; rv |= qi_sz << AMDIOMMU_CMDBUF_BASE_SZSHIFT; AMDIOMMU_LOCK(unit); amdiommu_write8(unit, AMDIOMMU_CMDBUF_BASE, rv); amdiommu_enable_cmdbuf(unit); amdiommu_enable_qi_intr(AMD2IOMMU(unit)); AMDIOMMU_UNLOCK(unit); return (0); } static void amdiommu_fini_cmd_helper(struct iommu_unit *iommu) { amdiommu_disable_cmdbuf(IOMMU2AMD(iommu)); amdiommu_disable_qi_intr(iommu); } void amdiommu_fini_cmd(struct amdiommu_unit *unit) { iommu_qi_common_fini(AMD2IOMMU(unit), amdiommu_fini_cmd_helper); } diff --git a/sys/x86/iommu/amd_ctx.c b/sys/x86/iommu/amd_ctx.c index b3e85350a995..81e284373fc8 100644 --- a/sys/x86/iommu/amd_ctx.c +++ b/sys/x86/iommu/amd_ctx.c @@ -1,639 +1,639 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_AMDIOMMU_CTX, "amdiommu_ctx", "AMD IOMMU Context"); static MALLOC_DEFINE(M_AMDIOMMU_DOMAIN, "amdiommu_dom", "AMD IOMMU Domain"); static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit, struct amdiommu_domain *domain); static struct amdiommu_dte * amdiommu_get_dtep(struct amdiommu_ctx *ctx) { return (&CTX2AMD(ctx)->dev_tbl[ctx->context.rid]); } void amdiommu_domain_unload_entry(struct iommu_map_entry *entry, bool free, bool cansleep) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; domain = IODOM2DOM(entry->domain); unit = DOM2AMD(domain); /* * If "free" is false, then the IOTLB invalidation must be performed * synchronously. Otherwise, the caller might free the entry before * dmar_qi_task() is finished processing it. */ if (free) { AMDIOMMU_LOCK(unit); iommu_qi_invalidate_locked(&domain->iodom, entry, true); AMDIOMMU_UNLOCK(unit); } else { iommu_qi_invalidate_sync(&domain->iodom, entry->start, entry->end - entry->start, cansleep); iommu_domain_free_entry(entry, false); } } static bool amdiommu_domain_unload_emit_wait(struct amdiommu_domain *domain, struct iommu_map_entry *entry) { return (true); /* XXXKIB */ } void amdiommu_domain_unload(struct iommu_domain *iodom, struct iommu_map_entries_tailq *entries, bool cansleep) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; struct iommu_map_entry *entry, *entry1; int error __diagused; domain = IODOM2DOM(iodom); unit = DOM2AMD(domain); TAILQ_FOREACH_SAFE(entry, entries, dmamap_link, entry1) { KASSERT((entry->flags & IOMMU_MAP_ENTRY_MAP) != 0, ("not mapped entry %p %p", domain, entry)); error = iodom->ops->unmap(iodom, entry, cansleep ? IOMMU_PGF_WAITOK : 0); KASSERT(error == 0, ("unmap %p error %d", domain, error)); } if (TAILQ_EMPTY(entries)) return; AMDIOMMU_LOCK(unit); while ((entry = TAILQ_FIRST(entries)) != NULL) { TAILQ_REMOVE(entries, entry, dmamap_link); iommu_qi_invalidate_locked(&domain->iodom, entry, amdiommu_domain_unload_emit_wait(domain, entry)); } AMDIOMMU_UNLOCK(unit); } static void amdiommu_domain_destroy(struct amdiommu_domain *domain) { struct iommu_domain *iodom; struct amdiommu_unit *unit; iodom = DOM2IODOM(domain); KASSERT(TAILQ_EMPTY(&domain->iodom.unload_entries), ("unfinished unloads %p", domain)); KASSERT(LIST_EMPTY(&iodom->contexts), ("destroying dom %p with contexts", domain)); KASSERT(domain->ctx_cnt == 0, ("destroying dom %p with ctx_cnt %d", domain, domain->ctx_cnt)); KASSERT(domain->refs == 0, ("destroying dom %p with refs %d", domain, domain->refs)); if ((domain->iodom.flags & IOMMU_DOMAIN_GAS_INITED) != 0) { AMDIOMMU_DOMAIN_LOCK(domain); iommu_gas_fini_domain(iodom); AMDIOMMU_DOMAIN_UNLOCK(domain); } if ((domain->iodom.flags & IOMMU_DOMAIN_PGTBL_INITED) != 0) { if (domain->pgtbl_obj != NULL) AMDIOMMU_DOMAIN_PGLOCK(domain); amdiommu_domain_free_pgtbl(domain); } iommu_domain_fini(iodom); unit = DOM2AMD(domain); free_unr(unit->domids, domain->domain); free(domain, M_AMDIOMMU_DOMAIN); } static iommu_gaddr_t lvl2addr(int lvl) { int x; x = IOMMU_PAGE_SHIFT + IOMMU_NPTEPGSHIFT * lvl; /* Level 6 has only 8 bits for page table index */ if (x >= NBBY * sizeof(uint64_t)) return (-1ull); return (1ull < (1ull << x)); } static void amdiommu_domain_init_pglvl(struct amdiommu_unit *unit, struct amdiommu_domain *domain) { iommu_gaddr_t end; int hats, i; uint64_t efr_hats; end = DOM2IODOM(domain)->end; for (i = AMDIOMMU_PGTBL_MAXLVL; i > 1; i--) { if (lvl2addr(i) >= end && lvl2addr(i - 1) < end) break; } domain->pglvl = i; efr_hats = unit->efr & AMDIOMMU_EFR_HATS_MASK; switch (efr_hats) { case AMDIOMMU_EFR_HATS_6LVL: hats = 6; break; case AMDIOMMU_EFR_HATS_5LVL: hats = 5; break; case AMDIOMMU_EFR_HATS_4LVL: hats = 4; break; default: printf("amdiommu%d: HATS %#jx (reserved) ignoring\n", unit->iommu.unit, (uintmax_t)efr_hats); return; } if (hats >= domain->pglvl) return; printf("amdiommu%d: domain %d HATS %d pglvl %d reducing to HATS\n", unit->iommu.unit, domain->domain, hats, domain->pglvl); domain->pglvl = hats; domain->iodom.end = lvl2addr(hats); } static struct amdiommu_domain * amdiommu_domain_alloc(struct amdiommu_unit *unit, bool id_mapped) { struct amdiommu_domain *domain; struct iommu_domain *iodom; int error, id; id = alloc_unr(unit->domids); if (id == -1) return (NULL); domain = malloc(sizeof(*domain), M_AMDIOMMU_DOMAIN, M_WAITOK | M_ZERO); iodom = DOM2IODOM(domain); domain->domain = id; LIST_INIT(&iodom->contexts); iommu_domain_init(AMD2IOMMU(unit), iodom, &amdiommu_domain_map_ops); domain->unit = unit; domain->iodom.end = id_mapped ? ptoa(Maxmem) : BUS_SPACE_MAXADDR; amdiommu_domain_init_pglvl(unit, domain); iommu_gas_init_domain(DOM2IODOM(domain)); if (id_mapped) { domain->iodom.flags |= IOMMU_DOMAIN_IDMAP; } else { error = amdiommu_domain_alloc_pgtbl(domain); if (error != 0) goto fail; /* Disable local apic region access */ error = iommu_gas_reserve_region(iodom, 0xfee00000, 0xfeefffff + 1, &iodom->msi_entry); if (error != 0) goto fail; } return (domain); fail: amdiommu_domain_destroy(domain); return (NULL); } static struct amdiommu_ctx * amdiommu_ctx_alloc(struct amdiommu_domain *domain, uint16_t rid) { struct amdiommu_ctx *ctx; ctx = malloc(sizeof(*ctx), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); ctx->context.domain = DOM2IODOM(domain); ctx->context.tag = malloc(sizeof(struct bus_dma_tag_iommu), M_AMDIOMMU_CTX, M_WAITOK | M_ZERO); ctx->context.rid = rid; ctx->context.refs = 1; return (ctx); } static void amdiommu_ctx_link(struct amdiommu_ctx *ctx) { struct amdiommu_domain *domain; domain = CTX2DOM(ctx); IOMMU_ASSERT_LOCKED(domain->iodom.iommu); KASSERT(domain->refs >= domain->ctx_cnt, ("dom %p ref underflow %d %d", domain, domain->refs, domain->ctx_cnt)); domain->refs++; domain->ctx_cnt++; LIST_INSERT_HEAD(&domain->iodom.contexts, &ctx->context, link); } static void amdiommu_ctx_unlink(struct amdiommu_ctx *ctx) { struct amdiommu_domain *domain; domain = CTX2DOM(ctx); IOMMU_ASSERT_LOCKED(domain->iodom.iommu); KASSERT(domain->refs > 0, ("domain %p ctx dtr refs %d", domain, domain->refs)); KASSERT(domain->ctx_cnt >= domain->refs, ("domain %p ctx dtr refs %d ctx_cnt %d", domain, domain->refs, domain->ctx_cnt)); domain->refs--; domain->ctx_cnt--; LIST_REMOVE(&ctx->context, link); } struct amdiommu_ctx * amdiommu_find_ctx_locked(struct amdiommu_unit *unit, uint16_t rid) { struct amdiommu_domain *domain; struct iommu_ctx *ctx; AMDIOMMU_ASSERT_LOCKED(unit); LIST_FOREACH(domain, &unit->domains, link) { LIST_FOREACH(ctx, &domain->iodom.contexts, link) { if (ctx->rid == rid) return (IOCTX2CTX(ctx)); } } return (NULL); } struct amdiommu_domain * amdiommu_find_domain(struct amdiommu_unit *unit, uint16_t rid) { struct amdiommu_domain *domain; struct iommu_ctx *ctx; AMDIOMMU_LOCK(unit); LIST_FOREACH(domain, &unit->domains, link) { LIST_FOREACH(ctx, &domain->iodom.contexts, link) { if (ctx->rid == rid) break; } } AMDIOMMU_UNLOCK(unit); return (domain); } static void amdiommu_free_ctx_locked(struct amdiommu_unit *unit, struct amdiommu_ctx *ctx) { struct amdiommu_dte *dtep; struct amdiommu_domain *domain; AMDIOMMU_ASSERT_LOCKED(unit); KASSERT(ctx->context.refs >= 1, ("amdiommu %p ctx %p refs %u", unit, ctx, ctx->context.refs)); /* * If our reference is not last, only the dereference should * be performed. */ if (ctx->context.refs > 1) { ctx->context.refs--; AMDIOMMU_UNLOCK(unit); return; } KASSERT((ctx->context.flags & IOMMU_CTX_DISABLED) == 0, ("lost ref on disabled ctx %p", ctx)); /* * Otherwise, the device table entry must be cleared before * the page table is destroyed. */ dtep = amdiommu_get_dtep(ctx); dtep->v = 0; atomic_thread_fence_rel(); memset(dtep, 0, sizeof(*dtep)); domain = CTX2DOM(ctx); amdiommu_qi_invalidate_ctx_locked_nowait(ctx); amdiommu_qi_invalidate_ir_locked_nowait(unit, ctx->context.rid); amdiommu_qi_invalidate_all_pages_locked_nowait(domain); amdiommu_qi_invalidate_wait_sync(AMD2IOMMU(CTX2AMD(ctx))); if (unit->irte_enabled) amdiommu_ctx_fini_irte(ctx); amdiommu_ctx_unlink(ctx); free(ctx->context.tag, M_AMDIOMMU_CTX); free(ctx, M_AMDIOMMU_CTX); amdiommu_unref_domain_locked(unit, domain); } static void amdiommu_free_ctx(struct amdiommu_ctx *ctx) { struct amdiommu_unit *unit; unit = CTX2AMD(ctx); AMDIOMMU_LOCK(unit); amdiommu_free_ctx_locked(unit, ctx); } static void amdiommu_unref_domain_locked(struct amdiommu_unit *unit, struct amdiommu_domain *domain) { AMDIOMMU_ASSERT_LOCKED(unit); KASSERT(domain->refs >= 1, ("amdiommu%d domain %p refs %u", unit->iommu.unit, domain, domain->refs)); KASSERT(domain->refs > domain->ctx_cnt, ("amdiommu%d domain %p refs %d ctx_cnt %d", unit->iommu.unit, domain, domain->refs, domain->ctx_cnt)); if (domain->refs > 1) { domain->refs--; AMDIOMMU_UNLOCK(unit); return; } LIST_REMOVE(domain, link); AMDIOMMU_UNLOCK(unit); taskqueue_drain(unit->iommu.delayed_taskqueue, &domain->iodom.unload_task); amdiommu_domain_destroy(domain); } static void dte_entry_init_one(struct amdiommu_dte *dtep, struct amdiommu_ctx *ctx, vm_page_t pgtblr, uint8_t dte, uint32_t edte) { struct amdiommu_domain *domain; struct amdiommu_unit *unit; domain = CTX2DOM(ctx); unit = DOM2AMD(domain); dtep->tv = 1; /* dtep->had not used for now */ dtep->ir = 1; dtep->iw = 1; dtep->domainid = domain->domain; dtep->pioctl = AMDIOMMU_DTE_PIOCTL_DIS; /* fill device interrupt passing hints from IVHD. */ dtep->initpass = (dte & ACPI_IVHD_INIT_PASS) != 0; dtep->eintpass = (dte & ACPI_IVHD_EINT_PASS) != 0; dtep->nmipass = (dte & ACPI_IVHD_NMI_PASS) != 0; dtep->sysmgt = (dte & ACPI_IVHD_SYSTEM_MGMT) >> 4; dtep->lint0pass = (dte & ACPI_IVHD_LINT0_PASS) != 0; dtep->lint1pass = (dte & ACPI_IVHD_LINT1_PASS) != 0; if (unit->irte_enabled) { dtep->iv = 1; dtep->i = 0; - dtep->inttablen = ilog2(unit->irte_nentries); + dtep->inttablen = ilog2_local(unit->irte_nentries); dtep->intrroot = pmap_kextract(unit->irte_x2apic ? (vm_offset_t)ctx->irtx2 : (vm_offset_t)ctx->irtb) >> 6; dtep->intctl = AMDIOMMU_DTE_INTCTL_MAP; } if ((DOM2IODOM(domain)->flags & IOMMU_DOMAIN_IDMAP) != 0) { dtep->pgmode = AMDIOMMU_DTE_PGMODE_1T1; } else { MPASS(domain->pglvl > 0 && domain->pglvl <= AMDIOMMU_PGTBL_MAXLVL); dtep->pgmode = domain->pglvl; dtep->ptroot = VM_PAGE_TO_PHYS(pgtblr) >> 12; } atomic_thread_fence_rel(); dtep->v = 1; } static void dte_entry_init(struct amdiommu_ctx *ctx, bool move, uint8_t dte, uint32_t edte) { struct amdiommu_dte *dtep; struct amdiommu_unit *unit; struct amdiommu_domain *domain; int i; domain = CTX2DOM(ctx); unit = DOM2AMD(domain); dtep = amdiommu_get_dtep(ctx); KASSERT(dtep->v == 0, ("amdiommu%d initializing valid dte @%p %#jx", CTX2AMD(ctx)->iommu.unit, dtep, (uintmax_t)(*(uint64_t *)dtep))); if (iommu_is_buswide_ctx(AMD2IOMMU(unit), PCI_RID2BUS(ctx->context.rid))) { MPASS(!move); for (i = 0; i <= PCI_BUSMAX; i++) { dte_entry_init_one(&dtep[i], ctx, domain->pgtblr, dte, edte); } } else { dte_entry_init_one(dtep, ctx, domain->pgtblr, dte, edte); } } struct amdiommu_ctx * amdiommu_get_ctx_for_dev(struct amdiommu_unit *unit, device_t dev, uint16_t rid, int dev_domain, bool id_mapped, bool rmrr_init, uint8_t dte, uint32_t edte) { struct amdiommu_domain *domain, *domain1; struct amdiommu_ctx *ctx, *ctx1; int bus, slot, func; if (dev != NULL) { bus = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); } else { bus = PCI_RID2BUS(rid); slot = PCI_RID2SLOT(rid); func = PCI_RID2FUNC(rid); } AMDIOMMU_LOCK(unit); KASSERT(!iommu_is_buswide_ctx(AMD2IOMMU(unit), bus) || (slot == 0 && func == 0), ("iommu%d pci%d:%d:%d get_ctx for buswide", AMD2IOMMU(unit)->unit, bus, slot, func)); ctx = amdiommu_find_ctx_locked(unit, rid); if (ctx == NULL) { /* * Perform the allocations which require sleep or have * higher chance to succeed if the sleep is allowed. */ AMDIOMMU_UNLOCK(unit); domain1 = amdiommu_domain_alloc(unit, id_mapped); if (domain1 == NULL) return (NULL); if (!id_mapped) { /* * XXXKIB IVMD seems to be less significant * and less used on AMD than RMRR on Intel. * Not implemented for now. */ } ctx1 = amdiommu_ctx_alloc(domain1, rid); amdiommu_ctx_init_irte(ctx1); AMDIOMMU_LOCK(unit); /* * Recheck the contexts, other thread might have * already allocated needed one. */ ctx = amdiommu_find_ctx_locked(unit, rid); if (ctx == NULL) { domain = domain1; ctx = ctx1; amdiommu_ctx_link(ctx); ctx->context.tag->owner = dev; iommu_device_tag_init(CTX2IOCTX(ctx), dev); LIST_INSERT_HEAD(&unit->domains, domain, link); dte_entry_init(ctx, false, dte, edte); amdiommu_qi_invalidate_ctx_locked(ctx); if (dev != NULL) { device_printf(dev, "amdiommu%d pci%d:%d:%d:%d rid %x domain %d " "%s-mapped\n", AMD2IOMMU(unit)->unit, unit->unit_dom, bus, slot, func, rid, domain->domain, id_mapped ? "id" : "re"); } } else { amdiommu_domain_destroy(domain1); /* Nothing needs to be done to destroy ctx1. */ free(ctx1, M_AMDIOMMU_CTX); domain = CTX2DOM(ctx); ctx->context.refs++; /* tag referenced us */ } } else { domain = CTX2DOM(ctx); if (ctx->context.tag->owner == NULL) ctx->context.tag->owner = dev; ctx->context.refs++; /* tag referenced us */ } AMDIOMMU_UNLOCK(unit); return (ctx); } struct iommu_ctx * amdiommu_get_ctx(struct iommu_unit *iommu, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init) { struct amdiommu_unit *unit; struct amdiommu_ctx *ret; int error; uint32_t edte; uint16_t rid1; uint8_t dte; error = amdiommu_find_unit(dev, &unit, &rid1, &dte, &edte, bootverbose); if (error != 0) return (NULL); if (AMD2IOMMU(unit) != iommu) /* XXX complain loudly */ return (NULL); ret = amdiommu_get_ctx_for_dev(unit, dev, rid1, pci_get_domain(dev), id_mapped, rmrr_init, dte, edte); return (CTX2IOCTX(ret)); } void amdiommu_free_ctx_locked_method(struct iommu_unit *iommu, struct iommu_ctx *context) { struct amdiommu_unit *unit; struct amdiommu_ctx *ctx; unit = IOMMU2AMD(iommu); ctx = IOCTX2CTX(context); amdiommu_free_ctx_locked(unit, ctx); } void amdiommu_free_ctx_method(struct iommu_ctx *context) { struct amdiommu_ctx *ctx; ctx = IOCTX2CTX(context); amdiommu_free_ctx(ctx); } diff --git a/sys/x86/iommu/amd_drv.c b/sys/x86/iommu/amd_drv.c index 395cefc65caa..5db7a5225538 100644 --- a/sys/x86/iommu/amd_drv.c +++ b/sys/x86/iommu/amd_drv.c @@ -1,1205 +1,1205 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_acpi.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" #include #include #include #include #include #include #include #include #include static int amdiommu_enable = 0; /* * All enumerated AMD IOMMU units. * Access is unlocked, the list is not modified after early * single-threaded startup. */ static TAILQ_HEAD(, amdiommu_unit) amdiommu_units = TAILQ_HEAD_INITIALIZER(amdiommu_units); static u_int ivrs_info_to_unit_id(UINT32 info) { return ((info & ACPI_IVHD_UNIT_ID_MASK) >> 8); } typedef bool (*amdiommu_itercc_t)(void *, void *); typedef bool (*amdiommu_iter40_t)(ACPI_IVRS_HARDWARE2 *, void *); typedef bool (*amdiommu_iter11_t)(ACPI_IVRS_HARDWARE2 *, void *); typedef bool (*amdiommu_iter10_t)(ACPI_IVRS_HARDWARE1 *, void *); static bool amdiommu_ivrs_iterate_tbl_typed(amdiommu_itercc_t iter, void *arg, int type, ACPI_TABLE_IVRS *ivrs_tbl) { char *ptr, *ptrend; bool done; done = false; ptr = (char *)ivrs_tbl + sizeof(*ivrs_tbl); ptrend = (char *)ivrs_tbl + ivrs_tbl->Header.Length; for (;;) { ACPI_IVRS_HEADER *ivrsh; if (ptr >= ptrend) break; ivrsh = (ACPI_IVRS_HEADER *)ptr; if (ivrsh->Length <= 0) { printf("amdiommu_iterate_tbl: corrupted IVRS table, " "length %d\n", ivrsh->Length); break; } ptr += ivrsh->Length; if (ivrsh->Type == type) { done = iter((void *)ivrsh, arg); if (done) break; } } return (done); } /* * Walk over IVRS, calling callback iterators following priority: * 0x40, then 0x11, then 0x10 subtable. First iterator returning true * ends the walk. * Returns true if any iterator returned true, otherwise false. */ static bool amdiommu_ivrs_iterate_tbl(amdiommu_iter40_t iter40, amdiommu_iter11_t iter11, amdiommu_iter10_t iter10, void *arg) { ACPI_TABLE_IVRS *ivrs_tbl; ACPI_STATUS status; bool done; status = AcpiGetTable(ACPI_SIG_IVRS, 1, (ACPI_TABLE_HEADER **)&ivrs_tbl); if (ACPI_FAILURE(status)) return (false); done = false; if (iter40 != NULL) done = amdiommu_ivrs_iterate_tbl_typed( (amdiommu_itercc_t)iter40, arg, ACPI_IVRS_TYPE_HARDWARE3, ivrs_tbl); if (!done && iter11 != NULL) done = amdiommu_ivrs_iterate_tbl_typed( (amdiommu_itercc_t)iter11, arg, ACPI_IVRS_TYPE_HARDWARE2, ivrs_tbl); if (!done && iter10 != NULL) done = amdiommu_ivrs_iterate_tbl_typed( (amdiommu_itercc_t)iter10, arg, ACPI_IVRS_TYPE_HARDWARE1, ivrs_tbl); AcpiPutTable((ACPI_TABLE_HEADER *)ivrs_tbl); return (done); } struct ivhd_lookup_data { struct amdiommu_unit *sc; uint16_t devid; }; static bool ivrs_lookup_ivhd_0x40(ACPI_IVRS_HARDWARE2 *h2, void *arg) { struct ivhd_lookup_data *ildp; KASSERT(h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 || h2->Header.Type == ACPI_IVRS_TYPE_HARDWARE3, ("Misparsed IVHD, h2 type %#x", h2->Header.Type)); ildp = arg; if (h2->Header.DeviceId != ildp->devid) return (false); ildp->sc->unit_dom = h2->PciSegmentGroup; ildp->sc->iommu.unit = ivrs_info_to_unit_id(h2->Info); ildp->sc->efr = h2->EfrRegisterImage; return (true); } static bool ivrs_lookup_ivhd_0x10(ACPI_IVRS_HARDWARE1 *h1, void *arg) { struct ivhd_lookup_data *ildp; KASSERT(h1->Header.Type == ACPI_IVRS_TYPE_HARDWARE1, ("Misparsed IVHD, h1 type %#x", h1->Header.Type)); ildp = arg; if (h1->Header.DeviceId != ildp->devid) return (false); ildp->sc->unit_dom = h1->PciSegmentGroup; ildp->sc->iommu.unit = ivrs_info_to_unit_id(h1->Info); return (true); } static u_int amdiommu_devtbl_sz(struct amdiommu_unit *sc __unused) { return (sizeof(struct amdiommu_dte) * (1 << 16)); } static void amdiommu_free_dev_tbl(struct amdiommu_unit *sc) { u_int devtbl_sz; devtbl_sz = amdiommu_devtbl_sz(sc); pmap_qremove((vm_offset_t)sc->dev_tbl, atop(devtbl_sz)); kva_free((vm_offset_t)sc->dev_tbl, devtbl_sz); sc->dev_tbl = NULL; vm_object_deallocate(sc->devtbl_obj); sc->devtbl_obj = NULL; } static int amdiommu_create_dev_tbl(struct amdiommu_unit *sc) { vm_offset_t seg_vaddr; u_int devtbl_sz, dom, i, reclaimno, segnum_log, segnum, seg_sz; int error; segnum_log = (sc->efr & AMDIOMMU_EFR_DEVTBLSEG_MASK) >> AMDIOMMU_EFR_DEVTBLSEG_SHIFT; segnum = 1 << segnum_log; devtbl_sz = amdiommu_devtbl_sz(sc); seg_sz = devtbl_sz / segnum; sc->devtbl_obj = vm_pager_allocate(OBJT_PHYS, NULL, atop(devtbl_sz), VM_PROT_ALL, 0, NULL); if (bus_get_domain(sc->iommu.dev, &dom) == 0) sc->devtbl_obj->domain.dr_policy = DOMAINSET_PREF(dom); sc->hw_ctrl &= ~AMDIOMMU_CTRL_DEVTABSEG_MASK; - sc->hw_ctrl |= (uint64_t)segnum_log << ilog2(AMDIOMMU_CTRL_DEVTABSEG_2); + sc->hw_ctrl |= (uint64_t)segnum_log << 34; /* ilog2(AMDIOMMU_CTRL_DEVTABSEG_2) */ sc->hw_ctrl |= AMDIOMMU_CTRL_COHERENT; amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl); seg_vaddr = kva_alloc(devtbl_sz); if (seg_vaddr == 0) return (ENOMEM); sc->dev_tbl = (void *)seg_vaddr; for (i = 0; i < segnum; i++) { vm_page_t m; uint64_t rval; u_int reg; for (reclaimno = 0; reclaimno < 3; reclaimno++) { VM_OBJECT_WLOCK(sc->devtbl_obj); m = vm_page_alloc_contig(sc->devtbl_obj, i * atop(seg_sz), VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY, atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); VM_OBJECT_WUNLOCK(sc->devtbl_obj); if (m != NULL) break; error = vm_page_reclaim_contig(VM_ALLOC_NORMAL, atop(seg_sz), 0, ~0ul, IOMMU_PAGE_SIZE, 0); if (error != 0) vm_wait(sc->devtbl_obj); } if (m == NULL) { amdiommu_free_dev_tbl(sc); return (ENOMEM); } rval = VM_PAGE_TO_PHYS(m) | (atop(seg_sz) - 1); for (u_int j = 0; j < atop(seg_sz); j++, seg_vaddr += PAGE_SIZE, m++) { pmap_zero_page(m); pmap_qenter(seg_vaddr, &m, 1); } reg = i == 0 ? AMDIOMMU_DEVTAB_BASE : AMDIOMMU_DEVTAB_S1_BASE + i - 1; amdiommu_write8(sc, reg, rval); } return (0); } static int amdiommu_cmd_event_intr(void *arg) { struct amdiommu_unit *unit; uint64_t status; unit = arg; status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS); if ((status & AMDIOMMU_CMDEVS_COMWAITINT) != 0) { amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, AMDIOMMU_CMDEVS_COMWAITINT); taskqueue_enqueue(unit->x86c.qi_taskqueue, &unit->x86c.qi_task); } if ((status & (AMDIOMMU_CMDEVS_EVLOGINT | AMDIOMMU_CMDEVS_EVOVRFLW)) != 0) amdiommu_event_intr(unit, status); return (FILTER_HANDLED); } static int amdiommu_setup_intr(struct amdiommu_unit *sc) { int error, msi_count, msix_count; msi_count = pci_msi_count(sc->iommu.dev); msix_count = pci_msix_count(sc->iommu.dev); if (msi_count == 0 && msix_count == 0) { device_printf(sc->iommu.dev, "needs MSI-class intr\n"); return (ENXIO); } #if 0 /* * XXXKIB how MSI-X is supposed to be organized for BAR-less * function? Practically available hardware implements only * one IOMMU unit per function, and uses MSI. */ if (msix_count > 0) { sc->msix_table = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_MEMORY, &sc->msix_tab_rid, RF_ACTIVE); if (sc->msix_table == NULL) return (ENXIO); if (sc->msix_pba_rid != sc->msix_tab_rid) { /* Separate BAR for PBA */ sc->msix_pba = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_MEMORY, &sc->msix_pba_rid, RF_ACTIVE); if (sc->msix_pba == NULL) { bus_release_resource(sc->iommu.dev, SYS_RES_MEMORY, &sc->msix_tab_rid, sc->msix_table); return (ENXIO); } } } #endif error = ENXIO; if (msix_count > 0) { error = pci_alloc_msix(sc->iommu.dev, &msix_count); if (error == 0) sc->numirqs = msix_count; } if (error != 0 && msi_count > 0) { error = pci_alloc_msi(sc->iommu.dev, &msi_count); if (error == 0) sc->numirqs = msi_count; } if (error != 0) { device_printf(sc->iommu.dev, "Failed to allocate MSI/MSI-x (%d)\n", error); return (ENXIO); } /* * XXXKIB Spec states that MISC0.MsiNum must be zero for IOMMU * using MSI interrupts. But at least one BIOS programmed '2' * there, making driver use wrong rid and causing * command/event interrupt ignored as stray. Try to fix it * with dirty force by assuming MsiNum is zero for MSI. */ sc->irq_cmdev_rid = 1; if (msix_count > 0) { sc->irq_cmdev_rid += pci_read_config(sc->iommu.dev, sc->seccap_reg + PCIR_AMDIOMMU_MISC0, 4) & PCIM_AMDIOMMU_MISC0_MSINUM_MASK; } sc->irq_cmdev = bus_alloc_resource_any(sc->iommu.dev, SYS_RES_IRQ, &sc->irq_cmdev_rid, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_cmdev == NULL) { device_printf(sc->iommu.dev, "unable to map CMD/EV interrupt\n"); return (ENXIO); } error = bus_setup_intr(sc->iommu.dev, sc->irq_cmdev, INTR_TYPE_MISC, amdiommu_cmd_event_intr, NULL, sc, &sc->irq_cmdev_cookie); if (error != 0) { device_printf(sc->iommu.dev, "unable to setup interrupt (%d)\n", error); return (ENXIO); } bus_describe_intr(sc->iommu.dev, sc->irq_cmdev, sc->irq_cmdev_cookie, "cmdev"); if (x2apic_mode) { AMDIOMMU_LOCK(sc); sc->hw_ctrl |= AMDIOMMU_CTRL_GA_EN | AMDIOMMU_CTRL_XT_EN; amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl); // XXXKIB AMDIOMMU_CTRL_INTCAPXT_EN and program x2APIC_CTRL AMDIOMMU_UNLOCK(sc); } return (0); } static int amdiommu_probe(device_t dev) { int seccap_reg; int error; uint32_t cap_h, cap_type, cap_rev; if (acpi_disabled("amdiommu")) return (ENXIO); TUNABLE_INT_FETCH("hw.amdiommu.enable", &amdiommu_enable); if (!amdiommu_enable) return (ENXIO); if (pci_get_class(dev) != PCIC_BASEPERIPH || pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU) return (ENXIO); error = pci_find_cap(dev, PCIY_SECDEV, &seccap_reg); if (error != 0 || seccap_reg == 0) return (ENXIO); cap_h = pci_read_config(dev, seccap_reg + PCIR_AMDIOMMU_CAP_HEADER, 4); cap_type = cap_h & PCIM_AMDIOMMU_CAP_TYPE_MASK; cap_rev = cap_h & PCIM_AMDIOMMU_CAP_REV_MASK; if (cap_type != PCIM_AMDIOMMU_CAP_TYPE_VAL && cap_rev != PCIM_AMDIOMMU_CAP_REV_VAL) return (ENXIO); device_set_desc(dev, "DMA remap"); return (BUS_PROBE_SPECIFIC); } static int amdiommu_attach(device_t dev) { struct amdiommu_unit *sc; struct ivhd_lookup_data ild; int error; uint32_t base_low, base_high; bool res; sc = device_get_softc(dev); sc->iommu.dev = dev; error = pci_find_cap(dev, PCIY_SECDEV, &sc->seccap_reg); if (error != 0 || sc->seccap_reg == 0) return (ENXIO); base_low = pci_read_config(dev, sc->seccap_reg + PCIR_AMDIOMMU_BASE_LOW, 4); base_high = pci_read_config(dev, sc->seccap_reg + PCIR_AMDIOMMU_BASE_HIGH, 4); sc->mmio_base = (base_low & PCIM_AMDIOMMU_BASE_LOW_ADDRM) | ((uint64_t)base_high << 32); sc->device_id = pci_get_rid(dev); ild.sc = sc; ild.devid = sc->device_id; res = amdiommu_ivrs_iterate_tbl(ivrs_lookup_ivhd_0x40, ivrs_lookup_ivhd_0x40, ivrs_lookup_ivhd_0x10, &ild); if (!res) { device_printf(dev, "Cannot find IVHD\n"); return (ENXIO); } mtx_init(&sc->iommu.lock, "amdihw", NULL, MTX_DEF); sc->domids = new_unrhdr(0, 0xffff, &sc->iommu.lock); LIST_INIT(&sc->domains); sysctl_ctx_init(&sc->iommu.sysctl_ctx); sc->mmio_sz = ((sc->efr & AMDIOMMU_EFR_PC_SUP) != 0 ? 512 : 16) * 1024; sc->mmio_rid = AMDIOMMU_RID; error = bus_set_resource(dev, SYS_RES_MEMORY, AMDIOMMU_RID, sc->mmio_base, sc->mmio_sz); if (error != 0) { device_printf(dev, "bus_set_resource %#jx-%#jx failed, error %d\n", (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base + sc->mmio_sz, error); error = ENXIO; goto errout1; } sc->mmio_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->mmio_rid, sc->mmio_base, sc->mmio_base + sc->mmio_sz - 1, sc->mmio_sz, RF_ALLOCATED | RF_ACTIVE | RF_SHAREABLE); if (sc->mmio_res == NULL) { device_printf(dev, "bus_alloc_resource %#jx-%#jx failed\n", (uintmax_t)sc->mmio_base, (uintmax_t)sc->mmio_base + sc->mmio_sz); error = ENXIO; goto errout2; } sc->hw_ctrl = amdiommu_read8(sc, AMDIOMMU_CTRL); if (bootverbose) device_printf(dev, "ctrl reg %#jx\n", (uintmax_t)sc->hw_ctrl); if ((sc->hw_ctrl & AMDIOMMU_CTRL_EN) != 0) { device_printf(dev, "CTRL_EN is set, bailing out\n"); error = EBUSY; goto errout2; } iommu_high = BUS_SPACE_MAXADDR; error = amdiommu_create_dev_tbl(sc); if (error != 0) goto errout3; error = amdiommu_init_cmd(sc); if (error != 0) goto errout4; error = amdiommu_init_event(sc); if (error != 0) goto errout5; error = amdiommu_setup_intr(sc); if (error != 0) goto errout6; error = iommu_init_busdma(AMD2IOMMU(sc)); if (error != 0) goto errout7; error = amdiommu_init_irt(sc); if (error != 0) goto errout8; /* * Unlike DMAR, AMD IOMMU does not process command queue * unless IOMMU is enabled. But since non-present devtab * entry makes IOMMU ignore transactions from corresponding * initiator, de-facto IOMMU operations are disabled for the * DMA and intr remapping. */ AMDIOMMU_LOCK(sc); sc->hw_ctrl |= AMDIOMMU_CTRL_EN; amdiommu_write8(sc, AMDIOMMU_CTRL, sc->hw_ctrl); if (bootverbose) { printf("amdiommu%d: enabled translation\n", AMD2IOMMU(sc)->unit); } AMDIOMMU_UNLOCK(sc); TAILQ_INSERT_TAIL(&amdiommu_units, sc, unit_next); return (0); errout8: iommu_fini_busdma(&sc->iommu); errout7: pci_release_msi(dev); errout6: amdiommu_fini_event(sc); errout5: amdiommu_fini_cmd(sc); errout4: amdiommu_free_dev_tbl(sc); errout3: bus_release_resource(dev, SYS_RES_MEMORY, sc->mmio_rid, sc->mmio_res); errout2: bus_delete_resource(dev, SYS_RES_MEMORY, sc->mmio_rid); errout1: sysctl_ctx_free(&sc->iommu.sysctl_ctx); delete_unrhdr(sc->domids); mtx_destroy(&sc->iommu.lock); return (error); } static int amdiommu_detach(device_t dev) { return (EBUSY); } static int amdiommu_suspend(device_t dev) { /* XXXKIB */ return (0); } static int amdiommu_resume(device_t dev) { /* XXXKIB */ return (0); } static device_method_t amdiommu_methods[] = { DEVMETHOD(device_probe, amdiommu_probe), DEVMETHOD(device_attach, amdiommu_attach), DEVMETHOD(device_detach, amdiommu_detach), DEVMETHOD(device_suspend, amdiommu_suspend), DEVMETHOD(device_resume, amdiommu_resume), DEVMETHOD_END }; static driver_t amdiommu_driver = { "amdiommu", amdiommu_methods, sizeof(struct amdiommu_unit), }; EARLY_DRIVER_MODULE(amdiommu, pci, amdiommu_driver, 0, 0, BUS_PASS_SUPPORTDEV); MODULE_DEPEND(amdiommu, pci, 1, 1, 1); static struct amdiommu_unit * amdiommu_unit_by_device_id(u_int pci_seg, u_int device_id) { struct amdiommu_unit *unit; TAILQ_FOREACH(unit, &amdiommu_units, unit_next) { if (unit->unit_dom == pci_seg && unit->device_id == device_id) return (unit); } return (NULL); } struct ivhd_find_unit { u_int domain; uintptr_t rid; int devno; enum { IFU_DEV_PCI, IFU_DEV_IOAPIC, IFU_DEV_HPET, } type; u_int device_id; uint16_t rid_real; uint8_t dte; uint32_t edte; }; static bool amdiommu_find_unit_scan_ivrs(ACPI_IVRS_DE_HEADER *d, size_t tlen, struct ivhd_find_unit *ifu) { char *db, *de; size_t len; for (de = (char *)d + tlen; (char *)d < de; d = (ACPI_IVRS_DE_HEADER *)(db + len)) { db = (char *)d; if (d->Type == ACPI_IVRS_TYPE_PAD4) { len = sizeof(ACPI_IVRS_DEVICE4); } else if (d->Type == ACPI_IVRS_TYPE_ALL) { ACPI_IVRS_DEVICE4 *d4; d4 = (ACPI_IVRS_DEVICE4 *)db; len = sizeof(*d4); ifu->dte = d4->Header.DataSetting; } else if (d->Type == ACPI_IVRS_TYPE_SELECT) { ACPI_IVRS_DEVICE4 *d4; d4 = (ACPI_IVRS_DEVICE4 *)db; if (d4->Header.Id == ifu->rid) { ifu->dte = d4->Header.DataSetting; ifu->rid_real = ifu->rid; return (true); } len = sizeof(*d4); } else if (d->Type == ACPI_IVRS_TYPE_START) { ACPI_IVRS_DEVICE4 *d4, *d4n; d4 = (ACPI_IVRS_DEVICE4 *)db; d4n = d4 + 1; if (d4n->Header.Type != ACPI_IVRS_TYPE_END) { printf("IVRS dev4 start not followed by END " "(%#x)\n", d4n->Header.Type); return (false); } if (d4->Header.Id <= ifu->rid && ifu->rid <= d4n->Header.Id) { ifu->dte = d4->Header.DataSetting; ifu->rid_real = ifu->rid; return (true); } len = 2 * sizeof(*d4); } else if (d->Type == ACPI_IVRS_TYPE_PAD8) { len = sizeof(ACPI_IVRS_DEVICE8A); } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_SELECT) { ACPI_IVRS_DEVICE8A *d8a; d8a = (ACPI_IVRS_DEVICE8A *)db; if (d8a->Header.Id == ifu->rid) { ifu->dte = d8a->Header.DataSetting; ifu->rid_real = d8a->UsedId; return (true); } len = sizeof(*d8a); } else if (d->Type == ACPI_IVRS_TYPE_ALIAS_START) { ACPI_IVRS_DEVICE8A *d8a; ACPI_IVRS_DEVICE4 *d4; d8a = (ACPI_IVRS_DEVICE8A *)db; d4 = (ACPI_IVRS_DEVICE4 *)(d8a + 1); if (d4->Header.Type != ACPI_IVRS_TYPE_END) { printf("IVRS alias start not followed by END " "(%#x)\n", d4->Header.Type); return (false); } if (d8a->Header.Id <= ifu->rid && ifu->rid <= d4->Header.Id) { ifu->dte = d8a->Header.DataSetting; ifu->rid_real = d8a->UsedId; return (true); } len = sizeof(*d8a) + sizeof(*d4); } else if (d->Type == ACPI_IVRS_TYPE_EXT_SELECT) { ACPI_IVRS_DEVICE8B *d8b; d8b = (ACPI_IVRS_DEVICE8B *)db; if (d8b->Header.Id == ifu->rid) { ifu->dte = d8b->Header.DataSetting; ifu->rid_real = ifu->rid; ifu->edte = d8b->ExtendedData; return (true); } len = sizeof(*d8b); } else if (d->Type == ACPI_IVRS_TYPE_EXT_START) { ACPI_IVRS_DEVICE8B *d8b; ACPI_IVRS_DEVICE4 *d4; d8b = (ACPI_IVRS_DEVICE8B *)db; d4 = (ACPI_IVRS_DEVICE4 *)(db + sizeof(*d8b)); if (d4->Header.Type != ACPI_IVRS_TYPE_END) { printf("IVRS ext start not followed by END " "(%#x)\n", d4->Header.Type); return (false); } if (d8b->Header.Id >= ifu->rid && ifu->rid <= d4->Header.Id) { ifu->dte = d8b->Header.DataSetting; ifu->rid_real = ifu->rid; ifu->edte = d8b->ExtendedData; return (true); } len = sizeof(*d8b) + sizeof(*d4); } else if (d->Type == ACPI_IVRS_TYPE_SPECIAL) { ACPI_IVRS_DEVICE8C *d8c; d8c = (ACPI_IVRS_DEVICE8C *)db; if (((ifu->type == IFU_DEV_IOAPIC && d8c->Variety == ACPI_IVHD_IOAPIC) || (ifu->type == IFU_DEV_HPET && d8c->Variety == ACPI_IVHD_HPET)) && ifu->devno == d8c->Handle) { ifu->dte = d8c->Header.DataSetting; ifu->rid_real = d8c->UsedId; return (true); } len = sizeof(*d8c); } else if (d->Type == ACPI_IVRS_TYPE_HID) { ACPI_IVRS_DEVICE_HID *dh; dh = (ACPI_IVRS_DEVICE_HID *)db; len = sizeof(*dh) + dh->UidLength; /* XXXKIB */ } else { #if 0 printf("amdiommu: unknown IVRS device entry type %#x\n", d->Type); #endif if (d->Type <= 63) len = sizeof(ACPI_IVRS_DEVICE4); else if (d->Type <= 127) len = sizeof(ACPI_IVRS_DEVICE8A); else { printf("amdiommu: abort, cannot " "advance iterator, item type %#x\n", d->Type); return (false); } } } return (false); } static bool amdiommu_find_unit_scan_0x11(ACPI_IVRS_HARDWARE2 *ivrs, void *arg) { struct ivhd_find_unit *ifu = arg; ACPI_IVRS_DE_HEADER *d; bool res; KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE2 || ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE3, ("Misparsed IVHD h2, ivrs type %#x", ivrs->Header.Type)); if (ifu->domain != ivrs->PciSegmentGroup) return (false); d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1); res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu); if (res) ifu->device_id = ivrs->Header.DeviceId; return (res); } static bool amdiommu_find_unit_scan_0x10(ACPI_IVRS_HARDWARE1 *ivrs, void *arg) { struct ivhd_find_unit *ifu = arg; ACPI_IVRS_DE_HEADER *d; bool res; KASSERT(ivrs->Header.Type == ACPI_IVRS_TYPE_HARDWARE1, ("Misparsed IVHD h1, ivrs type %#x", ivrs->Header.Type)); if (ifu->domain != ivrs->PciSegmentGroup) return (false); d = (ACPI_IVRS_DE_HEADER *)(ivrs + 1); res = amdiommu_find_unit_scan_ivrs(d, ivrs->Header.Length, ifu); if (res) ifu->device_id = ivrs->Header.DeviceId; return (res); } static void amdiommu_dev_prop_dtr(device_t dev, const char *name, void *val, void *dtr_ctx) { free(val, M_DEVBUF); } static int * amdiommu_dev_fetch_flagsp(struct amdiommu_unit *unit, device_t dev) { int *flagsp, error; bus_topo_assert(); error = device_get_prop(dev, device_get_nameunit(unit->iommu.dev), (void **)&flagsp); if (error == ENOENT) { flagsp = malloc(sizeof(int), M_DEVBUF, M_WAITOK | M_ZERO); device_set_prop(dev, device_get_nameunit(unit->iommu.dev), flagsp, amdiommu_dev_prop_dtr, unit); } return (flagsp); } static int amdiommu_get_dev_prop_flags(struct amdiommu_unit *unit, device_t dev) { int *flagsp, flags; bus_topo_lock(); flagsp = amdiommu_dev_fetch_flagsp(unit, dev); flags = *flagsp; bus_topo_unlock(); return (flags); } static void amdiommu_set_dev_prop_flags(struct amdiommu_unit *unit, device_t dev, int flag) { int *flagsp; bus_topo_lock(); flagsp = amdiommu_dev_fetch_flagsp(unit, dev); *flagsp |= flag; bus_topo_unlock(); } int amdiommu_find_unit(device_t dev, struct amdiommu_unit **unitp, uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose) { struct ivhd_find_unit ifu; struct amdiommu_unit *unit; int error, flags; bool res; if (device_get_devclass(device_get_parent(dev)) != devclass_find("pci")) return (ENXIO); bzero(&ifu, sizeof(ifu)); ifu.type = IFU_DEV_PCI; error = pci_get_id(dev, PCI_ID_RID, &ifu.rid); if (error != 0) { if (verbose) device_printf(dev, "amdiommu cannot get rid, error %d\n", error); return (ENXIO); } ifu.domain = pci_get_domain(dev); res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu); if (!res) { if (verbose) device_printf(dev, "(%#06x:%#06x) amdiommu cannot match rid in IVHD\n", ifu.domain, (unsigned)ifu.rid); return (ENXIO); } unit = amdiommu_unit_by_device_id(ifu.domain, ifu.device_id); if (unit == NULL) { if (verbose) device_printf(dev, "(%#06x:%#06x) amdiommu cannot find unit\n", ifu.domain, (unsigned)ifu.rid); return (ENXIO); } *unitp = unit; iommu_device_set_iommu_prop(dev, unit->iommu.dev); if (ridp != NULL) *ridp = ifu.rid_real; if (dtep != NULL) *dtep = ifu.dte; if (edtep != NULL) *edtep = ifu.edte; if (verbose) { flags = amdiommu_get_dev_prop_flags(unit, dev); if ((flags & AMDIOMMU_DEV_REPORTED) == 0) { amdiommu_set_dev_prop_flags(unit, dev, AMDIOMMU_DEV_REPORTED); device_printf(dev, "amdiommu%d " "initiator rid %#06x dte %#x edte %#x\n", unit->iommu.unit, ifu.rid_real, ifu.dte, ifu.edte); } } return (0); } int amdiommu_find_unit_for_ioapic(int apic_id, struct amdiommu_unit **unitp, uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose) { struct ivhd_find_unit ifu; struct amdiommu_unit *unit; device_t apic_dev; bool res; bzero(&ifu, sizeof(ifu)); ifu.type = IFU_DEV_IOAPIC; ifu.devno = apic_id; ifu.rid = -1; res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu); if (!res) { if (verbose) printf("amdiommu cannot match ioapic no %d in IVHD\n", apic_id); return (ENXIO); } unit = amdiommu_unit_by_device_id(0, ifu.device_id); apic_dev = ioapic_get_dev(apic_id); if (apic_dev != NULL) iommu_device_set_iommu_prop(apic_dev, unit->iommu.dev); if (unit == NULL) { if (verbose) printf("amdiommu cannot find unit by dev id %#x\n", ifu.device_id); return (ENXIO); } *unitp = unit; if (ridp != NULL) *ridp = ifu.rid_real; if (dtep != NULL) *dtep = ifu.dte; if (edtep != NULL) *edtep = ifu.edte; if (verbose) { printf("amdiommu%d IOAPIC %d " "initiator rid %#06x dte %#x edte %#x\n", unit->iommu.unit, apic_id, ifu.rid_real, ifu.dte, ifu.edte); } return (0); } int amdiommu_find_unit_for_hpet(device_t hpet, struct amdiommu_unit **unitp, uint16_t *ridp, uint8_t *dtep, uint32_t *edtep, bool verbose) { struct ivhd_find_unit ifu; struct amdiommu_unit *unit; int hpet_no; bool res; hpet_no = hpet_get_uid(hpet); bzero(&ifu, sizeof(ifu)); ifu.type = IFU_DEV_HPET; ifu.devno = hpet_no; ifu.rid = -1; res = amdiommu_ivrs_iterate_tbl(amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x11, amdiommu_find_unit_scan_0x10, &ifu); if (!res) { printf("amdiommu cannot match hpet no %d in IVHD\n", hpet_no); return (ENXIO); } unit = amdiommu_unit_by_device_id(0, ifu.device_id); if (unit == NULL) { if (verbose) printf("amdiommu cannot find unit id %d\n", hpet_no); return (ENXIO); } *unitp = unit; iommu_device_set_iommu_prop(hpet, unit->iommu.dev); if (ridp != NULL) *ridp = ifu.rid_real; if (dtep != NULL) *dtep = ifu.dte; if (edtep != NULL) *edtep = ifu.edte; if (verbose) { printf("amdiommu%d HPET no %d " "initiator rid %#06x dte %#x edte %#x\n", unit->iommu.unit, hpet_no, ifu.rid_real, ifu.dte, ifu.edte); } return (0); } static struct iommu_unit * amdiommu_find_method(device_t dev, bool verbose) { struct amdiommu_unit *unit; int error; uint32_t edte; uint16_t rid; uint8_t dte; error = amdiommu_find_unit(dev, &unit, &rid, &dte, &edte, verbose); if (error != 0) { if (verbose) device_printf(dev, "cannot find amdiommu unit, error %d\n", error); return (NULL); } return (&unit->iommu); } static struct x86_unit_common * amdiommu_get_x86_common(struct iommu_unit *unit) { struct amdiommu_unit *iommu; iommu = IOMMU2AMD(unit); return (&iommu->x86c); } static void amdiommu_unit_pre_instantiate_ctx(struct iommu_unit *unit) { } static struct x86_iommu amd_x86_iommu = { .get_x86_common = amdiommu_get_x86_common, .unit_pre_instantiate_ctx = amdiommu_unit_pre_instantiate_ctx, .find = amdiommu_find_method, .domain_unload_entry = amdiommu_domain_unload_entry, .domain_unload = amdiommu_domain_unload, .get_ctx = amdiommu_get_ctx, .free_ctx_locked = amdiommu_free_ctx_locked_method, .free_ctx = amdiommu_free_ctx_method, .alloc_msi_intr = amdiommu_alloc_msi_intr, .map_msi_intr = amdiommu_map_msi_intr, .unmap_msi_intr = amdiommu_unmap_msi_intr, .map_ioapic_intr = amdiommu_map_ioapic_intr, .unmap_ioapic_intr = amdiommu_unmap_ioapic_intr, }; static void x86_iommu_set_amd(void *arg __unused) { if (cpu_vendor_id == CPU_VENDOR_AMD) set_x86_iommu(&amd_x86_iommu); } SYSINIT(x86_iommu, SI_SUB_TUNABLES, SI_ORDER_ANY, x86_iommu_set_amd, NULL); #ifdef DDB #include #include static void amdiommu_print_domain(struct amdiommu_domain *domain, bool show_mappings) { struct iommu_domain *iodom; iodom = DOM2IODOM(domain); #if 0 db_printf( " @%p dom %d mgaw %d agaw %d pglvl %d end %jx refs %d\n" " ctx_cnt %d flags %x pgobj %p map_ents %u\n", domain, domain->domain, domain->mgaw, domain->agaw, domain->pglvl, (uintmax_t)domain->iodom.end, domain->refs, domain->ctx_cnt, domain->iodom.flags, domain->pgtbl_obj, domain->iodom.entries_cnt); #endif iommu_db_domain_print_contexts(iodom); if (show_mappings) iommu_db_domain_print_mappings(iodom); } static void amdiommu_print_one(struct amdiommu_unit *unit, bool show_domains, bool show_mappings, bool show_cmdq) { struct amdiommu_domain *domain; struct amdiommu_cmd_generic *cp; u_int cmd_head, cmd_tail, ci; cmd_head = amdiommu_read4(unit, AMDIOMMU_CMDBUF_HEAD); cmd_tail = amdiommu_read4(unit, AMDIOMMU_CMDBUF_TAIL); db_printf("amdiommu%d at %p, mmio at %#jx/sz %#jx\n", unit->iommu.unit, unit, (uintmax_t)unit->mmio_base, (uintmax_t)unit->mmio_sz); db_printf(" hw ctrl %#018jx cmdevst %#018jx\n", (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CTRL), (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS)); db_printf(" devtbl at %p\n", unit->dev_tbl); db_printf(" hwseq at %p phys %#jx val %#jx\n", &unit->x86c.inv_waitd_seq_hw, pmap_kextract((vm_offset_t)&unit->x86c.inv_waitd_seq_hw), unit->x86c.inv_waitd_seq_hw); db_printf(" invq at %p base %#jx hw head/tail %#x/%#x\n", unit->x86c.inv_queue, (uintmax_t)amdiommu_read8(unit, AMDIOMMU_CMDBUF_BASE), cmd_head, cmd_tail); if (show_cmdq) { db_printf(" cmd q:\n"); for (ci = cmd_head; ci != cmd_tail;) { cp = (struct amdiommu_cmd_generic *)(unit-> x86c.inv_queue + ci); db_printf( " idx %#x op %#x %#010x %#010x %#010x %#010x\n", ci >> AMDIOMMU_CMD_SZ_SHIFT, cp->op, cp->w0, cp->ww1, cp->w2, cp->w3); ci += AMDIOMMU_CMD_SZ; if (ci == unit->x86c.inv_queue_size) ci = 0; } } if (show_domains) { db_printf(" domains:\n"); LIST_FOREACH(domain, &unit->domains, link) { amdiommu_print_domain(domain, show_mappings); if (db_pager_quit) break; } } } DB_SHOW_COMMAND(amdiommu, db_amdiommu_print) { struct amdiommu_unit *unit; bool show_domains, show_mappings, show_cmdq; show_domains = strchr(modif, 'd') != NULL; show_mappings = strchr(modif, 'm') != NULL; show_cmdq = strchr(modif, 'q') != NULL; if (!have_addr) { db_printf("usage: show amdiommu [/d] [/m] [/q] index\n"); return; } if ((vm_offset_t)addr < 0x10000) unit = amdiommu_unit_by_device_id(0, (u_int)addr); else unit = (struct amdiommu_unit *)addr; amdiommu_print_one(unit, show_domains, show_mappings, show_cmdq); } DB_SHOW_ALL_COMMAND(amdiommus, db_show_all_amdiommus) { struct amdiommu_unit *unit; bool show_domains, show_mappings, show_cmdq; show_domains = strchr(modif, 'd') != NULL; show_mappings = strchr(modif, 'm') != NULL; show_cmdq = strchr(modif, 'q') != NULL; TAILQ_FOREACH(unit, &amdiommu_units, unit_next) { amdiommu_print_one(unit, show_domains, show_mappings, show_cmdq); if (db_pager_quit) break; } } #endif diff --git a/sys/x86/iommu/amd_event.c b/sys/x86/iommu/amd_event.c index 4a52e42260c2..d4be79474c6a 100644 --- a/sys/x86/iommu/amd_event.c +++ b/sys/x86/iommu/amd_event.c @@ -1,323 +1,323 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" #include #include #include #include #include #include #include #include #include #include static void amdiommu_event_rearm_intr(struct amdiommu_unit *unit) { amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, AMDIOMMU_CMDEVS_EVLOGINT); } static void amdiommu_event_log_inc_head(struct amdiommu_unit *unit) { unit->event_log_head++; if (unit->event_log_head >= unit->event_log_size) unit->event_log_head = 0; } static void amdiommu_event_log_print(struct amdiommu_unit *unit, const struct amdiommu_event_generic *evp, bool fancy) { printf("amdiommu%d: event type 0x%x 0x%08x 0x%08x 0x%08x 0x%08x\n", unit->iommu.unit, evp->code, evp->w0, evp->ww1, evp->w2, evp->w3); if (!fancy) return; AMDIOMMU_ASSERT_LOCKED(unit); if (evp->code == AMDIOMMU_EV_ILL_DEV_TABLE_ENTRY) { const struct amdiommu_event_ill_dev_table_entry *ev_dte_p; const struct amdiommu_dte *dte; const uint32_t *x; int i; ev_dte_p = (const struct amdiommu_event_ill_dev_table_entry *)evp; dte = &unit->dev_tbl[ev_dte_p->devid]; printf("\tIllegal Dev Tab Entry dte@%p:", dte); for (i = 0, x = (const uint32_t *)dte; i < sizeof(*dte) / sizeof(uint32_t); i++, x++) printf(" 0x%08x", *x); printf("\n"); } else if (evp->code == AMDIOMMU_EV_IO_PAGE_FAULT) { const struct amdiommu_event_io_page_fault_entry *ev_iopf_p; struct amdiommu_ctx *ctx; device_t dev; ev_iopf_p = (const struct amdiommu_event_io_page_fault_entry *)evp; printf("\tPage Fault rid %#x dom %d", ev_iopf_p->devid, ev_iopf_p->pasid); ctx = amdiommu_find_ctx_locked(unit, ev_iopf_p->devid); if (ctx != NULL) { dev = ctx->context.tag->owner; if (dev != NULL) printf(" %s", device_get_nameunit(dev)); } printf("\n\t" "gn %d nx %d us %d i %d pr %d rw %d pe %d rz %d tr %d" "\n\tgaddr %#jx\n", ev_iopf_p->gn, ev_iopf_p->nx, ev_iopf_p->us, ev_iopf_p->i, ev_iopf_p->pr, ev_iopf_p->rw, ev_iopf_p->pe, ev_iopf_p->rz, ev_iopf_p->tr, (((uintmax_t)(ev_iopf_p->addr2)) << 32) | ev_iopf_p->addr1); } } static u_int amdiommu_event_log_tail(struct amdiommu_unit *unit) { return (amdiommu_read8(unit, AMDIOMMU_EVNTLOG_TAIL) >> AMDIOMMU_EV_SZ_SHIFT); } static u_int amdiommu_event_copy_log_inc(u_int idx) { idx++; if (idx == nitems(((struct amdiommu_unit *)NULL)->event_copy_log)) idx = 0; return (idx); } static bool amdiommu_event_copy_log_hasspace(struct amdiommu_unit *unit) { return (unit->event_copy_tail != amdiommu_event_copy_log_inc( unit->event_copy_head)); } void amdiommu_event_intr(struct amdiommu_unit *unit, uint64_t status) { struct amdiommu_event_generic *evp; u_int hw_tail, hw_tail1; bool enqueue; enqueue = (status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0; hw_tail1 = amdiommu_event_log_tail(unit); do { hw_tail = hw_tail1; for (; hw_tail != unit->event_log_head; amdiommu_event_log_inc_head(unit)) { evp = &unit->event_log[unit->event_log_head]; mtx_lock_spin(&unit->event_lock); if (amdiommu_event_copy_log_hasspace(unit)) { unit->event_copy_log[unit->event_copy_head] = *evp; unit->event_copy_head = amdiommu_event_copy_log_inc(unit-> event_copy_head); enqueue = true; } else { amdiommu_event_log_print(unit, evp, false); } mtx_unlock_spin(&unit->event_lock); } amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, unit->event_log_head << AMDIOMMU_EV_SZ_SHIFT); hw_tail1 = amdiommu_event_log_tail(unit); } while (hw_tail1 != hw_tail); amdiommu_event_rearm_intr(unit); if (enqueue) taskqueue_enqueue(unit->event_taskqueue, &unit->event_task); } static void amdiommu_event_task(void *arg, int pending __unused) { struct amdiommu_unit *unit; uint64_t hwev_status, status; struct amdiommu_event_generic hwev; unit = arg; AMDIOMMU_LOCK(unit); if ((unit->efr & AMDIOMMU_EFR_HWEV_SUP) != 0) { hwev_status = amdiommu_read8(unit, AMDIOMMU_HWEV_STATUS); if ((hwev_status & AMDIOMMU_HWEVS_HEV) != 0) { *(uint64_t *)&hwev = amdiommu_read8(unit, AMDIOMMU_HWEV_LOWER); *((uint64_t *)&hwev + 1) = amdiommu_read8(unit, AMDIOMMU_HWEV_UPPER); printf("amdiommu%d: hw event%s\n", unit->iommu.unit, (hwev_status & AMDIOMMU_HWEVS_HEO) != 0 ? " (overflown)" : ""); amdiommu_event_log_print(unit, &hwev, true); amdiommu_write8(unit, AMDIOMMU_HWEV_STATUS, hwev_status); } } status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS); if ((status & AMDIOMMU_CMDEVS_EVOVRFLW) != 0) { printf("amdiommu%d: event log overflow\n", unit->iommu.unit); while ((status & AMDIOMMU_CMDEVS_EVLOGRUN) != 0) { DELAY(1); status = amdiommu_read8(unit, AMDIOMMU_CMDEV_STATUS); } unit->hw_ctrl &= ~AMDIOMMU_CTRL_EVNTLOG_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); unit->event_log_head = 0; amdiommu_write8(unit, AMDIOMMU_EVNTLOG_HEAD, 0); amdiommu_write8(unit, AMDIOMMU_CMDEV_STATUS, AMDIOMMU_CMDEVS_EVOVRFLW); /* RW1C */ unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); amdiommu_event_rearm_intr(unit); } mtx_lock_spin(&unit->event_lock); while (unit->event_copy_head != unit->event_copy_tail) { mtx_unlock_spin(&unit->event_lock); amdiommu_event_log_print(unit, &unit->event_copy_log[ unit->event_copy_tail], true); mtx_lock_spin(&unit->event_lock); unit->event_copy_tail = amdiommu_event_copy_log_inc(unit-> event_copy_tail); } mtx_unlock_spin(&unit->event_lock); AMDIOMMU_UNLOCK(unit); } int amdiommu_init_event(struct amdiommu_unit *unit) { uint64_t base_reg; mtx_init(&unit->event_lock, "amdevl", NULL, MTX_SPIN); /* event log entries */ unit->event_log_size = AMDIOMMU_EVNTLOG_MIN; TUNABLE_INT_FETCH("hw.amdiommu.event_log_size", &unit->event_log_size); if (unit->event_log_size < AMDIOMMU_EVNTLOG_MIN || unit->event_log_size > AMDIOMMU_EVNTLOG_MAX || !powerof2(unit->event_log_size)) panic("invalid hw.amdiommu.event_log_size"); unit->event_log = kmem_alloc_contig(AMDIOMMU_EV_SZ * unit->event_log_size, M_WAITOK | M_ZERO, 0, ~0ull, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); TASK_INIT(&unit->event_task, 0, amdiommu_event_task, unit); unit->event_taskqueue = taskqueue_create_fast("amdiommuff", M_WAITOK, taskqueue_thread_enqueue, &unit->event_taskqueue); taskqueue_start_threads(&unit->event_taskqueue, 1, PI_AV, "amdiommu%d event taskq", unit->iommu.unit); base_reg = pmap_kextract((vm_offset_t)unit->event_log) | - (((uint64_t)0x8 + ilog2(unit->event_log_size / + (((uint64_t)0x8 + ilog2_local(unit->event_log_size / AMDIOMMU_EVNTLOG_MIN)) << AMDIOMMU_EVNTLOG_BASE_SZSHIFT); AMDIOMMU_LOCK(unit); /* * Re-arm before enabling interrupt, to not loose it when * re-arming in the interrupt handler. */ amdiommu_event_rearm_intr(unit); amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, base_reg); unit->hw_ctrl |= AMDIOMMU_CTRL_EVNTLOG_EN | AMDIOMMU_CTRL_EVENTINT_EN; amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); AMDIOMMU_UNLOCK(unit); return (0); } void amdiommu_fini_event(struct amdiommu_unit *unit) { AMDIOMMU_LOCK(unit); unit->hw_ctrl &= ~(AMDIOMMU_CTRL_EVNTLOG_EN | AMDIOMMU_CTRL_EVENTINT_EN); amdiommu_write8(unit, AMDIOMMU_CTRL, unit->hw_ctrl); amdiommu_write8(unit, AMDIOMMU_EVNTLOG_BASE, 0); AMDIOMMU_UNLOCK(unit); taskqueue_drain(unit->event_taskqueue, &unit->event_task); taskqueue_free(unit->event_taskqueue); unit->event_taskqueue = NULL; kmem_free(unit->event_log, unit->event_log_size * AMDIOMMU_EV_SZ); unit->event_log = NULL; unit->event_log_head = unit->event_log_tail = 0; mtx_destroy(&unit->event_lock); } diff --git a/sys/x86/iommu/amd_intrmap.c b/sys/x86/iommu/amd_intrmap.c index a4c1a7836268..c5c1706f1f3e 100644 --- a/sys/x86/iommu/amd_intrmap.c +++ b/sys/x86/iommu/amd_intrmap.c @@ -1,391 +1,391 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct amdiommu_ctx *amdiommu_ir_find(device_t src, uint16_t *rid, bool *is_iommu); static void amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src, u_int cookie); int amdiommu_alloc_msi_intr(device_t src, u_int *cookies, u_int count) { struct amdiommu_ctx *ctx; vmem_addr_t vmem_res; u_int idx, i; int error; ctx = amdiommu_ir_find(src, NULL, NULL); if (ctx == NULL || !CTX2AMD(ctx)->irte_enabled) { for (i = 0; i < count; i++) cookies[i] = -1; return (EOPNOTSUPP); } error = vmem_alloc(ctx->irtids, count, M_FIRSTFIT | M_NOWAIT, &vmem_res); if (error != 0) { KASSERT(error != EOPNOTSUPP, ("impossible EOPNOTSUPP from vmem")); return (error); } idx = vmem_res; for (i = 0; i < count; i++) cookies[i] = idx + i; return (0); } int amdiommu_map_msi_intr(device_t src, u_int cpu, u_int vector, u_int cookie, uint64_t *addr, uint32_t *data) { struct amdiommu_ctx *ctx; struct amdiommu_unit *unit; uint16_t rid; bool is_iommu; ctx = amdiommu_ir_find(src, &rid, &is_iommu); if (is_iommu) { if (addr != NULL) { *data = vector; *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12); if (x2apic_mode) *addr |= ((uint64_t)cpu & 0xffffff00) << 32; else KASSERT(cpu <= 0xff, ("cpu id too big %d", cpu)); } return (0); } if (ctx == NULL) return (EOPNOTSUPP); unit = CTX2AMD(ctx); if (!unit->irte_enabled || cookie == -1) return (EOPNOTSUPP); if (cookie >= unit->irte_nentries) { device_printf(src, "amdiommu%d: cookie %u irte max %u\n", unit->iommu.unit, cookie, unit->irte_nentries); return (EINVAL); } if (unit->irte_x2apic) { struct amdiommu_irte_basic_vapic_x2 *irte; irte = &ctx->irtx2[cookie]; irte->supiopf = 0; irte->inttype = 0; irte->rqeoi = 0; irte->dm = 0; irte->guestmode = 0; irte->dest0 = cpu; irte->rsrv0 = 0; irte->vector = vector; irte->rsrv1 = 0; irte->rsrv2 = 0; irte->dest1 = cpu >> 24; atomic_thread_fence_rel(); irte->remapen = 1; } else { struct amdiommu_irte_basic_novapic *irte; irte = &ctx->irtb[cookie]; irte->supiopf = 0; irte->inttype = 0; /* fixed */ irte->rqeoi = 0; irte->dm = 0; /* phys */ irte->guestmode = 0; irte->dest = cpu; irte->vector = vector; irte->rsrv = 0; atomic_thread_fence_rel(); irte->remapen = 1; } if (addr != NULL) { *data = cookie; *addr = MSI_INTEL_ADDR_BASE | ((cpu & 0xff) << 12); if (unit->irte_x2apic) *addr |= ((uint64_t)cpu & 0xffffff00) << 32; } iommu_get_requester(src, &rid); AMDIOMMU_LOCK(unit); amdiommu_qi_invalidate_ir_locked(unit, rid); AMDIOMMU_UNLOCK(unit); return (0); } int amdiommu_unmap_msi_intr(device_t src, u_int cookie) { struct amdiommu_ctx *ctx; if (cookie == -1) return (0); ctx = amdiommu_ir_find(src, NULL, NULL); amdiommu_ir_free_irte(ctx, src, cookie); return (0); } int amdiommu_map_ioapic_intr(u_int ioapic_id, u_int cpu, u_int vector, bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo) { /* XXXKIB for early call from ioapic_create() */ return (EOPNOTSUPP); } int amdiommu_unmap_ioapic_intr(u_int ioapic_id, u_int *cookie) { /* XXXKIB */ return (0); } static struct amdiommu_ctx * amdiommu_ir_find(device_t src, uint16_t *ridp, bool *is_iommu) { devclass_t src_class; struct amdiommu_unit *unit; struct amdiommu_ctx *ctx; uint32_t edte; uint16_t rid; uint8_t dte; int error; /* * We need to determine if the interrupt source generates FSB * interrupts. If yes, it is either IOMMU, in which case * interrupts are not remapped. Or it is HPET, and interrupts * are remapped. For HPET, source id is reported by HPET * record in IVHD ACPI table. */ if (is_iommu != NULL) *is_iommu = false; ctx = NULL; src_class = device_get_devclass(src); if (src_class == devclass_find("amdiommu")) { if (is_iommu != NULL) *is_iommu = true; } else if (src_class == devclass_find("hpet")) { error = amdiommu_find_unit_for_hpet(src, &unit, &rid, &dte, &edte, bootverbose); ctx = NULL; // XXXKIB allocate ctx } else { error = amdiommu_find_unit(src, &unit, &rid, &dte, &edte, bootverbose); if (error == 0) { iommu_get_requester(src, &rid); ctx = amdiommu_get_ctx_for_dev(unit, src, rid, 0, false /* XXXKIB */, false, dte, edte); } } if (ridp != NULL) *ridp = rid; return (ctx); } static void amdiommu_ir_free_irte(struct amdiommu_ctx *ctx, device_t src, u_int cookie) { struct amdiommu_unit *unit; uint16_t rid; MPASS(ctx != NULL); unit = CTX2AMD(ctx); KASSERT(unit->irte_enabled, ("unmap: cookie %d ctx %p unit %p", cookie, ctx, unit)); KASSERT(cookie < unit->irte_nentries, ("bad cookie %u %u", cookie, unit->irte_nentries)); if (unit->irte_x2apic) { struct amdiommu_irte_basic_vapic_x2 *irte; irte = &ctx->irtx2[cookie]; irte->remapen = 0; atomic_thread_fence_rel(); bzero(irte, sizeof(*irte)); } else { struct amdiommu_irte_basic_novapic *irte; irte = &ctx->irtb[cookie]; irte->remapen = 0; atomic_thread_fence_rel(); bzero(irte, sizeof(*irte)); } iommu_get_requester(src, &rid); AMDIOMMU_LOCK(unit); amdiommu_qi_invalidate_ir_locked(unit, rid); AMDIOMMU_UNLOCK(unit); } int amdiommu_ctx_init_irte(struct amdiommu_ctx *ctx) { struct amdiommu_unit *unit; void *ptr; unsigned long sz; int dom; unit = CTX2AMD(ctx); if (!unit->irte_enabled) return (0); KASSERT(unit->irte_nentries > 0 && unit->irte_nentries <= 2048 && powerof2(unit->irte_nentries), ("amdiommu%d: unit %p irte_nentries %u", unit->iommu.unit, unit, unit->irte_nentries)); if (bus_get_domain(unit->iommu.dev, &dom) != 0) dom = -1; sz = unit->irte_nentries; sz *= unit->irte_x2apic ? sizeof(struct amdiommu_irte_basic_vapic_x2) : sizeof(struct amdiommu_irte_basic_novapic); if (dom != -1) { ptr = contigmalloc_domainset(sz, M_DEVBUF, DOMAINSET_PREF(dom), M_WAITOK | M_ZERO, 0, ~0ull, 128, 0); } else { ptr = contigmalloc(sz, M_DEVBUF, M_WAITOK | M_ZERO, 0, ~0ull, 128, 0); } if (unit->irte_x2apic) ctx->irtx2 = ptr; else ctx->irtb = ptr; ctx->irtids = vmem_create("amdirt", 0, unit->irte_nentries, 1, 0, M_FIRSTFIT | M_NOWAIT); intr_reprogram(); // XXXKIB return (0); } void amdiommu_ctx_fini_irte(struct amdiommu_ctx *ctx) { struct amdiommu_unit *unit; unit = CTX2AMD(ctx); if (!unit->irte_enabled) return; if (unit->irte_x2apic) free(ctx->irtx2, M_DEVBUF); else free(ctx->irtb, M_DEVBUF); vmem_destroy(ctx->irtids); } int amdiommu_init_irt(struct amdiommu_unit *unit) { int enabled, nentries; SYSCTL_ADD_INT(&unit->iommu.sysctl_ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(unit->iommu.dev)), OID_AUTO, "ir", CTLFLAG_RD, &unit->irte_enabled, 0, "Interrupt remapping ops enabled"); enabled = 1; TUNABLE_INT_FETCH("hw.iommu.ir", &enabled); unit->irte_enabled = enabled != 0; if (!unit->irte_enabled) return (0); nentries = 32; TUNABLE_INT_FETCH("hw.iommu.amd.ir_num", &nentries); - nentries = roundup_pow_of_two(nentries); + nentries = roundup_pow_of_two_local(nentries); if (nentries < 1) nentries = 1; if (nentries > 2048) nentries = 2048; unit->irte_nentries = nentries; unit->irte_x2apic = x2apic_mode; return (0); } void amdiommu_fini_irt(struct amdiommu_unit *unit) { } diff --git a/sys/x86/iommu/x86_iommu.h b/sys/x86/iommu/x86_iommu.h index eb4a9907a5d6..835a4623a95e 100644 --- a/sys/x86/iommu/x86_iommu.h +++ b/sys/x86/iommu/x86_iommu.h @@ -1,203 +1,213 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2013-2015, 2024 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef __X86_IOMMU_X86_IOMMU_H #define __X86_IOMMU_X86_IOMMU_H /* Both Intel and AMD are not too crazy to have different sizes. */ typedef struct iommu_pte { uint64_t pte; } iommu_pte_t; #define IOMMU_PAGE_SIZE PAGE_SIZE #define IOMMU_PAGE_MASK (IOMMU_PAGE_SIZE - 1) #define IOMMU_PAGE_SHIFT PAGE_SHIFT #define IOMMU_NPTEPG (IOMMU_PAGE_SIZE / sizeof(iommu_pte_t)) #define IOMMU_NPTEPGSHIFT 9 #define IOMMU_PTEMASK (IOMMU_NPTEPG - 1) struct sf_buf; struct vm_object; struct vm_page *iommu_pgalloc(struct vm_object *obj, vm_pindex_t idx, int flags); void iommu_pgfree(struct vm_object *obj, vm_pindex_t idx, int flags, struct iommu_map_entry *entry); void *iommu_map_pgtbl(struct vm_object *obj, vm_pindex_t idx, int flags, struct sf_buf **sf); void iommu_unmap_pgtbl(struct sf_buf *sf); extern iommu_haddr_t iommu_high; extern int iommu_tbl_pagecnt; extern int iommu_qi_batch_coalesce; SYSCTL_DECL(_hw_iommu); struct x86_unit_common; struct x86_iommu { struct x86_unit_common *(*get_x86_common)(struct iommu_unit *iommu); void (*unit_pre_instantiate_ctx)(struct iommu_unit *iommu); void (*qi_ensure)(struct iommu_unit *unit, int descr_count); void (*qi_emit_wait_descr)(struct iommu_unit *unit, uint32_t seq, bool, bool, bool); void (*qi_advance_tail)(struct iommu_unit *unit); void (*qi_invalidate_emit)(struct iommu_domain *idomain, iommu_gaddr_t base, iommu_gaddr_t size, struct iommu_qi_genseq * pseq, bool emit_wait); void (*domain_unload_entry)(struct iommu_map_entry *entry, bool free, bool cansleep); void (*domain_unload)(struct iommu_domain *iodom, struct iommu_map_entries_tailq *entries, bool cansleep); struct iommu_ctx *(*get_ctx)(struct iommu_unit *iommu, device_t dev, uint16_t rid, bool id_mapped, bool rmrr_init); void (*free_ctx_locked)(struct iommu_unit *iommu, struct iommu_ctx *context); void (*free_ctx)(struct iommu_ctx *context); struct iommu_unit *(*find)(device_t dev, bool verbose); int (*alloc_msi_intr)(device_t src, u_int *cookies, u_int count); int (*map_msi_intr)(device_t src, u_int cpu, u_int vector, u_int cookie, uint64_t *addr, uint32_t *data); int (*unmap_msi_intr)(device_t src, u_int cookie); int (*map_ioapic_intr)(u_int ioapic_id, u_int cpu, u_int vector, bool edge, bool activehi, int irq, u_int *cookie, uint32_t *hi, uint32_t *lo); int (*unmap_ioapic_intr)(u_int ioapic_id, u_int *cookie); }; void set_x86_iommu(struct x86_iommu *); struct x86_iommu *get_x86_iommu(void); struct iommu_msi_data { int irq; int irq_rid; struct resource *irq_res; void *intr_handle; int (*handler)(void *); int msi_data_reg; int msi_addr_reg; int msi_uaddr_reg; uint64_t msi_addr; uint32_t msi_data; void (*enable_intr)(struct iommu_unit *); void (*disable_intr)(struct iommu_unit *); const char *name; }; #define IOMMU_MAX_MSI 3 struct x86_unit_common { uint32_t qi_buf_maxsz; uint32_t qi_cmd_sz; char *inv_queue; vm_size_t inv_queue_size; uint32_t inv_queue_avail; uint32_t inv_queue_tail; /* * Hw writes there on completion of wait descriptor * processing. Intel writes 4 bytes, while AMD does the * 8-bytes write. Due to little-endian, and use of 4-byte * sequence numbers, the difference does not matter for us. */ volatile uint64_t inv_waitd_seq_hw; uint64_t inv_waitd_seq_hw_phys; uint32_t inv_waitd_seq; /* next sequence number to use for wait descr */ u_int inv_waitd_gen; /* seq number generation AKA seq overflows */ u_int inv_seq_waiters; /* count of waiters for seq */ u_int inv_queue_full; /* informational counter */ /* * Delayed freeing of map entries queue processing: * * tlb_flush_head and tlb_flush_tail are used to implement a FIFO * queue that supports concurrent dequeues and enqueues. However, * there can only be a single dequeuer (accessing tlb_flush_head) and * a single enqueuer (accessing tlb_flush_tail) at a time. Since the * unit's qi_task is the only dequeuer, it can access tlb_flush_head * without any locking. In contrast, there may be multiple enqueuers, * so the enqueuers acquire the iommu unit lock to serialize their * accesses to tlb_flush_tail. * * In this FIFO queue implementation, the key to enabling concurrent * dequeues and enqueues is that the dequeuer never needs to access * tlb_flush_tail and the enqueuer never needs to access * tlb_flush_head. In particular, tlb_flush_head and tlb_flush_tail * are never NULL, so neither a dequeuer nor an enqueuer ever needs to * update both. Instead, tlb_flush_head always points to a "zombie" * struct, which previously held the last dequeued item. Thus, the * zombie's next field actually points to the struct holding the first * item in the queue. When an item is dequeued, the current zombie is * finally freed, and the struct that held the just dequeued item * becomes the new zombie. When the queue is empty, tlb_flush_tail * also points to the zombie. */ struct iommu_map_entry *tlb_flush_head; struct iommu_map_entry *tlb_flush_tail; struct task qi_task; struct taskqueue *qi_taskqueue; struct iommu_msi_data intrs[IOMMU_MAX_MSI]; }; void iommu_domain_free_entry(struct iommu_map_entry *entry, bool free); void iommu_qi_emit_wait_seq(struct iommu_unit *unit, struct iommu_qi_genseq * pseq, bool emit_wait); void iommu_qi_wait_for_seq(struct iommu_unit *unit, const struct iommu_qi_genseq *gseq, bool nowait); void iommu_qi_drain_tlb_flush(struct iommu_unit *unit); void iommu_qi_invalidate_locked(struct iommu_domain *domain, struct iommu_map_entry *entry, bool emit_wait); void iommu_qi_invalidate_sync(struct iommu_domain *domain, iommu_gaddr_t base, iommu_gaddr_t size, bool cansleep); void iommu_qi_common_init(struct iommu_unit *unit, task_fn_t taskfunc); void iommu_qi_common_fini(struct iommu_unit *unit, void (*disable_qi)( struct iommu_unit *)); int iommu_alloc_irq(struct iommu_unit *unit, int idx); void iommu_release_intr(struct iommu_unit *unit, int idx); void iommu_device_tag_init(struct iommu_ctx *ctx, device_t dev); void iommu_device_set_iommu_prop(device_t dev, device_t iommu); int pglvl_pgtbl_pte_off(int pglvl, iommu_gaddr_t base, int lvl); vm_pindex_t pglvl_pgtbl_get_pindex(int pglvl, iommu_gaddr_t base, int lvl); vm_pindex_t pglvl_max_pages(int pglvl); iommu_gaddr_t pglvl_page_size(int total_pglvl, int lvl); void iommu_db_print_domain_entry(const struct iommu_map_entry *entry); void iommu_db_print_ctx(struct iommu_ctx *ctx); void iommu_db_domain_print_contexts(struct iommu_domain *iodom); void iommu_db_domain_print_mappings(struct iommu_domain *iodom); +static __inline __pure2 int +ilog2_local(int n) +{ + KASSERT(n != 0, ("ilog argument must be nonzero")); + return (8 * sizeof(n) - 1 - __builtin_clz((u_int)n)); +} + +#define order_base_2_local(n) ilog2_local(2*(n)-1) +#define roundup_pow_of_two_local(n) ((__typeof(n))1 << order_base_2_local(n)) + #endif