diff --git a/sys/amd64/vmm/amd/amdiommu.c b/sys/amd64/vmm/amd/amdiommu.c new file mode 100644 index 000000000000..4ded23dff003 --- /dev/null +++ b/sys/amd64/vmm/amd/amdiommu.c @@ -0,0 +1,185 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2021 The FreeBSD Fondation + * + * Portions of this software were developed by Ka Ho Ng + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include + +#include "amdvi_priv.h" +#include "ivhd_if.h" + +struct amdiommu_softc { + struct resource *event_res; /* Event interrupt resource. */ + void *event_tag; /* Event interrupt tag. */ + int event_rid; +}; + +static int amdiommu_probe(device_t); +static int amdiommu_attach(device_t); +static int amdiommu_detach(device_t); +static int ivhd_setup_intr(device_t, driver_intr_t, void *, + const char *); +static int ivhd_teardown_intr(device_t); + +static device_method_t amdiommu_methods[] = { + /* device interface */ + DEVMETHOD(device_probe, amdiommu_probe), + DEVMETHOD(device_attach, amdiommu_attach), + DEVMETHOD(device_detach, amdiommu_detach), + DEVMETHOD(ivhd_setup_intr, ivhd_setup_intr), + DEVMETHOD(ivhd_teardown_intr, ivhd_teardown_intr), + DEVMETHOD_END +}; +static driver_t amdiommu_driver = { + "amdiommu", + amdiommu_methods, + sizeof(struct amdiommu_softc), +}; + +static int +amdiommu_probe(device_t dev) +{ + int error; + int capoff; + + /* + * Check base class and sub-class + */ + if (pci_get_class(dev) != PCIC_BASEPERIPH || + pci_get_subclass(dev) != PCIS_BASEPERIPH_IOMMU) + return (ENXIO); + + /* + * A IOMMU capability block carries a 0Fh capid. + */ + error = pci_find_cap(dev, PCIY_SECDEV, &capoff); + if (error) + return (ENXIO); + + /* + * bit [18:16] == 011b indicates the capability block is IOMMU + * capability block. If the field is not set to 011b, bail out. + */ + if ((pci_read_config(dev, capoff + 2, 2) & 0x7) != 0x3) + return (ENXIO); + + return (BUS_PROBE_SPECIFIC); +} + +static int +amdiommu_attach(device_t dev) +{ + + device_set_desc(dev, "AMD-Vi/IOMMU PCI function"); + return (0); +} + +static int +amdiommu_detach(device_t dev) +{ + + return (0); +} + +static int +ivhd_setup_intr(device_t dev, driver_intr_t handler, void *arg, + const char *desc) +{ + struct amdiommu_softc *sc; + int error, msicnt; + + sc = device_get_softc(dev); + msicnt = 1; + if (sc->event_res != NULL) + panic("%s is called without intr teardown", __func__); + sc->event_rid = 1; + + error = pci_alloc_msi(dev, &msicnt); + if (error) { + device_printf(dev, "Couldn't find event MSI IRQ resource.\n"); + return (ENOENT); + } + + sc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &sc->event_rid, RF_ACTIVE); + if (sc->event_res == NULL) { + device_printf(dev, "Unable to allocate event INTR resource.\n"); + error = ENOMEM; + goto fail; + } + + error = bus_setup_intr(dev, sc->event_res, INTR_TYPE_MISC | INTR_MPSAFE, + NULL, handler, arg, &sc->event_tag); + if (error) { + device_printf(dev, "Fail to setup event intr\n"); + goto fail; + } + + bus_describe_intr(dev, sc->event_res, sc->event_tag, "%s", desc); + return (0); + +fail: + ivhd_teardown_intr(dev); + return (error); +} + +static int +ivhd_teardown_intr(device_t dev) +{ + struct amdiommu_softc *sc; + + sc = device_get_softc(dev); + + if (sc->event_res != NULL) { + bus_teardown_intr(dev, sc->event_res, sc->event_tag); + sc->event_tag = NULL; + } + if (sc->event_res != NULL) { + bus_release_resource(dev, SYS_RES_IRQ, sc->event_rid, + sc->event_res); + sc->event_res = NULL; + } + pci_release_msi(dev); + return (0); +} + +static devclass_t amdiommu_devclass; + +/* This driver has to be loaded before ivhd */ +DRIVER_MODULE(amdiommu, pci, amdiommu_driver, amdiommu_devclass, 0, 0); +MODULE_DEPEND(amdiommu, pci, 1, 1, 1); diff --git a/sys/amd64/vmm/amd/amdvi_hw.c b/sys/amd64/vmm/amd/amdvi_hw.c index 62aba04de050..3fe84cc1445a 100644 --- a/sys/amd64/vmm/amd/amdvi_hw.c +++ b/sys/amd64/vmm/amd/amdvi_hw.c @@ -1,1456 +1,1391 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016, Anish Gupta (anish@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include "ivhd_if.h" #include "pcib_if.h" #include "io/iommu.h" #include "amdvi_priv.h" SYSCTL_DECL(_hw_vmm); SYSCTL_NODE(_hw_vmm, OID_AUTO, amdvi, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, NULL); #define MOD_INC(a, s, m) (((a) + (s)) % ((m) * (s))) #define MOD_DEC(a, s, m) (((a) - (s)) % ((m) * (s))) /* Print RID or device ID in PCI string format. */ #define RID2PCI_STR(d) PCI_RID2BUS(d), PCI_RID2SLOT(d), PCI_RID2FUNC(d) static void amdvi_dump_cmds(struct amdvi_softc *softc, int count); static void amdvi_print_dev_cap(struct amdvi_softc *softc); MALLOC_DEFINE(M_AMDVI, "amdvi", "amdvi"); extern device_t *ivhd_devs; extern int ivhd_count; SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, count, CTLFLAG_RDTUN, &ivhd_count, 0, NULL); static int amdvi_enable_user = 0; SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, enable, CTLFLAG_RDTUN, &amdvi_enable_user, 0, NULL); TUNABLE_INT("hw.vmm.amdvi_enable", &amdvi_enable_user); #ifdef AMDVI_ATS_ENABLE /* XXX: ATS is not tested. */ static int amdvi_enable_iotlb = 1; SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, iotlb_enabled, CTLFLAG_RDTUN, &amdvi_enable_iotlb, 0, NULL); TUNABLE_INT("hw.vmm.enable_iotlb", &amdvi_enable_iotlb); #endif static int amdvi_host_ptp = 1; /* Use page tables for host. */ SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, host_ptp, CTLFLAG_RDTUN, &amdvi_host_ptp, 0, NULL); TUNABLE_INT("hw.vmm.amdvi.host_ptp", &amdvi_host_ptp); /* Page table level used <= supported by h/w[v1=7]. */ int amdvi_ptp_level = 4; SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, ptp_level, CTLFLAG_RDTUN, &amdvi_ptp_level, 0, NULL); TUNABLE_INT("hw.vmm.amdvi.ptp_level", &amdvi_ptp_level); /* Disable fault event reporting. */ static int amdvi_disable_io_fault = 0; SYSCTL_INT(_hw_vmm_amdvi, OID_AUTO, disable_io_fault, CTLFLAG_RDTUN, &amdvi_disable_io_fault, 0, NULL); TUNABLE_INT("hw.vmm.amdvi.disable_io_fault", &amdvi_disable_io_fault); static uint32_t amdvi_dom_id = 0; /* 0 is reserved for host. */ SYSCTL_UINT(_hw_vmm_amdvi, OID_AUTO, domain_id, CTLFLAG_RD, &amdvi_dom_id, 0, NULL); /* * Device table entry. * Bus(256) x Dev(32) x Fun(8) x DTE(256 bits or 32 bytes). * = 256 * 2 * PAGE_SIZE. */ static struct amdvi_dte amdvi_dte[PCI_NUM_DEV_MAX] __aligned(PAGE_SIZE); CTASSERT(PCI_NUM_DEV_MAX == 0x10000); CTASSERT(sizeof(amdvi_dte) == 0x200000); static SLIST_HEAD (, amdvi_domain) dom_head; static inline uint32_t amdvi_pci_read(struct amdvi_softc *softc, int off) { return (pci_cfgregread(PCI_RID2BUS(softc->pci_rid), PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid), off, 4)); } #ifdef AMDVI_ATS_ENABLE /* XXX: Should be in pci.c */ /* * Check if device has ATS capability and its enabled. * If ATS is absent or disabled, return (-1), otherwise ATS * queue length. */ static int amdvi_find_ats_qlen(uint16_t devid) { device_t dev; uint32_t off, cap; int qlen = -1; dev = pci_find_bsf(PCI_RID2BUS(devid), PCI_RID2SLOT(devid), PCI_RID2FUNC(devid)); if (!dev) { return (-1); } #define PCIM_ATS_EN BIT(31) if (pci_find_extcap(dev, PCIZ_ATS, &off) == 0) { cap = pci_read_config(dev, off + 4, 4); qlen = (cap & 0x1F); qlen = qlen ? qlen : 32; printf("AMD-Vi: PCI device %d.%d.%d ATS %s qlen=%d\n", RID2PCI_STR(devid), (cap & PCIM_ATS_EN) ? "enabled" : "Disabled", qlen); qlen = (cap & PCIM_ATS_EN) ? qlen : -1; } return (qlen); } /* * Check if an endpoint device support device IOTLB or ATS. */ static inline bool amdvi_dev_support_iotlb(struct amdvi_softc *softc, uint16_t devid) { struct ivhd_dev_cfg *cfg; int qlen, i; bool pci_ats, ivhd_ats; qlen = amdvi_find_ats_qlen(devid); if (qlen < 0) return (false); KASSERT(softc, ("softc is NULL")); cfg = softc->dev_cfg; ivhd_ats = false; for (i = 0; i < softc->dev_cfg_cnt; i++) { if ((cfg->start_id <= devid) && (cfg->end_id >= devid)) { ivhd_ats = cfg->enable_ats; break; } cfg++; } pci_ats = (qlen < 0) ? false : true; if (pci_ats != ivhd_ats) device_printf(softc->dev, "BIOS bug: mismatch in ATS setting for %d.%d.%d," "ATS inv qlen = %d\n", RID2PCI_STR(devid), qlen); /* Ignore IVRS setting and respect PCI setting. */ return (pci_ats); } #endif /* Enable IOTLB support for IOMMU if its supported. */ static inline void amdvi_hw_enable_iotlb(struct amdvi_softc *softc) { #ifndef AMDVI_ATS_ENABLE softc->iotlb = false; #else bool supported; supported = (softc->ivhd_flag & IVHD_FLAG_IOTLB) ? true : false; if (softc->pci_cap & AMDVI_PCI_CAP_IOTLB) { if (!supported) device_printf(softc->dev, "IOTLB disabled by BIOS.\n"); if (supported && !amdvi_enable_iotlb) { device_printf(softc->dev, "IOTLB disabled by user.\n"); supported = false; } } else supported = false; softc->iotlb = supported; #endif } static int amdvi_init_cmd(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl = softc->ctrl; ctrl->cmd.len = 8; /* Use 256 command buffer entries. */ softc->cmd_max = 1 << ctrl->cmd.len; softc->cmd = malloc(sizeof(struct amdvi_cmd) * softc->cmd_max, M_AMDVI, M_WAITOK | M_ZERO); if ((uintptr_t)softc->cmd & PAGE_MASK) panic("AMDVi: Command buffer not aligned on page boundary."); ctrl->cmd.base = vtophys(softc->cmd) / PAGE_SIZE; /* * XXX: Reset the h/w pointers in case IOMMU is restarting, * h/w doesn't clear these pointers based on empirical data. */ ctrl->cmd_tail = 0; ctrl->cmd_head = 0; return (0); } /* * Note: Update tail pointer after we have written the command since tail * pointer update cause h/w to execute new commands, see section 3.3 * of AMD IOMMU spec ver 2.0. */ /* Get the command tail pointer w/o updating it. */ static struct amdvi_cmd * amdvi_get_cmd_tail(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; struct amdvi_cmd *tail; KASSERT(softc, ("softc is NULL")); KASSERT(softc->cmd != NULL, ("cmd is NULL")); ctrl = softc->ctrl; KASSERT(ctrl != NULL, ("ctrl is NULL")); tail = (struct amdvi_cmd *)((uint8_t *)softc->cmd + ctrl->cmd_tail); return (tail); } /* * Update the command tail pointer which will start command execution. */ static void amdvi_update_cmd_tail(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; int size; size = sizeof(struct amdvi_cmd); KASSERT(softc->cmd != NULL, ("cmd is NULL")); ctrl = softc->ctrl; KASSERT(ctrl != NULL, ("ctrl is NULL")); ctrl->cmd_tail = MOD_INC(ctrl->cmd_tail, size, softc->cmd_max); softc->total_cmd++; #ifdef AMDVI_DEBUG_CMD device_printf(softc->dev, "cmd_tail: %s Tail:0x%x, Head:0x%x.\n", ctrl->cmd_tail, ctrl->cmd_head); #endif } /* * Various commands supported by IOMMU. */ /* Completion wait command. */ static void amdvi_cmd_cmp(struct amdvi_softc *softc, const uint64_t data) { struct amdvi_cmd *cmd; uint64_t pa; cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); pa = vtophys(&softc->cmp_data); cmd->opcode = AMDVI_CMP_WAIT_OPCODE; cmd->word0 = (pa & 0xFFFFFFF8) | AMDVI_CMP_WAIT_STORE; cmd->word1 = (pa >> 32) & 0xFFFFF; cmd->addr = data; amdvi_update_cmd_tail(softc); } /* Invalidate device table entry. */ static void amdvi_cmd_inv_dte(struct amdvi_softc *softc, uint16_t devid) { struct amdvi_cmd *cmd; cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); cmd->opcode = AMDVI_INVD_DTE_OPCODE; cmd->word0 = devid; amdvi_update_cmd_tail(softc); #ifdef AMDVI_DEBUG_CMD device_printf(softc->dev, "Invalidated DTE:0x%x\n", devid); #endif } /* Invalidate IOMMU page, use for invalidation of domain. */ static void amdvi_cmd_inv_iommu_pages(struct amdvi_softc *softc, uint16_t domain_id, uint64_t addr, bool guest_nested, bool pde, bool page) { struct amdvi_cmd *cmd; cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); cmd->opcode = AMDVI_INVD_PAGE_OPCODE; cmd->word1 = domain_id; /* * Invalidate all addresses for this domain. */ cmd->addr = addr; cmd->addr |= pde ? AMDVI_INVD_PAGE_PDE : 0; cmd->addr |= page ? AMDVI_INVD_PAGE_S : 0; amdvi_update_cmd_tail(softc); } #ifdef AMDVI_ATS_ENABLE /* Invalidate device IOTLB. */ static void amdvi_cmd_inv_iotlb(struct amdvi_softc *softc, uint16_t devid) { struct amdvi_cmd *cmd; int qlen; if (!softc->iotlb) return; qlen = amdvi_find_ats_qlen(devid); if (qlen < 0) { panic("AMDVI: Invalid ATS qlen(%d) for device %d.%d.%d\n", qlen, RID2PCI_STR(devid)); } cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); #ifdef AMDVI_DEBUG_CMD device_printf(softc->dev, "Invalidate IOTLB devID 0x%x" " Qlen:%d\n", devid, qlen); #endif cmd->opcode = AMDVI_INVD_IOTLB_OPCODE; cmd->word0 = devid; cmd->word1 = qlen; cmd->addr = AMDVI_INVD_IOTLB_ALL_ADDR | AMDVI_INVD_IOTLB_S; amdvi_update_cmd_tail(softc); } #endif #ifdef notyet /* For Interrupt Remap. */ static void amdvi_cmd_inv_intr_map(struct amdvi_softc *softc, uint16_t devid) { struct amdvi_cmd *cmd; cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); cmd->opcode = AMDVI_INVD_INTR_OPCODE; cmd->word0 = devid; amdvi_update_cmd_tail(softc); #ifdef AMDVI_DEBUG_CMD device_printf(softc->dev, "Invalidate INTR map of devID 0x%x\n", devid); #endif } #endif /* Invalidate domain using INVALIDATE_IOMMU_PAGES command. */ static void amdvi_inv_domain(struct amdvi_softc *softc, uint16_t domain_id) { struct amdvi_cmd *cmd; cmd = amdvi_get_cmd_tail(softc); KASSERT(cmd != NULL, ("Cmd is NULL")); /* * See section 3.3.3 of IOMMU spec rev 2.0, software note * for invalidating domain. */ amdvi_cmd_inv_iommu_pages(softc, domain_id, AMDVI_INVD_PAGE_ALL_ADDR, false, true, true); #ifdef AMDVI_DEBUG_CMD device_printf(softc->dev, "Invalidate domain:0x%x\n", domain_id); #endif } static bool amdvi_cmp_wait(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; const uint64_t VERIFY = 0xA5A5; volatile uint64_t *read; int i; bool status; ctrl = softc->ctrl; read = &softc->cmp_data; *read = 0; amdvi_cmd_cmp(softc, VERIFY); /* Wait for h/w to update completion data. */ for (i = 0; i < 100 && (*read != VERIFY); i++) { DELAY(1000); /* 1 ms */ } status = (VERIFY == softc->cmp_data) ? true : false; #ifdef AMDVI_DEBUG_CMD if (status) device_printf(softc->dev, "CMD completion DONE Tail:0x%x, " "Head:0x%x, loop:%d.\n", ctrl->cmd_tail, ctrl->cmd_head, loop); #endif return (status); } static void amdvi_wait(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; int i; KASSERT(softc, ("softc is NULL")); ctrl = softc->ctrl; KASSERT(ctrl != NULL, ("ctrl is NULL")); /* Don't wait if h/w is not enabled. */ if ((ctrl->control & AMDVI_CTRL_EN) == 0) return; for (i = 0; i < 10; i++) { if (amdvi_cmp_wait(softc)) return; } device_printf(softc->dev, "Error: completion failed" " tail:0x%x, head:0x%x.\n", ctrl->cmd_tail, ctrl->cmd_head); /* Dump the last command. */ amdvi_dump_cmds(softc, 1); } static void amdvi_dump_cmds(struct amdvi_softc *softc, int count) { struct amdvi_ctrl *ctrl; struct amdvi_cmd *cmd; int off, i; ctrl = softc->ctrl; device_printf(softc->dev, "Dump last %d command(s):\n", count); /* * If h/w is stuck in completion, it is the previous command, * start dumping from previous command onward. */ off = MOD_DEC(ctrl->cmd_head, sizeof(struct amdvi_cmd), softc->cmd_max); for (i = 0; off != ctrl->cmd_tail && i < count; i++) { cmd = (struct amdvi_cmd *)((uint8_t *)softc->cmd + off); printf(" [CMD%d, off:0x%x] opcode= 0x%x 0x%x" " 0x%x 0x%lx\n", i, off, cmd->opcode, cmd->word0, cmd->word1, cmd->addr); off = (off + sizeof(struct amdvi_cmd)) % (softc->cmd_max * sizeof(struct amdvi_cmd)); } } static int amdvi_init_event(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; ctrl = softc->ctrl; ctrl->event.len = 8; softc->event_max = 1 << ctrl->event.len; softc->event = malloc(sizeof(struct amdvi_event) * softc->event_max, M_AMDVI, M_WAITOK | M_ZERO); if ((uintptr_t)softc->event & PAGE_MASK) { device_printf(softc->dev, "Event buffer not aligned on page."); return (false); } ctrl->event.base = vtophys(softc->event) / PAGE_SIZE; /* Reset the pointers. */ ctrl->evt_head = 0; ctrl->evt_tail = 0; return (0); } static inline void amdvi_decode_evt_flag(uint16_t flag) { flag &= AMDVI_EVENT_FLAG_MASK; printf(" 0x%b]\n", flag, "\020" "\001GN" "\002NX" "\003US" "\004I" "\005PR" "\006RW" "\007PE" "\010RZ" "\011TR" ); } /* See section 2.5.4 of AMD IOMMU spec ver 2.62.*/ static inline void amdvi_decode_evt_flag_type(uint8_t type) { switch (AMDVI_EVENT_FLAG_TYPE(type)) { case 0: printf("RSVD\n"); break; case 1: printf("Master Abort\n"); break; case 2: printf("Target Abort\n"); break; case 3: printf("Data Err\n"); break; default: break; } } static void amdvi_decode_inv_dte_evt(uint16_t devid, uint16_t domid, uint64_t addr, uint16_t flag) { printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x" " Addr:0x%lx", devid, domid, addr); amdvi_decode_evt_flag(flag); } static void amdvi_decode_pf_evt(uint16_t devid, uint16_t domid, uint64_t addr, uint16_t flag) { printf("\t[IO_PAGE_FAULT EVT: devId:0x%x DomId:0x%x" " Addr:0x%lx", devid, domid, addr); amdvi_decode_evt_flag(flag); } static void amdvi_decode_dte_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr, uint16_t flag) { printf("\t[DEV_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x" " Addr:0x%lx", devid, domid, addr); amdvi_decode_evt_flag(flag); amdvi_decode_evt_flag_type(flag); } static void amdvi_decode_page_hwerr_evt(uint16_t devid, uint16_t domid, uint64_t addr, uint16_t flag) { printf("\t[PAGE_TAB_HW_ERR EVT: devId:0x%x DomId:0x%x" " Addr:0x%lx", devid, domid, addr); amdvi_decode_evt_flag(flag); amdvi_decode_evt_flag_type(AMDVI_EVENT_FLAG_TYPE(flag)); } static void amdvi_decode_evt(struct amdvi_event *evt) { struct amdvi_cmd *cmd; switch (evt->opcode) { case AMDVI_EVENT_INVALID_DTE: amdvi_decode_inv_dte_evt(evt->devid, evt->pasid_domid, evt->addr, evt->flag); break; case AMDVI_EVENT_PFAULT: amdvi_decode_pf_evt(evt->devid, evt->pasid_domid, evt->addr, evt->flag); break; case AMDVI_EVENT_DTE_HW_ERROR: amdvi_decode_dte_hwerr_evt(evt->devid, evt->pasid_domid, evt->addr, evt->flag); break; case AMDVI_EVENT_PAGE_HW_ERROR: amdvi_decode_page_hwerr_evt(evt->devid, evt->pasid_domid, evt->addr, evt->flag); break; case AMDVI_EVENT_ILLEGAL_CMD: /* FALL THROUGH */ case AMDVI_EVENT_CMD_HW_ERROR: printf("\t[%s EVT]\n", (evt->opcode == AMDVI_EVENT_ILLEGAL_CMD) ? "ILLEGAL CMD" : "CMD HW ERR"); cmd = (struct amdvi_cmd *)PHYS_TO_DMAP(evt->addr); printf("\tCMD opcode= 0x%x 0x%x 0x%x 0x%lx\n", cmd->opcode, cmd->word0, cmd->word1, cmd->addr); break; case AMDVI_EVENT_IOTLB_TIMEOUT: printf("\t[IOTLB_INV_TIMEOUT devid:0x%x addr:0x%lx]\n", evt->devid, evt->addr); break; case AMDVI_EVENT_INVALID_DTE_REQ: printf("\t[INV_DTE devid:0x%x addr:0x%lx type:0x%x tr:%d]\n", evt->devid, evt->addr, evt->flag >> 9, (evt->flag >> 8) & 1); break; case AMDVI_EVENT_INVALID_PPR_REQ: case AMDVI_EVENT_COUNTER_ZERO: printf("AMD-Vi: v2 events.\n"); break; default: printf("Unsupported AMD-Vi event:%d\n", evt->opcode); } } static void amdvi_print_events(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; struct amdvi_event *event; int i, size; ctrl = softc->ctrl; size = sizeof(struct amdvi_event); for (i = 0; i < softc->event_max; i++) { event = &softc->event[ctrl->evt_head / size]; if (!event->opcode) break; device_printf(softc->dev, "\t[Event%d: Head:0x%x Tail:0x%x]\n", i, ctrl->evt_head, ctrl->evt_tail); amdvi_decode_evt(event); ctrl->evt_head = MOD_INC(ctrl->evt_head, size, softc->event_max); } } static int amdvi_init_dte(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; ctrl = softc->ctrl; ctrl->dte.base = vtophys(amdvi_dte) / PAGE_SIZE; ctrl->dte.size = 0x1FF; /* 2MB device table. */ return (0); } /* * Not all capabilities of IOMMU are available in ACPI IVHD flag * or EFR entry, read directly from device. */ static int amdvi_print_pci_cap(device_t dev) { struct amdvi_softc *softc; uint32_t off, cap; softc = device_get_softc(dev); off = softc->cap_off; /* * Section 3.7.1 of IOMMU sepc rev 2.0. * Read capability from device. */ cap = amdvi_pci_read(softc, off); /* Make sure capability type[18:16] is 3. */ KASSERT((((cap >> 16) & 0x7) == 0x3), ("Not a IOMMU capability 0x%x@0x%x", cap, off)); softc->pci_cap = cap >> 24; device_printf(softc->dev, "PCI cap 0x%x@0x%x feature:%b\n", cap, off, softc->pci_cap, "\20\1IOTLB\2HT\3NPCache\4EFR\5CapExt"); return (0); } static void amdvi_event_intr(void *arg) { struct amdvi_softc *softc; struct amdvi_ctrl *ctrl; softc = (struct amdvi_softc *)arg; ctrl = softc->ctrl; device_printf(softc->dev, "EVT INTR %ld Status:0x%x" " EVT Head:0x%x Tail:0x%x]\n", softc->event_intr_cnt++, ctrl->status, ctrl->evt_head, ctrl->evt_tail); printf(" [CMD Total 0x%lx] Tail:0x%x, Head:0x%x.\n", softc->total_cmd, ctrl->cmd_tail, ctrl->cmd_head); amdvi_print_events(softc); ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR; } static void amdvi_free_evt_intr_res(device_t dev) { struct amdvi_softc *softc; + device_t mmio_dev; softc = device_get_softc(dev); - if (softc->event_tag != NULL) { - bus_teardown_intr(dev, softc->event_res, softc->event_tag); - } - if (softc->event_res != NULL) { - bus_release_resource(dev, SYS_RES_IRQ, softc->event_rid, - softc->event_res); - } - bus_delete_resource(dev, SYS_RES_IRQ, softc->event_rid); - PCIB_RELEASE_MSI(device_get_parent(device_get_parent(dev)), - dev, 1, &softc->event_irq); + mmio_dev = softc->pci_dev; + + IVHD_TEARDOWN_INTR(mmio_dev); } static bool amdvi_alloc_intr_resources(struct amdvi_softc *softc) { struct amdvi_ctrl *ctrl; - device_t dev, pcib; - device_t mmio_dev; - uint64_t msi_addr; - uint32_t msi_data; + device_t dev, mmio_dev; int err; dev = softc->dev; - pcib = device_get_parent(device_get_parent(dev)); - mmio_dev = pci_find_bsf(PCI_RID2BUS(softc->pci_rid), - PCI_RID2SLOT(softc->pci_rid), PCI_RID2FUNC(softc->pci_rid)); - if (device_is_attached(mmio_dev)) { - device_printf(dev, - "warning: IOMMU device is claimed by another driver %s\n", - device_get_driver(mmio_dev)->name); - } - - softc->event_irq = -1; - softc->event_rid = 0; - - /* - * Section 3.7.1 of IOMMU rev 2.0. With MSI, there is only one - * interrupt. XXX: Enable MSI/X support. - */ - err = PCIB_ALLOC_MSI(pcib, dev, 1, 1, &softc->event_irq); - if (err) { - device_printf(dev, - "Couldn't find event MSI IRQ resource.\n"); - return (ENOENT); - } - - err = bus_set_resource(dev, SYS_RES_IRQ, softc->event_rid, - softc->event_irq, 1); - if (err) { - device_printf(dev, "Couldn't set event MSI resource.\n"); - return (ENXIO); - } - - softc->event_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, - &softc->event_rid, RF_ACTIVE); - if (!softc->event_res) { - device_printf(dev, - "Unable to allocate event INTR resource.\n"); - return (ENOMEM); - } - - if (bus_setup_intr(dev, softc->event_res, - INTR_TYPE_MISC | INTR_MPSAFE, NULL, amdvi_event_intr, - softc, &softc->event_tag)) { - device_printf(dev, "Fail to setup event intr\n"); - bus_release_resource(softc->dev, SYS_RES_IRQ, - softc->event_rid, softc->event_res); - softc->event_res = NULL; - return (ENXIO); - } - - bus_describe_intr(dev, softc->event_res, softc->event_tag, - "fault"); - - err = PCIB_MAP_MSI(pcib, dev, softc->event_irq, &msi_addr, - &msi_data); - if (err) { - device_printf(dev, - "Event interrupt config failed, err=%d.\n", - err); - amdvi_free_evt_intr_res(softc->dev); - return (err); - } + mmio_dev = softc->pci_dev; /* Clear interrupt status bits. */ ctrl = softc->ctrl; ctrl->status &= AMDVI_STATUS_EV_OF | AMDVI_STATUS_EV_INTR; - /* Now enable MSI interrupt. */ - pci_enable_msi(mmio_dev, msi_addr, msi_data); - return (0); + err = IVHD_SETUP_INTR(mmio_dev, amdvi_event_intr, softc, "fault"); + if (err) + device_printf(dev, "Interrupt setup failed on %s\n", + device_get_nameunit(mmio_dev)); + return (err); } static void amdvi_print_dev_cap(struct amdvi_softc *softc) { struct ivhd_dev_cfg *cfg; int i; cfg = softc->dev_cfg; for (i = 0; i < softc->dev_cfg_cnt; i++) { device_printf(softc->dev, "device [0x%x - 0x%x]" "config:%b%s\n", cfg->start_id, cfg->end_id, cfg->data, "\020\001INIT\002ExtInt\003NMI" "\007LINT0\008LINT1", cfg->enable_ats ? "ATS enabled" : ""); cfg++; } } static int amdvi_handle_sysctl(SYSCTL_HANDLER_ARGS) { struct amdvi_softc *softc; int result, type, error = 0; softc = (struct amdvi_softc *)arg1; type = arg2; switch (type) { case 0: result = softc->ctrl->cmd_head; error = sysctl_handle_int(oidp, &result, 0, req); break; case 1: result = softc->ctrl->cmd_tail; error = sysctl_handle_int(oidp, &result, 0, req); break; case 2: result = softc->ctrl->evt_head; error = sysctl_handle_int(oidp, &result, 0, req); break; case 3: result = softc->ctrl->evt_tail; error = sysctl_handle_int(oidp, &result, 0, req); break; default: device_printf(softc->dev, "Unknown sysctl:%d\n", type); } return (error); } static void amdvi_add_sysctl(struct amdvi_softc *softc) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev; dev = softc->dev; ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "event_intr_count", CTLFLAG_RD, &softc->event_intr_cnt, "Event interrupt count"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "command_count", CTLFLAG_RD, &softc->total_cmd, "Command submitted count"); SYSCTL_ADD_U16(ctx, child, OID_AUTO, "pci_rid", CTLFLAG_RD, &softc->pci_rid, 0, "IOMMU RID"); SYSCTL_ADD_U16(ctx, child, OID_AUTO, "start_dev_rid", CTLFLAG_RD, &softc->start_dev_rid, 0, "Start of device under this IOMMU"); SYSCTL_ADD_U16(ctx, child, OID_AUTO, "end_dev_rid", CTLFLAG_RD, &softc->end_dev_rid, 0, "End of device under this IOMMU"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 0, amdvi_handle_sysctl, "IU", "Command head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "command_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 1, amdvi_handle_sysctl, "IU", "Command tail"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_head", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 2, amdvi_handle_sysctl, "IU", "Command head"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "event_tail", CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, softc, 3, amdvi_handle_sysctl, "IU", "Command tail"); } int amdvi_setup_hw(struct amdvi_softc *softc) { device_t dev; int status; dev = softc->dev; amdvi_hw_enable_iotlb(softc); amdvi_print_dev_cap(softc); if ((status = amdvi_print_pci_cap(dev)) != 0) { device_printf(dev, "PCI capability.\n"); return (status); } if ((status = amdvi_init_cmd(softc)) != 0) { device_printf(dev, "Couldn't configure command buffer.\n"); return (status); } if ((status = amdvi_init_event(softc)) != 0) { device_printf(dev, "Couldn't configure event buffer.\n"); return (status); } if ((status = amdvi_init_dte(softc)) != 0) { device_printf(dev, "Couldn't configure device table.\n"); return (status); } if ((status = amdvi_alloc_intr_resources(softc)) != 0) { return (status); } amdvi_add_sysctl(softc); return (0); } int amdvi_teardown_hw(struct amdvi_softc *softc) { device_t dev; dev = softc->dev; /* * Called after disable, h/w is stopped by now, free all the resources. */ amdvi_free_evt_intr_res(dev); if (softc->cmd) free(softc->cmd, M_AMDVI); if (softc->event) free(softc->event, M_AMDVI); return (0); } /*********** bhyve interfaces *********************/ static int amdvi_init(void) { if (!ivhd_count) { return (EIO); } if (!amdvi_enable_user && ivhd_count) { printf("bhyve: Found %d AMD-Vi/IOMMU device(s), " "use hw.vmm.amdvi.enable=1 to enable pass-through.\n", ivhd_count); return (EINVAL); } return (0); } static void amdvi_cleanup(void) { /* Nothing. */ } static uint16_t amdvi_domainId(void) { /* * If we hit maximum domain limit, rollover leaving host * domain(0). * XXX: make sure that this domain is not used. */ if (amdvi_dom_id == AMDVI_MAX_DOMAIN) amdvi_dom_id = 1; return ((uint16_t)amdvi_dom_id++); } static void amdvi_do_inv_domain(uint16_t domain_id, bool create) { struct amdvi_softc *softc; int i; for (i = 0; i < ivhd_count; i++) { softc = device_get_softc(ivhd_devs[i]); KASSERT(softc, ("softc is NULL")); /* * If not present pages are cached, invalidate page after * creating domain. */ #if 0 if (create && ((softc->pci_cap & AMDVI_PCI_CAP_NPCACHE) == 0)) continue; #endif amdvi_inv_domain(softc, domain_id); amdvi_wait(softc); } } static void * amdvi_create_domain(vm_paddr_t maxaddr) { struct amdvi_domain *dom; dom = malloc(sizeof(struct amdvi_domain), M_AMDVI, M_ZERO | M_WAITOK); dom->id = amdvi_domainId(); //dom->maxaddr = maxaddr; #ifdef AMDVI_DEBUG_CMD printf("Created domain #%d\n", dom->id); #endif /* * Host domain(#0) don't create translation table. */ if (dom->id || amdvi_host_ptp) dom->ptp = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO); dom->ptp_level = amdvi_ptp_level; amdvi_do_inv_domain(dom->id, true); SLIST_INSERT_HEAD(&dom_head, dom, next); return (dom); } static void amdvi_free_ptp(uint64_t *ptp, int level) { int i; if (level < 1) return; for (i = 0; i < NPTEPG ; i++) { if ((ptp[i] & AMDVI_PT_PRESENT) == 0) continue; /* XXX: Add super-page or PTE mapping > 4KB. */ #ifdef notyet /* Super-page mapping. */ if (AMDVI_PD_SUPER(ptp[i])) continue; #endif amdvi_free_ptp((uint64_t *)PHYS_TO_DMAP(ptp[i] & AMDVI_PT_MASK), level - 1); } free(ptp, M_AMDVI); } static void amdvi_destroy_domain(void *arg) { struct amdvi_domain *domain; domain = (struct amdvi_domain *)arg; KASSERT(domain, ("domain is NULL")); #ifdef AMDVI_DEBUG_CMD printf("Destroying domain %d\n", domain->id); #endif if (domain->ptp) amdvi_free_ptp(domain->ptp, domain->ptp_level); amdvi_do_inv_domain(domain->id, false); SLIST_REMOVE(&dom_head, domain, amdvi_domain, next); free(domain, M_AMDVI); } static uint64_t amdvi_set_pt(uint64_t *pt, int level, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t pg_size, bool create) { uint64_t *page, pa; int shift, index; const int PT_SHIFT = 9; const int PT_INDEX_MASK = (1 << PT_SHIFT) - 1; /* Based on PT_SHIFT */ if (!pg_size) return (0); if (hpa & (pg_size - 1)) { printf("HPA is not size aligned.\n"); return (0); } if (gpa & (pg_size - 1)) { printf("HPA is not size aligned.\n"); return (0); } shift = PML4SHIFT; while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) { index = (gpa >> shift) & PT_INDEX_MASK; if ((pt[index] == 0) && create) { page = malloc(PAGE_SIZE, M_AMDVI, M_WAITOK | M_ZERO); pa = vtophys(page); pt[index] = pa | AMDVI_PT_PRESENT | AMDVI_PT_RW | ((level - 1) << AMDVI_PD_LEVEL_SHIFT); } #ifdef AMDVI_DEBUG_PTE if ((gpa % 0x1000000) == 0) printf("[level%d, shift = %d]PTE:0x%lx\n", level, shift, pt[index]); #endif #define PTE2PA(x) ((uint64_t)(x) & AMDVI_PT_MASK) pa = PTE2PA(pt[index]); pt = (uint64_t *)PHYS_TO_DMAP(pa); shift -= PT_SHIFT; level--; } /* Leaf entry. */ index = (gpa >> shift) & PT_INDEX_MASK; if (create) { pt[index] = hpa | AMDVI_PT_RW | AMDVI_PT_PRESENT; } else pt[index] = 0; #ifdef AMDVI_DEBUG_PTE if ((gpa % 0x1000000) == 0) printf("[Last level%d, shift = %d]PTE:0x%lx\n", level, shift, pt[index]); #endif return (1ULL << shift); } static uint64_t amdvi_update_mapping(struct amdvi_domain *domain, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t size, bool create) { uint64_t mapped, *ptp, len; int level; KASSERT(domain, ("domain is NULL")); level = domain->ptp_level; KASSERT(level, ("Page table level is 0")); ptp = domain->ptp; KASSERT(ptp, ("PTP is NULL")); mapped = 0; while (mapped < size) { len = amdvi_set_pt(ptp, level, gpa + mapped, hpa + mapped, PAGE_SIZE, create); if (!len) { printf("Error: Couldn't map HPA:0x%lx GPA:0x%lx\n", hpa, gpa); return (0); } mapped += len; } return (mapped); } static uint64_t amdvi_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) { struct amdvi_domain *domain; domain = (struct amdvi_domain *)arg; if (domain->id && !domain->ptp) { printf("ptp is NULL"); return (-1); } /* * If host domain is created w/o page table, skip IOMMU page * table set-up. */ if (domain->ptp) return (amdvi_update_mapping(domain, gpa, hpa, len, true)); else return (len); } static uint64_t amdvi_destroy_mapping(void *arg, vm_paddr_t gpa, uint64_t len) { struct amdvi_domain *domain; domain = (struct amdvi_domain *)arg; /* * If host domain is created w/o page table, skip IOMMU page * table set-up. */ if (domain->ptp) return (amdvi_update_mapping(domain, gpa, 0, len, false)); return (len); } static struct amdvi_softc * amdvi_find_iommu(uint16_t devid) { struct amdvi_softc *softc; int i; for (i = 0; i < ivhd_count; i++) { softc = device_get_softc(ivhd_devs[i]); if ((devid >= softc->start_dev_rid) && (devid <= softc->end_dev_rid)) return (softc); } /* * XXX: BIOS bug, device not in IVRS table, assume its from first IOMMU. */ printf("BIOS bug device(%d.%d.%d) doesn't have IVHD entry.\n", RID2PCI_STR(devid)); return (device_get_softc(ivhd_devs[0])); } /* * Set-up device table entry. * IOMMU spec Rev 2.0, section 3.2.2.2, some of the fields must * be set concurrently, e.g. read and write bits. */ static void amdvi_set_dte(struct amdvi_domain *domain, uint16_t devid, bool enable) { struct amdvi_softc *softc; struct amdvi_dte* temp; KASSERT(domain, ("domain is NULL for pci_rid:0x%x\n", devid)); softc = amdvi_find_iommu(devid); KASSERT(softc, ("softc is NULL for pci_rid:0x%x\n", devid)); temp = &amdvi_dte[devid]; #ifdef AMDVI_ATS_ENABLE /* If IOMMU and device support IOTLB, enable it. */ if (amdvi_dev_support_iotlb(softc, devid) && softc->iotlb) temp->iotlb_enable = 1; #endif /* Avoid duplicate I/O faults. */ temp->sup_second_io_fault = 1; temp->sup_all_io_fault = amdvi_disable_io_fault; temp->dt_valid = 1; temp->domain_id = domain->id; if (enable) { if (domain->ptp) { temp->pt_base = vtophys(domain->ptp) >> 12; temp->pt_level = amdvi_ptp_level; } /* * XXX: Page table valid[TV] bit must be set even if host domain * page tables are not enabled. */ temp->pt_valid = 1; temp->read_allow = 1; temp->write_allow = 1; } } static void amdvi_inv_device(uint16_t devid) { struct amdvi_softc *softc; softc = amdvi_find_iommu(devid); KASSERT(softc, ("softc is NULL")); amdvi_cmd_inv_dte(softc, devid); #ifdef AMDVI_ATS_ENABLE if (amdvi_dev_support_iotlb(softc, devid)) amdvi_cmd_inv_iotlb(softc, devid); #endif amdvi_wait(softc); } static void amdvi_add_device(void *arg, uint16_t devid) { struct amdvi_domain *domain; domain = (struct amdvi_domain *)arg; KASSERT(domain != NULL, ("domain is NULL")); #ifdef AMDVI_DEBUG_CMD printf("Assigning device(%d.%d.%d) to domain:%d\n", RID2PCI_STR(devid), domain->id); #endif amdvi_set_dte(domain, devid, true); amdvi_inv_device(devid); } static void amdvi_remove_device(void *arg, uint16_t devid) { struct amdvi_domain *domain; domain = (struct amdvi_domain *)arg; #ifdef AMDVI_DEBUG_CMD printf("Remove device(0x%x) from domain:%d\n", devid, domain->id); #endif amdvi_set_dte(domain, devid, false); amdvi_inv_device(devid); } static void amdvi_enable(void) { struct amdvi_ctrl *ctrl; struct amdvi_softc *softc; uint64_t val; int i; for (i = 0; i < ivhd_count; i++) { softc = device_get_softc(ivhd_devs[i]); KASSERT(softc, ("softc is NULL\n")); ctrl = softc->ctrl; KASSERT(ctrl, ("ctrl is NULL\n")); val = ( AMDVI_CTRL_EN | AMDVI_CTRL_CMD | AMDVI_CTRL_ELOG | AMDVI_CTRL_ELOGINT | AMDVI_CTRL_INV_TO_1S); if (softc->ivhd_flag & IVHD_FLAG_COH) val |= AMDVI_CTRL_COH; if (softc->ivhd_flag & IVHD_FLAG_HTT) val |= AMDVI_CTRL_HTT; if (softc->ivhd_flag & IVHD_FLAG_RPPW) val |= AMDVI_CTRL_RPPW; if (softc->ivhd_flag & IVHD_FLAG_PPW) val |= AMDVI_CTRL_PPW; if (softc->ivhd_flag & IVHD_FLAG_ISOC) val |= AMDVI_CTRL_ISOC; ctrl->control = val; } } static void amdvi_disable(void) { struct amdvi_ctrl *ctrl; struct amdvi_softc *softc; int i; for (i = 0; i < ivhd_count; i++) { softc = device_get_softc(ivhd_devs[i]); KASSERT(softc, ("softc is NULL\n")); ctrl = softc->ctrl; KASSERT(ctrl, ("ctrl is NULL\n")); ctrl->control = 0; } } static void amdvi_inv_tlb(void *arg) { struct amdvi_domain *domain; domain = (struct amdvi_domain *)arg; KASSERT(domain, ("domain is NULL")); amdvi_do_inv_domain(domain->id, false); } struct iommu_ops iommu_ops_amd = { amdvi_init, amdvi_cleanup, amdvi_enable, amdvi_disable, amdvi_create_domain, amdvi_destroy_domain, amdvi_create_mapping, amdvi_destroy_mapping, amdvi_add_device, amdvi_remove_device, amdvi_inv_tlb }; diff --git a/sys/amd64/vmm/amd/amdvi_priv.h b/sys/amd64/vmm/amd/amdvi_priv.h index 2db6914f08f4..0eae7ca6ca4c 100644 --- a/sys/amd64/vmm/amd/amdvi_priv.h +++ b/sys/amd64/vmm/amd/amdvi_priv.h @@ -1,413 +1,410 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016 Anish Gupta (anish@freebsd.org) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _AMDVI_PRIV_H_ #define _AMDVI_PRIV_H_ #include #define BIT(n) (1ULL << (n)) /* Return value of bits[n:m] where n and (n >= ) m are bit positions. */ #define REG_BITS(x, n, m) (((x) >> (m)) & \ ((1 << (((n) - (m)) + 1)) - 1)) /* * IOMMU PCI capability. */ #define AMDVI_PCI_CAP_IOTLB BIT(0) /* IOTLB is supported. */ #define AMDVI_PCI_CAP_HT BIT(1) /* HyperTransport tunnel support. */ #define AMDVI_PCI_CAP_NPCACHE BIT(2) /* Not present page cached. */ #define AMDVI_PCI_CAP_EFR BIT(3) /* Extended features. */ #define AMDVI_PCI_CAP_EXT BIT(4) /* Miscellaneous information reg. */ /* * IOMMU extended features. */ #define AMDVI_EX_FEA_PREFSUP BIT(0) /* Prefetch command support. */ #define AMDVI_EX_FEA_PPRSUP BIT(1) /* PPR support */ #define AMDVI_EX_FEA_XTSUP BIT(2) /* Reserved */ #define AMDVI_EX_FEA_NXSUP BIT(3) /* No-execute. */ #define AMDVI_EX_FEA_GTSUP BIT(4) /* Guest translation support. */ #define AMDVI_EX_FEA_EFRW BIT(5) /* Reserved */ #define AMDVI_EX_FEA_IASUP BIT(6) /* Invalidate all command supp. */ #define AMDVI_EX_FEA_GASUP BIT(7) /* Guest APIC or AVIC support. */ #define AMDVI_EX_FEA_HESUP BIT(8) /* Hardware Error. */ #define AMDVI_EX_FEA_PCSUP BIT(9) /* Performance counters support. */ /* XXX: add more EFER bits. */ /* * Device table entry or DTE * NOTE: Must be 256-bits/32 bytes aligned. */ struct amdvi_dte { uint32_t dt_valid:1; /* Device Table valid. */ uint32_t pt_valid:1; /* Page translation valid. */ uint16_t :7; /* Reserved[8:2] */ uint8_t pt_level:3; /* Paging level, 0 to disable. */ uint64_t pt_base:40; /* Page table root pointer. */ uint8_t :3; /* Reserved[54:52] */ uint8_t gv_valid:1; /* Revision 2, GVA to SPA. */ uint8_t gv_level:2; /* Revision 2, GLX level. */ uint8_t gv_cr3_lsb:3; /* Revision 2, GCR3[14:12] */ uint8_t read_allow:1; /* I/O read enabled. */ uint8_t write_allow:1; /* I/O write enabled. */ uint8_t :1; /* Reserved[63] */ uint16_t domain_id:16; /* Domain ID */ uint16_t gv_cr3_lsb2:16; /* Revision 2, GCR3[30:15] */ uint8_t iotlb_enable:1; /* Device support IOTLB */ uint8_t sup_second_io_fault:1; /* Suppress subsequent I/O faults. */ uint8_t sup_all_io_fault:1; /* Suppress all I/O page faults. */ uint8_t IOctl:2; /* Port I/O control. */ uint8_t iotlb_cache_disable:1; /* IOTLB cache hints. */ uint8_t snoop_disable:1; /* Snoop disable. */ uint8_t allow_ex:1; /* Allow exclusion. */ uint8_t sysmgmt:2; /* System management message.*/ uint8_t :1; /* Reserved[106] */ uint32_t gv_cr3_msb:21; /* Revision 2, GCR3[51:31] */ uint8_t intmap_valid:1; /* Interrupt map valid. */ uint8_t intmap_len:4; /* Interrupt map table length. */ uint8_t intmap_ign:1; /* Ignore unmapped interrupts. */ uint64_t intmap_base:46; /* IntMap base. */ uint8_t :4; /* Reserved[183:180] */ uint8_t init_pass:1; /* INIT pass through or PT */ uint8_t extintr_pass:1; /* External Interrupt PT */ uint8_t nmi_pass:1; /* NMI PT */ uint8_t :1; /* Reserved[187] */ uint8_t intr_ctrl:2; /* Interrupt control */ uint8_t lint0_pass:1; /* LINT0 PT */ uint8_t lint1_pass:1; /* LINT1 PT */ uint64_t :64; /* Reserved[255:192] */ } __attribute__((__packed__)); CTASSERT(sizeof(struct amdvi_dte) == 32); /* * IOMMU command entry. */ struct amdvi_cmd { uint32_t word0; uint32_t word1:28; uint8_t opcode:4; uint64_t addr; } __attribute__((__packed__)); /* Command opcodes. */ #define AMDVI_CMP_WAIT_OPCODE 0x1 /* Completion wait. */ #define AMDVI_INVD_DTE_OPCODE 0x2 /* Invalidate device table entry. */ #define AMDVI_INVD_PAGE_OPCODE 0x3 /* Invalidate pages. */ #define AMDVI_INVD_IOTLB_OPCODE 0x4 /* Invalidate IOTLB pages. */ #define AMDVI_INVD_INTR_OPCODE 0x5 /* Invalidate Interrupt table. */ #define AMDVI_PREFETCH_PAGES_OPCODE 0x6 /* Prefetch IOMMU pages. */ #define AMDVI_COMP_PPR_OPCODE 0x7 /* Complete PPR request. */ #define AMDVI_INV_ALL_OPCODE 0x8 /* Invalidate all. */ /* Completion wait attributes. */ #define AMDVI_CMP_WAIT_STORE BIT(0) /* Write back data. */ #define AMDVI_CMP_WAIT_INTR BIT(1) /* Completion wait interrupt. */ #define AMDVI_CMP_WAIT_FLUSH BIT(2) /* Flush queue. */ /* Invalidate page. */ #define AMDVI_INVD_PAGE_S BIT(0) /* Invalidation size. */ #define AMDVI_INVD_PAGE_PDE BIT(1) /* Invalidate PDE. */ #define AMDVI_INVD_PAGE_GN_GVA BIT(2) /* GPA or GVA. */ #define AMDVI_INVD_PAGE_ALL_ADDR (0x7FFFFFFFFFFFFULL << 12) /* Invalidate IOTLB. */ #define AMDVI_INVD_IOTLB_S BIT(0) /* Invalidation size 4k or addr */ #define AMDVI_INVD_IOTLB_GN_GVA BIT(2) /* GPA or GVA. */ #define AMDVI_INVD_IOTLB_ALL_ADDR (0x7FFFFFFFFFFFFULL << 12) /* XXX: add more command entries. */ /* * IOMMU event entry. */ struct amdvi_event { uint16_t devid; uint16_t pasid_hi; uint16_t pasid_domid; /* PASID low or DomainID */ uint16_t flag:12; uint8_t opcode:4; uint64_t addr; } __attribute__((__packed__)); CTASSERT(sizeof(struct amdvi_event) == 16); /* Various event types. */ #define AMDVI_EVENT_INVALID_DTE 0x1 #define AMDVI_EVENT_PFAULT 0x2 #define AMDVI_EVENT_DTE_HW_ERROR 0x3 #define AMDVI_EVENT_PAGE_HW_ERROR 0x4 #define AMDVI_EVENT_ILLEGAL_CMD 0x5 #define AMDVI_EVENT_CMD_HW_ERROR 0x6 #define AMDVI_EVENT_IOTLB_TIMEOUT 0x7 #define AMDVI_EVENT_INVALID_DTE_REQ 0x8 #define AMDVI_EVENT_INVALID_PPR_REQ 0x9 #define AMDVI_EVENT_COUNTER_ZERO 0xA #define AMDVI_EVENT_FLAG_MASK 0x1FF /* Mask for event flags. */ #define AMDVI_EVENT_FLAG_TYPE(x) (((x) >> 9) & 0x3) /* * IOMMU control block. */ struct amdvi_ctrl { struct { uint16_t size:9; uint16_t :3; uint64_t base:40; /* Devtable register base. */ uint16_t :12; } dte; struct { uint16_t :12; uint64_t base:40; uint8_t :4; uint8_t len:4; uint8_t :4; } cmd; struct { uint16_t :12; uint64_t base:40; uint8_t :4; uint8_t len:4; uint8_t :4; } event; uint16_t control :13; uint64_t :51; struct { uint8_t enable:1; uint8_t allow:1; uint16_t :10; uint64_t base:40; uint16_t :12; uint16_t :12; uint64_t limit:40; uint16_t :12; } excl; /* * Revision 2 only. */ uint64_t ex_feature; struct { uint16_t :12; uint64_t base:40; uint8_t :4; uint8_t len:4; uint8_t :4; } ppr; uint64_t first_event; uint64_t second_event; uint64_t event_status; /* Revision 2 only, end. */ uint8_t pad1[0x1FA8]; /* Padding. */ uint32_t cmd_head:19; uint64_t :45; uint32_t cmd_tail:19; uint64_t :45; uint32_t evt_head:19; uint64_t :45; uint32_t evt_tail:19; uint64_t :45; uint32_t status:19; uint64_t :45; uint64_t pad2; uint8_t :4; uint16_t ppr_head:15; uint64_t :45; uint8_t :4; uint16_t ppr_tail:15; uint64_t :45; uint8_t pad3[0x1FC0]; /* Padding. */ /* XXX: More for rev2. */ } __attribute__((__packed__)); CTASSERT(offsetof(struct amdvi_ctrl, pad1)== 0x58); CTASSERT(offsetof(struct amdvi_ctrl, pad2)== 0x2028); CTASSERT(offsetof(struct amdvi_ctrl, pad3)== 0x2040); #define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE) /* v1 size */ /* * AMF IOMMU v2 size including event counters */ #define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE) CTASSERT(sizeof(struct amdvi_ctrl) == 0x4000); CTASSERT(sizeof(struct amdvi_ctrl) == AMDVI_MMIO_V1_SIZE); /* IVHD flag */ #define IVHD_FLAG_HTT BIT(0) /* Hypertransport Tunnel. */ #define IVHD_FLAG_PPW BIT(1) /* Pass posted write. */ #define IVHD_FLAG_RPPW BIT(2) /* Response pass posted write. */ #define IVHD_FLAG_ISOC BIT(3) /* Isoc support. */ #define IVHD_FLAG_IOTLB BIT(4) /* IOTLB support. */ #define IVHD_FLAG_COH BIT(5) /* Coherent control, default 1 */ #define IVHD_FLAG_PFS BIT(6) /* Prefetch IOMMU pages. */ #define IVHD_FLAG_PPRS BIT(7) /* Peripheral page support. */ /* IVHD device entry data setting. */ #define IVHD_DEV_LINT0_PASS BIT(6) /* LINT0 interrupts. */ #define IVHD_DEV_LINT1_PASS BIT(7) /* LINT1 interrupts. */ /* Bit[5:4] for System Mgmt. Bit3 is reserved. */ #define IVHD_DEV_INIT_PASS BIT(0) /* INIT */ #define IVHD_DEV_EXTINTR_PASS BIT(1) /* ExtInt */ #define IVHD_DEV_NMI_PASS BIT(2) /* NMI */ /* IVHD 8-byte extended data settings. */ #define IVHD_DEV_EXT_ATS_DISABLE BIT(31) /* Disable ATS */ /* IOMMU control register. */ #define AMDVI_CTRL_EN BIT(0) /* IOMMU enable. */ #define AMDVI_CTRL_HTT BIT(1) /* Hypertransport tunnel enable. */ #define AMDVI_CTRL_ELOG BIT(2) /* Event log enable. */ #define AMDVI_CTRL_ELOGINT BIT(3) /* Event log interrupt. */ #define AMDVI_CTRL_COMINT BIT(4) /* Completion wait interrupt. */ #define AMDVI_CTRL_PPW BIT(8) #define AMDVI_CTRL_RPPW BIT(9) #define AMDVI_CTRL_COH BIT(10) #define AMDVI_CTRL_ISOC BIT(11) #define AMDVI_CTRL_CMD BIT(12) /* Command buffer enable. */ #define AMDVI_CTRL_PPRLOG BIT(13) #define AMDVI_CTRL_PPRINT BIT(14) #define AMDVI_CTRL_PPREN BIT(15) #define AMDVI_CTRL_GTE BIT(16) /* Guest translation enable. */ #define AMDVI_CTRL_GAE BIT(17) /* Guest APIC enable. */ /* Invalidation timeout. */ #define AMDVI_CTRL_INV_NO_TO 0 /* No timeout. */ #define AMDVI_CTRL_INV_TO_1ms 1 /* 1 ms */ #define AMDVI_CTRL_INV_TO_10ms 2 /* 10 ms */ #define AMDVI_CTRL_INV_TO_100ms 3 /* 100 ms */ #define AMDVI_CTRL_INV_TO_1S 4 /* 1 second */ #define AMDVI_CTRL_INV_TO_10S 5 /* 10 second */ #define AMDVI_CTRL_INV_TO_100S 6 /* 100 second */ /* * Max number of PCI devices. * 256 bus x 32 slot/devices x 8 functions. */ #define PCI_NUM_DEV_MAX 0x10000 /* Maximum number of domains supported by IOMMU. */ #define AMDVI_MAX_DOMAIN (BIT(16) - 1) /* * IOMMU Page Table attributes. */ #define AMDVI_PT_PRESENT BIT(0) #define AMDVI_PT_COHERENT BIT(60) #define AMDVI_PT_READ BIT(61) #define AMDVI_PT_WRITE BIT(62) #define AMDVI_PT_RW (AMDVI_PT_READ | AMDVI_PT_WRITE) #define AMDVI_PT_MASK 0xFFFFFFFFFF000UL /* Only [51:12] for PA */ #define AMDVI_PD_LEVEL_SHIFT 9 #define AMDVI_PD_SUPER(x) (((x) >> AMDVI_PD_LEVEL_SHIFT) == 7) /* * IOMMU Status, offset 0x2020 */ #define AMDVI_STATUS_EV_OF BIT(0) /* Event overflow. */ #define AMDVI_STATUS_EV_INTR BIT(1) /* Event interrupt. */ /* Completion wait command completed. */ #define AMDVI_STATUS_CMP BIT(2) #define IVRS_CTRL_RID 1 /* MMIO RID */ /* ACPI IVHD */ struct ivhd_dev_cfg { uint32_t start_id; uint32_t end_id; uint8_t data; /* Device configuration. */ bool enable_ats; /* ATS enabled for the device. */ int ats_qlen; /* ATS invalidation queue depth. */ }; struct amdvi_domain { uint64_t *ptp; /* Highest level page table */ int ptp_level; /* Level of page tables */ u_int id; /* Domain id */ SLIST_ENTRY (amdvi_domain) next; }; /* * Different type of IVHD. * XXX: Use AcpiIvrsType once new IVHD types are available. */ enum IvrsType { IVRS_TYPE_HARDWARE_LEGACY = ACPI_IVRS_TYPE_HARDWARE1, /* Legacy without EFRi support. */ IVRS_TYPE_HARDWARE_EFR = ACPI_IVRS_TYPE_HARDWARE2, /* With EFR support. */ IVRS_TYPE_HARDWARE_MIXED = 0x40, /* Mixed with EFR support. */ }; /* * AMD IOMMU softc. */ struct amdvi_softc { struct amdvi_ctrl *ctrl; /* Control area. */ device_t dev; /* IOMMU device. */ + device_t pci_dev; /* IOMMU PCI function device. */ enum IvrsType ivhd_type; /* IOMMU IVHD type. */ bool iotlb; /* IOTLB supported by IOMMU */ struct amdvi_cmd *cmd; /* Command descriptor area. */ int cmd_max; /* Max number of commands. */ uint64_t cmp_data; /* Command completion write back. */ struct amdvi_event *event; /* Event descriptor area. */ - struct resource *event_res; /* Event interrupt resource. */ - void *event_tag; /* Event interrupt tag. */ int event_max; /* Max number of events. */ - int event_irq; - int event_rid; /* ACPI various flags. */ uint32_t ivhd_flag; /* ACPI IVHD flag. */ uint32_t ivhd_feature; /* ACPI v1 Reserved or v2 attribute. */ uint64_t ext_feature; /* IVHD EFR */ /* PCI related. */ uint16_t cap_off; /* PCI Capability offset. */ uint8_t pci_cap; /* PCI capability. */ uint16_t pci_seg; /* IOMMU PCI domain/segment. */ uint16_t pci_rid; /* PCI BDF of IOMMU */ /* Device range under this IOMMU. */ uint16_t start_dev_rid; /* First device under this IOMMU. */ uint16_t end_dev_rid; /* Last device under this IOMMU. */ /* BIOS provided device configuration for end points. */ struct ivhd_dev_cfg dev_cfg[10]; int dev_cfg_cnt; /* Software statistics. */ uint64_t event_intr_cnt; /* Total event INTR count. */ uint64_t total_cmd; /* Total number of commands. */ }; int amdvi_setup_hw(struct amdvi_softc *softc); int amdvi_teardown_hw(struct amdvi_softc *softc); #endif /* _AMDVI_PRIV_H_ */ diff --git a/sys/amd64/vmm/amd/ivhd_if.m b/sys/amd64/vmm/amd/ivhd_if.m new file mode 100644 index 000000000000..eaae01ae0131 --- /dev/null +++ b/sys/amd64/vmm/amd/ivhd_if.m @@ -0,0 +1,46 @@ +#- +# Copyright (c) 2021 The FreeBSD Fondation +# +# Portions of this software were developed by Ka Ho Ng +# under sponsorship from the FreeBSD Foundation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +#include +#include +#include + +INTERFACE ivhd; + +METHOD int setup_intr { + device_t dev; + driver_intr_t handler; + void *arg; + const char *desc; +}; + +METHOD int teardown_intr { + device_t dev; +}; diff --git a/sys/amd64/vmm/amd/ivrs_drv.c b/sys/amd64/vmm/amd/ivrs_drv.c index d33229623a96..6291895c212f 100644 --- a/sys/amd64/vmm/amd/ivrs_drv.c +++ b/sys/amd64/vmm/amd/ivrs_drv.c @@ -1,734 +1,740 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2016, Anish Gupta (anish@freebsd.org) + * Copyright (c) 2021 The FreeBSD Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include +#include +#include #include "io/iommu.h" #include "amdvi_priv.h" device_t *ivhd_devs; /* IVHD or AMD-Vi device list. */ int ivhd_count; /* Number of IVHD header. */ /* * Cached IVHD header list. * Single entry for each IVHD, filtered the legacy one. */ ACPI_IVRS_HARDWARE1 *ivhd_hdrs[10]; extern int amdvi_ptp_level; /* Page table levels. */ typedef int (*ivhd_iter_t)(ACPI_IVRS_HEADER *ptr, void *arg); /* * Iterate IVRS table for IVHD and IVMD device type. */ static void ivrs_hdr_iterate_tbl(ivhd_iter_t iter, void *arg) { ACPI_TABLE_IVRS *ivrs; ACPI_IVRS_HEADER *ivrs_hdr, *end; ACPI_STATUS status; status = AcpiGetTable(ACPI_SIG_IVRS, 1, (ACPI_TABLE_HEADER **)&ivrs); if (ACPI_FAILURE(status)) return; if (ivrs->Header.Length == 0) { return; } ivrs_hdr = (ACPI_IVRS_HEADER *)(ivrs + 1); end = (ACPI_IVRS_HEADER *)((char *)ivrs + ivrs->Header.Length); while (ivrs_hdr < end) { if ((uint8_t *)ivrs_hdr + ivrs_hdr->Length > (uint8_t *)end) { printf("AMD-Vi:IVHD/IVMD is corrupted, length : %d\n", ivrs_hdr->Length); break; } switch (ivrs_hdr->Type) { case IVRS_TYPE_HARDWARE_LEGACY: /* Legacy */ case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: if (!iter(ivrs_hdr, arg)) return; break; case ACPI_IVRS_TYPE_MEMORY1: case ACPI_IVRS_TYPE_MEMORY2: case ACPI_IVRS_TYPE_MEMORY3: if (!iter(ivrs_hdr, arg)) return; break; default: printf("AMD-Vi:Not IVHD/IVMD type(%d)", ivrs_hdr->Type); } ivrs_hdr = (ACPI_IVRS_HEADER *)((uint8_t *)ivrs_hdr + ivrs_hdr->Length); } } static bool ivrs_is_ivhd(UINT8 type) { switch(type) { case IVRS_TYPE_HARDWARE_LEGACY: case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: return (true); default: return (false); } } /* Count the number of AMD-Vi devices in the system. */ static int ivhd_count_iter(ACPI_IVRS_HEADER * ivrs_he, void *arg) { if (ivrs_is_ivhd(ivrs_he->Type)) ivhd_count++; return (1); } struct find_ivrs_hdr_args { int i; ACPI_IVRS_HEADER *ptr; }; static int ivrs_hdr_find_iter(ACPI_IVRS_HEADER * ivrs_hdr, void *args) { struct find_ivrs_hdr_args *fi; fi = (struct find_ivrs_hdr_args *)args; if (ivrs_is_ivhd(ivrs_hdr->Type)) { if (fi->i == 0) { fi->ptr = ivrs_hdr; return (0); } fi->i--; } return (1); } static ACPI_IVRS_HARDWARE1 * ivhd_find_by_index(int idx) { struct find_ivrs_hdr_args fi; fi.i = idx; fi.ptr = NULL; ivrs_hdr_iterate_tbl(ivrs_hdr_find_iter, &fi); return ((ACPI_IVRS_HARDWARE1 *)fi.ptr); } static void ivhd_dev_add_entry(struct amdvi_softc *softc, uint32_t start_id, uint32_t end_id, uint8_t cfg, bool ats) { struct ivhd_dev_cfg *dev_cfg; /* If device doesn't have special data, don't add it. */ if (!cfg) return; dev_cfg = &softc->dev_cfg[softc->dev_cfg_cnt++]; dev_cfg->start_id = start_id; dev_cfg->end_id = end_id; dev_cfg->data = cfg; dev_cfg->enable_ats = ats; } /* * Record device attributes as suggested by BIOS. */ static int ivhd_dev_parse(ACPI_IVRS_HARDWARE1 *ivhd, struct amdvi_softc *softc) { ACPI_IVRS_DE_HEADER *de; uint8_t *p, *end; int range_start_id = 0, range_end_id = 0; uint32_t *extended; uint8_t all_data = 0, range_data = 0; bool range_enable_ats = false, enable_ats; softc->start_dev_rid = ~0; softc->end_dev_rid = 0; switch (ivhd->Header.Type) { case IVRS_TYPE_HARDWARE_LEGACY: p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE1); break; case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: p = (uint8_t *)ivhd + sizeof(ACPI_IVRS_HARDWARE2); break; default: device_printf(softc->dev, "unknown type: 0x%x\n", ivhd->Header.Type); return (-1); } end = (uint8_t *)ivhd + ivhd->Header.Length; while (p < end) { de = (ACPI_IVRS_DE_HEADER *)p; softc->start_dev_rid = MIN(softc->start_dev_rid, de->Id); softc->end_dev_rid = MAX(softc->end_dev_rid, de->Id); switch (de->Type) { case ACPI_IVRS_TYPE_ALL: all_data = de->DataSetting; break; case ACPI_IVRS_TYPE_SELECT: case ACPI_IVRS_TYPE_ALIAS_SELECT: case ACPI_IVRS_TYPE_EXT_SELECT: enable_ats = false; if (de->Type == ACPI_IVRS_TYPE_EXT_SELECT) { extended = (uint32_t *)(de + 1); enable_ats = (*extended & IVHD_DEV_EXT_ATS_DISABLE) ? false : true; } ivhd_dev_add_entry(softc, de->Id, de->Id, de->DataSetting | all_data, enable_ats); break; case ACPI_IVRS_TYPE_START: case ACPI_IVRS_TYPE_ALIAS_START: case ACPI_IVRS_TYPE_EXT_START: range_start_id = de->Id; range_data = de->DataSetting; if (de->Type == ACPI_IVRS_TYPE_EXT_START) { extended = (uint32_t *)(de + 1); range_enable_ats = (*extended & IVHD_DEV_EXT_ATS_DISABLE) ? false : true; } break; case ACPI_IVRS_TYPE_END: range_end_id = de->Id; ivhd_dev_add_entry(softc, range_start_id, range_end_id, range_data | all_data, range_enable_ats); range_start_id = range_end_id = 0; range_data = 0; all_data = 0; break; case ACPI_IVRS_TYPE_PAD4: break; case ACPI_IVRS_TYPE_SPECIAL: /* HPET or IOAPIC */ break; default: if ((de->Type < 5) || (de->Type >= ACPI_IVRS_TYPE_PAD8)) device_printf(softc->dev, "Unknown dev entry:0x%x\n", de->Type); } if (softc->dev_cfg_cnt > (sizeof(softc->dev_cfg) / sizeof(softc->dev_cfg[0]))) { device_printf(softc->dev, "WARN Too many device entries.\n"); return (EINVAL); } if (de->Type < 0x40) p += sizeof(ACPI_IVRS_DEVICE4); else if (de->Type < 0x80) p += sizeof(ACPI_IVRS_DEVICE8A); else { printf("Variable size IVHD type 0x%x not supported\n", de->Type); break; } } KASSERT((softc->end_dev_rid >= softc->start_dev_rid), ("Device end[0x%x] < start[0x%x.\n", softc->end_dev_rid, softc->start_dev_rid)); return (0); } static bool ivhd_is_newer(ACPI_IVRS_HEADER *old, ACPI_IVRS_HEADER *new) { /* * Newer IVRS header type take precedence. */ if ((old->DeviceId == new->DeviceId) && (old->Type == IVRS_TYPE_HARDWARE_LEGACY) && ((new->Type == IVRS_TYPE_HARDWARE_EFR) || (new->Type == IVRS_TYPE_HARDWARE_MIXED))) { return (true); } return (false); } static void ivhd_identify(driver_t *driver, device_t parent) { ACPI_TABLE_IVRS *ivrs; ACPI_IVRS_HARDWARE1 *ivhd; ACPI_STATUS status; int i, count = 0; uint32_t ivrs_ivinfo; if (acpi_disabled("ivhd")) return; status = AcpiGetTable(ACPI_SIG_IVRS, 1, (ACPI_TABLE_HEADER **)&ivrs); if (ACPI_FAILURE(status)) return; if (ivrs->Header.Length == 0) { return; } ivrs_ivinfo = ivrs->Info; printf("AMD-Vi: IVRS Info VAsize = %d PAsize = %d GVAsize = %d" " flags:%b\n", REG_BITS(ivrs_ivinfo, 21, 15), REG_BITS(ivrs_ivinfo, 14, 8), REG_BITS(ivrs_ivinfo, 7, 5), REG_BITS(ivrs_ivinfo, 22, 22), "\020\001EFRSup"); ivrs_hdr_iterate_tbl(ivhd_count_iter, NULL); if (!ivhd_count) return; for (i = 0; i < ivhd_count; i++) { ivhd = ivhd_find_by_index(i); KASSERT(ivhd, ("ivhd%d is NULL\n", i)); ivhd_hdrs[i] = ivhd; } /* * Scan for presence of legacy and non-legacy device type * for same AMD-Vi device and override the old one. */ for (i = ivhd_count - 1 ; i > 0 ; i--){ if (ivhd_is_newer(&ivhd_hdrs[i-1]->Header, &ivhd_hdrs[i]->Header)) { memmove(&ivhd_hdrs[i-1], &ivhd_hdrs[i], sizeof(void *) * (ivhd_count - i)); ivhd_count--; } } ivhd_devs = malloc(sizeof(device_t) * ivhd_count, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < ivhd_count; i++) { ivhd = ivhd_hdrs[i]; KASSERT(ivhd, ("ivhd%d is NULL\n", i)); /* * Use a high order to ensure that this driver is probed after * the Host-PCI bridge and the root PCI bus. */ ivhd_devs[i] = BUS_ADD_CHILD(parent, ACPI_DEV_BASE_ORDER + 10 * 10, "ivhd", i); /* * XXX: In case device was not destroyed before, add will fail. * locate the old device instance. */ if (ivhd_devs[i] == NULL) { ivhd_devs[i] = device_find_child(parent, "ivhd", i); if (ivhd_devs[i] == NULL) { printf("AMD-Vi: cant find ivhd%d\n", i); break; } } count++; } /* * Update device count in case failed to attach. */ ivhd_count = count; } static int ivhd_probe(device_t dev) { ACPI_IVRS_HARDWARE1 *ivhd; int unit; if (acpi_get_handle(dev) != NULL) return (ENXIO); unit = device_get_unit(dev); KASSERT((unit < ivhd_count), ("ivhd unit %d > count %d", unit, ivhd_count)); ivhd = ivhd_hdrs[unit]; KASSERT(ivhd, ("ivhd is NULL")); switch (ivhd->Header.Type) { case IVRS_TYPE_HARDWARE_EFR: device_set_desc(dev, "AMD-Vi/IOMMU ivhd with EFR"); break; case IVRS_TYPE_HARDWARE_MIXED: device_set_desc(dev, "AMD-Vi/IOMMU ivhd in mixed format"); break; case IVRS_TYPE_HARDWARE_LEGACY: default: device_set_desc(dev, "AMD-Vi/IOMMU ivhd"); break; } return (BUS_PROBE_NOWILDCARD); } static void ivhd_print_flag(device_t dev, enum IvrsType ivhd_type, uint8_t flag) { /* * IVHD lgeacy type has two extra high bits in flag which has * been moved to EFR for non-legacy device. */ switch (ivhd_type) { case IVRS_TYPE_HARDWARE_LEGACY: device_printf(dev, "Flag:%b\n", flag, "\020" "\001HtTunEn" "\002PassPW" "\003ResPassPW" "\004Isoc" "\005IotlbSup" "\006Coherent" "\007PreFSup" "\008PPRSup"); break; case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: device_printf(dev, "Flag:%b\n", flag, "\020" "\001HtTunEn" "\002PassPW" "\003ResPassPW" "\004Isoc" "\005IotlbSup" "\006Coherent"); break; default: device_printf(dev, "Can't decode flag of ivhd type :0x%x\n", ivhd_type); break; } } /* * Feature in legacy IVHD type(0x10) and attribute in newer type(0x11 and 0x40). */ static void ivhd_print_feature(device_t dev, enum IvrsType ivhd_type, uint32_t feature) { switch (ivhd_type) { case IVRS_TYPE_HARDWARE_LEGACY: device_printf(dev, "Features(type:0x%x) HATS = %d GATS = %d" " MsiNumPPR = %d PNBanks= %d PNCounters= %d\n", ivhd_type, REG_BITS(feature, 31, 30), REG_BITS(feature, 29, 28), REG_BITS(feature, 27, 23), REG_BITS(feature, 22, 17), REG_BITS(feature, 16, 13)); device_printf(dev, "max PASID = %d GLXSup = %d Feature:%b\n", REG_BITS(feature, 12, 8), REG_BITS(feature, 4, 3), feature, "\020" "\002NXSup" "\003GTSup" "\004" "\005IASup" "\006GASup" "\007HESup"); break; /* Fewer features or attributes are reported in non-legacy type. */ case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: device_printf(dev, "Features(type:0x%x) MsiNumPPR = %d" " PNBanks= %d PNCounters= %d\n", ivhd_type, REG_BITS(feature, 27, 23), REG_BITS(feature, 22, 17), REG_BITS(feature, 16, 13)); break; default: /* Other ivhd type features are not decoded. */ device_printf(dev, "Can't decode ivhd type :0x%x\n", ivhd_type); } } /* Print extended features of IOMMU. */ static void ivhd_print_ext_feature(device_t dev, uint64_t ext_feature) { uint32_t ext_low, ext_high; if (!ext_feature) return; ext_low = ext_feature; device_printf(dev, "Extended features[31:0]:%b " "HATS = 0x%x GATS = 0x%x " "GLXSup = 0x%x SmiFSup = 0x%x SmiFRC = 0x%x " "GAMSup = 0x%x DualPortLogSup = 0x%x DualEventLogSup = 0x%x\n", (int)ext_low, "\020" "\001PreFSup" "\002PPRSup" "\003" "\004NXSup" "\005GTSup" "\006" "\007IASup" "\008GASup" "\009HESup" "\010PCSup", REG_BITS(ext_low, 11, 10), REG_BITS(ext_low, 13, 12), REG_BITS(ext_low, 15, 14), REG_BITS(ext_low, 17, 16), REG_BITS(ext_low, 20, 18), REG_BITS(ext_low, 23, 21), REG_BITS(ext_low, 25, 24), REG_BITS(ext_low, 29, 28)); ext_high = ext_feature >> 32; device_printf(dev, "Extended features[62:32]:%b " "Max PASID: 0x%x DevTblSegSup = 0x%x " "MarcSup = 0x%x\n", (int)(ext_high), "\020" "\006USSup" "\009PprOvrflwEarlySup" "\010PPRAutoRspSup" "\013BlKStopMrkSup" "\014PerfOptSup" "\015MsiCapMmioSup" "\017GIOSup" "\018HASup" "\019EPHSup" "\020AttrFWSup" "\021HDSup" "\023InvIotlbSup", REG_BITS(ext_high, 5, 0), REG_BITS(ext_high, 8, 7), REG_BITS(ext_high, 11, 10)); } static int ivhd_print_cap(struct amdvi_softc *softc, ACPI_IVRS_HARDWARE1 * ivhd) { device_t dev; int max_ptp_level; dev = softc->dev; ivhd_print_flag(dev, softc->ivhd_type, softc->ivhd_flag); ivhd_print_feature(dev, softc->ivhd_type, softc->ivhd_feature); ivhd_print_ext_feature(dev, softc->ext_feature); max_ptp_level = 7; /* Make sure device support minimum page level as requested by user. */ if (max_ptp_level < amdvi_ptp_level) { device_printf(dev, "insufficient PTP level:%d\n", max_ptp_level); return (EINVAL); } else { device_printf(softc->dev, "supported paging level:%d, will use only: %d\n", max_ptp_level, amdvi_ptp_level); } device_printf(softc->dev, "device range: 0x%x - 0x%x\n", softc->start_dev_rid, softc->end_dev_rid); return (0); } static int ivhd_attach(device_t dev) { ACPI_IVRS_HARDWARE1 *ivhd; ACPI_IVRS_HARDWARE2 *ivhd_efr; struct amdvi_softc *softc; int status, unit; unit = device_get_unit(dev); KASSERT((unit < ivhd_count), ("ivhd unit %d > count %d", unit, ivhd_count)); /* Make sure its same device for which attach is called. */ KASSERT((ivhd_devs[unit] == dev), ("Not same device old %p new %p", ivhd_devs[unit], dev)); softc = device_get_softc(dev); softc->dev = dev; ivhd = ivhd_hdrs[unit]; KASSERT(ivhd, ("ivhd is NULL")); + softc->pci_dev = pci_find_bsf(PCI_RID2BUS(ivhd->Header.DeviceId), + PCI_RID2SLOT(ivhd->Header.DeviceId), + PCI_RID2FUNC(ivhd->Header.DeviceId)); softc->ivhd_type = ivhd->Header.Type; softc->pci_seg = ivhd->PciSegmentGroup; softc->pci_rid = ivhd->Header.DeviceId; softc->ivhd_flag = ivhd->Header.Flags; /* * On lgeacy IVHD type(0x10), it is documented as feature * but in newer type it is attribute. */ softc->ivhd_feature = ivhd->FeatureReporting; /* * PCI capability has more capabilities that are not part of IVRS. */ softc->cap_off = ivhd->CapabilityOffset; #ifdef notyet /* IVHD Info bit[4:0] is event MSI/X number. */ softc->event_msix = ivhd->Info & 0x1F; #endif switch (ivhd->Header.Type) { case IVRS_TYPE_HARDWARE_EFR: case IVRS_TYPE_HARDWARE_MIXED: ivhd_efr = (ACPI_IVRS_HARDWARE2 *)ivhd; softc->ext_feature = ivhd_efr->EfrRegisterImage; break; } softc->ctrl = (struct amdvi_ctrl *) PHYS_TO_DMAP(ivhd->BaseAddress); status = ivhd_dev_parse(ivhd, softc); if (status != 0) { device_printf(dev, "endpoint device parsing error=%d\n", status); } status = ivhd_print_cap(softc, ivhd); if (status != 0) { return (status); } status = amdvi_setup_hw(softc); if (status != 0) { device_printf(dev, "couldn't be initialised, error=%d\n", status); return (status); } return (0); } static int ivhd_detach(device_t dev) { struct amdvi_softc *softc; softc = device_get_softc(dev); amdvi_teardown_hw(softc); /* * XXX: delete the device. * don't allow detach, return EBUSY. */ return (0); } static int ivhd_suspend(device_t dev) { return (0); } static int ivhd_resume(device_t dev) { return (0); } static device_method_t ivhd_methods[] = { DEVMETHOD(device_identify, ivhd_identify), DEVMETHOD(device_probe, ivhd_probe), DEVMETHOD(device_attach, ivhd_attach), DEVMETHOD(device_detach, ivhd_detach), DEVMETHOD(device_suspend, ivhd_suspend), DEVMETHOD(device_resume, ivhd_resume), DEVMETHOD_END }; static driver_t ivhd_driver = { "ivhd", ivhd_methods, sizeof(struct amdvi_softc), }; static devclass_t ivhd_devclass; /* * Load this module at the end after PCI re-probing to configure interrupt. */ DRIVER_MODULE_ORDERED(ivhd, acpi, ivhd_driver, ivhd_devclass, 0, 0, SI_ORDER_ANY); MODULE_DEPEND(ivhd, acpi, 1, 1, 1); MODULE_DEPEND(ivhd, pci, 1, 1, 1); diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile index b5d62c358272..ef0d9dcb6786 100644 --- a/sys/modules/vmm/Makefile +++ b/sys/modules/vmm/Makefile @@ -1,90 +1,93 @@ # $FreeBSD$ .include KMOD= vmm SRCS= opt_acpi.h opt_bhyve_snapshot.h opt_ddb.h SRCS+= device_if.h bus_if.h pci_if.h pcib_if.h acpi_if.h vnode_if.h DPSRCS+= vmx_assym.h svm_assym.h DPSRCS+= vmx_genassym.c svm_genassym.c offset.inc CFLAGS+= -DVMM_KEEP_STATS CFLAGS+= -I${SRCTOP}/sys/amd64/vmm CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/io CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/intel CFLAGS+= -I${SRCTOP}/sys/amd64/vmm/amd # generic vmm support .PATH: ${SRCTOP}/sys/amd64/vmm SRCS+= vmm.c \ vmm_dev.c \ vmm_host.c \ vmm_instruction_emul.c \ vmm_ioport.c \ vmm_lapic.c \ vmm_mem.c \ vmm_stat.c \ vmm_util.c \ x86.c .PATH: ${SRCTOP}/sys/amd64/vmm/io SRCS+= iommu.c \ ppt.c \ vatpic.c \ vatpit.c \ vhpet.c \ vioapic.c \ vlapic.c \ vpmtmr.c \ vrtc.c # intel-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/intel SRCS+= ept.c \ vmcs.c \ vmx_msr.c \ vmx_support.S \ vmx.c \ vtd.c # amd-specific files .PATH: ${SRCTOP}/sys/amd64/vmm/amd SRCS+= vmcb.c \ + amdiommu.c \ + ivhd_if.c \ + ivhd_if.h \ svm.c \ svm_support.S \ npt.c \ ivrs_drv.c \ amdvi_hw.c \ svm_msr.c .if ${KERN_OPTS:MBHYVE_SNAPSHOT} != "" SRCS+= vmm_snapshot.c .endif CLEANFILES= vmx_assym.h vmx_genassym.o svm_assym.h svm_genassym.o OBJS_DEPEND_GUESS.vmx_support.o+= vmx_assym.h OBJS_DEPEND_GUESS.svm_support.o+= svm_assym.h vmx_assym.h: vmx_genassym.o sh ${SYSDIR}/kern/genassym.sh vmx_genassym.o > ${.TARGET} svm_assym.h: svm_genassym.o sh ${SYSDIR}/kern/genassym.sh svm_genassym.o > ${.TARGET} vmx_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} svm_support.o: ${CC} -c -x assembler-with-cpp -DLOCORE ${CFLAGS} \ ${.IMPSRC} -o ${.TARGET} vmx_genassym.o: offset.inc ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC} svm_genassym.o: offset.inc ${CC} -c ${CFLAGS:N-flto:N-fno-common} -fcommon ${.IMPSRC} .include