diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c index 40e42bc5214e..9638046f4f1e 100644 --- a/sys/arm64/arm64/gicv3_its.c +++ b/sys/arm64/arm64/gicv3_its.c @@ -1,2058 +1,2059 @@ /*- * Copyright (c) 2015-2016 The FreeBSD Foundation * * This software was developed by Andrew Turner under * the sponsorship of the FreeBSD Foundation. * * This software was developed by Semihalf under * the sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_acpi.h" #include "opt_platform.h" #include "opt_iommu.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #include #include #ifdef IOMMU #include #include #endif #include "pcib_if.h" #include "pic_if.h" #include "msi_if.h" MALLOC_DEFINE(M_GICV3_ITS, "GICv3 ITS", "ARM GICv3 Interrupt Translation Service"); #define LPI_NIRQS (64 * 1024) /* The size and alignment of the command circular buffer */ #define ITS_CMDQ_SIZE (64 * 1024) /* Must be a multiple of 4K */ #define ITS_CMDQ_ALIGN (64 * 1024) #define LPI_CONFTAB_SIZE LPI_NIRQS #define LPI_CONFTAB_ALIGN (64 * 1024) #define LPI_CONFTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */ /* 1 bit per SPI, PPI, and SGI (8k), and 1 bit per LPI (LPI_CONFTAB_SIZE) */ #define LPI_PENDTAB_SIZE ((LPI_NIRQS + GIC_FIRST_LPI) / 8) #define LPI_PENDTAB_ALIGN (64 * 1024) #define LPI_PENDTAB_MAX_ADDR ((1ul << 48) - 1) /* We need a 47 bit PA */ #define LPI_INT_TRANS_TAB_ALIGN 256 #define LPI_INT_TRANS_TAB_MAX_ADDR ((1ul << 48) - 1) /* ITS commands encoding */ #define ITS_CMD_MOVI (0x01) #define ITS_CMD_SYNC (0x05) #define ITS_CMD_MAPD (0x08) #define ITS_CMD_MAPC (0x09) #define ITS_CMD_MAPTI (0x0a) #define ITS_CMD_MAPI (0x0b) #define ITS_CMD_INV (0x0c) #define ITS_CMD_INVALL (0x0d) /* Command */ #define CMD_COMMAND_MASK (0xFFUL) /* PCI device ID */ #define CMD_DEVID_SHIFT (32) #define CMD_DEVID_MASK (0xFFFFFFFFUL << CMD_DEVID_SHIFT) /* Size of IRQ ID bitfield */ #define CMD_SIZE_MASK (0xFFUL) /* Virtual LPI ID */ #define CMD_ID_MASK (0xFFFFFFFFUL) /* Physical LPI ID */ #define CMD_PID_SHIFT (32) #define CMD_PID_MASK (0xFFFFFFFFUL << CMD_PID_SHIFT) /* Collection */ #define CMD_COL_MASK (0xFFFFUL) /* Target (CPU or Re-Distributor) */ #define CMD_TARGET_SHIFT (16) #define CMD_TARGET_MASK (0xFFFFFFFFUL << CMD_TARGET_SHIFT) /* Interrupt Translation Table address */ #define CMD_ITT_MASK (0xFFFFFFFFFF00UL) /* Valid command bit */ #define CMD_VALID_SHIFT (63) #define CMD_VALID_MASK (1UL << CMD_VALID_SHIFT) #define ITS_TARGET_NONE 0xFBADBEEF /* LPI chunk owned by ITS device */ struct lpi_chunk { u_int lpi_base; u_int lpi_free; /* First free LPI in set */ u_int lpi_num; /* Total number of LPIs in chunk */ u_int lpi_busy; /* Number of busy LPIs in chink */ }; /* ITS device */ struct its_dev { TAILQ_ENTRY(its_dev) entry; /* PCI device */ device_t pci_dev; /* Device ID (i.e. PCI device ID) */ uint32_t devid; /* List of assigned LPIs */ struct lpi_chunk lpis; /* Virtual address of ITT */ vm_offset_t itt; size_t itt_size; }; /* * ITS command descriptor. * Idea for command description passing taken from Linux. */ struct its_cmd_desc { uint8_t cmd_type; union { struct { struct its_dev *its_dev; struct its_col *col; uint32_t id; } cmd_desc_movi; struct { struct its_col *col; } cmd_desc_sync; struct { struct its_col *col; uint8_t valid; } cmd_desc_mapc; struct { struct its_dev *its_dev; struct its_col *col; uint32_t pid; uint32_t id; } cmd_desc_mapvi; struct { struct its_dev *its_dev; struct its_col *col; uint32_t pid; } cmd_desc_mapi; struct { struct its_dev *its_dev; uint8_t valid; } cmd_desc_mapd; struct { struct its_dev *its_dev; struct its_col *col; uint32_t pid; } cmd_desc_inv; struct { struct its_col *col; } cmd_desc_invall; }; }; /* ITS command. Each command is 32 bytes long */ struct its_cmd { uint64_t cmd_dword[4]; /* ITS command double word */ }; /* An ITS private table */ struct its_ptable { vm_offset_t ptab_vaddr; unsigned long ptab_size; }; /* ITS collection description. */ struct its_col { uint64_t col_target; /* Target Re-Distributor */ uint64_t col_id; /* Collection ID */ }; struct gicv3_its_irqsrc { struct intr_irqsrc gi_isrc; u_int gi_id; u_int gi_lpi; struct its_dev *gi_its_dev; TAILQ_ENTRY(gicv3_its_irqsrc) gi_link; }; struct gicv3_its_softc { device_t dev; struct intr_pic *sc_pic; struct resource *sc_its_res; cpuset_t sc_cpus; struct domainset *sc_ds; u_int gic_irq_cpu; struct its_ptable sc_its_ptab[GITS_BASER_NUM]; struct its_col *sc_its_cols[MAXCPU]; /* Per-CPU collections */ /* * TODO: We should get these from the parent as we only want a * single copy of each across the interrupt controller. */ uint8_t *sc_conf_base; vm_offset_t sc_pend_base[MAXCPU]; /* Command handling */ struct mtx sc_its_cmd_lock; struct its_cmd *sc_its_cmd_base; /* Command circular buffer address */ size_t sc_its_cmd_next_idx; vmem_t *sc_irq_alloc; struct gicv3_its_irqsrc **sc_irqs; u_int sc_irq_base; u_int sc_irq_length; u_int sc_irq_count; struct mtx sc_its_dev_lock; TAILQ_HEAD(its_dev_list, its_dev) sc_its_dev_list; TAILQ_HEAD(free_irqs, gicv3_its_irqsrc) sc_free_irqs; #define ITS_FLAGS_CMDQ_FLUSH 0x00000001 #define ITS_FLAGS_LPI_CONF_FLUSH 0x00000002 #define ITS_FLAGS_ERRATA_CAVIUM_22375 0x00000004 u_int sc_its_flags; bool trace_enable; vm_page_t ma; /* fake msi page */ }; static void *conf_base; typedef void (its_quirk_func_t)(device_t); static its_quirk_func_t its_quirk_cavium_22375; static const struct { const char *desc; uint32_t iidr; uint32_t iidr_mask; its_quirk_func_t *func; } its_quirks[] = { { /* Cavium ThunderX Pass 1.x */ .desc = "Cavium ThunderX errata: 22375, 24313", .iidr = GITS_IIDR_RAW(GITS_IIDR_IMPL_CAVIUM, GITS_IIDR_PROD_THUNDER, GITS_IIDR_VAR_THUNDER_1, 0), .iidr_mask = ~GITS_IIDR_REVISION_MASK, .func = its_quirk_cavium_22375, }, }; #define gic_its_read_4(sc, reg) \ bus_read_4((sc)->sc_its_res, (reg)) #define gic_its_read_8(sc, reg) \ bus_read_8((sc)->sc_its_res, (reg)) #define gic_its_write_4(sc, reg, val) \ bus_write_4((sc)->sc_its_res, (reg), (val)) #define gic_its_write_8(sc, reg, val) \ bus_write_8((sc)->sc_its_res, (reg), (val)) static device_attach_t gicv3_its_attach; static device_detach_t gicv3_its_detach; static pic_disable_intr_t gicv3_its_disable_intr; static pic_enable_intr_t gicv3_its_enable_intr; static pic_map_intr_t gicv3_its_map_intr; static pic_setup_intr_t gicv3_its_setup_intr; static pic_post_filter_t gicv3_its_post_filter; static pic_post_ithread_t gicv3_its_post_ithread; static pic_pre_ithread_t gicv3_its_pre_ithread; static pic_bind_intr_t gicv3_its_bind_intr; #ifdef SMP static pic_init_secondary_t gicv3_its_init_secondary; #endif static msi_alloc_msi_t gicv3_its_alloc_msi; static msi_release_msi_t gicv3_its_release_msi; static msi_alloc_msix_t gicv3_its_alloc_msix; static msi_release_msix_t gicv3_its_release_msix; static msi_map_msi_t gicv3_its_map_msi; #ifdef IOMMU static msi_iommu_init_t gicv3_iommu_init; static msi_iommu_deinit_t gicv3_iommu_deinit; #endif static void its_cmd_movi(device_t, struct gicv3_its_irqsrc *); static void its_cmd_mapc(device_t, struct its_col *, uint8_t); static void its_cmd_mapti(device_t, struct gicv3_its_irqsrc *); static void its_cmd_mapd(device_t, struct its_dev *, uint8_t); static void its_cmd_inv(device_t, struct its_dev *, struct gicv3_its_irqsrc *); static void its_cmd_invall(device_t, struct its_col *); static device_method_t gicv3_its_methods[] = { /* Device interface */ DEVMETHOD(device_detach, gicv3_its_detach), /* Interrupt controller interface */ DEVMETHOD(pic_disable_intr, gicv3_its_disable_intr), DEVMETHOD(pic_enable_intr, gicv3_its_enable_intr), DEVMETHOD(pic_map_intr, gicv3_its_map_intr), DEVMETHOD(pic_setup_intr, gicv3_its_setup_intr), DEVMETHOD(pic_post_filter, gicv3_its_post_filter), DEVMETHOD(pic_post_ithread, gicv3_its_post_ithread), DEVMETHOD(pic_pre_ithread, gicv3_its_pre_ithread), #ifdef SMP DEVMETHOD(pic_bind_intr, gicv3_its_bind_intr), DEVMETHOD(pic_init_secondary, gicv3_its_init_secondary), #endif /* MSI/MSI-X */ DEVMETHOD(msi_alloc_msi, gicv3_its_alloc_msi), DEVMETHOD(msi_release_msi, gicv3_its_release_msi), DEVMETHOD(msi_alloc_msix, gicv3_its_alloc_msix), DEVMETHOD(msi_release_msix, gicv3_its_release_msix), DEVMETHOD(msi_map_msi, gicv3_its_map_msi), #ifdef IOMMU DEVMETHOD(msi_iommu_init, gicv3_iommu_init), DEVMETHOD(msi_iommu_deinit, gicv3_iommu_deinit), #endif /* End */ DEVMETHOD_END }; static DEFINE_CLASS_0(gic, gicv3_its_driver, gicv3_its_methods, sizeof(struct gicv3_its_softc)); static void gicv3_its_cmdq_init(struct gicv3_its_softc *sc) { vm_paddr_t cmd_paddr; uint64_t reg, tmp; /* Set up the command circular buffer */ sc->sc_its_cmd_base = contigmalloc_domainset(ITS_CMDQ_SIZE, M_GICV3_ITS, sc->sc_ds, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, ITS_CMDQ_ALIGN, 0); sc->sc_its_cmd_next_idx = 0; cmd_paddr = vtophys(sc->sc_its_cmd_base); /* Set the base of the command buffer */ reg = GITS_CBASER_VALID | (GITS_CBASER_CACHE_NIWAWB << GITS_CBASER_CACHE_SHIFT) | cmd_paddr | (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT) | (ITS_CMDQ_SIZE / 4096 - 1); gic_its_write_8(sc, GITS_CBASER, reg); /* Read back to check for fixed value fields */ tmp = gic_its_read_8(sc, GITS_CBASER); if ((tmp & GITS_CBASER_SHARE_MASK) != (GITS_CBASER_SHARE_IS << GITS_CBASER_SHARE_SHIFT)) { /* Check if the hardware reported non-shareable */ if ((tmp & GITS_CBASER_SHARE_MASK) == (GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT)) { /* If so remove the cache attribute */ reg &= ~GITS_CBASER_CACHE_MASK; reg &= ~GITS_CBASER_SHARE_MASK; /* Set to Non-cacheable, Non-shareable */ reg |= GITS_CBASER_CACHE_NIN << GITS_CBASER_CACHE_SHIFT; reg |= GITS_CBASER_SHARE_NS << GITS_CBASER_SHARE_SHIFT; gic_its_write_8(sc, GITS_CBASER, reg); } /* The command queue has to be flushed after each command */ sc->sc_its_flags |= ITS_FLAGS_CMDQ_FLUSH; } /* Get the next command from the start of the buffer */ gic_its_write_8(sc, GITS_CWRITER, 0x0); } static int gicv3_its_table_init(device_t dev, struct gicv3_its_softc *sc) { vm_offset_t table; vm_paddr_t paddr; uint64_t cache, reg, share, tmp, type; size_t esize, its_tbl_size, nidents, nitspages, npages; int i, page_size; int devbits; if ((sc->sc_its_flags & ITS_FLAGS_ERRATA_CAVIUM_22375) != 0) { /* * GITS_TYPER[17:13] of ThunderX reports that device IDs * are to be 21 bits in length. The entry size of the ITS * table can be read from GITS_BASERn[52:48] and on ThunderX * is supposed to be 8 bytes in length (for device table). * Finally the page size that is to be used by ITS to access * this table will be set to 64KB. * * This gives 0x200000 entries of size 0x8 bytes covered by * 256 pages each of which 64KB in size. The number of pages * (minus 1) should then be written to GITS_BASERn[7:0]. In * that case this value would be 0xFF but on ThunderX the * maximum value that HW accepts is 0xFD. * * Set an arbitrary number of device ID bits to 20 in order * to limit the number of entries in ITS device table to * 0x100000 and the table size to 8MB. */ devbits = 20; cache = 0; } else { devbits = GITS_TYPER_DEVB(gic_its_read_8(sc, GITS_TYPER)); cache = GITS_BASER_CACHE_WAWB; } share = GITS_BASER_SHARE_IS; page_size = PAGE_SIZE_64K; for (i = 0; i < GITS_BASER_NUM; i++) { reg = gic_its_read_8(sc, GITS_BASER(i)); /* The type of table */ type = GITS_BASER_TYPE(reg); /* The table entry size */ esize = GITS_BASER_ESIZE(reg); switch(type) { case GITS_BASER_TYPE_DEV: nidents = (1 << devbits); its_tbl_size = esize * nidents; its_tbl_size = roundup2(its_tbl_size, PAGE_SIZE_64K); break; case GITS_BASER_TYPE_VP: case GITS_BASER_TYPE_PP: /* Undocumented? */ case GITS_BASER_TYPE_IC: its_tbl_size = page_size; break; default: continue; } npages = howmany(its_tbl_size, PAGE_SIZE); /* Allocate the table */ table = (vm_offset_t)contigmalloc_domainset(npages * PAGE_SIZE, M_GICV3_ITS, sc->sc_ds, M_WAITOK | M_ZERO, 0, (1ul << 48) - 1, PAGE_SIZE_64K, 0); sc->sc_its_ptab[i].ptab_vaddr = table; sc->sc_its_ptab[i].ptab_size = npages * PAGE_SIZE; paddr = vtophys(table); while (1) { nitspages = howmany(its_tbl_size, page_size); /* Clear the fields we will be setting */ reg &= ~(GITS_BASER_VALID | GITS_BASER_CACHE_MASK | GITS_BASER_TYPE_MASK | GITS_BASER_ESIZE_MASK | GITS_BASER_PA_MASK | GITS_BASER_SHARE_MASK | GITS_BASER_PSZ_MASK | GITS_BASER_SIZE_MASK); /* Set the new values */ reg |= GITS_BASER_VALID | (cache << GITS_BASER_CACHE_SHIFT) | (type << GITS_BASER_TYPE_SHIFT) | ((esize - 1) << GITS_BASER_ESIZE_SHIFT) | paddr | (share << GITS_BASER_SHARE_SHIFT) | (nitspages - 1); switch (page_size) { case PAGE_SIZE_4K: /* 4KB */ reg |= GITS_BASER_PSZ_4K << GITS_BASER_PSZ_SHIFT; break; case PAGE_SIZE_16K: /* 16KB */ reg |= GITS_BASER_PSZ_16K << GITS_BASER_PSZ_SHIFT; break; case PAGE_SIZE_64K: /* 64KB */ reg |= GITS_BASER_PSZ_64K << GITS_BASER_PSZ_SHIFT; break; } gic_its_write_8(sc, GITS_BASER(i), reg); /* Read back to check */ tmp = gic_its_read_8(sc, GITS_BASER(i)); /* Do the shareability masks line up? */ if ((tmp & GITS_BASER_SHARE_MASK) != (reg & GITS_BASER_SHARE_MASK)) { share = (tmp & GITS_BASER_SHARE_MASK) >> GITS_BASER_SHARE_SHIFT; continue; } if ((tmp & GITS_BASER_PSZ_MASK) != (reg & GITS_BASER_PSZ_MASK)) { switch (page_size) { case PAGE_SIZE_16K: page_size = PAGE_SIZE_4K; continue; case PAGE_SIZE_64K: page_size = PAGE_SIZE_16K; continue; } } if (tmp != reg) { device_printf(dev, "GITS_BASER%d: " "unable to be updated: %lx != %lx\n", i, reg, tmp); return (ENXIO); } /* We should have made all needed changes */ break; } } return (0); } static void gicv3_its_conftable_init(struct gicv3_its_softc *sc) { void *conf_table; conf_table = atomic_load_ptr(&conf_base); if (conf_table == NULL) { conf_table = contigmalloc(LPI_CONFTAB_SIZE, M_GICV3_ITS, M_WAITOK, 0, LPI_CONFTAB_MAX_ADDR, LPI_CONFTAB_ALIGN, 0); if (atomic_cmpset_ptr((uintptr_t *)&conf_base, (uintptr_t)NULL, (uintptr_t)conf_table) == 0) { contigfree(conf_table, LPI_CONFTAB_SIZE, M_GICV3_ITS); conf_table = atomic_load_ptr(&conf_base); } } sc->sc_conf_base = conf_table; /* Set the default configuration */ memset(sc->sc_conf_base, GIC_PRIORITY_MAX | LPI_CONF_GROUP1, LPI_CONFTAB_SIZE); /* Flush the table to memory */ cpu_dcache_wb_range((vm_offset_t)sc->sc_conf_base, LPI_CONFTAB_SIZE); } static void gicv3_its_pendtables_init(struct gicv3_its_softc *sc) { int i; for (i = 0; i <= mp_maxid; i++) { if (CPU_ISSET(i, &sc->sc_cpus) == 0) continue; sc->sc_pend_base[i] = (vm_offset_t)contigmalloc( LPI_PENDTAB_SIZE, M_GICV3_ITS, M_WAITOK | M_ZERO, 0, LPI_PENDTAB_MAX_ADDR, LPI_PENDTAB_ALIGN, 0); /* Flush so the ITS can see the memory */ cpu_dcache_wb_range((vm_offset_t)sc->sc_pend_base[i], LPI_PENDTAB_SIZE); } } static void its_init_cpu_lpi(device_t dev, struct gicv3_its_softc *sc) { device_t gicv3; uint64_t xbaser, tmp; uint32_t ctlr; u_int cpuid; gicv3 = device_get_parent(dev); cpuid = PCPU_GET(cpuid); /* Disable LPIs */ ctlr = gic_r_read_4(gicv3, GICR_CTLR); ctlr &= ~GICR_CTLR_LPI_ENABLE; gic_r_write_4(gicv3, GICR_CTLR, ctlr); /* Make sure changes are observable my the GIC */ dsb(sy); /* * Set the redistributor base */ xbaser = vtophys(sc->sc_conf_base) | (GICR_PROPBASER_SHARE_IS << GICR_PROPBASER_SHARE_SHIFT) | (GICR_PROPBASER_CACHE_NIWAWB << GICR_PROPBASER_CACHE_SHIFT) | (flsl(LPI_CONFTAB_SIZE | GIC_FIRST_LPI) - 1); gic_r_write_8(gicv3, GICR_PROPBASER, xbaser); /* Check the cache attributes we set */ tmp = gic_r_read_8(gicv3, GICR_PROPBASER); if ((tmp & GICR_PROPBASER_SHARE_MASK) != (xbaser & GICR_PROPBASER_SHARE_MASK)) { if ((tmp & GICR_PROPBASER_SHARE_MASK) == (GICR_PROPBASER_SHARE_NS << GICR_PROPBASER_SHARE_SHIFT)) { /* We need to mark as non-cacheable */ xbaser &= ~(GICR_PROPBASER_SHARE_MASK | GICR_PROPBASER_CACHE_MASK); /* Non-cacheable */ xbaser |= GICR_PROPBASER_CACHE_NIN << GICR_PROPBASER_CACHE_SHIFT; /* Non-sareable */ xbaser |= GICR_PROPBASER_SHARE_NS << GICR_PROPBASER_SHARE_SHIFT; gic_r_write_8(gicv3, GICR_PROPBASER, xbaser); } sc->sc_its_flags |= ITS_FLAGS_LPI_CONF_FLUSH; } /* * Set the LPI pending table base */ xbaser = vtophys(sc->sc_pend_base[cpuid]) | (GICR_PENDBASER_CACHE_NIWAWB << GICR_PENDBASER_CACHE_SHIFT) | (GICR_PENDBASER_SHARE_IS << GICR_PENDBASER_SHARE_SHIFT); gic_r_write_8(gicv3, GICR_PENDBASER, xbaser); tmp = gic_r_read_8(gicv3, GICR_PENDBASER); if ((tmp & GICR_PENDBASER_SHARE_MASK) == (GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT)) { /* Clear the cahce and shareability bits */ xbaser &= ~(GICR_PENDBASER_CACHE_MASK | GICR_PENDBASER_SHARE_MASK); /* Mark as non-shareable */ xbaser |= GICR_PENDBASER_SHARE_NS << GICR_PENDBASER_SHARE_SHIFT; /* And non-cacheable */ xbaser |= GICR_PENDBASER_CACHE_NIN << GICR_PENDBASER_CACHE_SHIFT; } /* Enable LPIs */ ctlr = gic_r_read_4(gicv3, GICR_CTLR); ctlr |= GICR_CTLR_LPI_ENABLE; gic_r_write_4(gicv3, GICR_CTLR, ctlr); /* Make sure the GIC has seen everything */ dsb(sy); } static int its_init_cpu(device_t dev, struct gicv3_its_softc *sc) { device_t gicv3; vm_paddr_t target; u_int cpuid; struct redist_pcpu *rpcpu; gicv3 = device_get_parent(dev); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sc->sc_cpus)) return (0); /* Check if the ITS is enabled on this CPU */ if ((gic_r_read_8(gicv3, GICR_TYPER) & GICR_TYPER_PLPIS) == 0) return (ENXIO); rpcpu = gicv3_get_redist(dev); /* Do per-cpu LPI init once */ if (!rpcpu->lpi_enabled) { its_init_cpu_lpi(dev, sc); rpcpu->lpi_enabled = true; } if ((gic_its_read_8(sc, GITS_TYPER) & GITS_TYPER_PTA) != 0) { /* This ITS wants the redistributor physical address */ target = vtophys(rman_get_virtual(&rpcpu->res)); } else { /* This ITS wants the unique processor number */ target = GICR_TYPER_CPUNUM(gic_r_read_8(gicv3, GICR_TYPER)) << CMD_TARGET_SHIFT; } sc->sc_its_cols[cpuid]->col_target = target; sc->sc_its_cols[cpuid]->col_id = cpuid; its_cmd_mapc(dev, sc->sc_its_cols[cpuid], 1); its_cmd_invall(dev, sc->sc_its_cols[cpuid]); return (0); } static int gicv3_its_sysctl_trace_enable(SYSCTL_HANDLER_ARGS) { struct gicv3_its_softc *sc; int rv; sc = arg1; rv = sysctl_handle_bool(oidp, &sc->trace_enable, 0, req); if (rv != 0 || req->newptr == NULL) return (rv); if (sc->trace_enable) gic_its_write_8(sc, GITS_TRKCTLR, 3); else gic_its_write_8(sc, GITS_TRKCTLR, 0); return (0); } static int gicv3_its_sysctl_trace_regs(SYSCTL_HANDLER_ARGS) { struct gicv3_its_softc *sc; struct sbuf *sb; int err; sc = arg1; sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) { device_printf(sc->dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(sb, "\n"); sbuf_printf(sb, "GITS_TRKCTLR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKCTLR)); sbuf_printf(sb, "GITS_TRKR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKR)); sbuf_printf(sb, "GITS_TRKDIDR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKDIDR)); sbuf_printf(sb, "GITS_TRKPIDR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKPIDR)); sbuf_printf(sb, "GITS_TRKVIDR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKVIDR)); sbuf_printf(sb, "GITS_TRKTGTR: 0x%08X\n", gic_its_read_4(sc, GITS_TRKTGTR)); err = sbuf_finish(sb); if (err) device_printf(sc->dev, "Error finishing sbuf: %d\n", err); sbuf_delete(sb); return(err); } static int gicv3_its_init_sysctl(struct gicv3_its_softc *sc) { struct sysctl_oid *oid, *child; struct sysctl_ctx_list *ctx_list; ctx_list = device_get_sysctl_ctx(sc->dev); child = device_get_sysctl_tree(sc->dev); oid = SYSCTL_ADD_NODE(ctx_list, SYSCTL_CHILDREN(child), OID_AUTO, "tracing", CTLFLAG_RD| CTLFLAG_MPSAFE, NULL, "Messages tracing"); if (oid == NULL) return (ENXIO); /* Add registers */ SYSCTL_ADD_PROC(ctx_list, SYSCTL_CHILDREN(oid), OID_AUTO, "enable", CTLTYPE_U8 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, gicv3_its_sysctl_trace_enable, "CU", "Enable tracing"); SYSCTL_ADD_PROC(ctx_list, SYSCTL_CHILDREN(oid), OID_AUTO, "capture", CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, gicv3_its_sysctl_trace_regs, "", "Captured tracing registers."); return (0); } static int gicv3_its_attach(device_t dev) { struct gicv3_its_softc *sc; int domain, err, i, rid; uint64_t phys; uint32_t ctlr, iidr; sc = device_get_softc(dev); sc->sc_irq_length = gicv3_get_nirqs(dev); sc->sc_irq_base = GIC_FIRST_LPI; sc->sc_irq_base += device_get_unit(dev) * sc->sc_irq_length; rid = 0; sc->sc_its_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->sc_its_res == NULL) { device_printf(dev, "Could not allocate memory\n"); return (ENXIO); } phys = rounddown2(vtophys(rman_get_virtual(sc->sc_its_res)) + GITS_TRANSLATER, PAGE_SIZE); sc->ma = malloc(sizeof(struct vm_page), M_DEVBUF, M_WAITOK | M_ZERO); vm_page_initfake(sc->ma, phys, VM_MEMATTR_DEFAULT); CPU_COPY(&all_cpus, &sc->sc_cpus); iidr = gic_its_read_4(sc, GITS_IIDR); for (i = 0; i < nitems(its_quirks); i++) { if ((iidr & its_quirks[i].iidr_mask) == its_quirks[i].iidr) { if (bootverbose) { device_printf(dev, "Applying %s\n", its_quirks[i].desc); } its_quirks[i].func(dev); break; } } if (bus_get_domain(dev, &domain) == 0 && domain < MAXMEMDOM) { sc->sc_ds = DOMAINSET_PREF(domain); } else { sc->sc_ds = DOMAINSET_RR(); } /* * GIT_CTLR_EN is mandated to reset to 0 on a Warm reset, but we may be * coming in via, for instance, a kexec/kboot style setup where a * previous kernel has configured then relinquished control. Clear it * so that we can reconfigure GITS_BASER*. */ ctlr = gic_its_read_4(sc, GITS_CTLR); if ((ctlr & GITS_CTLR_EN) != 0) { ctlr &= ~GITS_CTLR_EN; gic_its_write_4(sc, GITS_CTLR, ctlr); } /* Allocate the private tables */ err = gicv3_its_table_init(dev, sc); if (err != 0) return (err); /* Protects access to the device list */ mtx_init(&sc->sc_its_dev_lock, "ITS device lock", NULL, MTX_SPIN); /* Protects access to the ITS command circular buffer. */ mtx_init(&sc->sc_its_cmd_lock, "ITS cmd lock", NULL, MTX_SPIN); /* Allocate the command circular buffer */ gicv3_its_cmdq_init(sc); /* Allocate the per-CPU collections */ for (int cpu = 0; cpu <= mp_maxid; cpu++) if (CPU_ISSET(cpu, &sc->sc_cpus) != 0) sc->sc_its_cols[cpu] = malloc_domainset( sizeof(*sc->sc_its_cols[0]), M_GICV3_ITS, DOMAINSET_PREF(pcpu_find(cpu)->pc_domain), M_WAITOK | M_ZERO); else sc->sc_its_cols[cpu] = NULL; /* Enable the ITS */ gic_its_write_4(sc, GITS_CTLR, ctlr | GITS_CTLR_EN); /* Create the LPI configuration table */ gicv3_its_conftable_init(sc); /* And the pending tebles */ gicv3_its_pendtables_init(sc); /* Enable LPIs on this CPU */ its_init_cpu(dev, sc); TAILQ_INIT(&sc->sc_its_dev_list); TAILQ_INIT(&sc->sc_free_irqs); /* * Create the vmem object to allocate INTRNG IRQs from. We try to * use all IRQs not already used by the GICv3. * XXX: This assumes there are no other interrupt controllers in the * system. */ sc->sc_irq_alloc = vmem_create(device_get_nameunit(dev), 0, gicv3_get_nirqs(dev), 1, 0, M_FIRSTFIT | M_WAITOK); sc->sc_irqs = malloc(sizeof(*sc->sc_irqs) * sc->sc_irq_length, M_GICV3_ITS, M_WAITOK | M_ZERO); /* For GIC-500 install tracking sysctls. */ if ((iidr & (GITS_IIDR_PRODUCT_MASK | GITS_IIDR_IMPLEMENTOR_MASK)) == GITS_IIDR_RAW(GITS_IIDR_IMPL_ARM, GITS_IIDR_PROD_GIC500, 0, 0)) gicv3_its_init_sysctl(sc); return (0); } static int gicv3_its_detach(device_t dev) { return (ENXIO); } static void its_quirk_cavium_22375(device_t dev) { struct gicv3_its_softc *sc; int domain; sc = device_get_softc(dev); sc->sc_its_flags |= ITS_FLAGS_ERRATA_CAVIUM_22375; /* * We need to limit which CPUs we send these interrupts to on * the original dual socket ThunderX as it is unable to * forward them between the two sockets. */ if (bus_get_domain(dev, &domain) == 0) { if (domain < MAXMEMDOM) { CPU_COPY(&cpuset_domain[domain], &sc->sc_cpus); } else { CPU_ZERO(&sc->sc_cpus); } } } static void gicv3_its_disable_intr(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; uint8_t *conf; sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; conf = sc->sc_conf_base; conf[girq->gi_lpi] &= ~LPI_CONF_ENABLE; if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) { /* Clean D-cache under command. */ cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1); } else { /* DSB inner shareable, store */ dsb(ishst); } its_cmd_inv(dev, girq->gi_its_dev, girq); } static void gicv3_its_enable_intr(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; uint8_t *conf; sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; conf = sc->sc_conf_base; conf[girq->gi_lpi] |= LPI_CONF_ENABLE; if ((sc->sc_its_flags & ITS_FLAGS_LPI_CONF_FLUSH) != 0) { /* Clean D-cache under command. */ cpu_dcache_wb_range((vm_offset_t)&conf[girq->gi_lpi], 1); } else { /* DSB inner shareable, store */ dsb(ishst); } its_cmd_inv(dev, girq->gi_its_dev, girq); } static int gicv3_its_intr(void *arg, uintptr_t irq) { struct gicv3_its_softc *sc = arg; struct gicv3_its_irqsrc *girq; struct trapframe *tf; irq -= sc->sc_irq_base; girq = sc->sc_irqs[irq]; if (girq == NULL) panic("gicv3_its_intr: Invalid interrupt %ld", irq + sc->sc_irq_base); tf = curthread->td_intr_frame; intr_isrc_dispatch(&girq->gi_isrc, tf); return (FILTER_HANDLED); } static void gicv3_its_pre_ithread(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_irqsrc *girq; girq = (struct gicv3_its_irqsrc *)isrc; gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI); } static void gicv3_its_post_ithread(device_t dev, struct intr_irqsrc *isrc) { } static void gicv3_its_post_filter(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_irqsrc *girq; girq = (struct gicv3_its_irqsrc *)isrc; gic_icc_write(EOIR1, girq->gi_lpi + GIC_FIRST_LPI); } static int gicv3_its_select_cpu(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; sc = device_get_softc(dev); if (CPU_EMPTY(&isrc->isrc_cpu)) { sc->gic_irq_cpu = intr_irq_next_cpu(sc->gic_irq_cpu, &sc->sc_cpus); CPU_SETOF(sc->gic_irq_cpu, &isrc->isrc_cpu); } return (0); } static int gicv3_its_bind_intr(device_t dev, struct intr_irqsrc *isrc) { struct gicv3_its_irqsrc *girq; gicv3_its_select_cpu(dev, isrc); girq = (struct gicv3_its_irqsrc *)isrc; its_cmd_movi(dev, girq); return (0); } static int gicv3_its_map_intr(device_t dev, struct intr_map_data *data, struct intr_irqsrc **isrcp) { /* * This should never happen, we only call this function to map * interrupts found before the controller driver is ready. */ panic("gicv3_its_map_intr: Unable to map a MSI interrupt"); } static int gicv3_its_setup_intr(device_t dev, struct intr_irqsrc *isrc, struct resource *res, struct intr_map_data *data) { /* Bind the interrupt to a CPU */ gicv3_its_bind_intr(dev, isrc); return (0); } #ifdef SMP static void gicv3_its_init_secondary(device_t dev) { struct gicv3_its_softc *sc; sc = device_get_softc(dev); /* * This is fatal as otherwise we may bind interrupts to this CPU. * We need a way to tell the interrupt framework to only bind to a * subset of given CPUs when it performs the shuffle. */ if (its_init_cpu(dev, sc) != 0) panic("gicv3_its_init_secondary: No usable ITS on CPU%d", PCPU_GET(cpuid)); } #endif static uint32_t its_get_devid(device_t pci_dev) { uintptr_t id; if (pci_get_id(pci_dev, PCI_ID_MSI, &id) != 0) panic("%s: %s: Unable to get the MSI DeviceID", __func__, device_get_nameunit(pci_dev)); return (id); } static struct its_dev * its_device_find(device_t dev, device_t child) { struct gicv3_its_softc *sc; struct its_dev *its_dev = NULL; sc = device_get_softc(dev); mtx_lock_spin(&sc->sc_its_dev_lock); TAILQ_FOREACH(its_dev, &sc->sc_its_dev_list, entry) { if (its_dev->pci_dev == child) break; } mtx_unlock_spin(&sc->sc_its_dev_lock); return (its_dev); } static struct its_dev * its_device_get(device_t dev, device_t child, u_int nvecs) { struct gicv3_its_softc *sc; struct its_dev *its_dev; vmem_addr_t irq_base; size_t esize; sc = device_get_softc(dev); its_dev = its_device_find(dev, child); if (its_dev != NULL) return (its_dev); its_dev = malloc(sizeof(*its_dev), M_GICV3_ITS, M_NOWAIT | M_ZERO); if (its_dev == NULL) return (NULL); its_dev->pci_dev = child; its_dev->devid = its_get_devid(child); its_dev->lpis.lpi_busy = 0; its_dev->lpis.lpi_num = nvecs; its_dev->lpis.lpi_free = nvecs; if (vmem_alloc(sc->sc_irq_alloc, nvecs, M_FIRSTFIT | M_NOWAIT, &irq_base) != 0) { free(its_dev, M_GICV3_ITS); return (NULL); } its_dev->lpis.lpi_base = irq_base; /* Get ITT entry size */ esize = GITS_TYPER_ITTES(gic_its_read_8(sc, GITS_TYPER)); /* * Allocate ITT for this device. * PA has to be 256 B aligned. At least two entries for device. */ its_dev->itt_size = roundup2(MAX(nvecs, 2) * esize, 256); its_dev->itt = (vm_offset_t)contigmalloc_domainset(its_dev->itt_size, M_GICV3_ITS, sc->sc_ds, M_NOWAIT | M_ZERO, 0, LPI_INT_TRANS_TAB_MAX_ADDR, LPI_INT_TRANS_TAB_ALIGN, 0); if (its_dev->itt == 0) { vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, nvecs); free(its_dev, M_GICV3_ITS); return (NULL); } /* Make sure device sees zeroed ITT. */ if ((sc->sc_its_flags & ITS_FLAGS_CMDQ_FLUSH) != 0) cpu_dcache_wb_range(its_dev->itt, its_dev->itt_size); mtx_lock_spin(&sc->sc_its_dev_lock); TAILQ_INSERT_TAIL(&sc->sc_its_dev_list, its_dev, entry); mtx_unlock_spin(&sc->sc_its_dev_lock); /* Map device to its ITT */ its_cmd_mapd(dev, its_dev, 1); return (its_dev); } static void its_device_release(device_t dev, struct its_dev *its_dev) { struct gicv3_its_softc *sc; KASSERT(its_dev->lpis.lpi_busy == 0, ("its_device_release: Trying to release an inuse ITS device")); /* Unmap device in ITS */ its_cmd_mapd(dev, its_dev, 0); sc = device_get_softc(dev); /* Remove the device from the list of devices */ mtx_lock_spin(&sc->sc_its_dev_lock); TAILQ_REMOVE(&sc->sc_its_dev_list, its_dev, entry); mtx_unlock_spin(&sc->sc_its_dev_lock); /* Free ITT */ KASSERT(its_dev->itt != 0, ("Invalid ITT in valid ITS device")); contigfree((void *)its_dev->itt, its_dev->itt_size, M_GICV3_ITS); /* Free the IRQ allocation */ vmem_free(sc->sc_irq_alloc, its_dev->lpis.lpi_base, its_dev->lpis.lpi_num); free(its_dev, M_GICV3_ITS); } static struct gicv3_its_irqsrc * gicv3_its_alloc_irqsrc(device_t dev, struct gicv3_its_softc *sc, u_int irq) { struct gicv3_its_irqsrc *girq = NULL; KASSERT(sc->sc_irqs[irq] == NULL, ("%s: Interrupt %u already allocated", __func__, irq)); mtx_lock_spin(&sc->sc_its_dev_lock); if (!TAILQ_EMPTY(&sc->sc_free_irqs)) { girq = TAILQ_FIRST(&sc->sc_free_irqs); TAILQ_REMOVE(&sc->sc_free_irqs, girq, gi_link); } mtx_unlock_spin(&sc->sc_its_dev_lock); if (girq == NULL) { girq = malloc(sizeof(*girq), M_GICV3_ITS, M_NOWAIT | M_ZERO); if (girq == NULL) return (NULL); girq->gi_id = -1; if (intr_isrc_register(&girq->gi_isrc, dev, 0, "%s,%u", device_get_nameunit(dev), irq) != 0) { free(girq, M_GICV3_ITS); return (NULL); } } girq->gi_lpi = irq + sc->sc_irq_base - GIC_FIRST_LPI; sc->sc_irqs[irq] = girq; return (girq); } static void gicv3_its_release_irqsrc(struct gicv3_its_softc *sc, struct gicv3_its_irqsrc *girq) { u_int irq; mtx_assert(&sc->sc_its_dev_lock, MA_OWNED); irq = girq->gi_lpi + GIC_FIRST_LPI - sc->sc_irq_base; sc->sc_irqs[irq] = NULL; girq->gi_id = -1; girq->gi_its_dev = NULL; TAILQ_INSERT_TAIL(&sc->sc_free_irqs, girq, gi_link); } static int gicv3_its_alloc_msi(device_t dev, device_t child, int count, int maxcount, device_t *pic, struct intr_irqsrc **srcs) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; u_int irq; int i; its_dev = its_device_get(dev, child, count); if (its_dev == NULL) return (ENXIO); KASSERT(its_dev->lpis.lpi_free >= count, ("gicv3_its_alloc_msi: No free LPIs")); sc = device_get_softc(dev); irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num - its_dev->lpis.lpi_free; /* Allocate the irqsrc for each MSI */ for (i = 0; i < count; i++, irq++) { its_dev->lpis.lpi_free--; srcs[i] = (struct intr_irqsrc *)gicv3_its_alloc_irqsrc(dev, sc, irq); if (srcs[i] == NULL) break; } /* The allocation failed, release them */ if (i != count) { mtx_lock_spin(&sc->sc_its_dev_lock); for (i = 0; i < count; i++) { girq = (struct gicv3_its_irqsrc *)srcs[i]; if (girq == NULL) break; gicv3_its_release_irqsrc(sc, girq); srcs[i] = NULL; } mtx_unlock_spin(&sc->sc_its_dev_lock); return (ENXIO); } /* Finish the allocation now we have all MSI irqsrcs */ for (i = 0; i < count; i++) { girq = (struct gicv3_its_irqsrc *)srcs[i]; girq->gi_id = i; girq->gi_its_dev = its_dev; /* Map the message to the given IRQ */ gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq); its_cmd_mapti(dev, girq); } its_dev->lpis.lpi_busy += count; *pic = dev; return (0); } static int gicv3_its_release_msi(device_t dev, device_t child, int count, struct intr_irqsrc **isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; int i; its_dev = its_device_find(dev, child); KASSERT(its_dev != NULL, ("gicv3_its_release_msi: Releasing a MSI interrupt with " "no ITS device")); KASSERT(its_dev->lpis.lpi_busy >= count, ("gicv3_its_release_msi: Releasing more interrupts than " "were allocated: releasing %d, allocated %d", count, its_dev->lpis.lpi_busy)); sc = device_get_softc(dev); mtx_lock_spin(&sc->sc_its_dev_lock); for (i = 0; i < count; i++) { girq = (struct gicv3_its_irqsrc *)isrc[i]; gicv3_its_release_irqsrc(sc, girq); } mtx_unlock_spin(&sc->sc_its_dev_lock); its_dev->lpis.lpi_busy -= count; if (its_dev->lpis.lpi_busy == 0) its_device_release(dev, its_dev); return (0); } static int gicv3_its_alloc_msix(device_t dev, device_t child, device_t *pic, struct intr_irqsrc **isrcp) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; u_int nvecs, irq; nvecs = pci_msix_count(child); its_dev = its_device_get(dev, child, nvecs); if (its_dev == NULL) return (ENXIO); KASSERT(its_dev->lpis.lpi_free > 0, ("gicv3_its_alloc_msix: No free LPIs")); sc = device_get_softc(dev); irq = its_dev->lpis.lpi_base + its_dev->lpis.lpi_num - its_dev->lpis.lpi_free; girq = gicv3_its_alloc_irqsrc(dev, sc, irq); if (girq == NULL) return (ENXIO); girq->gi_id = its_dev->lpis.lpi_busy; girq->gi_its_dev = its_dev; its_dev->lpis.lpi_free--; its_dev->lpis.lpi_busy++; /* Map the message to the given IRQ */ gicv3_its_select_cpu(dev, (struct intr_irqsrc *)girq); its_cmd_mapti(dev, girq); *pic = dev; *isrcp = (struct intr_irqsrc *)girq; return (0); } static int gicv3_its_release_msix(device_t dev, device_t child, struct intr_irqsrc *isrc) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; struct its_dev *its_dev; its_dev = its_device_find(dev, child); KASSERT(its_dev != NULL, ("gicv3_its_release_msix: Releasing a MSI-X interrupt with " "no ITS device")); KASSERT(its_dev->lpis.lpi_busy > 0, ("gicv3_its_release_msix: Releasing more interrupts than " "were allocated: allocated %d", its_dev->lpis.lpi_busy)); sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; mtx_lock_spin(&sc->sc_its_dev_lock); gicv3_its_release_irqsrc(sc, girq); mtx_unlock_spin(&sc->sc_its_dev_lock); its_dev->lpis.lpi_busy--; if (its_dev->lpis.lpi_busy == 0) its_device_release(dev, its_dev); return (0); } static int gicv3_its_map_msi(device_t dev, device_t child, struct intr_irqsrc *isrc, uint64_t *addr, uint32_t *data) { struct gicv3_its_softc *sc; struct gicv3_its_irqsrc *girq; sc = device_get_softc(dev); girq = (struct gicv3_its_irqsrc *)isrc; *addr = vtophys(rman_get_virtual(sc->sc_its_res)) + GITS_TRANSLATER; *data = girq->gi_id; return (0); } #ifdef IOMMU static int gicv3_iommu_init(device_t dev, device_t child, struct iommu_domain **domain) { struct gicv3_its_softc *sc; struct iommu_ctx *ctx; int error; sc = device_get_softc(dev); ctx = iommu_get_dev_ctx(child); if (ctx == NULL) return (ENXIO); - error = iommu_map_msi(ctx, PAGE_SIZE, GITS_TRANSLATER, + /* Map the page containing the GITS_TRANSLATER register. */ + error = iommu_map_msi(ctx, PAGE_SIZE, 0, IOMMU_MAP_ENTRY_WRITE, IOMMU_MF_CANWAIT, &sc->ma); *domain = iommu_get_ctx_domain(ctx); return (error); } static void gicv3_iommu_deinit(device_t dev, device_t child) { struct iommu_ctx *ctx; ctx = iommu_get_dev_ctx(child); if (ctx == NULL) return; iommu_unmap_msi(ctx); } #endif /* * Commands handling. */ static __inline void cmd_format_command(struct its_cmd *cmd, uint8_t cmd_type) { /* Command field: DW0 [7:0] */ cmd->cmd_dword[0] &= htole64(~CMD_COMMAND_MASK); cmd->cmd_dword[0] |= htole64(cmd_type); } static __inline void cmd_format_devid(struct its_cmd *cmd, uint32_t devid) { /* Device ID field: DW0 [63:32] */ cmd->cmd_dword[0] &= htole64(~CMD_DEVID_MASK); cmd->cmd_dword[0] |= htole64((uint64_t)devid << CMD_DEVID_SHIFT); } static __inline void cmd_format_size(struct its_cmd *cmd, uint16_t size) { /* Size field: DW1 [4:0] */ cmd->cmd_dword[1] &= htole64(~CMD_SIZE_MASK); cmd->cmd_dword[1] |= htole64((size & CMD_SIZE_MASK)); } static __inline void cmd_format_id(struct its_cmd *cmd, uint32_t id) { /* ID field: DW1 [31:0] */ cmd->cmd_dword[1] &= htole64(~CMD_ID_MASK); cmd->cmd_dword[1] |= htole64(id); } static __inline void cmd_format_pid(struct its_cmd *cmd, uint32_t pid) { /* Physical ID field: DW1 [63:32] */ cmd->cmd_dword[1] &= htole64(~CMD_PID_MASK); cmd->cmd_dword[1] |= htole64((uint64_t)pid << CMD_PID_SHIFT); } static __inline void cmd_format_col(struct its_cmd *cmd, uint16_t col_id) { /* Collection field: DW2 [16:0] */ cmd->cmd_dword[2] &= htole64(~CMD_COL_MASK); cmd->cmd_dword[2] |= htole64(col_id); } static __inline void cmd_format_target(struct its_cmd *cmd, uint64_t target) { /* Target Address field: DW2 [47:16] */ cmd->cmd_dword[2] &= htole64(~CMD_TARGET_MASK); cmd->cmd_dword[2] |= htole64(target & CMD_TARGET_MASK); } static __inline void cmd_format_itt(struct its_cmd *cmd, uint64_t itt) { /* ITT Address field: DW2 [47:8] */ cmd->cmd_dword[2] &= htole64(~CMD_ITT_MASK); cmd->cmd_dword[2] |= htole64(itt & CMD_ITT_MASK); } static __inline void cmd_format_valid(struct its_cmd *cmd, uint8_t valid) { /* Valid field: DW2 [63] */ cmd->cmd_dword[2] &= htole64(~CMD_VALID_MASK); cmd->cmd_dword[2] |= htole64((uint64_t)valid << CMD_VALID_SHIFT); } static inline bool its_cmd_queue_full(struct gicv3_its_softc *sc) { size_t read_idx, next_write_idx; /* Get the index of the next command */ next_write_idx = (sc->sc_its_cmd_next_idx + 1) % (ITS_CMDQ_SIZE / sizeof(struct its_cmd)); /* And the index of the current command being read */ read_idx = gic_its_read_4(sc, GITS_CREADR) / sizeof(struct its_cmd); /* * The queue is full when the write offset points * at the command before the current read offset. */ return (next_write_idx == read_idx); } static inline void its_cmd_sync(struct gicv3_its_softc *sc, struct its_cmd *cmd) { if ((sc->sc_its_flags & ITS_FLAGS_CMDQ_FLUSH) != 0) { /* Clean D-cache under command. */ cpu_dcache_wb_range((vm_offset_t)cmd, sizeof(*cmd)); } else { /* DSB inner shareable, store */ dsb(ishst); } } static inline uint64_t its_cmd_cwriter_offset(struct gicv3_its_softc *sc, struct its_cmd *cmd) { uint64_t off; off = (cmd - sc->sc_its_cmd_base) * sizeof(*cmd); return (off); } static void its_cmd_wait_completion(device_t dev, struct its_cmd *cmd_first, struct its_cmd *cmd_last) { struct gicv3_its_softc *sc; uint64_t first, last, read; size_t us_left; sc = device_get_softc(dev); /* * XXX ARM64TODO: This is obviously a significant delay. * The reason for that is that currently the time frames for * the command to complete are not known. */ us_left = 1000000; first = its_cmd_cwriter_offset(sc, cmd_first); last = its_cmd_cwriter_offset(sc, cmd_last); for (;;) { read = gic_its_read_8(sc, GITS_CREADR); if (first < last) { if (read < first || read >= last) break; } else if (read < first && read >= last) break; if (us_left-- == 0) { /* This means timeout */ device_printf(dev, "Timeout while waiting for CMD completion.\n"); return; } DELAY(1); } } static struct its_cmd * its_cmd_alloc_locked(device_t dev) { struct gicv3_its_softc *sc; struct its_cmd *cmd; size_t us_left; sc = device_get_softc(dev); /* * XXX ARM64TODO: This is obviously a significant delay. * The reason for that is that currently the time frames for * the command to complete (and therefore free the descriptor) * are not known. */ us_left = 1000000; mtx_assert(&sc->sc_its_cmd_lock, MA_OWNED); while (its_cmd_queue_full(sc)) { if (us_left-- == 0) { /* Timeout while waiting for free command */ device_printf(dev, "Timeout while waiting for free command\n"); return (NULL); } DELAY(1); } cmd = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx]; sc->sc_its_cmd_next_idx++; sc->sc_its_cmd_next_idx %= ITS_CMDQ_SIZE / sizeof(struct its_cmd); return (cmd); } static uint64_t its_cmd_prepare(struct its_cmd *cmd, struct its_cmd_desc *desc) { uint64_t target; uint8_t cmd_type; u_int size; cmd_type = desc->cmd_type; target = ITS_TARGET_NONE; switch (cmd_type) { case ITS_CMD_MOVI: /* Move interrupt ID to another collection */ target = desc->cmd_desc_movi.col->col_target; cmd_format_command(cmd, ITS_CMD_MOVI); cmd_format_id(cmd, desc->cmd_desc_movi.id); cmd_format_col(cmd, desc->cmd_desc_movi.col->col_id); cmd_format_devid(cmd, desc->cmd_desc_movi.its_dev->devid); break; case ITS_CMD_SYNC: /* Wait for previous commands completion */ target = desc->cmd_desc_sync.col->col_target; cmd_format_command(cmd, ITS_CMD_SYNC); cmd_format_target(cmd, target); break; case ITS_CMD_MAPD: /* Assign ITT to device */ cmd_format_command(cmd, ITS_CMD_MAPD); cmd_format_itt(cmd, vtophys(desc->cmd_desc_mapd.its_dev->itt)); /* * Size describes number of bits to encode interrupt IDs * supported by the device minus one. * When V (valid) bit is zero, this field should be written * as zero. */ if (desc->cmd_desc_mapd.valid != 0) { size = fls(desc->cmd_desc_mapd.its_dev->lpis.lpi_num); size = MAX(1, size) - 1; } else size = 0; cmd_format_size(cmd, size); cmd_format_devid(cmd, desc->cmd_desc_mapd.its_dev->devid); cmd_format_valid(cmd, desc->cmd_desc_mapd.valid); break; case ITS_CMD_MAPC: /* Map collection to Re-Distributor */ target = desc->cmd_desc_mapc.col->col_target; cmd_format_command(cmd, ITS_CMD_MAPC); cmd_format_col(cmd, desc->cmd_desc_mapc.col->col_id); cmd_format_valid(cmd, desc->cmd_desc_mapc.valid); cmd_format_target(cmd, target); break; case ITS_CMD_MAPTI: target = desc->cmd_desc_mapvi.col->col_target; cmd_format_command(cmd, ITS_CMD_MAPTI); cmd_format_devid(cmd, desc->cmd_desc_mapvi.its_dev->devid); cmd_format_id(cmd, desc->cmd_desc_mapvi.id); cmd_format_pid(cmd, desc->cmd_desc_mapvi.pid); cmd_format_col(cmd, desc->cmd_desc_mapvi.col->col_id); break; case ITS_CMD_MAPI: target = desc->cmd_desc_mapi.col->col_target; cmd_format_command(cmd, ITS_CMD_MAPI); cmd_format_devid(cmd, desc->cmd_desc_mapi.its_dev->devid); cmd_format_id(cmd, desc->cmd_desc_mapi.pid); cmd_format_col(cmd, desc->cmd_desc_mapi.col->col_id); break; case ITS_CMD_INV: target = desc->cmd_desc_inv.col->col_target; cmd_format_command(cmd, ITS_CMD_INV); cmd_format_devid(cmd, desc->cmd_desc_inv.its_dev->devid); cmd_format_id(cmd, desc->cmd_desc_inv.pid); break; case ITS_CMD_INVALL: cmd_format_command(cmd, ITS_CMD_INVALL); cmd_format_col(cmd, desc->cmd_desc_invall.col->col_id); break; default: panic("its_cmd_prepare: Invalid command: %x", cmd_type); } return (target); } static int its_cmd_send(device_t dev, struct its_cmd_desc *desc) { struct gicv3_its_softc *sc; struct its_cmd *cmd, *cmd_sync, *cmd_write; struct its_col col_sync; struct its_cmd_desc desc_sync; uint64_t target, cwriter; sc = device_get_softc(dev); mtx_lock_spin(&sc->sc_its_cmd_lock); cmd = its_cmd_alloc_locked(dev); if (cmd == NULL) { device_printf(dev, "could not allocate ITS command\n"); mtx_unlock_spin(&sc->sc_its_cmd_lock); return (EBUSY); } target = its_cmd_prepare(cmd, desc); its_cmd_sync(sc, cmd); if (target != ITS_TARGET_NONE) { cmd_sync = its_cmd_alloc_locked(dev); if (cmd_sync != NULL) { desc_sync.cmd_type = ITS_CMD_SYNC; col_sync.col_target = target; desc_sync.cmd_desc_sync.col = &col_sync; its_cmd_prepare(cmd_sync, &desc_sync); its_cmd_sync(sc, cmd_sync); } } /* Update GITS_CWRITER */ cwriter = sc->sc_its_cmd_next_idx * sizeof(struct its_cmd); gic_its_write_8(sc, GITS_CWRITER, cwriter); cmd_write = &sc->sc_its_cmd_base[sc->sc_its_cmd_next_idx]; mtx_unlock_spin(&sc->sc_its_cmd_lock); its_cmd_wait_completion(dev, cmd, cmd_write); return (0); } /* Handlers to send commands */ static void its_cmd_movi(device_t dev, struct gicv3_its_irqsrc *girq) { struct gicv3_its_softc *sc; struct its_cmd_desc desc; struct its_col *col; sc = device_get_softc(dev); col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1]; desc.cmd_type = ITS_CMD_MOVI; desc.cmd_desc_movi.its_dev = girq->gi_its_dev; desc.cmd_desc_movi.col = col; desc.cmd_desc_movi.id = girq->gi_id; its_cmd_send(dev, &desc); } static void its_cmd_mapc(device_t dev, struct its_col *col, uint8_t valid) { struct its_cmd_desc desc; desc.cmd_type = ITS_CMD_MAPC; desc.cmd_desc_mapc.col = col; /* * Valid bit set - map the collection. * Valid bit cleared - unmap the collection. */ desc.cmd_desc_mapc.valid = valid; its_cmd_send(dev, &desc); } static void its_cmd_mapti(device_t dev, struct gicv3_its_irqsrc *girq) { struct gicv3_its_softc *sc; struct its_cmd_desc desc; struct its_col *col; u_int col_id; sc = device_get_softc(dev); col_id = CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1; col = sc->sc_its_cols[col_id]; desc.cmd_type = ITS_CMD_MAPTI; desc.cmd_desc_mapvi.its_dev = girq->gi_its_dev; desc.cmd_desc_mapvi.col = col; /* The EventID sent to the device */ desc.cmd_desc_mapvi.id = girq->gi_id; /* The physical interrupt presented to softeware */ desc.cmd_desc_mapvi.pid = girq->gi_lpi + GIC_FIRST_LPI; its_cmd_send(dev, &desc); } static void its_cmd_mapd(device_t dev, struct its_dev *its_dev, uint8_t valid) { struct its_cmd_desc desc; desc.cmd_type = ITS_CMD_MAPD; desc.cmd_desc_mapd.its_dev = its_dev; desc.cmd_desc_mapd.valid = valid; its_cmd_send(dev, &desc); } static void its_cmd_inv(device_t dev, struct its_dev *its_dev, struct gicv3_its_irqsrc *girq) { struct gicv3_its_softc *sc; struct its_cmd_desc desc; struct its_col *col; sc = device_get_softc(dev); col = sc->sc_its_cols[CPU_FFS(&girq->gi_isrc.isrc_cpu) - 1]; desc.cmd_type = ITS_CMD_INV; /* The EventID sent to the device */ desc.cmd_desc_inv.pid = girq->gi_id; desc.cmd_desc_inv.its_dev = its_dev; desc.cmd_desc_inv.col = col; its_cmd_send(dev, &desc); } static void its_cmd_invall(device_t dev, struct its_col *col) { struct its_cmd_desc desc; desc.cmd_type = ITS_CMD_INVALL; desc.cmd_desc_invall.col = col; its_cmd_send(dev, &desc); } #ifdef FDT static device_probe_t gicv3_its_fdt_probe; static device_attach_t gicv3_its_fdt_attach; static device_method_t gicv3_its_fdt_methods[] = { /* Device interface */ DEVMETHOD(device_probe, gicv3_its_fdt_probe), DEVMETHOD(device_attach, gicv3_its_fdt_attach), /* End */ DEVMETHOD_END }; #define its_baseclasses its_fdt_baseclasses DEFINE_CLASS_1(its, gicv3_its_fdt_driver, gicv3_its_fdt_methods, sizeof(struct gicv3_its_softc), gicv3_its_driver); #undef its_baseclasses EARLY_DRIVER_MODULE(its_fdt, gic, gicv3_its_fdt_driver, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); static int gicv3_its_fdt_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "arm,gic-v3-its")) return (ENXIO); device_set_desc(dev, "ARM GIC Interrupt Translation Service"); return (BUS_PROBE_DEFAULT); } static int gicv3_its_fdt_attach(device_t dev) { struct gicv3_its_softc *sc; phandle_t xref; int err; sc = device_get_softc(dev); sc->dev = dev; err = gicv3_its_attach(dev); if (err != 0) return (err); /* Register this device as a interrupt controller */ xref = OF_xref_from_node(ofw_bus_get_node(dev)); sc->sc_pic = intr_pic_register(dev, xref); err = intr_pic_add_handler(device_get_parent(dev), sc->sc_pic, gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length); if (err != 0) { device_printf(dev, "Failed to add PIC handler: %d\n", err); return (err); } /* Register this device to handle MSI interrupts */ err = intr_msi_register(dev, xref); if (err != 0) { device_printf(dev, "Failed to register for MSIs: %d\n", err); return (err); } return (0); } #endif #ifdef DEV_ACPI static device_probe_t gicv3_its_acpi_probe; static device_attach_t gicv3_its_acpi_attach; static device_method_t gicv3_its_acpi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, gicv3_its_acpi_probe), DEVMETHOD(device_attach, gicv3_its_acpi_attach), /* End */ DEVMETHOD_END }; #define its_baseclasses its_acpi_baseclasses DEFINE_CLASS_1(its, gicv3_its_acpi_driver, gicv3_its_acpi_methods, sizeof(struct gicv3_its_softc), gicv3_its_driver); #undef its_baseclasses EARLY_DRIVER_MODULE(its_acpi, gic, gicv3_its_acpi_driver, 0, 0, BUS_PASS_INTERRUPT + BUS_PASS_ORDER_MIDDLE); static int gicv3_its_acpi_probe(device_t dev) { if (gic_get_bus(dev) != GIC_BUS_ACPI) return (EINVAL); if (gic_get_hw_rev(dev) < 3) return (EINVAL); device_set_desc(dev, "ARM GIC Interrupt Translation Service"); return (BUS_PROBE_DEFAULT); } static int gicv3_its_acpi_attach(device_t dev) { struct gicv3_its_softc *sc; struct gic_v3_devinfo *di; int err; sc = device_get_softc(dev); sc->dev = dev; err = gicv3_its_attach(dev); if (err != 0) return (err); di = device_get_ivars(dev); sc->sc_pic = intr_pic_register(dev, di->msi_xref); err = intr_pic_add_handler(device_get_parent(dev), sc->sc_pic, gicv3_its_intr, sc, sc->sc_irq_base, sc->sc_irq_length); if (err != 0) { device_printf(dev, "Failed to add PIC handler: %d\n", err); return (err); } /* Register this device to handle MSI interrupts */ err = intr_msi_register(dev, di->msi_xref); if (err != 0) { device_printf(dev, "Failed to register for MSIs: %d\n", err); return (err); } return (0); } #endif diff --git a/sys/dev/iommu/busdma_iommu.c b/sys/dev/iommu/busdma_iommu.c index ad638e293324..e06d96dad027 100644 --- a/sys/dev/iommu/busdma_iommu.c +++ b/sys/dev/iommu/busdma_iommu.c @@ -1,1121 +1,1120 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * busdma_iommu.c, the implementation of the busdma(9) interface using * IOMMU units from Intel VT-d. */ static bool iommu_bus_dma_is_dev_disabled(int domain, int bus, int slot, int func) { char str[128], *env; int default_bounce; bool ret; static const char bounce_str[] = "bounce"; static const char iommu_str[] = "iommu"; static const char dmar_str[] = "dmar"; /* compatibility */ default_bounce = 0; env = kern_getenv("hw.busdma.default"); if (env != NULL) { if (strcmp(env, bounce_str) == 0) default_bounce = 1; else if (strcmp(env, iommu_str) == 0 || strcmp(env, dmar_str) == 0) default_bounce = 0; freeenv(env); } snprintf(str, sizeof(str), "hw.busdma.pci%d.%d.%d.%d", domain, bus, slot, func); env = kern_getenv(str); if (env == NULL) return (default_bounce != 0); if (strcmp(env, bounce_str) == 0) ret = true; else if (strcmp(env, iommu_str) == 0 || strcmp(env, dmar_str) == 0) ret = false; else ret = default_bounce != 0; freeenv(env); return (ret); } /* * Given original device, find the requester ID that will be seen by * the IOMMU unit and used for page table lookup. PCI bridges may take * ownership of transactions from downstream devices, so it may not be * the same as the BSF of the target device. In those cases, all * devices downstream of the bridge must share a single mapping * domain, and must collectively be assigned to use either IOMMU or * bounce mapping. */ device_t iommu_get_requester(device_t dev, uint16_t *rid) { devclass_t pci_class; device_t l, pci, pcib, pcip, pcibp, requester; int cap_offset; uint16_t pcie_flags; bool bridge_is_pcie; pci_class = devclass_find("pci"); l = requester = dev; *rid = pci_get_rid(dev); /* * Walk the bridge hierarchy from the target device to the * host port to find the translating bridge nearest the IOMMU * unit. */ for (;;) { pci = device_get_parent(l); KASSERT(pci != NULL, ("iommu_get_requester(%s): NULL parent " "for %s", device_get_name(dev), device_get_name(l))); KASSERT(device_get_devclass(pci) == pci_class, ("iommu_get_requester(%s): non-pci parent %s for %s", device_get_name(dev), device_get_name(pci), device_get_name(l))); pcib = device_get_parent(pci); KASSERT(pcib != NULL, ("iommu_get_requester(%s): NULL bridge " "for %s", device_get_name(dev), device_get_name(pci))); /* * The parent of our "bridge" isn't another PCI bus, * so pcib isn't a PCI->PCI bridge but rather a host * port, and the requester ID won't be translated * further. */ pcip = device_get_parent(pcib); if (device_get_devclass(pcip) != pci_class) break; pcibp = device_get_parent(pcip); if (pci_find_cap(l, PCIY_EXPRESS, &cap_offset) == 0) { /* * Do not stop the loop even if the target * device is PCIe, because it is possible (but * unlikely) to have a PCI->PCIe bridge * somewhere in the hierarchy. */ l = pcib; } else { /* * Device is not PCIe, it cannot be seen as a * requester by IOMMU unit. Check whether the * bridge is PCIe. */ bridge_is_pcie = pci_find_cap(pcib, PCIY_EXPRESS, &cap_offset) == 0; requester = pcib; /* * Check for a buggy PCIe/PCI bridge that * doesn't report the express capability. If * the bridge above it is express but isn't a * PCI bridge, then we know pcib is actually a * PCIe/PCI bridge. */ if (!bridge_is_pcie && pci_find_cap(pcibp, PCIY_EXPRESS, &cap_offset) == 0) { pcie_flags = pci_read_config(pcibp, cap_offset + PCIER_FLAGS, 2); if ((pcie_flags & PCIEM_FLAGS_TYPE) != PCIEM_TYPE_PCI_BRIDGE) bridge_is_pcie = true; } if (bridge_is_pcie) { /* * The current device is not PCIe, but * the bridge above it is. This is a * PCIe->PCI bridge. Assume that the * requester ID will be the secondary * bus number with slot and function * set to zero. * * XXX: Doesn't handle the case where * the bridge is PCIe->PCI-X, and the * bridge will only take ownership of * requests in some cases. We should * provide context entries with the * same page tables for taken and * non-taken transactions. */ *rid = PCI_RID(pci_get_bus(l), 0, 0); l = pcibp; } else { /* * Neither the device nor the bridge * above it are PCIe. This is a * conventional PCI->PCI bridge, which * will use the bridge's BSF as the * requester ID. */ *rid = pci_get_rid(pcib); l = pcib; } } } return (requester); } struct iommu_ctx * iommu_instantiate_ctx(struct iommu_unit *unit, device_t dev, bool rmrr) { device_t requester; struct iommu_ctx *ctx; bool disabled; uint16_t rid; requester = iommu_get_requester(dev, &rid); /* * If the user requested the IOMMU disabled for the device, we * cannot disable the IOMMU unit, due to possibility of other * devices on the same IOMMU unit still requiring translation. * Instead provide the identity mapping for the device * context. */ disabled = iommu_bus_dma_is_dev_disabled(pci_get_domain(requester), pci_get_bus(requester), pci_get_slot(requester), pci_get_function(requester)); ctx = iommu_get_ctx(unit, requester, rid, disabled, rmrr); if (ctx == NULL) return (NULL); if (disabled) { /* * Keep the first reference on context, release the * later refs. */ IOMMU_LOCK(unit); if ((ctx->flags & IOMMU_CTX_DISABLED) == 0) { ctx->flags |= IOMMU_CTX_DISABLED; IOMMU_UNLOCK(unit); } else { iommu_free_ctx_locked(unit, ctx); } ctx = NULL; } return (ctx); } struct iommu_ctx * iommu_get_dev_ctx(device_t dev) { struct iommu_unit *unit; unit = iommu_find(dev, bootverbose); /* Not in scope of any IOMMU ? */ if (unit == NULL) return (NULL); if (!unit->dma_enabled) return (NULL); #if defined(__amd64__) || defined(__i386__) dmar_quirks_pre_use(unit); dmar_instantiate_rmrr_ctxs(unit); #endif return (iommu_instantiate_ctx(unit, dev, false)); } bus_dma_tag_t iommu_get_dma_tag(device_t dev, device_t child) { struct iommu_ctx *ctx; bus_dma_tag_t res; ctx = iommu_get_dev_ctx(child); if (ctx == NULL) return (NULL); res = (bus_dma_tag_t)ctx->tag; return (res); } bool bus_dma_iommu_set_buswide(device_t dev) { struct iommu_unit *unit; device_t parent; u_int busno, slot, func; parent = device_get_parent(dev); if (device_get_devclass(parent) != devclass_find("pci")) return (false); unit = iommu_find(dev, bootverbose); if (unit == NULL) return (false); busno = pci_get_bus(dev); slot = pci_get_slot(dev); func = pci_get_function(dev); if (slot != 0 || func != 0) { if (bootverbose) { device_printf(dev, "iommu%d pci%d:%d:%d requested buswide busdma\n", unit->unit, busno, slot, func); } return (false); } iommu_set_buswide_ctx(unit, busno); return (true); } void iommu_set_buswide_ctx(struct iommu_unit *unit, u_int busno) { MPASS(busno <= PCI_BUSMAX); IOMMU_LOCK(unit); unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] |= 1 << (busno % (NBBY * sizeof(uint32_t))); IOMMU_UNLOCK(unit); } bool iommu_is_buswide_ctx(struct iommu_unit *unit, u_int busno) { MPASS(busno <= PCI_BUSMAX); return ((unit->buswide_ctxs[busno / NBBY / sizeof(uint32_t)] & (1U << (busno % (NBBY * sizeof(uint32_t))))) != 0); } static MALLOC_DEFINE(M_IOMMU_DMAMAP, "iommu_dmamap", "IOMMU DMA Map"); static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map); static int iommu_bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, bus_addr_t boundary, bus_addr_t lowaddr, bus_addr_t highaddr, bus_dma_filter_t *filter, void *filterarg, bus_size_t maxsize, int nsegments, bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, void *lockfuncarg, bus_dma_tag_t *dmat) { struct bus_dma_tag_iommu *newtag, *oldtag; int error; *dmat = NULL; error = common_bus_dma_tag_create(parent != NULL ? &((struct bus_dma_tag_iommu *)parent)->common : NULL, alignment, boundary, lowaddr, highaddr, filter, filterarg, maxsize, nsegments, maxsegsz, flags, lockfunc, lockfuncarg, sizeof(struct bus_dma_tag_iommu), (void **)&newtag); if (error != 0) goto out; oldtag = (struct bus_dma_tag_iommu *)parent; newtag->common.impl = &bus_dma_iommu_impl; newtag->ctx = oldtag->ctx; newtag->owner = oldtag->owner; *dmat = (bus_dma_tag_t)newtag; out: CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", __func__, newtag, (newtag != NULL ? newtag->common.flags : 0), error); return (error); } static int iommu_bus_dma_tag_set_domain(bus_dma_tag_t dmat) { return (0); } static int iommu_bus_dma_tag_destroy(bus_dma_tag_t dmat1) { struct bus_dma_tag_iommu *dmat, *parent; struct bus_dma_tag_iommu *dmat_copy __unused; int error; error = 0; dmat_copy = dmat = (struct bus_dma_tag_iommu *)dmat1; if (dmat != NULL) { if (dmat->map_count != 0) { error = EBUSY; goto out; } while (dmat != NULL) { parent = (struct bus_dma_tag_iommu *)dmat->common.parent; if (atomic_fetchadd_int(&dmat->common.ref_count, -1) == 1) { if (dmat == dmat->ctx->tag) iommu_free_ctx(dmat->ctx); free(dmat->segments, M_IOMMU_DMAMAP); free(dmat, M_DEVBUF); dmat = parent; } else dmat = NULL; } } out: CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); return (error); } static bool iommu_bus_dma_id_mapped(bus_dma_tag_t dmat, vm_paddr_t buf, bus_size_t buflen) { return (false); } static int iommu_bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; tag = (struct bus_dma_tag_iommu *)dmat; map = malloc_domainset(sizeof(*map), M_IOMMU_DMAMAP, DOMAINSET_PREF(tag->common.domain), M_NOWAIT | M_ZERO); if (map == NULL) { *mapp = NULL; return (ENOMEM); } if (tag->segments == NULL) { tag->segments = malloc_domainset(sizeof(bus_dma_segment_t) * tag->common.nsegments, M_IOMMU_DMAMAP, DOMAINSET_PREF(tag->common.domain), M_NOWAIT); if (tag->segments == NULL) { free(map, M_IOMMU_DMAMAP); *mapp = NULL; return (ENOMEM); } } TAILQ_INIT(&map->map_entries); map->tag = tag; map->locked = true; map->cansleep = false; tag->map_count++; *mapp = (bus_dmamap_t)map; return (0); } static int iommu_bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; struct iommu_domain *domain; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; if (map != NULL) { domain = tag->ctx->domain; IOMMU_DOMAIN_LOCK(domain); if (!TAILQ_EMPTY(&map->map_entries)) { IOMMU_DOMAIN_UNLOCK(domain); return (EBUSY); } IOMMU_DOMAIN_UNLOCK(domain); free(map, M_IOMMU_DMAMAP); } tag->map_count--; return (0); } static int iommu_bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, bus_dmamap_t *mapp) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; int error, mflags; vm_memattr_t attr; error = iommu_bus_dmamap_create(dmat, flags, mapp); if (error != 0) return (error); mflags = (flags & BUS_DMA_NOWAIT) != 0 ? M_NOWAIT : M_WAITOK; mflags |= (flags & BUS_DMA_ZERO) != 0 ? M_ZERO : 0; attr = (flags & BUS_DMA_NOCACHE) != 0 ? VM_MEMATTR_UNCACHEABLE : VM_MEMATTR_DEFAULT; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)*mapp; if (tag->common.maxsize < PAGE_SIZE && tag->common.alignment <= tag->common.maxsize && attr == VM_MEMATTR_DEFAULT) { *vaddr = malloc_domainset(tag->common.maxsize, M_DEVBUF, DOMAINSET_PREF(tag->common.domain), mflags); map->flags |= BUS_DMAMAP_IOMMU_MALLOC; } else { *vaddr = (void *)kmem_alloc_attr_domainset( DOMAINSET_PREF(tag->common.domain), tag->common.maxsize, mflags, 0ul, BUS_SPACE_MAXADDR, attr); map->flags |= BUS_DMAMAP_IOMMU_KMEM_ALLOC; } if (*vaddr == NULL) { iommu_bus_dmamap_destroy(dmat, *mapp); *mapp = NULL; return (ENOMEM); } return (0); } static void iommu_bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map1) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; if ((map->flags & BUS_DMAMAP_IOMMU_MALLOC) != 0) { free(vaddr, M_DEVBUF); map->flags &= ~BUS_DMAMAP_IOMMU_MALLOC; } else { KASSERT((map->flags & BUS_DMAMAP_IOMMU_KMEM_ALLOC) != 0, ("iommu_bus_dmamem_free for non alloced map %p", map)); kmem_free((vm_offset_t)vaddr, tag->common.maxsize); map->flags &= ~BUS_DMAMAP_IOMMU_KMEM_ALLOC; } iommu_bus_dmamap_destroy(dmat, map1); } static int iommu_bus_dmamap_load_something1(struct bus_dma_tag_iommu *tag, struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp, struct iommu_map_entries_tailq *unroll_list) { struct iommu_ctx *ctx; struct iommu_domain *domain; struct iommu_map_entry *entry; bus_size_t buflen1; int error, e_flags, idx, gas_flags, seg; KASSERT(offset < IOMMU_PAGE_SIZE, ("offset %d", offset)); if (segs == NULL) segs = tag->segments; ctx = tag->ctx; domain = ctx->domain; e_flags = IOMMU_MAP_ENTRY_READ | ((flags & BUS_DMA_NOWRITE) == 0 ? IOMMU_MAP_ENTRY_WRITE : 0); seg = *segp; error = 0; idx = 0; while (buflen > 0) { seg++; if (seg >= tag->common.nsegments) { error = EFBIG; break; } buflen1 = buflen > tag->common.maxsegsz ? tag->common.maxsegsz : buflen; /* * (Too) optimistically allow split if there are more * then one segments left. */ gas_flags = map->cansleep ? IOMMU_MF_CANWAIT : 0; if (seg + 1 < tag->common.nsegments) gas_flags |= IOMMU_MF_CANSPLIT; - error = iommu_map(domain, &tag->common, - round_page(offset + buflen1), + error = iommu_map(domain, &tag->common, buflen1, offset, e_flags, gas_flags, ma + idx, &entry); if (error != 0) break; /* Update buflen1 in case buffer split. */ if (buflen1 > entry->end - entry->start - offset) buflen1 = entry->end - entry->start - offset; KASSERT(vm_addr_align_ok(entry->start + offset, tag->common.alignment), ("alignment failed: ctx %p start 0x%jx offset %x " "align 0x%jx", ctx, (uintmax_t)entry->start, offset, (uintmax_t)tag->common.alignment)); KASSERT(entry->end <= tag->common.lowaddr || entry->start >= tag->common.highaddr, ("entry placement failed: ctx %p start 0x%jx end 0x%jx " "lowaddr 0x%jx highaddr 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.lowaddr, (uintmax_t)tag->common.highaddr)); KASSERT(vm_addr_bound_ok(entry->start + offset, buflen1, tag->common.boundary), ("boundary failed: ctx %p start 0x%jx end 0x%jx " "boundary 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)tag->common.boundary)); KASSERT(buflen1 <= tag->common.maxsegsz, ("segment too large: ctx %p start 0x%jx end 0x%jx " "buflen1 0x%jx maxsegsz 0x%jx", ctx, (uintmax_t)entry->start, (uintmax_t)entry->end, (uintmax_t)buflen1, (uintmax_t)tag->common.maxsegsz)); IOMMU_DOMAIN_LOCK(domain); TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); entry->flags |= IOMMU_MAP_ENTRY_MAP; IOMMU_DOMAIN_UNLOCK(domain); TAILQ_INSERT_TAIL(unroll_list, entry, unroll_link); segs[seg].ds_addr = entry->start + offset; segs[seg].ds_len = buflen1; idx += OFF_TO_IDX(trunc_page(offset + buflen1)); offset += buflen1; offset &= IOMMU_PAGE_MASK; buflen -= buflen1; } if (error == 0) *segp = seg; return (error); } static int iommu_bus_dmamap_load_something(struct bus_dma_tag_iommu *tag, struct bus_dmamap_iommu *map, vm_page_t *ma, int offset, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct iommu_ctx *ctx; struct iommu_domain *domain; struct iommu_map_entry *entry, *entry1; struct iommu_map_entries_tailq unroll_list; int error; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->loads, 1); TAILQ_INIT(&unroll_list); error = iommu_bus_dmamap_load_something1(tag, map, ma, offset, buflen, flags, segs, segp, &unroll_list); if (error != 0) { /* * The busdma interface does not allow us to report * partial buffer load, so unfortunately we have to * revert all work done. */ IOMMU_DOMAIN_LOCK(domain); TAILQ_FOREACH_SAFE(entry, &unroll_list, unroll_link, entry1) { /* * No entries other than what we have created * during the failed run might have been * inserted there in between, since we own ctx * pglock. */ TAILQ_REMOVE(&map->map_entries, entry, dmamap_link); TAILQ_REMOVE(&unroll_list, entry, unroll_link); TAILQ_INSERT_TAIL(&domain->unload_entries, entry, dmamap_link); } IOMMU_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->iommu->delayed_taskqueue, &domain->unload_task); } if (error == ENOMEM && (flags & BUS_DMA_NOWAIT) == 0 && !map->cansleep) error = EINPROGRESS; if (error == EINPROGRESS) iommu_bus_schedule_dmamap(domain->iommu, map); return (error); } static int iommu_bus_dmamap_load_ma(bus_dma_tag_t dmat, bus_dmamap_t map1, struct vm_page **ma, bus_size_t tlen, int ma_offs, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; return (iommu_bus_dmamap_load_something(tag, map, ma, ma_offs, tlen, flags, segs, segp)); } static int iommu_bus_dmamap_load_phys(bus_dma_tag_t dmat, bus_dmamap_t map1, vm_paddr_t buf, bus_size_t buflen, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; vm_page_t *ma, fma; vm_paddr_t pstart, pend, paddr; int error, i, ma_cnt, mflags, offset; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; pstart = trunc_page(buf); pend = round_page(buf + buflen); offset = buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); mflags = map->cansleep ? M_WAITOK : M_NOWAIT; ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); if (ma == NULL) return (ENOMEM); fma = NULL; for (i = 0; i < ma_cnt; i++) { paddr = pstart + ptoa(i); ma[i] = PHYS_TO_VM_PAGE(paddr); if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { /* * If PHYS_TO_VM_PAGE() returned NULL or the * vm_page was not initialized we'll use a * fake page. */ if (fma == NULL) { fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF, M_ZERO | mflags); if (fma == NULL) { free(ma, M_DEVBUF); return (ENOMEM); } } vm_page_initfake(&fma[i], pstart + ptoa(i), VM_MEMATTR_DEFAULT); ma[i] = &fma[i]; } } error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(fma, M_DEVBUF); free(ma, M_DEVBUF); return (error); } static int iommu_bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dmamap_t map1, void *buf, bus_size_t buflen, pmap_t pmap, int flags, bus_dma_segment_t *segs, int *segp) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; vm_page_t *ma, fma; vm_paddr_t pstart, pend, paddr; int error, i, ma_cnt, mflags, offset; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; pstart = trunc_page((vm_offset_t)buf); pend = round_page((vm_offset_t)buf + buflen); offset = (vm_offset_t)buf & PAGE_MASK; ma_cnt = OFF_TO_IDX(pend - pstart); mflags = map->cansleep ? M_WAITOK : M_NOWAIT; ma = malloc(sizeof(vm_page_t) * ma_cnt, M_DEVBUF, mflags); if (ma == NULL) return (ENOMEM); fma = NULL; for (i = 0; i < ma_cnt; i++, pstart += PAGE_SIZE) { if (pmap == kernel_pmap) paddr = pmap_kextract(pstart); else paddr = pmap_extract(pmap, pstart); ma[i] = PHYS_TO_VM_PAGE(paddr); if (ma[i] == NULL || VM_PAGE_TO_PHYS(ma[i]) != paddr) { /* * If PHYS_TO_VM_PAGE() returned NULL or the * vm_page was not initialized we'll use a * fake page. */ if (fma == NULL) { fma = malloc(sizeof(struct vm_page) * ma_cnt, M_DEVBUF, M_ZERO | mflags); if (fma == NULL) { free(ma, M_DEVBUF); return (ENOMEM); } } vm_page_initfake(&fma[i], paddr, VM_MEMATTR_DEFAULT); ma[i] = &fma[i]; } } error = iommu_bus_dmamap_load_something(tag, map, ma, offset, buflen, flags, segs, segp); free(ma, M_DEVBUF); free(fma, M_DEVBUF); return (error); } static void iommu_bus_dmamap_waitok(bus_dma_tag_t dmat, bus_dmamap_t map1, struct memdesc *mem, bus_dmamap_callback_t *callback, void *callback_arg) { struct bus_dmamap_iommu *map; if (map1 == NULL) return; map = (struct bus_dmamap_iommu *)map1; map->mem = *mem; map->tag = (struct bus_dma_tag_iommu *)dmat; map->callback = callback; map->callback_arg = callback_arg; } static bus_dma_segment_t * iommu_bus_dmamap_complete(bus_dma_tag_t dmat, bus_dmamap_t map1, bus_dma_segment_t *segs, int nsegs, int error) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; if (!map->locked) { KASSERT(map->cansleep, ("map not locked and not sleepable context %p", map)); /* * We are called from the delayed context. Relock the * driver. */ (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_LOCK); map->locked = true; } if (segs == NULL) segs = tag->segments; return (segs); } /* * The limitations of busdma KPI forces the iommu to perform the actual * unload, consisting of the unmapping of the map entries page tables, * from the delayed context on i386, since page table page mapping * might require a sleep to be successfull. The unfortunate * consequence is that the DMA requests can be served some time after * the bus_dmamap_unload() call returned. * * On amd64, we assume that sf allocation cannot fail. */ static void iommu_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map1) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; struct iommu_ctx *ctx; struct iommu_domain *domain; #ifndef IOMMU_DOMAIN_UNLOAD_SLEEP struct iommu_map_entries_tailq entries; #endif tag = (struct bus_dma_tag_iommu *)dmat; map = (struct bus_dmamap_iommu *)map1; ctx = tag->ctx; domain = ctx->domain; atomic_add_long(&ctx->unloads, 1); #if defined(IOMMU_DOMAIN_UNLOAD_SLEEP) IOMMU_DOMAIN_LOCK(domain); TAILQ_CONCAT(&domain->unload_entries, &map->map_entries, dmamap_link); IOMMU_DOMAIN_UNLOCK(domain); taskqueue_enqueue(domain->iommu->delayed_taskqueue, &domain->unload_task); #else TAILQ_INIT(&entries); IOMMU_DOMAIN_LOCK(domain); TAILQ_CONCAT(&entries, &map->map_entries, dmamap_link); IOMMU_DOMAIN_UNLOCK(domain); THREAD_NO_SLEEPING(); iommu_domain_unload(domain, &entries, false); THREAD_SLEEPING_OK(); KASSERT(TAILQ_EMPTY(&entries), ("lazy iommu_ctx_unload %p", ctx)); #endif } static void iommu_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map1, bus_dmasync_op_t op) { struct bus_dmamap_iommu *map __unused; map = (struct bus_dmamap_iommu *)map1; kmsan_bus_dmamap_sync(&map->kmsan_mem, op); } #ifdef KMSAN static void iommu_bus_dmamap_load_kmsan(bus_dmamap_t map1, struct memdesc *mem) { struct bus_dmamap_iommu *map; map = (struct bus_dmamap_iommu *)map1; if (map == NULL) return; memcpy(&map->kmsan_mem, mem, sizeof(struct memdesc)); } #endif struct bus_dma_impl bus_dma_iommu_impl = { .tag_create = iommu_bus_dma_tag_create, .tag_destroy = iommu_bus_dma_tag_destroy, .tag_set_domain = iommu_bus_dma_tag_set_domain, .id_mapped = iommu_bus_dma_id_mapped, .map_create = iommu_bus_dmamap_create, .map_destroy = iommu_bus_dmamap_destroy, .mem_alloc = iommu_bus_dmamem_alloc, .mem_free = iommu_bus_dmamem_free, .load_phys = iommu_bus_dmamap_load_phys, .load_buffer = iommu_bus_dmamap_load_buffer, .load_ma = iommu_bus_dmamap_load_ma, .map_waitok = iommu_bus_dmamap_waitok, .map_complete = iommu_bus_dmamap_complete, .map_unload = iommu_bus_dmamap_unload, .map_sync = iommu_bus_dmamap_sync, #ifdef KMSAN .load_kmsan = iommu_bus_dmamap_load_kmsan, #endif }; static void iommu_bus_task_dmamap(void *arg, int pending) { struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; struct iommu_unit *unit; unit = arg; IOMMU_LOCK(unit); while ((map = TAILQ_FIRST(&unit->delayed_maps)) != NULL) { TAILQ_REMOVE(&unit->delayed_maps, map, delay_link); IOMMU_UNLOCK(unit); tag = map->tag; map->cansleep = true; map->locked = false; bus_dmamap_load_mem((bus_dma_tag_t)tag, (bus_dmamap_t)map, &map->mem, map->callback, map->callback_arg, BUS_DMA_WAITOK); map->cansleep = false; if (map->locked) { (tag->common.lockfunc)(tag->common.lockfuncarg, BUS_DMA_UNLOCK); } else map->locked = true; map->cansleep = false; IOMMU_LOCK(unit); } IOMMU_UNLOCK(unit); } static void iommu_bus_schedule_dmamap(struct iommu_unit *unit, struct bus_dmamap_iommu *map) { map->locked = false; IOMMU_LOCK(unit); TAILQ_INSERT_TAIL(&unit->delayed_maps, map, delay_link); IOMMU_UNLOCK(unit); taskqueue_enqueue(unit->delayed_taskqueue, &unit->dmamap_load_task); } int iommu_init_busdma(struct iommu_unit *unit) { int error; unit->dma_enabled = 1; error = TUNABLE_INT_FETCH("hw.iommu.dma", &unit->dma_enabled); if (error == 0) /* compatibility */ TUNABLE_INT_FETCH("hw.dmar.dma", &unit->dma_enabled); TAILQ_INIT(&unit->delayed_maps); TASK_INIT(&unit->dmamap_load_task, 0, iommu_bus_task_dmamap, unit); unit->delayed_taskqueue = taskqueue_create("iommu", M_WAITOK, taskqueue_thread_enqueue, &unit->delayed_taskqueue); taskqueue_start_threads(&unit->delayed_taskqueue, 1, PI_DISK, "iommu%d busdma taskq", unit->unit); return (0); } void iommu_fini_busdma(struct iommu_unit *unit) { if (unit->delayed_taskqueue == NULL) return; taskqueue_drain(unit->delayed_taskqueue, &unit->dmamap_load_task); taskqueue_free(unit->delayed_taskqueue); unit->delayed_taskqueue = NULL; } int bus_dma_iommu_load_ident(bus_dma_tag_t dmat, bus_dmamap_t map1, vm_paddr_t start, vm_size_t length, int flags) { struct bus_dma_tag_common *tc; struct bus_dma_tag_iommu *tag; struct bus_dmamap_iommu *map; struct iommu_ctx *ctx; struct iommu_domain *domain; struct iommu_map_entry *entry; vm_page_t *ma; vm_size_t i; int error; bool waitok; MPASS((start & PAGE_MASK) == 0); MPASS((length & PAGE_MASK) == 0); MPASS(length > 0); MPASS(start + length >= start); MPASS((flags & ~(BUS_DMA_NOWAIT | BUS_DMA_NOWRITE)) == 0); tc = (struct bus_dma_tag_common *)dmat; if (tc->impl != &bus_dma_iommu_impl) return (0); tag = (struct bus_dma_tag_iommu *)dmat; ctx = tag->ctx; domain = ctx->domain; map = (struct bus_dmamap_iommu *)map1; waitok = (flags & BUS_DMA_NOWAIT) != 0; entry = iommu_map_alloc_entry(domain, waitok ? 0 : IOMMU_PGF_WAITOK); if (entry == NULL) return (ENOMEM); entry->start = start; entry->end = start + length; ma = malloc(sizeof(vm_page_t) * atop(length), M_TEMP, waitok ? M_WAITOK : M_NOWAIT); if (ma == NULL) { iommu_map_free_entry(domain, entry); return (ENOMEM); } for (i = 0; i < atop(length); i++) { ma[i] = vm_page_getfake(entry->start + PAGE_SIZE * i, VM_MEMATTR_DEFAULT); } error = iommu_map_region(domain, entry, IOMMU_MAP_ENTRY_READ | ((flags & BUS_DMA_NOWRITE) ? 0 : IOMMU_MAP_ENTRY_WRITE), waitok ? IOMMU_MF_CANWAIT : 0, ma); if (error == 0) { IOMMU_DOMAIN_LOCK(domain); TAILQ_INSERT_TAIL(&map->map_entries, entry, dmamap_link); entry->flags |= IOMMU_MAP_ENTRY_MAP; IOMMU_DOMAIN_UNLOCK(domain); } else { iommu_domain_unload_entry(entry, true); } for (i = 0; i < atop(length); i++) vm_page_putfake(ma[i]); free(ma, M_TEMP); return (error); } static void iommu_domain_unload_task(void *arg, int pending) { struct iommu_domain *domain; struct iommu_map_entries_tailq entries; domain = arg; TAILQ_INIT(&entries); for (;;) { IOMMU_DOMAIN_LOCK(domain); TAILQ_SWAP(&domain->unload_entries, &entries, iommu_map_entry, dmamap_link); IOMMU_DOMAIN_UNLOCK(domain); if (TAILQ_EMPTY(&entries)) break; iommu_domain_unload(domain, &entries, true); } } void iommu_domain_init(struct iommu_unit *unit, struct iommu_domain *domain, const struct iommu_domain_map_ops *ops) { domain->ops = ops; domain->iommu = unit; TASK_INIT(&domain->unload_task, 0, iommu_domain_unload_task, domain); RB_INIT(&domain->rb_root); TAILQ_INIT(&domain->unload_entries); mtx_init(&domain->lock, "iodom", NULL, MTX_DEF); } void iommu_domain_fini(struct iommu_domain *domain) { mtx_destroy(&domain->lock); } diff --git a/sys/dev/iommu/iommu_gas.c b/sys/dev/iommu/iommu_gas.c index 5b589e999926..221b404f2a45 100644 --- a/sys/dev/iommu/iommu_gas.c +++ b/sys/dev/iommu/iommu_gas.c @@ -1,905 +1,907 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #define RB_AUGMENT(entry) iommu_gas_augment_entry(entry) #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Guest Address Space management. */ static uma_zone_t iommu_map_entry_zone; #ifdef INVARIANTS static int iommu_check_free; #endif static void intel_gas_init(void) { iommu_map_entry_zone = uma_zcreate("IOMMU_MAP_ENTRY", sizeof(struct iommu_map_entry), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NODUMP); } SYSINIT(intel_gas, SI_SUB_DRIVERS, SI_ORDER_FIRST, intel_gas_init, NULL); struct iommu_map_entry * iommu_gas_alloc_entry(struct iommu_domain *domain, u_int flags) { struct iommu_map_entry *res; KASSERT((flags & ~(IOMMU_PGF_WAITOK)) == 0, ("unsupported flags %x", flags)); res = uma_zalloc(iommu_map_entry_zone, ((flags & IOMMU_PGF_WAITOK) != 0 ? M_WAITOK : M_NOWAIT) | M_ZERO); if (res != NULL) { res->domain = domain; atomic_add_int(&domain->entries_cnt, 1); } return (res); } void iommu_gas_free_entry(struct iommu_domain *domain, struct iommu_map_entry *entry) { KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); atomic_subtract_int(&domain->entries_cnt, 1); uma_zfree(iommu_map_entry_zone, entry); } static int iommu_gas_cmp_entries(struct iommu_map_entry *a, struct iommu_map_entry *b) { /* Last entry have zero size, so <= */ KASSERT(a->start <= a->end, ("inverted entry %p (%jx, %jx)", a, (uintmax_t)a->start, (uintmax_t)a->end)); KASSERT(b->start <= b->end, ("inverted entry %p (%jx, %jx)", b, (uintmax_t)b->start, (uintmax_t)b->end)); KASSERT(a->end <= b->start || b->end <= a->start || a->end == a->start || b->end == b->start, ("overlapping entries %p (%jx, %jx) %p (%jx, %jx)", a, (uintmax_t)a->start, (uintmax_t)a->end, b, (uintmax_t)b->start, (uintmax_t)b->end)); if (a->end < b->end) return (-1); else if (b->end < a->end) return (1); return (0); } static void iommu_gas_augment_entry(struct iommu_map_entry *entry) { struct iommu_map_entry *child; iommu_gaddr_t free_down; free_down = 0; if ((child = RB_LEFT(entry, rb_entry)) != NULL) { free_down = MAX(free_down, child->free_down); free_down = MAX(free_down, entry->start - child->last); entry->first = child->first; } else entry->first = entry->start; if ((child = RB_RIGHT(entry, rb_entry)) != NULL) { free_down = MAX(free_down, child->free_down); free_down = MAX(free_down, child->first - entry->end); entry->last = child->last; } else entry->last = entry->end; entry->free_down = free_down; } RB_GENERATE(iommu_gas_entries_tree, iommu_map_entry, rb_entry, iommu_gas_cmp_entries); #ifdef INVARIANTS static void iommu_gas_check_free(struct iommu_domain *domain) { struct iommu_map_entry *entry, *l, *r; iommu_gaddr_t v; RB_FOREACH(entry, iommu_gas_entries_tree, &domain->rb_root) { KASSERT(domain == entry->domain, ("mismatched free domain %p entry %p entry->domain %p", domain, entry, entry->domain)); l = RB_LEFT(entry, rb_entry); r = RB_RIGHT(entry, rb_entry); v = 0; if (l != NULL) { v = MAX(v, l->free_down); v = MAX(v, entry->start - l->last); } if (r != NULL) { v = MAX(v, r->free_down); v = MAX(v, r->first - entry->end); } MPASS(entry->free_down == v); } } #endif static bool iommu_gas_rb_insert(struct iommu_domain *domain, struct iommu_map_entry *entry) { struct iommu_map_entry *found; found = RB_INSERT(iommu_gas_entries_tree, &domain->rb_root, entry); return (found == NULL); } static void iommu_gas_rb_remove(struct iommu_domain *domain, struct iommu_map_entry *entry) { RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); } struct iommu_domain * iommu_get_ctx_domain(struct iommu_ctx *ctx) { return (ctx->domain); } void iommu_gas_init_domain(struct iommu_domain *domain) { struct iommu_map_entry *begin, *end; begin = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); end = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); IOMMU_DOMAIN_LOCK(domain); KASSERT(domain->entries_cnt == 2, ("dirty domain %p", domain)); KASSERT(RB_EMPTY(&domain->rb_root), ("non-empty entries %p", domain)); begin->start = 0; begin->end = IOMMU_PAGE_SIZE; begin->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; iommu_gas_rb_insert(domain, begin); end->start = domain->end; end->end = domain->end; end->flags = IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED; iommu_gas_rb_insert(domain, end); domain->first_place = begin; domain->last_place = end; domain->flags |= IOMMU_DOMAIN_GAS_INITED; IOMMU_DOMAIN_UNLOCK(domain); } void iommu_gas_fini_domain(struct iommu_domain *domain) { struct iommu_map_entry *entry, *entry1; IOMMU_DOMAIN_ASSERT_LOCKED(domain); KASSERT(domain->entries_cnt == 2, ("domain still in use %p", domain)); entry = RB_MIN(iommu_gas_entries_tree, &domain->rb_root); KASSERT(entry->start == 0, ("start entry start %p", domain)); KASSERT(entry->end == IOMMU_PAGE_SIZE, ("start entry end %p", domain)); KASSERT(entry->flags == (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), ("start entry flags %p", domain)); RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); iommu_gas_free_entry(domain, entry); entry = RB_MAX(iommu_gas_entries_tree, &domain->rb_root); KASSERT(entry->start == domain->end, ("end entry start %p", domain)); KASSERT(entry->end == domain->end, ("end entry end %p", domain)); KASSERT(entry->flags == (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_UNMAPPED), ("end entry flags %p", domain)); RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); iommu_gas_free_entry(domain, entry); RB_FOREACH_SAFE(entry, iommu_gas_entries_tree, &domain->rb_root, entry1) { KASSERT((entry->flags & IOMMU_MAP_ENTRY_RMRR) != 0, ("non-RMRR entry left %p", domain)); RB_REMOVE(iommu_gas_entries_tree, &domain->rb_root, entry); iommu_gas_free_entry(domain, entry); } } struct iommu_gas_match_args { struct iommu_domain *domain; iommu_gaddr_t size; int offset; const struct bus_dma_tag_common *common; u_int gas_flags; struct iommu_map_entry *entry; }; /* * The interval [beg, end) is a free interval between two iommu_map_entries. * maxaddr is an upper bound on addresses that can be allocated. Try to * allocate space in the free interval, subject to the conditions expressed * by a, and return 'true' if and only if the allocation attempt succeeds. */ static bool iommu_gas_match_one(struct iommu_gas_match_args *a, iommu_gaddr_t beg, iommu_gaddr_t end, iommu_gaddr_t maxaddr) { iommu_gaddr_t bs, start; a->entry->start = roundup2(beg + IOMMU_PAGE_SIZE, a->common->alignment); if (a->entry->start + a->offset + a->size > maxaddr) return (false); /* IOMMU_PAGE_SIZE to create gap after new entry. */ if (a->entry->start < beg + IOMMU_PAGE_SIZE || a->entry->start + a->size + a->offset + IOMMU_PAGE_SIZE > end) return (false); /* No boundary crossing. */ if (vm_addr_bound_ok(a->entry->start + a->offset, a->size, a->common->boundary)) return (true); /* * The start + offset to start + offset + size region crosses * the boundary. Check if there is enough space after the * next boundary after the beg. */ bs = rounddown2(a->entry->start + a->offset + a->common->boundary, a->common->boundary); start = roundup2(bs, a->common->alignment); /* IOMMU_PAGE_SIZE to create gap after new entry. */ if (start + a->offset + a->size + IOMMU_PAGE_SIZE <= end && start + a->offset + a->size <= maxaddr && vm_addr_bound_ok(start + a->offset, a->size, a->common->boundary)) { a->entry->start = start; return (true); } /* * Not enough space to align at the requested boundary, or * boundary is smaller than the size, but allowed to split. * We already checked that start + size does not overlap maxaddr. * * XXXKIB. It is possible that bs is exactly at the start of * the next entry, then we do not have gap. Ignore for now. */ if ((a->gas_flags & IOMMU_MF_CANSPLIT) != 0) { a->size = bs - a->entry->start; return (true); } return (false); } static void iommu_gas_match_insert(struct iommu_gas_match_args *a) { bool found __diagused; /* * The prev->end is always aligned on the page size, which * causes page alignment for the entry->start too. The size * is checked to be multiple of the page size. * * The page sized gap is created between consequent * allocations to ensure that out-of-bounds accesses fault. */ - a->entry->end = a->entry->start + a->size; + a->entry->end = a->entry->start + + roundup2(a->size + a->offset, IOMMU_PAGE_SIZE); found = iommu_gas_rb_insert(a->domain, a->entry); KASSERT(found, ("found dup %p start %jx size %jx", a->domain, (uintmax_t)a->entry->start, (uintmax_t)a->size)); a->entry->flags = IOMMU_MAP_ENTRY_MAP; } static int iommu_gas_lowermatch(struct iommu_gas_match_args *a, struct iommu_map_entry *entry) { struct iommu_map_entry *child; /* * If the subtree doesn't have free space for the requested allocation * plus two guard pages, give up. */ - if (entry->free_down < a->size + a->offset + 2 * IOMMU_PAGE_SIZE) + if (entry->free_down < 2 * IOMMU_PAGE_SIZE + + roundup2(a->size + a->offset, IOMMU_PAGE_SIZE)) return (ENOMEM); if (entry->first >= a->common->lowaddr) return (ENOMEM); child = RB_RIGHT(entry, rb_entry); if (child != NULL && 0 == iommu_gas_lowermatch(a, child)) return (0); if (child != NULL && entry->end < a->common->lowaddr && iommu_gas_match_one(a, entry->end, child->first, a->common->lowaddr)) { iommu_gas_match_insert(a); return (0); } child = RB_LEFT(entry, rb_entry); if (child != NULL && child->last < a->common->lowaddr && iommu_gas_match_one(a, child->last, entry->start, a->common->lowaddr)) { iommu_gas_match_insert(a); return (0); } if (child != NULL && 0 == iommu_gas_lowermatch(a, child)) return (0); return (ENOMEM); } static int iommu_gas_uppermatch(struct iommu_gas_match_args *a, struct iommu_map_entry *entry) { struct iommu_map_entry *child; /* * If the subtree doesn't have free space for the requested allocation * plus two guard pages, give up. */ - if (entry->free_down < a->size + a->offset + 2 * IOMMU_PAGE_SIZE) + if (entry->free_down < 2 * IOMMU_PAGE_SIZE + + roundup2(a->size + a->offset, IOMMU_PAGE_SIZE)) return (ENOMEM); if (entry->last < a->common->highaddr) return (ENOMEM); child = RB_LEFT(entry, rb_entry); if (child != NULL && 0 == iommu_gas_uppermatch(a, child)) return (0); if (child != NULL && child->last >= a->common->highaddr && iommu_gas_match_one(a, child->last, entry->start, a->domain->end)) { iommu_gas_match_insert(a); return (0); } child = RB_RIGHT(entry, rb_entry); if (child != NULL && entry->end >= a->common->highaddr && iommu_gas_match_one(a, entry->end, child->first, a->domain->end)) { iommu_gas_match_insert(a); return (0); } if (child != NULL && 0 == iommu_gas_uppermatch(a, child)) return (0); return (ENOMEM); } static int iommu_gas_find_space(struct iommu_domain *domain, const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, u_int flags, struct iommu_map_entry *entry) { struct iommu_gas_match_args a; int error; IOMMU_DOMAIN_ASSERT_LOCKED(domain); KASSERT(entry->flags == 0, ("dirty entry %p %p", domain, entry)); - KASSERT((size & IOMMU_PAGE_MASK) == 0, ("size %jx", (uintmax_t)size)); a.domain = domain; a.size = size; a.offset = offset; a.common = common; a.gas_flags = flags; a.entry = entry; /* Handle lower region. */ if (common->lowaddr > 0) { error = iommu_gas_lowermatch(&a, RB_ROOT(&domain->rb_root)); if (error == 0) return (0); KASSERT(error == ENOMEM, ("error %d from iommu_gas_lowermatch", error)); } /* Handle upper region. */ if (common->highaddr >= domain->end) return (ENOMEM); error = iommu_gas_uppermatch(&a, RB_ROOT(&domain->rb_root)); KASSERT(error == ENOMEM, ("error %d from iommu_gas_uppermatch", error)); return (error); } static int iommu_gas_alloc_region(struct iommu_domain *domain, struct iommu_map_entry *entry, u_int flags) { struct iommu_map_entry *next, *prev; bool found __diagused; IOMMU_DOMAIN_ASSERT_LOCKED(domain); if ((entry->start & IOMMU_PAGE_MASK) != 0 || (entry->end & IOMMU_PAGE_MASK) != 0) return (EINVAL); if (entry->start >= entry->end) return (EINVAL); if (entry->end >= domain->end) return (EINVAL); next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, entry); KASSERT(next != NULL, ("next must be non-null %p %jx", domain, (uintmax_t)entry->start)); prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); /* prev could be NULL */ /* * Adapt to broken BIOSes which specify overlapping RMRR * entries. * * XXXKIB: this does not handle a case when prev or next * entries are completely covered by the current one, which * extends both ways. */ if (prev != NULL && prev->end > entry->start && (prev->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { if ((flags & IOMMU_MF_RMRR) == 0 || (prev->flags & IOMMU_MAP_ENTRY_RMRR) == 0) return (EBUSY); entry->start = prev->end; } if (next->start < entry->end && (next->flags & IOMMU_MAP_ENTRY_PLACE) == 0) { if ((flags & IOMMU_MF_RMRR) == 0 || (next->flags & IOMMU_MAP_ENTRY_RMRR) == 0) return (EBUSY); entry->end = next->start; } if (entry->end == entry->start) return (0); if (prev != NULL && prev->end > entry->start) { /* This assumes that prev is the placeholder entry. */ iommu_gas_rb_remove(domain, prev); prev = NULL; } if (next->start < entry->end) { iommu_gas_rb_remove(domain, next); next = NULL; } found = iommu_gas_rb_insert(domain, entry); KASSERT(found, ("found RMRR dup %p start %jx end %jx", domain, (uintmax_t)entry->start, (uintmax_t)entry->end)); if ((flags & IOMMU_MF_RMRR) != 0) entry->flags = IOMMU_MAP_ENTRY_RMRR; #ifdef INVARIANTS struct iommu_map_entry *ip, *in; ip = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); in = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); KASSERT(prev == NULL || ip == prev, ("RMRR %p (%jx %jx) prev %p (%jx %jx) ins prev %p (%jx %jx)", entry, entry->start, entry->end, prev, prev == NULL ? 0 : prev->start, prev == NULL ? 0 : prev->end, ip, ip == NULL ? 0 : ip->start, ip == NULL ? 0 : ip->end)); KASSERT(next == NULL || in == next, ("RMRR %p (%jx %jx) next %p (%jx %jx) ins next %p (%jx %jx)", entry, entry->start, entry->end, next, next == NULL ? 0 : next->start, next == NULL ? 0 : next->end, in, in == NULL ? 0 : in->start, in == NULL ? 0 : in->end)); #endif return (0); } void iommu_gas_free_space(struct iommu_domain *domain, struct iommu_map_entry *entry) { IOMMU_DOMAIN_ASSERT_LOCKED(domain); KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_MAP, ("permanent entry %p %p", domain, entry)); iommu_gas_rb_remove(domain, entry); entry->flags &= ~IOMMU_MAP_ENTRY_MAP; #ifdef INVARIANTS if (iommu_check_free) iommu_gas_check_free(domain); #endif } void iommu_gas_free_region(struct iommu_domain *domain, struct iommu_map_entry *entry) { struct iommu_map_entry *next, *prev; IOMMU_DOMAIN_ASSERT_LOCKED(domain); KASSERT((entry->flags & (IOMMU_MAP_ENTRY_PLACE | IOMMU_MAP_ENTRY_RMRR | IOMMU_MAP_ENTRY_MAP)) == IOMMU_MAP_ENTRY_RMRR, ("non-RMRR entry %p %p", domain, entry)); prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, entry); next = RB_NEXT(iommu_gas_entries_tree, &domain->rb_root, entry); iommu_gas_rb_remove(domain, entry); entry->flags &= ~IOMMU_MAP_ENTRY_RMRR; if (prev == NULL) iommu_gas_rb_insert(domain, domain->first_place); if (next == NULL) iommu_gas_rb_insert(domain, domain->last_place); } int iommu_gas_map(struct iommu_domain *domain, const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) { struct iommu_map_entry *entry; int error; KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_CANSPLIT)) == 0, ("invalid flags 0x%x", flags)); entry = iommu_gas_alloc_entry(domain, (flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0); if (entry == NULL) return (ENOMEM); IOMMU_DOMAIN_LOCK(domain); error = iommu_gas_find_space(domain, common, size, offset, flags, entry); if (error == ENOMEM) { IOMMU_DOMAIN_UNLOCK(domain); iommu_gas_free_entry(domain, entry); return (error); } #ifdef INVARIANTS if (iommu_check_free) iommu_gas_check_free(domain); #endif KASSERT(error == 0, ("unexpected error %d from iommu_gas_find_entry", error)); KASSERT(entry->end < domain->end, ("allocated GPA %jx, max GPA %jx", (uintmax_t)entry->end, (uintmax_t)domain->end)); entry->flags |= eflags; IOMMU_DOMAIN_UNLOCK(domain); error = domain->ops->map(domain, entry->start, entry->end - entry->start, ma, eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); if (error == ENOMEM) { iommu_domain_unload_entry(entry, true); return (error); } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); *res = entry; return (0); } int iommu_gas_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma) { iommu_gaddr_t start; int error; KASSERT(entry->flags == 0, ("used RMRR entry %p %p %x", domain, entry, entry->flags)); KASSERT((flags & ~(IOMMU_MF_CANWAIT | IOMMU_MF_RMRR)) == 0, ("invalid flags 0x%x", flags)); start = entry->start; IOMMU_DOMAIN_LOCK(domain); error = iommu_gas_alloc_region(domain, entry, flags); if (error != 0) { IOMMU_DOMAIN_UNLOCK(domain); return (error); } entry->flags |= eflags; IOMMU_DOMAIN_UNLOCK(domain); if (entry->end == entry->start) return (0); error = domain->ops->map(domain, entry->start, entry->end - entry->start, ma + OFF_TO_IDX(start - entry->start), eflags, ((flags & IOMMU_MF_CANWAIT) != 0 ? IOMMU_PGF_WAITOK : 0)); if (error == ENOMEM) { iommu_domain_unload_entry(entry, false); return (error); } KASSERT(error == 0, ("unexpected error %d from domain_map_buf", error)); return (0); } static int iommu_gas_reserve_region_locked(struct iommu_domain *domain, iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry *entry) { int error; IOMMU_DOMAIN_ASSERT_LOCKED(domain); entry->start = start; entry->end = end; error = iommu_gas_alloc_region(domain, entry, IOMMU_MF_CANWAIT); if (error == 0) entry->flags |= IOMMU_MAP_ENTRY_UNMAPPED; return (error); } int iommu_gas_reserve_region(struct iommu_domain *domain, iommu_gaddr_t start, iommu_gaddr_t end, struct iommu_map_entry **entry0) { struct iommu_map_entry *entry; int error; entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); IOMMU_DOMAIN_LOCK(domain); error = iommu_gas_reserve_region_locked(domain, start, end, entry); IOMMU_DOMAIN_UNLOCK(domain); if (error != 0) iommu_gas_free_entry(domain, entry); else if (entry0 != NULL) *entry0 = entry; return (error); } /* * As in iommu_gas_reserve_region, reserve [start, end), but allow for existing * entries. */ int iommu_gas_reserve_region_extend(struct iommu_domain *domain, iommu_gaddr_t start, iommu_gaddr_t end) { struct iommu_map_entry *entry, *next, *prev, key = {}; iommu_gaddr_t entry_start, entry_end; int error; error = 0; entry = NULL; end = ummin(end, domain->end); while (start < end) { /* Preallocate an entry. */ if (entry == NULL) entry = iommu_gas_alloc_entry(domain, IOMMU_PGF_WAITOK); /* Calculate the free region from here to the next entry. */ key.start = key.end = start; IOMMU_DOMAIN_LOCK(domain); next = RB_NFIND(iommu_gas_entries_tree, &domain->rb_root, &key); KASSERT(next != NULL, ("domain %p with end %#jx has no entry " "after %#jx", domain, (uintmax_t)domain->end, (uintmax_t)start)); entry_end = ummin(end, next->start); prev = RB_PREV(iommu_gas_entries_tree, &domain->rb_root, next); if (prev != NULL) entry_start = ummax(start, prev->end); else entry_start = start; start = next->end; /* Reserve the region if non-empty. */ if (entry_start != entry_end) { error = iommu_gas_reserve_region_locked(domain, entry_start, entry_end, entry); if (error != 0) break; entry = NULL; } IOMMU_DOMAIN_UNLOCK(domain); } /* Release a preallocated entry if it was not used. */ if (entry != NULL) iommu_gas_free_entry(domain, entry); return (error); } struct iommu_map_entry * iommu_map_alloc_entry(struct iommu_domain *domain, u_int flags) { struct iommu_map_entry *res; res = iommu_gas_alloc_entry(domain, flags); return (res); } void iommu_map_free_entry(struct iommu_domain *domain, struct iommu_map_entry *entry) { iommu_gas_free_entry(domain, entry); } int iommu_map(struct iommu_domain *domain, const struct bus_dma_tag_common *common, iommu_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma, struct iommu_map_entry **res) { int error; error = iommu_gas_map(domain, common, size, offset, eflags, flags, ma, res); return (error); } void iommu_unmap_msi(struct iommu_ctx *ctx) { struct iommu_map_entry *entry; struct iommu_domain *domain; domain = ctx->domain; entry = domain->msi_entry; if (entry == NULL) return; domain->ops->unmap(domain, entry->start, entry->end - entry->start, IOMMU_PGF_WAITOK); IOMMU_DOMAIN_LOCK(domain); iommu_gas_free_space(domain, entry); IOMMU_DOMAIN_UNLOCK(domain); iommu_gas_free_entry(domain, entry); domain->msi_entry = NULL; domain->msi_base = 0; domain->msi_phys = 0; } int iommu_map_msi(struct iommu_ctx *ctx, iommu_gaddr_t size, int offset, u_int eflags, u_int flags, vm_page_t *ma) { struct iommu_domain *domain; struct iommu_map_entry *entry; int error; error = 0; domain = ctx->domain; /* Check if there is already an MSI page allocated */ IOMMU_DOMAIN_LOCK(domain); entry = domain->msi_entry; IOMMU_DOMAIN_UNLOCK(domain); if (entry == NULL) { error = iommu_gas_map(domain, &ctx->tag->common, size, offset, eflags, flags, ma, &entry); IOMMU_DOMAIN_LOCK(domain); if (error == 0) { if (domain->msi_entry == NULL) { MPASS(domain->msi_base == 0); MPASS(domain->msi_phys == 0); domain->msi_entry = entry; domain->msi_base = entry->start; domain->msi_phys = VM_PAGE_TO_PHYS(ma[0]); } else { /* * We lost the race and already have an * MSI page allocated. Free the unneeded entry. */ iommu_gas_free_entry(domain, entry); } } else if (domain->msi_entry != NULL) { /* * The allocation failed, but another succeeded. * Return success as there is a valid MSI page. */ error = 0; } IOMMU_DOMAIN_UNLOCK(domain); } return (error); } void iommu_translate_msi(struct iommu_domain *domain, uint64_t *addr) { *addr = (*addr - domain->msi_phys) + domain->msi_base; KASSERT(*addr >= domain->msi_entry->start, ("%s: Address is below the MSI entry start address (%jx < %jx)", __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->start)); KASSERT(*addr + sizeof(*addr) <= domain->msi_entry->end, ("%s: Address is above the MSI entry end address (%jx < %jx)", __func__, (uintmax_t)*addr, (uintmax_t)domain->msi_entry->end)); } int iommu_map_region(struct iommu_domain *domain, struct iommu_map_entry *entry, u_int eflags, u_int flags, vm_page_t *ma) { int error; error = iommu_gas_map_region(domain, entry, eflags, flags, ma); return (error); } SYSCTL_NODE(_hw, OID_AUTO, iommu, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, ""); #ifdef INVARIANTS SYSCTL_INT(_hw_iommu, OID_AUTO, check_free, CTLFLAG_RWTUN, &iommu_check_free, 0, "Check the GPA RBtree for free_down and free_after validity"); #endif