Index: head/sys/dev/amdsmn/amdsmn.c =================================================================== --- head/sys/dev/amdsmn/amdsmn.c (revision 323585) +++ head/sys/dev/amdsmn/amdsmn.c (revision 323586) @@ -1,193 +1,195 @@ /*- * Copyright (c) 2017 Conrad Meyer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Driver for the AMD Family 17h CPU System Management Network. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SMN_ADDR_REG 0x60 #define SMN_DATA_REG 0x64 struct amdsmn_softc { struct mtx smn_lock; }; static struct pciid { uint32_t device_id; } amdsmn_ids[] = { { 0x14501022 }, }; /* * Device methods. */ static void amdsmn_identify(driver_t *driver, device_t parent); static int amdsmn_probe(device_t dev); static int amdsmn_attach(device_t dev); static int amdsmn_detach(device_t dev); static device_method_t amdsmn_methods[] = { /* Device interface */ DEVMETHOD(device_identify, amdsmn_identify), DEVMETHOD(device_probe, amdsmn_probe), DEVMETHOD(device_attach, amdsmn_attach), DEVMETHOD(device_detach, amdsmn_detach), DEVMETHOD_END }; static driver_t amdsmn_driver = { "amdsmn", amdsmn_methods, sizeof(struct amdsmn_softc), }; static devclass_t amdsmn_devclass; DRIVER_MODULE(amdsmn, hostb, amdsmn_driver, amdsmn_devclass, NULL, NULL); MODULE_VERSION(amdsmn, 1); +MODULE_PNP_INFO("W32:vendor/device", pci, amdsmn, amdsmn_ids, + sizeof(amdsmn_ids[0]), nitems(amdsmn_ids)); static bool amdsmn_match(device_t parent) { uint32_t devid; size_t i; devid = pci_get_devid(parent); for (i = 0; i < nitems(amdsmn_ids); i++) if (amdsmn_ids[i].device_id == devid) return (true); return (false); } static void amdsmn_identify(driver_t *driver, device_t parent) { device_t child; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "amdsmn", -1) != NULL) return; if (!amdsmn_match(parent)) return; child = device_add_child(parent, "amdsmn", -1); if (child == NULL) device_printf(parent, "add amdsmn child failed\n"); } static int amdsmn_probe(device_t dev) { uint32_t family; if (resource_disabled("amdsmn", 0)) return (ENXIO); if (!amdsmn_match(device_get_parent(dev))) return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); switch (family) { case 0x17: break; default: return (ENXIO); } device_set_desc(dev, "AMD Family 17h System Management Network"); return (BUS_PROBE_GENERIC); } static int amdsmn_attach(device_t dev) { struct amdsmn_softc *sc = device_get_softc(dev); mtx_init(&sc->smn_lock, "SMN mtx", "SMN", MTX_DEF); return (0); } int amdsmn_detach(device_t dev) { struct amdsmn_softc *sc = device_get_softc(dev); mtx_destroy(&sc->smn_lock); return (0); } int amdsmn_read(device_t dev, uint32_t addr, uint32_t *value) { struct amdsmn_softc *sc = device_get_softc(dev); device_t parent; parent = device_get_parent(dev); mtx_lock(&sc->smn_lock); pci_write_config(parent, SMN_ADDR_REG, addr, 4); *value = pci_read_config(parent, SMN_DATA_REG, 4); mtx_unlock(&sc->smn_lock); return (0); } int amdsmn_write(device_t dev, uint32_t addr, uint32_t value) { struct amdsmn_softc *sc = device_get_softc(dev); device_t parent; parent = device_get_parent(dev); mtx_lock(&sc->smn_lock); pci_write_config(parent, SMN_ADDR_REG, addr, 4); pci_write_config(parent, SMN_DATA_REG, value, 4); mtx_unlock(&sc->smn_lock); return (0); } Index: head/sys/dev/amdtemp/amdtemp.c =================================================================== --- head/sys/dev/amdtemp/amdtemp.c (revision 323585) +++ head/sys/dev/amdtemp/amdtemp.c (revision 323586) @@ -1,601 +1,602 @@ /*- * Copyright (c) 2008, 2009 Rui Paulo * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2009-2012 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Driver for the AMD CPU on-die thermal sensors. * Initially based on the k8temp Linux driver. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum { CORE0_SENSOR0, CORE0_SENSOR1, CORE1_SENSOR0, CORE1_SENSOR1, CORE0, CORE1 } amdsensor_t; struct amdtemp_softc { int sc_ncores; int sc_ntemps; int sc_flags; #define AMDTEMP_FLAG_CS_SWAP 0x01 /* ThermSenseCoreSel is inverted. */ #define AMDTEMP_FLAG_CT_10BIT 0x02 /* CurTmp is 10-bit wide. */ #define AMDTEMP_FLAG_ALT_OFFSET 0x04 /* CurTmp starts at -28C. */ int32_t sc_offset; int32_t (*sc_gettemp)(device_t, amdsensor_t); struct sysctl_oid *sc_sysctl_cpu[MAXCPU]; struct intr_config_hook sc_ich; device_t sc_smn; }; #define VENDORID_AMD 0x1022 #define DEVICEID_AMD_MISC0F 0x1103 #define DEVICEID_AMD_MISC10 0x1203 #define DEVICEID_AMD_MISC11 0x1303 #define DEVICEID_AMD_MISC12 0x1403 #define DEVICEID_AMD_MISC14 0x1703 #define DEVICEID_AMD_MISC15 0x1603 #define DEVICEID_AMD_MISC16 0x1533 #define DEVICEID_AMD_MISC16_M30H 0x1583 #define DEVICEID_AMD_MISC17 0x141d #define DEVICEID_AMD_HOSTB17H 0x1450 static struct amdtemp_product { uint16_t amdtemp_vendorid; uint16_t amdtemp_deviceid; } amdtemp_products[] = { { VENDORID_AMD, DEVICEID_AMD_MISC0F }, { VENDORID_AMD, DEVICEID_AMD_MISC10 }, { VENDORID_AMD, DEVICEID_AMD_MISC11 }, { VENDORID_AMD, DEVICEID_AMD_MISC12 }, { VENDORID_AMD, DEVICEID_AMD_MISC14 }, { VENDORID_AMD, DEVICEID_AMD_MISC15 }, { VENDORID_AMD, DEVICEID_AMD_MISC16 }, { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H }, { VENDORID_AMD, DEVICEID_AMD_MISC17 }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H }, - { 0, 0 } }; /* * Reported Temperature Control Register */ #define AMDTEMP_REPTMP_CTRL 0xa4 /* * Reported Temperature, Family 17h */ #define AMDTEMP_17H_CUR_TMP 0x59800 /* * Thermaltrip Status Register (Family 0Fh only) */ #define AMDTEMP_THERMTP_STAT 0xe4 #define AMDTEMP_TTSR_SELCORE 0x04 #define AMDTEMP_TTSR_SELSENSOR 0x40 /* * DRAM Configuration High Register */ #define AMDTEMP_DRAM_CONF_HIGH 0x94 /* Function 2 */ #define AMDTEMP_DRAM_MODE_DDR3 0x0100 /* * CPU Family/Model Register */ #define AMDTEMP_CPUID 0xfc /* * Device methods. */ static void amdtemp_identify(driver_t *driver, device_t parent); static int amdtemp_probe(device_t dev); static int amdtemp_attach(device_t dev); static void amdtemp_intrhook(void *arg); static int amdtemp_detach(device_t dev); static int amdtemp_match(device_t dev); static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor); static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS); static device_method_t amdtemp_methods[] = { /* Device interface */ DEVMETHOD(device_identify, amdtemp_identify), DEVMETHOD(device_probe, amdtemp_probe), DEVMETHOD(device_attach, amdtemp_attach), DEVMETHOD(device_detach, amdtemp_detach), DEVMETHOD_END }; static driver_t amdtemp_driver = { "amdtemp", amdtemp_methods, sizeof(struct amdtemp_softc), }; static devclass_t amdtemp_devclass; DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, amdtemp_devclass, NULL, NULL); MODULE_VERSION(amdtemp, 1); MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1); +MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products, + sizeof(amdtemp_products[0]), nitems(amdtemp_products)); static int amdtemp_match(device_t dev) { int i; uint16_t vendor, devid; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); - for (i = 0; amdtemp_products[i].amdtemp_vendorid != 0; i++) { + for (i = 0; i < nitems(amdtemp_products); i++) { if (vendor == amdtemp_products[i].amdtemp_vendorid && devid == amdtemp_products[i].amdtemp_deviceid) return (1); } return (0); } static void amdtemp_identify(driver_t *driver, device_t parent) { device_t child; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "amdtemp", -1) != NULL) return; if (amdtemp_match(parent)) { child = device_add_child(parent, "amdtemp", -1); if (child == NULL) device_printf(parent, "add amdtemp child failed\n"); } } static int amdtemp_probe(device_t dev) { uint32_t family, model; if (resource_disabled("amdtemp", 0)) return (ENXIO); if (!amdtemp_match(device_get_parent(dev))) return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); switch (family) { case 0x0f: if ((model == 0x04 && (cpu_id & CPUID_STEPPING) == 0) || (model == 0x05 && (cpu_id & CPUID_STEPPING) <= 1)) return (ENXIO); break; case 0x10: case 0x11: case 0x12: case 0x14: case 0x15: case 0x16: case 0x17: break; default: return (ENXIO); } device_set_desc(dev, "AMD CPU On-Die Thermal Sensors"); return (BUS_PROBE_GENERIC); } static int amdtemp_attach(device_t dev) { char tn[32]; u_int regs[4]; struct amdtemp_softc *sc = device_get_softc(dev); struct sysctl_ctx_list *sysctlctx; struct sysctl_oid *sysctlnode; uint32_t cpuid, family, model; u_int bid; int erratum319, unit; erratum319 = 0; /* * CPUID Register is available from Revision F. */ cpuid = cpu_id; family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); if ((family != 0x0f || model >= 0x40) && family != 0x17) { cpuid = pci_read_config(dev, AMDTEMP_CPUID, 4); family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); } switch (family) { case 0x0f: /* * Thermaltrip Status Register * * - ThermSenseCoreSel * * Revision F & G: 0 - Core1, 1 - Core0 * Other: 0 - Core0, 1 - Core1 * * - CurTmp * * Revision G: bits 23-14 * Other: bits 23-16 * * XXX According to the BKDG, CurTmp, ThermSenseSel and * ThermSenseCoreSel bits were introduced in Revision F * but CurTmp seems working fine as early as Revision C. * However, it is not clear whether ThermSenseSel and/or * ThermSenseCoreSel work in undocumented cases as well. * In fact, the Linux driver suggests it may not work but * we just assume it does until we find otherwise. * * XXX According to Linux, CurTmp starts at -28C on * Socket AM2 Revision G processors, which is not * documented anywhere. */ if (model >= 0x40) sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP; if (model >= 0x60 && model != 0xc1) { do_cpuid(0x80000001, regs); bid = (regs[1] >> 9) & 0x1f; switch (model) { case 0x68: /* Socket S1g1 */ case 0x6c: case 0x7c: break; case 0x6b: /* Socket AM2 and ASB1 (2 cores) */ if (bid != 0x0b && bid != 0x0c) sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; break; case 0x6f: /* Socket AM2 and ASB1 (1 core) */ case 0x7f: if (bid != 0x07 && bid != 0x09 && bid != 0x0c) sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; break; default: sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; } sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT; } /* * There are two sensors per core. */ sc->sc_ntemps = 2; sc->sc_gettemp = amdtemp_gettemp0f; break; case 0x10: /* * Erratum 319 Inaccurate Temperature Measurement * * http://support.amd.com/us/Processor_TechDocs/41322.pdf */ do_cpuid(0x80000001, regs); switch ((regs[1] >> 28) & 0xf) { case 0: /* Socket F */ erratum319 = 1; break; case 1: /* Socket AM2+ or AM3 */ if ((pci_cfgregread(pci_get_bus(dev), pci_get_slot(dev), 2, AMDTEMP_DRAM_CONF_HIGH, 2) & AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 || (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3)) break; /* XXX 00100F42h (RB-C2) exists in both formats. */ erratum319 = 1; break; } /* FALLTHROUGH */ case 0x11: case 0x12: case 0x14: case 0x15: case 0x16: /* * There is only one sensor per package. */ sc->sc_ntemps = 1; sc->sc_gettemp = amdtemp_gettemp; break; case 0x17: sc->sc_ntemps = 1; sc->sc_gettemp = amdtemp_gettemp17h; sc->sc_smn = device_find_child( device_get_parent(dev), "amdsmn", -1); if (sc->sc_smn == NULL) { if (bootverbose) device_printf(dev, "No SMN device found\n"); return (ENXIO); } break; } /* Find number of cores per package. */ sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ? (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1; if (sc->sc_ncores > MAXCPU) return (ENXIO); if (erratum319) device_printf(dev, "Erratum 319: temperature measurement may be inaccurate\n"); if (bootverbose) device_printf(dev, "Found %d cores and %d sensors.\n", sc->sc_ncores, sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1); /* * dev.amdtemp.N tree. */ unit = device_get_unit(dev); snprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit); TUNABLE_INT_FETCH(tn, &sc->sc_offset); sysctlctx = device_get_sysctl_ctx(dev); SYSCTL_ADD_INT(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0, "Temperature sensor offset"); sysctlnode = SYSCTL_ADD_NODE(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "core0", CTLFLAG_RD, 0, "Core 0"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor0", CTLTYPE_INT | CTLFLAG_RD, dev, CORE0_SENSOR0, amdtemp_sysctl, "IK", "Core 0 / Sensor 0 temperature"); if (sc->sc_ntemps > 1) { SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor1", CTLTYPE_INT | CTLFLAG_RD, dev, CORE0_SENSOR1, amdtemp_sysctl, "IK", "Core 0 / Sensor 1 temperature"); if (sc->sc_ncores > 1) { sysctlnode = SYSCTL_ADD_NODE(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "core1", CTLFLAG_RD, 0, "Core 1"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor0", CTLTYPE_INT | CTLFLAG_RD, dev, CORE1_SENSOR0, amdtemp_sysctl, "IK", "Core 1 / Sensor 0 temperature"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor1", CTLTYPE_INT | CTLFLAG_RD, dev, CORE1_SENSOR1, amdtemp_sysctl, "IK", "Core 1 / Sensor 1 temperature"); } } /* * Try to create dev.cpu sysctl entries and setup intrhook function. * This is needed because the cpu driver may be loaded late on boot, * after us. */ amdtemp_intrhook(dev); sc->sc_ich.ich_func = amdtemp_intrhook; sc->sc_ich.ich_arg = dev; if (config_intrhook_establish(&sc->sc_ich) != 0) { device_printf(dev, "config_intrhook_establish failed!\n"); return (ENXIO); } return (0); } void amdtemp_intrhook(void *arg) { struct amdtemp_softc *sc; struct sysctl_ctx_list *sysctlctx; device_t dev = (device_t)arg; device_t acpi, cpu, nexus; amdsensor_t sensor; int i; sc = device_get_softc(dev); /* * dev.cpu.N.temperature. */ nexus = device_find_child(root_bus, "nexus", 0); acpi = device_find_child(nexus, "acpi", 0); for (i = 0; i < sc->sc_ncores; i++) { if (sc->sc_sysctl_cpu[i] != NULL) continue; cpu = device_find_child(acpi, "cpu", device_get_unit(dev) * sc->sc_ncores + i); if (cpu != NULL) { sysctlctx = device_get_sysctl_ctx(cpu); sensor = sc->sc_ntemps > 1 ? (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0; sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)), OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, dev, sensor, amdtemp_sysctl, "IK", "Current temparature"); } } if (sc->sc_ich.ich_arg != NULL) config_intrhook_disestablish(&sc->sc_ich); } int amdtemp_detach(device_t dev) { struct amdtemp_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->sc_ncores; i++) if (sc->sc_sysctl_cpu[i] != NULL) sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0); /* NewBus removes the dev.amdtemp.N tree by itself. */ return (0); } static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev = (device_t)arg1; struct amdtemp_softc *sc = device_get_softc(dev); amdsensor_t sensor = (amdsensor_t)arg2; int32_t auxtemp[2], temp; int error; switch (sensor) { case CORE0: auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0); auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1); temp = imax(auxtemp[0], auxtemp[1]); break; case CORE1: auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0); auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1); temp = imax(auxtemp[0], auxtemp[1]); break; default: temp = sc->sc_gettemp(dev, sensor); break; } error = sysctl_handle_int(oidp, &temp, 0, req); return (error); } #define AMDTEMP_ZERO_C_TO_K 2731 static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t mask, offset, temp; /* Set Sensor/Core selector. */ temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1); temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR); switch (sensor) { case CORE0_SENSOR1: temp |= AMDTEMP_TTSR_SELSENSOR; /* FALLTHROUGH */ case CORE0_SENSOR0: case CORE0: if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0) temp |= AMDTEMP_TTSR_SELCORE; break; case CORE1_SENSOR1: temp |= AMDTEMP_TTSR_SELSENSOR; /* FALLTHROUGH */ case CORE1_SENSOR0: case CORE1: if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0) temp |= AMDTEMP_TTSR_SELCORE; break; } pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1); mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc; offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49; temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4); temp = ((temp >> 14) & mask) * 5 / 2; temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10; return (temp); } static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t temp; temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4); temp = ((temp >> 21) & 0x7ff) * 5 / 4; temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; return (temp); } static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t temp; int error; error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &temp); KASSERT(error == 0, ("amdsmn_read")); temp = ((temp >> 21) & 0x7ff) * 5 / 4; temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; return (temp); } Index: head/sys/dev/intpm/intpm.c =================================================================== --- head/sys/dev/intpm/intpm.c (revision 323585) +++ head/sys/dev/intpm/intpm.c (revision 323586) @@ -1,893 +1,899 @@ /*- * Copyright (c) 1998, 1999 Takanori Watanabe * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "smbus_if.h" #include #include #include #include #include "opt_intpm.h" struct intsmb_softc { device_t dev; struct resource *io_res; struct resource *irq_res; void *irq_hand; device_t smbus; int io_rid; int isbusy; int cfg_irq9; int sb8xx; int poll; struct mtx lock; }; #define INTSMB_LOCK(sc) mtx_lock(&(sc)->lock) #define INTSMB_UNLOCK(sc) mtx_unlock(&(sc)->lock) #define INTSMB_LOCK_ASSERT(sc) mtx_assert(&(sc)->lock, MA_OWNED) static int intsmb_probe(device_t); static int intsmb_attach(device_t); static int intsmb_detach(device_t); static int intsmb_intr(struct intsmb_softc *sc); static int intsmb_slvintr(struct intsmb_softc *sc); static void intsmb_alrintr(struct intsmb_softc *sc); static int intsmb_callback(device_t dev, int index, void *data); static int intsmb_quick(device_t dev, u_char slave, int how); static int intsmb_sendb(device_t dev, u_char slave, char byte); static int intsmb_recvb(device_t dev, u_char slave, char *byte); static int intsmb_writeb(device_t dev, u_char slave, char cmd, char byte); static int intsmb_writew(device_t dev, u_char slave, char cmd, short word); static int intsmb_readb(device_t dev, u_char slave, char cmd, char *byte); static int intsmb_readw(device_t dev, u_char slave, char cmd, short *word); static int intsmb_pcall(device_t dev, u_char slave, char cmd, short sdata, short *rdata); static int intsmb_bwrite(device_t dev, u_char slave, char cmd, u_char count, char *buf); static int intsmb_bread(device_t dev, u_char slave, char cmd, u_char *count, char *buf); static void intsmb_start(struct intsmb_softc *sc, u_char cmd, int nointr); static int intsmb_stop(struct intsmb_softc *sc); static int intsmb_stop_poll(struct intsmb_softc *sc); static int intsmb_free(struct intsmb_softc *sc); static void intsmb_rawintr(void *arg); +const struct intsmb_device { + uint32_t devid; + const char *description; +} intsmb_products[] = { + { 0x71138086, "Intel PIIX4 SMBUS Interface" }, + { 0x719b8086, "Intel PIIX4 SMBUS Interface" }, +#if 0 + /* Not a good idea yet, this stops isab0 functioning */ + { 0x02001166, "ServerWorks OSB4" }, +#endif + { 0x43721002, "ATI IXP400 SMBus Controller" }, + { AMDSB_SMBUS_DEVID, "AMD SB600/7xx/8xx/9xx SMBus Controller" }, + { AMDFCH_SMBUS_DEVID, "AMD FCH SMBus Controller" }, + { AMDCZ_SMBUS_DEVID, "AMD FCH SMBus Controller" }, +}; + static int intsmb_probe(device_t dev) { + const struct intsmb_device *isd; + uint32_t devid; + size_t i; - switch (pci_get_devid(dev)) { - case 0x71138086: /* Intel 82371AB */ - case 0x719b8086: /* Intel 82443MX */ -#if 0 - /* Not a good idea yet, this stops isab0 functioning */ - case 0x02001166: /* ServerWorks OSB4 */ -#endif - device_set_desc(dev, "Intel PIIX4 SMBUS Interface"); - break; - case 0x43721002: - device_set_desc(dev, "ATI IXP400 SMBus Controller"); - break; - case AMDSB_SMBUS_DEVID: - device_set_desc(dev, "AMD SB600/7xx/8xx/9xx SMBus Controller"); - break; - case AMDFCH_SMBUS_DEVID: /* AMD FCH */ - case AMDCZ_SMBUS_DEVID: /* AMD Carizzo FCH */ - device_set_desc(dev, "AMD FCH SMBus Controller"); - break; - default: - return (ENXIO); + devid = pci_get_devid(dev); + for (i = 0; i < nitems(intsmb_products); i++) { + isd = &intsmb_products[i]; + if (isd->devid == devid) { + device_set_desc(dev, isd->description); + return (BUS_PROBE_DEFAULT); + } } - - return (BUS_PROBE_DEFAULT); + return (ENXIO); } static uint8_t amd_pmio_read(struct resource *res, uint8_t reg) { bus_write_1(res, 0, reg); /* Index */ return (bus_read_1(res, 1)); /* Data */ } static int sb8xx_attach(device_t dev) { static const int AMDSB_SMBIO_WIDTH = 0x10; struct intsmb_softc *sc; struct resource *res; uint32_t devid; uint8_t revid; uint16_t addr; int rid; int rc; bool enabled; sc = device_get_softc(dev); rid = 0; rc = bus_set_resource(dev, SYS_RES_IOPORT, rid, AMDSB_PMIO_INDEX, AMDSB_PMIO_WIDTH); if (rc != 0) { device_printf(dev, "bus_set_resource for PM IO failed\n"); return (ENXIO); } res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (res == NULL) { device_printf(dev, "bus_alloc_resource for PM IO failed\n"); return (ENXIO); } devid = pci_get_devid(dev); revid = pci_get_revid(dev); if (devid == AMDSB_SMBUS_DEVID || (devid == AMDFCH_SMBUS_DEVID && revid < AMDFCH41_SMBUS_REVID) || (devid == AMDCZ_SMBUS_DEVID && revid < AMDCZ49_SMBUS_REVID)) { addr = amd_pmio_read(res, AMDSB8_PM_SMBUS_EN + 1); addr <<= 8; addr |= amd_pmio_read(res, AMDSB8_PM_SMBUS_EN); enabled = (addr & AMDSB8_SMBUS_EN) != 0; addr &= AMDSB8_SMBUS_ADDR_MASK; } else { addr = amd_pmio_read(res, AMDFCH41_PM_DECODE_EN0); enabled = (addr & AMDFCH41_SMBUS_EN) != 0; addr = amd_pmio_read(res, AMDFCH41_PM_DECODE_EN1); addr <<= 8; } bus_release_resource(dev, SYS_RES_IOPORT, rid, res); bus_delete_resource(dev, SYS_RES_IOPORT, rid); if (!enabled) { device_printf(dev, "SB8xx/SB9xx/FCH SMBus not enabled\n"); return (ENXIO); } sc->io_rid = 0; rc = bus_set_resource(dev, SYS_RES_IOPORT, sc->io_rid, addr, AMDSB_SMBIO_WIDTH); if (rc != 0) { device_printf(dev, "bus_set_resource for SMBus IO failed\n"); return (ENXIO); } sc->io_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &sc->io_rid, RF_ACTIVE); if (sc->io_res == NULL) { device_printf(dev, "Could not allocate I/O space\n"); return (ENXIO); } sc->poll = 1; return (0); } static void intsmb_release_resources(device_t dev) { struct intsmb_softc *sc = device_get_softc(dev); if (sc->smbus) device_delete_child(dev, sc->smbus); if (sc->irq_hand) bus_teardown_intr(dev, sc->irq_res, sc->irq_hand); if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); if (sc->io_res) bus_release_resource(dev, SYS_RES_IOPORT, sc->io_rid, sc->io_res); mtx_destroy(&sc->lock); } static int intsmb_attach(device_t dev) { struct intsmb_softc *sc = device_get_softc(dev); int error, rid, value; int intr; char *str; sc->dev = dev; mtx_init(&sc->lock, device_get_nameunit(dev), "intsmb", MTX_DEF); sc->cfg_irq9 = 0; switch (pci_get_devid(dev)) { #ifndef NO_CHANGE_PCICONF case 0x71138086: /* Intel 82371AB */ case 0x719b8086: /* Intel 82443MX */ /* Changing configuration is allowed. */ sc->cfg_irq9 = 1; break; #endif case AMDSB_SMBUS_DEVID: if (pci_get_revid(dev) >= AMDSB8_SMBUS_REVID) sc->sb8xx = 1; break; case AMDFCH_SMBUS_DEVID: case AMDCZ_SMBUS_DEVID: sc->sb8xx = 1; break; } if (sc->sb8xx) { error = sb8xx_attach(dev); if (error != 0) goto fail; else goto no_intr; } sc->io_rid = PCI_BASE_ADDR_SMB; sc->io_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &sc->io_rid, RF_ACTIVE); if (sc->io_res == NULL) { device_printf(dev, "Could not allocate I/O space\n"); error = ENXIO; goto fail; } if (sc->cfg_irq9) { pci_write_config(dev, PCIR_INTLINE, 0x9, 1); pci_write_config(dev, PCI_HST_CFG_SMB, PCI_INTR_SMB_IRQ9 | PCI_INTR_SMB_ENABLE, 1); } value = pci_read_config(dev, PCI_HST_CFG_SMB, 1); sc->poll = (value & PCI_INTR_SMB_ENABLE) == 0; intr = value & PCI_INTR_SMB_MASK; switch (intr) { case PCI_INTR_SMB_SMI: str = "SMI"; break; case PCI_INTR_SMB_IRQ9: str = "IRQ 9"; break; case PCI_INTR_SMB_IRQ_PCI: str = "PCI IRQ"; break; default: str = "BOGUS"; } device_printf(dev, "intr %s %s ", str, sc->poll == 0 ? "enabled" : "disabled"); printf("revision %d\n", pci_read_config(dev, PCI_REVID_SMB, 1)); if (!sc->poll && intr == PCI_INTR_SMB_SMI) { device_printf(dev, "using polling mode when configured interrupt is SMI\n"); sc->poll = 1; } if (sc->poll) goto no_intr; if (intr != PCI_INTR_SMB_IRQ9 && intr != PCI_INTR_SMB_IRQ_PCI) { device_printf(dev, "Unsupported interrupt mode\n"); error = ENXIO; goto fail; } /* Force IRQ 9. */ rid = 0; if (sc->cfg_irq9) bus_set_resource(dev, SYS_RES_IRQ, rid, 9, 1); sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(dev, "Could not allocate irq\n"); error = ENXIO; goto fail; } error = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, intsmb_rawintr, sc, &sc->irq_hand); if (error) { device_printf(dev, "Failed to map intr\n"); goto fail; } no_intr: sc->isbusy = 0; sc->smbus = device_add_child(dev, "smbus", -1); if (sc->smbus == NULL) { device_printf(dev, "failed to add smbus child\n"); error = ENXIO; goto fail; } error = device_probe_and_attach(sc->smbus); if (error) { device_printf(dev, "failed to probe+attach smbus child\n"); goto fail; } #ifdef ENABLE_ALART /* Enable Arart */ bus_write_1(sc->io_res, PIIX4_SMBSLVCNT, PIIX4_SMBSLVCNT_ALTEN); #endif return (0); fail: intsmb_release_resources(dev); return (error); } static int intsmb_detach(device_t dev) { int error; error = bus_generic_detach(dev); if (error) { device_printf(dev, "bus detach failed\n"); return (error); } intsmb_release_resources(dev); return (0); } static void intsmb_rawintr(void *arg) { struct intsmb_softc *sc = arg; INTSMB_LOCK(sc); intsmb_intr(sc); intsmb_slvintr(sc); INTSMB_UNLOCK(sc); } static int intsmb_callback(device_t dev, int index, void *data) { int error = 0; switch (index) { case SMB_REQUEST_BUS: break; case SMB_RELEASE_BUS: break; default: error = SMB_EINVAL; } return (error); } /* Counterpart of smbtx_smb_free(). */ static int intsmb_free(struct intsmb_softc *sc) { INTSMB_LOCK_ASSERT(sc); if ((bus_read_1(sc->io_res, PIIX4_SMBHSTSTS) & PIIX4_SMBHSTSTAT_BUSY) || #ifdef ENABLE_ALART (bus_read_1(sc->io_res, PIIX4_SMBSLVSTS) & PIIX4_SMBSLVSTS_BUSY) || #endif sc->isbusy) return (SMB_EBUSY); sc->isbusy = 1; /* Disable Interrupt in slave part. */ #ifndef ENABLE_ALART bus_write_1(sc->io_res, PIIX4_SMBSLVCNT, 0); #endif /* Reset INTR Flag to prepare INTR. */ bus_write_1(sc->io_res, PIIX4_SMBHSTSTS, PIIX4_SMBHSTSTAT_INTR | PIIX4_SMBHSTSTAT_ERR | PIIX4_SMBHSTSTAT_BUSC | PIIX4_SMBHSTSTAT_FAIL); return (0); } static int intsmb_intr(struct intsmb_softc *sc) { int status, tmp; status = bus_read_1(sc->io_res, PIIX4_SMBHSTSTS); if (status & PIIX4_SMBHSTSTAT_BUSY) return (1); if (status & (PIIX4_SMBHSTSTAT_INTR | PIIX4_SMBHSTSTAT_ERR | PIIX4_SMBHSTSTAT_BUSC | PIIX4_SMBHSTSTAT_FAIL)) { tmp = bus_read_1(sc->io_res, PIIX4_SMBHSTCNT); bus_write_1(sc->io_res, PIIX4_SMBHSTCNT, tmp & ~PIIX4_SMBHSTCNT_INTREN); if (sc->isbusy) { sc->isbusy = 0; wakeup(sc); } return (0); } return (1); /* Not Completed */ } static int intsmb_slvintr(struct intsmb_softc *sc) { int status; status = bus_read_1(sc->io_res, PIIX4_SMBSLVSTS); if (status & PIIX4_SMBSLVSTS_BUSY) return (1); if (status & PIIX4_SMBSLVSTS_ALART) intsmb_alrintr(sc); else if (status & ~(PIIX4_SMBSLVSTS_ALART | PIIX4_SMBSLVSTS_SDW2 | PIIX4_SMBSLVSTS_SDW1)) { } /* Reset Status Register */ bus_write_1(sc->io_res, PIIX4_SMBSLVSTS, PIIX4_SMBSLVSTS_ALART | PIIX4_SMBSLVSTS_SDW2 | PIIX4_SMBSLVSTS_SDW1 | PIIX4_SMBSLVSTS_SLV); return (0); } static void intsmb_alrintr(struct intsmb_softc *sc) { int slvcnt; #ifdef ENABLE_ALART int error; uint8_t addr; #endif /* Stop generating INTR from ALART. */ slvcnt = bus_read_1(sc->io_res, PIIX4_SMBSLVCNT); #ifdef ENABLE_ALART bus_write_1(sc->io_res, PIIX4_SMBSLVCNT, slvcnt & ~PIIX4_SMBSLVCNT_ALTEN); #endif DELAY(5); /* Ask bus who asserted it and then ask it what's the matter. */ #ifdef ENABLE_ALART error = intsmb_free(sc); if (error) return; bus_write_1(sc->io_res, PIIX4_SMBHSTADD, SMBALTRESP | LSB); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BYTE, 1); error = intsmb_stop_poll(sc); if (error) device_printf(sc->dev, "ALART: ERROR\n"); else { addr = bus_read_1(sc->io_res, PIIX4_SMBHSTDAT0); device_printf(sc->dev, "ALART_RESPONSE: 0x%x\n", addr); } /* Re-enable INTR from ALART. */ bus_write_1(sc->io_res, PIIX4_SMBSLVCNT, slvcnt | PIIX4_SMBSLVCNT_ALTEN); DELAY(5); #endif } static void intsmb_start(struct intsmb_softc *sc, unsigned char cmd, int nointr) { unsigned char tmp; INTSMB_LOCK_ASSERT(sc); tmp = bus_read_1(sc->io_res, PIIX4_SMBHSTCNT); tmp &= 0xe0; tmp |= cmd; tmp |= PIIX4_SMBHSTCNT_START; /* While not in autoconfiguration enable interrupts. */ if (!sc->poll && !cold && !nointr) tmp |= PIIX4_SMBHSTCNT_INTREN; bus_write_1(sc->io_res, PIIX4_SMBHSTCNT, tmp); } static int intsmb_error(device_t dev, int status) { int error = 0; if (status & PIIX4_SMBHSTSTAT_ERR) error |= SMB_EBUSERR; if (status & PIIX4_SMBHSTSTAT_BUSC) error |= SMB_ECOLLI; if (status & PIIX4_SMBHSTSTAT_FAIL) error |= SMB_ENOACK; if (error != 0 && bootverbose) device_printf(dev, "error = %d, status = %#x\n", error, status); return (error); } /* * Polling Code. * * Polling is not encouraged because it requires waiting for the * device if it is busy. * (29063505.pdf from Intel) But during boot, interrupt cannot be used, so use * polling code then. */ static int intsmb_stop_poll(struct intsmb_softc *sc) { int error, i, status, tmp; INTSMB_LOCK_ASSERT(sc); /* First, wait for busy to be set. */ for (i = 0; i < 0x7fff; i++) if (bus_read_1(sc->io_res, PIIX4_SMBHSTSTS) & PIIX4_SMBHSTSTAT_BUSY) break; /* Wait for busy to clear. */ for (i = 0; i < 0x7fff; i++) { status = bus_read_1(sc->io_res, PIIX4_SMBHSTSTS); if (!(status & PIIX4_SMBHSTSTAT_BUSY)) { sc->isbusy = 0; error = intsmb_error(sc->dev, status); return (error); } } /* Timed out waiting for busy to clear. */ sc->isbusy = 0; tmp = bus_read_1(sc->io_res, PIIX4_SMBHSTCNT); bus_write_1(sc->io_res, PIIX4_SMBHSTCNT, tmp & ~PIIX4_SMBHSTCNT_INTREN); return (SMB_ETIMEOUT); } /* * Wait for completion and return result. */ static int intsmb_stop(struct intsmb_softc *sc) { int error, status; INTSMB_LOCK_ASSERT(sc); if (sc->poll || cold) /* So that it can use device during device probe on SMBus. */ return (intsmb_stop_poll(sc)); error = msleep(sc, &sc->lock, PWAIT | PCATCH, "SMBWAI", hz / 8); if (error == 0) { status = bus_read_1(sc->io_res, PIIX4_SMBHSTSTS); if (!(status & PIIX4_SMBHSTSTAT_BUSY)) { error = intsmb_error(sc->dev, status); if (error == 0 && !(status & PIIX4_SMBHSTSTAT_INTR)) device_printf(sc->dev, "unknown cause why?\n"); #ifdef ENABLE_ALART bus_write_1(sc->io_res, PIIX4_SMBSLVCNT, PIIX4_SMBSLVCNT_ALTEN); #endif return (error); } } /* Timeout Procedure. */ sc->isbusy = 0; /* Re-enable suppressed interrupt from slave part. */ bus_write_1(sc->io_res, PIIX4_SMBSLVCNT, PIIX4_SMBSLVCNT_ALTEN); if (error == EWOULDBLOCK) return (SMB_ETIMEOUT); else return (SMB_EABORT); } static int intsmb_quick(device_t dev, u_char slave, int how) { struct intsmb_softc *sc = device_get_softc(dev); int error; u_char data; data = slave; /* Quick command is part of Address, I think. */ switch(how) { case SMB_QWRITE: data &= ~LSB; break; case SMB_QREAD: data |= LSB; break; default: return (SMB_EINVAL); } INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, data); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_QUICK, 0); error = intsmb_stop(sc); INTSMB_UNLOCK(sc); return (error); } static int intsmb_sendb(device_t dev, u_char slave, char byte) { struct intsmb_softc *sc = device_get_softc(dev); int error; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave & ~LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, byte); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BYTE, 0); error = intsmb_stop(sc); INTSMB_UNLOCK(sc); return (error); } static int intsmb_recvb(device_t dev, u_char slave, char *byte) { struct intsmb_softc *sc = device_get_softc(dev); int error; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave | LSB); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BYTE, 0); error = intsmb_stop(sc); if (error == 0) { #ifdef RECV_IS_IN_CMD /* * Linux SMBus stuff also troubles * Because Intel's datasheet does not make clear. */ *byte = bus_read_1(sc->io_res, PIIX4_SMBHSTCMD); #else *byte = bus_read_1(sc->io_res, PIIX4_SMBHSTDAT0); #endif } INTSMB_UNLOCK(sc); return (error); } static int intsmb_writeb(device_t dev, u_char slave, char cmd, char byte) { struct intsmb_softc *sc = device_get_softc(dev); int error; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave & ~LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, cmd); bus_write_1(sc->io_res, PIIX4_SMBHSTDAT0, byte); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BDATA, 0); error = intsmb_stop(sc); INTSMB_UNLOCK(sc); return (error); } static int intsmb_writew(device_t dev, u_char slave, char cmd, short word) { struct intsmb_softc *sc = device_get_softc(dev); int error; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave & ~LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, cmd); bus_write_1(sc->io_res, PIIX4_SMBHSTDAT0, word & 0xff); bus_write_1(sc->io_res, PIIX4_SMBHSTDAT1, (word >> 8) & 0xff); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_WDATA, 0); error = intsmb_stop(sc); INTSMB_UNLOCK(sc); return (error); } static int intsmb_readb(device_t dev, u_char slave, char cmd, char *byte) { struct intsmb_softc *sc = device_get_softc(dev); int error; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave | LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, cmd); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BDATA, 0); error = intsmb_stop(sc); if (error == 0) *byte = bus_read_1(sc->io_res, PIIX4_SMBHSTDAT0); INTSMB_UNLOCK(sc); return (error); } static int intsmb_readw(device_t dev, u_char slave, char cmd, short *word) { struct intsmb_softc *sc = device_get_softc(dev); int error; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave | LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, cmd); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_WDATA, 0); error = intsmb_stop(sc); if (error == 0) { *word = bus_read_1(sc->io_res, PIIX4_SMBHSTDAT0); *word |= bus_read_1(sc->io_res, PIIX4_SMBHSTDAT1) << 8; } INTSMB_UNLOCK(sc); return (error); } static int intsmb_pcall(device_t dev, u_char slave, char cmd, short sdata, short *rdata) { return (SMB_ENOTSUPP); } static int intsmb_bwrite(device_t dev, u_char slave, char cmd, u_char count, char *buf) { struct intsmb_softc *sc = device_get_softc(dev); int error, i; if (count > SMBBLOCKTRANS_MAX || count == 0) return (SMB_EINVAL); INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } /* Reset internal array index. */ bus_read_1(sc->io_res, PIIX4_SMBHSTCNT); bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave & ~LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, cmd); for (i = 0; i < count; i++) bus_write_1(sc->io_res, PIIX4_SMBBLKDAT, buf[i]); bus_write_1(sc->io_res, PIIX4_SMBHSTDAT0, count); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BLOCK, 0); error = intsmb_stop(sc); INTSMB_UNLOCK(sc); return (error); } static int intsmb_bread(device_t dev, u_char slave, char cmd, u_char *count, char *buf) { struct intsmb_softc *sc = device_get_softc(dev); int error, i; u_char data, nread; INTSMB_LOCK(sc); error = intsmb_free(sc); if (error) { INTSMB_UNLOCK(sc); return (error); } /* Reset internal array index. */ bus_read_1(sc->io_res, PIIX4_SMBHSTCNT); bus_write_1(sc->io_res, PIIX4_SMBHSTADD, slave | LSB); bus_write_1(sc->io_res, PIIX4_SMBHSTCMD, cmd); intsmb_start(sc, PIIX4_SMBHSTCNT_PROT_BLOCK, 0); error = intsmb_stop(sc); if (error == 0) { nread = bus_read_1(sc->io_res, PIIX4_SMBHSTDAT0); if (nread != 0 && nread <= SMBBLOCKTRANS_MAX) { *count = nread; for (i = 0; i < nread; i++) data = bus_read_1(sc->io_res, PIIX4_SMBBLKDAT); } else error = SMB_EBUSERR; } INTSMB_UNLOCK(sc); return (error); } static devclass_t intsmb_devclass; static device_method_t intsmb_methods[] = { /* Device interface */ DEVMETHOD(device_probe, intsmb_probe), DEVMETHOD(device_attach, intsmb_attach), DEVMETHOD(device_detach, intsmb_detach), /* SMBus interface */ DEVMETHOD(smbus_callback, intsmb_callback), DEVMETHOD(smbus_quick, intsmb_quick), DEVMETHOD(smbus_sendb, intsmb_sendb), DEVMETHOD(smbus_recvb, intsmb_recvb), DEVMETHOD(smbus_writeb, intsmb_writeb), DEVMETHOD(smbus_writew, intsmb_writew), DEVMETHOD(smbus_readb, intsmb_readb), DEVMETHOD(smbus_readw, intsmb_readw), DEVMETHOD(smbus_pcall, intsmb_pcall), DEVMETHOD(smbus_bwrite, intsmb_bwrite), DEVMETHOD(smbus_bread, intsmb_bread), DEVMETHOD_END }; static driver_t intsmb_driver = { "intsmb", intsmb_methods, sizeof(struct intsmb_softc), }; DRIVER_MODULE_ORDERED(intsmb, pci, intsmb_driver, intsmb_devclass, 0, 0, SI_ORDER_ANY); DRIVER_MODULE(smbus, intsmb, smbus_driver, smbus_devclass, 0, 0); MODULE_DEPEND(intsmb, smbus, SMBUS_MINVER, SMBUS_PREFVER, SMBUS_MAXVER); MODULE_VERSION(intsmb, 1); +MODULE_PNP_INFO("W32:vendor/device;D:human", pci, intpm, intsmb_products, + sizeof(intsmb_products[0]), nitems(intsmb_products)); Index: head/sys/dev/ioat/ioat.c =================================================================== --- head/sys/dev/ioat/ioat.c (revision 323585) +++ head/sys/dev/ioat/ioat.c (revision 323586) @@ -1,2076 +1,2077 @@ /*- * Copyright (C) 2012 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif #include "ioat.h" #include "ioat_hw.h" #include "ioat_internal.h" #ifndef BUS_SPACE_MAXADDR_40BIT #define BUS_SPACE_MAXADDR_40BIT 0xFFFFFFFFFFULL #endif #define IOAT_REFLK (&ioat->submit_lock) static int ioat_probe(device_t device); static int ioat_attach(device_t device); static int ioat_detach(device_t device); static int ioat_setup_intr(struct ioat_softc *ioat); static int ioat_teardown_intr(struct ioat_softc *ioat); static int ioat3_attach(device_t device); static int ioat_start_channel(struct ioat_softc *ioat); static int ioat_map_pci_bar(struct ioat_softc *ioat); static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error); static void ioat_interrupt_handler(void *arg); static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat); static int chanerr_to_errno(uint32_t); static void ioat_process_events(struct ioat_softc *ioat); static inline uint32_t ioat_get_active(struct ioat_softc *ioat); static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat); static void ioat_free_ring(struct ioat_softc *, uint32_t size, struct ioat_descriptor *); static int ioat_reserve_space(struct ioat_softc *, uint32_t, int mflags); static union ioat_hw_descriptor *ioat_get_descriptor(struct ioat_softc *, uint32_t index); static struct ioat_descriptor *ioat_get_ring_entry(struct ioat_softc *, uint32_t index); static void ioat_halted_debug(struct ioat_softc *, uint32_t); static void ioat_poll_timer_callback(void *arg); static void dump_descriptor(void *hw_desc); static void ioat_submit_single(struct ioat_softc *ioat); static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error); static int ioat_reset_hw(struct ioat_softc *ioat); static void ioat_reset_hw_task(void *, int); static void ioat_setup_sysctl(device_t device); static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS); static inline struct ioat_softc *ioat_get(struct ioat_softc *, enum ioat_ref_kind); static inline void ioat_put(struct ioat_softc *, enum ioat_ref_kind); static inline void _ioat_putn(struct ioat_softc *, uint32_t, enum ioat_ref_kind, boolean_t); static inline void ioat_putn(struct ioat_softc *, uint32_t, enum ioat_ref_kind); static inline void ioat_putn_locked(struct ioat_softc *, uint32_t, enum ioat_ref_kind); static void ioat_drain_locked(struct ioat_softc *); #define ioat_log_message(v, ...) do { \ if ((v) <= g_ioat_debug_level) { \ device_printf(ioat->device, __VA_ARGS__); \ } \ } while (0) MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations"); SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node"); static int g_force_legacy_interrupts; SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN, &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled"); int g_ioat_debug_level = 0; SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level, 0, "Set log level (0-3) for ioat(4). Higher is more verbose."); unsigned g_ioat_ring_order = 13; SYSCTL_UINT(_hw_ioat, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ioat_ring_order, 0, "Set IOAT ring order. (1 << this) == ring size."); /* * OS <-> Driver interface structures */ static device_method_t ioat_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ioat_probe), DEVMETHOD(device_attach, ioat_attach), DEVMETHOD(device_detach, ioat_detach), DEVMETHOD_END }; static driver_t ioat_pci_driver = { "ioat", ioat_pci_methods, sizeof(struct ioat_softc), }; static devclass_t ioat_devclass; DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0); MODULE_VERSION(ioat, 1); /* * Private data structures */ static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS]; static unsigned ioat_channel_index = 0; SYSCTL_UINT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0, "Number of IOAT channels attached"); static struct _pcsid { u_int32_t type; const char *desc; } pci_ids[] = { { 0x34308086, "TBG IOAT Ch0" }, { 0x34318086, "TBG IOAT Ch1" }, { 0x34328086, "TBG IOAT Ch2" }, { 0x34338086, "TBG IOAT Ch3" }, { 0x34298086, "TBG IOAT Ch4" }, { 0x342a8086, "TBG IOAT Ch5" }, { 0x342b8086, "TBG IOAT Ch6" }, { 0x342c8086, "TBG IOAT Ch7" }, { 0x37108086, "JSF IOAT Ch0" }, { 0x37118086, "JSF IOAT Ch1" }, { 0x37128086, "JSF IOAT Ch2" }, { 0x37138086, "JSF IOAT Ch3" }, { 0x37148086, "JSF IOAT Ch4" }, { 0x37158086, "JSF IOAT Ch5" }, { 0x37168086, "JSF IOAT Ch6" }, { 0x37178086, "JSF IOAT Ch7" }, { 0x37188086, "JSF IOAT Ch0 (RAID)" }, { 0x37198086, "JSF IOAT Ch1 (RAID)" }, { 0x3c208086, "SNB IOAT Ch0" }, { 0x3c218086, "SNB IOAT Ch1" }, { 0x3c228086, "SNB IOAT Ch2" }, { 0x3c238086, "SNB IOAT Ch3" }, { 0x3c248086, "SNB IOAT Ch4" }, { 0x3c258086, "SNB IOAT Ch5" }, { 0x3c268086, "SNB IOAT Ch6" }, { 0x3c278086, "SNB IOAT Ch7" }, { 0x3c2e8086, "SNB IOAT Ch0 (RAID)" }, { 0x3c2f8086, "SNB IOAT Ch1 (RAID)" }, { 0x0e208086, "IVB IOAT Ch0" }, { 0x0e218086, "IVB IOAT Ch1" }, { 0x0e228086, "IVB IOAT Ch2" }, { 0x0e238086, "IVB IOAT Ch3" }, { 0x0e248086, "IVB IOAT Ch4" }, { 0x0e258086, "IVB IOAT Ch5" }, { 0x0e268086, "IVB IOAT Ch6" }, { 0x0e278086, "IVB IOAT Ch7" }, { 0x0e2e8086, "IVB IOAT Ch0 (RAID)" }, { 0x0e2f8086, "IVB IOAT Ch1 (RAID)" }, { 0x2f208086, "HSW IOAT Ch0" }, { 0x2f218086, "HSW IOAT Ch1" }, { 0x2f228086, "HSW IOAT Ch2" }, { 0x2f238086, "HSW IOAT Ch3" }, { 0x2f248086, "HSW IOAT Ch4" }, { 0x2f258086, "HSW IOAT Ch5" }, { 0x2f268086, "HSW IOAT Ch6" }, { 0x2f278086, "HSW IOAT Ch7" }, { 0x2f2e8086, "HSW IOAT Ch0 (RAID)" }, { 0x2f2f8086, "HSW IOAT Ch1 (RAID)" }, { 0x0c508086, "BWD IOAT Ch0" }, { 0x0c518086, "BWD IOAT Ch1" }, { 0x0c528086, "BWD IOAT Ch2" }, { 0x0c538086, "BWD IOAT Ch3" }, { 0x6f508086, "BDXDE IOAT Ch0" }, { 0x6f518086, "BDXDE IOAT Ch1" }, { 0x6f528086, "BDXDE IOAT Ch2" }, { 0x6f538086, "BDXDE IOAT Ch3" }, { 0x6f208086, "BDX IOAT Ch0" }, { 0x6f218086, "BDX IOAT Ch1" }, { 0x6f228086, "BDX IOAT Ch2" }, { 0x6f238086, "BDX IOAT Ch3" }, { 0x6f248086, "BDX IOAT Ch4" }, { 0x6f258086, "BDX IOAT Ch5" }, { 0x6f268086, "BDX IOAT Ch6" }, { 0x6f278086, "BDX IOAT Ch7" }, { 0x6f2e8086, "BDX IOAT Ch0 (RAID)" }, { 0x6f2f8086, "BDX IOAT Ch1 (RAID)" }, - - { 0x00000000, NULL } }; +MODULE_PNP_INFO("W32:vendor/device;D:human", pci, ioat, pci_ids, + sizeof(pci_ids[0]), nitems(pci_ids)); + /* * OS <-> Driver linkage functions */ static int ioat_probe(device_t device) { struct _pcsid *ep; u_int32_t type; type = pci_get_devid(device); - for (ep = pci_ids; ep->type; ep++) { + for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) { if (ep->type == type) { device_set_desc(device, ep->desc); return (0); } } return (ENXIO); } static int ioat_attach(device_t device) { struct ioat_softc *ioat; int error; ioat = DEVICE2SOFTC(device); ioat->device = device; error = ioat_map_pci_bar(ioat); if (error != 0) goto err; ioat->version = ioat_read_cbver(ioat); if (ioat->version < IOAT_VER_3_0) { error = ENODEV; goto err; } error = ioat3_attach(device); if (error != 0) goto err; error = pci_enable_busmaster(device); if (error != 0) goto err; error = ioat_setup_intr(ioat); if (error != 0) goto err; error = ioat_reset_hw(ioat); if (error != 0) goto err; ioat_process_events(ioat); ioat_setup_sysctl(device); ioat->chan_idx = ioat_channel_index; ioat_channel[ioat_channel_index++] = ioat; ioat_test_attach(); err: if (error != 0) ioat_detach(device); return (error); } static int ioat_detach(device_t device) { struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ioat_test_detach(); taskqueue_drain(taskqueue_thread, &ioat->reset_task); mtx_lock(IOAT_REFLK); ioat->quiescing = TRUE; ioat->destroying = TRUE; wakeup(&ioat->quiescing); wakeup(&ioat->resetting); ioat_channel[ioat->chan_idx] = NULL; ioat_drain_locked(ioat); mtx_unlock(IOAT_REFLK); ioat_teardown_intr(ioat); callout_drain(&ioat->poll_timer); pci_disable_busmaster(device); if (ioat->pci_resource != NULL) bus_release_resource(device, SYS_RES_MEMORY, ioat->pci_resource_id, ioat->pci_resource); if (ioat->ring != NULL) ioat_free_ring(ioat, 1 << ioat->ring_size_order, ioat->ring); if (ioat->comp_update != NULL) { bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map); bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update, ioat->comp_update_map); bus_dma_tag_destroy(ioat->comp_update_tag); } if (ioat->hw_desc_ring != NULL) { bus_dmamap_unload(ioat->hw_desc_tag, ioat->hw_desc_map); bus_dmamem_free(ioat->hw_desc_tag, ioat->hw_desc_ring, ioat->hw_desc_map); bus_dma_tag_destroy(ioat->hw_desc_tag); } return (0); } static int ioat_teardown_intr(struct ioat_softc *ioat) { if (ioat->tag != NULL) bus_teardown_intr(ioat->device, ioat->res, ioat->tag); if (ioat->res != NULL) bus_release_resource(ioat->device, SYS_RES_IRQ, rman_get_rid(ioat->res), ioat->res); pci_release_msi(ioat->device); return (0); } static int ioat_start_channel(struct ioat_softc *ioat) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct bus_dmadesc *dmadesc; uint64_t status; uint32_t chanerr; int i; ioat_acquire(&ioat->dmaengine); /* Submit 'NULL' operation manually to avoid quiescing flag */ desc = ioat_get_ring_entry(ioat, ioat->head); hw_desc = &ioat_get_descriptor(ioat, ioat->head)->dma; dmadesc = &desc->bus_dmadesc; dmadesc->callback_fn = NULL; dmadesc->callback_arg = NULL; hw_desc->u.control_raw = 0; hw_desc->u.control_generic.op = IOAT_OP_COPY; hw_desc->u.control_generic.completion_update = 1; hw_desc->size = 8; hw_desc->src_addr = 0; hw_desc->dest_addr = 0; hw_desc->u.control.null = 1; ioat_submit_single(ioat); ioat_release(&ioat->dmaengine); for (i = 0; i < 100; i++) { DELAY(1); status = ioat_get_chansts(ioat); if (is_ioat_idle(status)) return (0); } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_log_message(0, "could not start channel: " "status = %#jx error = %b\n", (uintmax_t)status, (int)chanerr, IOAT_CHANERR_STR); return (ENXIO); } /* * Initialize Hardware */ static int ioat3_attach(device_t device) { struct ioat_softc *ioat; struct ioat_descriptor *ring; struct ioat_dma_hw_descriptor *dma_hw_desc; void *hw_desc; size_t ringsz; int i, num_descriptors; int error; uint8_t xfercap; error = 0; ioat = DEVICE2SOFTC(device); ioat->capabilities = ioat_read_dmacapability(ioat); ioat_log_message(0, "Capabilities: %b\n", (int)ioat->capabilities, IOAT_DMACAP_STR); xfercap = ioat_read_xfercap(ioat); ioat->max_xfer_size = 1 << xfercap; ioat->intrdelay_supported = (ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_SUPPORTED) != 0; if (ioat->intrdelay_supported) ioat->intrdelay_max = IOAT_INTRDELAY_US_MASK; /* TODO: need to check DCA here if we ever do XOR/PQ */ mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF); mtx_init(&ioat->cleanup_lock, "ioat_cleanup", NULL, MTX_DEF); callout_init(&ioat->poll_timer, 1); TASK_INIT(&ioat->reset_task, 0, ioat_reset_hw_task, ioat); /* Establish lock order for Witness */ mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); mtx_unlock(&ioat->cleanup_lock); mtx_unlock(&ioat->submit_lock); ioat->is_submitter_processing = FALSE; ioat->is_completion_pending = FALSE; ioat->is_reset_pending = FALSE; ioat->is_channel_running = FALSE; bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL, &ioat->comp_update_tag); error = bus_dmamem_alloc(ioat->comp_update_tag, (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map); if (ioat->comp_update == NULL) return (ENOMEM); error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map, ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat, 0); if (error != 0) return (error); ioat->ring_size_order = g_ioat_ring_order; num_descriptors = 1 << ioat->ring_size_order; ringsz = sizeof(struct ioat_dma_hw_descriptor) * num_descriptors; error = bus_dma_tag_create(bus_get_dma_tag(ioat->device), 2 * 1024 * 1024, 0x0, (bus_addr_t)BUS_SPACE_MAXADDR_40BIT, BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, ringsz, 0, NULL, NULL, &ioat->hw_desc_tag); if (error != 0) return (error); error = bus_dmamem_alloc(ioat->hw_desc_tag, &hw_desc, BUS_DMA_ZERO | BUS_DMA_WAITOK, &ioat->hw_desc_map); if (error != 0) return (error); error = bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc, ringsz, ioat_dmamap_cb, &ioat->hw_desc_bus_addr, BUS_DMA_WAITOK); if (error) return (error); ioat->hw_desc_ring = hw_desc; ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT, M_ZERO | M_WAITOK); ring = ioat->ring; for (i = 0; i < num_descriptors; i++) { memset(&ring[i].bus_dmadesc, 0, sizeof(ring[i].bus_dmadesc)); ring[i].id = i; } for (i = 0; i < num_descriptors; i++) { dma_hw_desc = &ioat->hw_desc_ring[i].dma; dma_hw_desc->next = RING_PHYS_ADDR(ioat, i + 1); } ioat->head = ioat->hw_head = 0; ioat->tail = 0; ioat->last_seen = 0; *ioat->comp_update = 0; return (0); } static int ioat_map_pci_bar(struct ioat_softc *ioat) { ioat->pci_resource_id = PCIR_BAR(0); ioat->pci_resource = bus_alloc_resource_any(ioat->device, SYS_RES_MEMORY, &ioat->pci_resource_id, RF_ACTIVE); if (ioat->pci_resource == NULL) { ioat_log_message(0, "unable to allocate pci resource\n"); return (ENODEV); } ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource); ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource); return (0); } static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error) { struct ioat_softc *ioat = arg; KASSERT(error == 0, ("%s: error:%d", __func__, error)); ioat->comp_update_bus_addr = seg[0].ds_addr; } static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *baddr; KASSERT(error == 0, ("%s: error:%d", __func__, error)); baddr = arg; *baddr = segs->ds_addr; } /* * Interrupt setup and handlers */ static int ioat_setup_intr(struct ioat_softc *ioat) { uint32_t num_vectors; int error; boolean_t use_msix; boolean_t force_legacy_interrupts; use_msix = FALSE; force_legacy_interrupts = FALSE; if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) { num_vectors = 1; pci_alloc_msix(ioat->device, &num_vectors); if (num_vectors == 1) use_msix = TRUE; } if (use_msix) { ioat->rid = 1; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_ACTIVE); } else { ioat->rid = 0; ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ, &ioat->rid, RF_SHAREABLE | RF_ACTIVE); } if (ioat->res == NULL) { ioat_log_message(0, "bus_alloc_resource failed\n"); return (ENOMEM); } ioat->tag = NULL; error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag); if (error != 0) { ioat_log_message(0, "bus_setup_intr failed\n"); return (error); } ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN); return (0); } static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat) { u_int32_t pciid; pciid = pci_get_devid(ioat->device); switch (pciid) { /* BWD: */ case 0x0c508086: case 0x0c518086: case 0x0c528086: case 0x0c538086: /* BDXDE: */ case 0x6f508086: case 0x6f518086: case 0x6f528086: case 0x6f538086: return (TRUE); } return (FALSE); } static void ioat_interrupt_handler(void *arg) { struct ioat_softc *ioat = arg; ioat->stats.interrupts++; ioat_process_events(ioat); } static int chanerr_to_errno(uint32_t chanerr) { if (chanerr == 0) return (0); if ((chanerr & (IOAT_CHANERR_XSADDERR | IOAT_CHANERR_XDADDERR)) != 0) return (EFAULT); if ((chanerr & (IOAT_CHANERR_RDERR | IOAT_CHANERR_WDERR)) != 0) return (EIO); /* This one is probably our fault: */ if ((chanerr & IOAT_CHANERR_NDADDERR) != 0) return (EIO); return (EIO); } static void ioat_process_events(struct ioat_softc *ioat) { struct ioat_descriptor *desc; struct bus_dmadesc *dmadesc; uint64_t comp_update, status; uint32_t completed, chanerr; boolean_t pending; int error; mtx_lock(&ioat->cleanup_lock); /* * Don't run while the hardware is being reset. Reset is responsible * for blocking new work and draining & completing existing work, so * there is nothing to do until new work is queued after reset anyway. */ if (ioat->resetting_cleanup) { mtx_unlock(&ioat->cleanup_lock); return; } completed = 0; comp_update = *ioat->comp_update; status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK; if (status < ioat->hw_desc_bus_addr || status >= ioat->hw_desc_bus_addr + (1 << ioat->ring_size_order) * sizeof(struct ioat_generic_hw_descriptor)) panic("Bogus completion address %jx (channel %u)", (uintmax_t)status, ioat->chan_idx); if (status == ioat->last_seen) { /* * If we landed in process_events and nothing has been * completed, check for a timeout due to channel halt. */ goto out; } CTR4(KTR_IOAT, "%s channel=%u hw_status=0x%lx last_seen=0x%lx", __func__, ioat->chan_idx, comp_update, ioat->last_seen); while (RING_PHYS_ADDR(ioat, ioat->tail - 1) != status) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) ok cb %p(%p)", ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn, dmadesc->callback_arg); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, 0); completed++; ioat->tail++; } CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__, ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat)); if (completed != 0) { ioat->last_seen = RING_PHYS_ADDR(ioat, ioat->tail - 1); ioat->stats.descriptors_processed += completed; } out: ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); /* Perform a racy check first; only take the locks if it passes. */ pending = (ioat_get_active(ioat) != 0); if (!pending && ioat->is_completion_pending) { mtx_unlock(&ioat->cleanup_lock); mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); pending = (ioat_get_active(ioat) != 0); if (!pending && ioat->is_completion_pending) { ioat->is_completion_pending = FALSE; callout_stop(&ioat->poll_timer); } mtx_unlock(&ioat->submit_lock); } mtx_unlock(&ioat->cleanup_lock); if (pending) callout_reset(&ioat->poll_timer, 1, ioat_poll_timer_callback, ioat); if (completed != 0) { ioat_putn(ioat, completed, IOAT_ACTIVE_DESCR_REF); wakeup(&ioat->tail); } /* * The device doesn't seem to reliably push suspend/halt statuses to * the channel completion memory address, so poll the device register * here. */ comp_update = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS; if (!is_ioat_halted(comp_update) && !is_ioat_suspended(comp_update)) return; ioat->stats.channel_halts++; /* * Fatal programming error on this DMA channel. Flush any outstanding * work with error status and restart the engine. */ mtx_lock(&ioat->submit_lock); mtx_lock(&ioat->cleanup_lock); ioat->quiescing = TRUE; /* * This is safe to do here because we have both locks and the submit * queue is quiesced. We know that we will drain all outstanding * events, so ioat_reset_hw can't deadlock. It is necessary to * protect other ioat_process_event threads from racing ioat_reset_hw, * reading an indeterminate hw state, and attempting to continue * issuing completions. */ ioat->resetting_cleanup = TRUE; chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); if (1 <= g_ioat_debug_level) ioat_halted_debug(ioat, chanerr); ioat->stats.last_halt_chanerr = chanerr; while (ioat_get_active(ioat) > 0) { desc = ioat_get_ring_entry(ioat, ioat->tail); dmadesc = &desc->bus_dmadesc; CTR5(KTR_IOAT, "channel=%u completing desc idx %u (%p) err cb %p(%p)", ioat->chan_idx, ioat->tail, dmadesc, dmadesc->callback_fn, dmadesc->callback_arg); if (dmadesc->callback_fn != NULL) dmadesc->callback_fn(dmadesc->callback_arg, chanerr_to_errno(chanerr)); ioat_putn_locked(ioat, 1, IOAT_ACTIVE_DESCR_REF); ioat->tail++; ioat->stats.descriptors_processed++; ioat->stats.descriptors_error++; } CTR5(KTR_IOAT, "%s channel=%u head=%u tail=%u active=%u", __func__, ioat->chan_idx, ioat->head, ioat->tail, ioat_get_active(ioat)); if (ioat->is_completion_pending) { ioat->is_completion_pending = FALSE; callout_stop(&ioat->poll_timer); } /* Clear error status */ ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); mtx_unlock(&ioat->cleanup_lock); mtx_unlock(&ioat->submit_lock); ioat_log_message(0, "Resetting channel to recover from error\n"); error = taskqueue_enqueue(taskqueue_thread, &ioat->reset_task); KASSERT(error == 0, ("%s: taskqueue_enqueue failed: %d", __func__, error)); } static void ioat_reset_hw_task(void *ctx, int pending __unused) { struct ioat_softc *ioat; int error; ioat = ctx; ioat_log_message(1, "%s: Resetting channel\n", __func__); error = ioat_reset_hw(ioat); KASSERT(error == 0, ("%s: reset failed: %d", __func__, error)); (void)error; } /* * User API functions */ unsigned ioat_get_nchannels(void) { return (ioat_channel_index); } bus_dmaengine_t ioat_get_dmaengine(uint32_t index, int flags) { struct ioat_softc *ioat; KASSERT((flags & ~(M_NOWAIT | M_WAITOK)) == 0, ("invalid flags: 0x%08x", flags)); KASSERT((flags & (M_NOWAIT | M_WAITOK)) != (M_NOWAIT | M_WAITOK), ("invalid wait | nowait")); if (index >= ioat_channel_index) return (NULL); ioat = ioat_channel[index]; if (ioat == NULL || ioat->destroying) return (NULL); if (ioat->quiescing) { if ((flags & M_NOWAIT) != 0) return (NULL); mtx_lock(IOAT_REFLK); while (ioat->quiescing && !ioat->destroying) msleep(&ioat->quiescing, IOAT_REFLK, 0, "getdma", 0); mtx_unlock(IOAT_REFLK); if (ioat->destroying) return (NULL); } /* * There's a race here between the quiescing check and HW reset or * module destroy. */ return (&ioat_get(ioat, IOAT_DMAENGINE_REF)->dmaengine); } void ioat_put_dmaengine(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); ioat_put(ioat, IOAT_DMAENGINE_REF); } int ioat_get_hwversion(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->version); } size_t ioat_get_max_io_size(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->max_xfer_size); } uint32_t ioat_get_capabilities(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->capabilities); } int ioat_set_interrupt_coalesce(bus_dmaengine_t dmaengine, uint16_t delay) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); if (!ioat->intrdelay_supported) return (ENODEV); if (delay > ioat->intrdelay_max) return (ERANGE); ioat_write_2(ioat, IOAT_INTRDELAY_OFFSET, delay); ioat->cached_intrdelay = ioat_read_2(ioat, IOAT_INTRDELAY_OFFSET) & IOAT_INTRDELAY_US_MASK; return (0); } uint16_t ioat_get_max_coalesce_period(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); return (ioat->intrdelay_max); } void ioat_acquire(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); mtx_lock(&ioat->submit_lock); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); ioat->acq_head = ioat->head; } int ioat_acquire_reserve(bus_dmaengine_t dmaengine, unsigned n, int mflags) { struct ioat_softc *ioat; int error; ioat = to_ioat_softc(dmaengine); ioat_acquire(dmaengine); error = ioat_reserve_space(ioat, n, mflags); if (error != 0) ioat_release(dmaengine); return (error); } void ioat_release(bus_dmaengine_t dmaengine) { struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR4(KTR_IOAT, "%s channel=%u dispatch1 hw_head=%u head=%u", __func__, ioat->chan_idx, ioat->hw_head & UINT16_MAX, ioat->head); KFAIL_POINT_CODE(DEBUG_FP, ioat_release, /* do nothing */); CTR4(KTR_IOAT, "%s channel=%u dispatch2 hw_head=%u head=%u", __func__, ioat->chan_idx, ioat->hw_head & UINT16_MAX, ioat->head); if (ioat->acq_head != ioat->head) { ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->hw_head); if (!ioat->is_completion_pending) { ioat->is_completion_pending = TRUE; callout_reset(&ioat->poll_timer, 1, ioat_poll_timer_callback, ioat); } } mtx_unlock(&ioat->submit_lock); } static struct ioat_descriptor * ioat_op_generic(struct ioat_softc *ioat, uint8_t op, uint32_t size, uint64_t src, uint64_t dst, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_generic_hw_descriptor *hw_desc; struct ioat_descriptor *desc; int mflags; mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT((flags & ~_DMA_GENERIC_FLAGS) == 0, ("Unrecognized flag(s): %#x", flags & ~_DMA_GENERIC_FLAGS)); if ((flags & DMA_NO_WAIT) != 0) mflags = M_NOWAIT; else mflags = M_WAITOK; if (size > ioat->max_xfer_size) { ioat_log_message(0, "%s: max_xfer_size = %d, requested = %u\n", __func__, ioat->max_xfer_size, (unsigned)size); return (NULL); } if (ioat_reserve_space(ioat, 1, mflags) != 0) return (NULL); desc = ioat_get_ring_entry(ioat, ioat->head); hw_desc = &ioat_get_descriptor(ioat, ioat->head)->generic; hw_desc->u.control_raw = 0; hw_desc->u.control_generic.op = op; hw_desc->u.control_generic.completion_update = 1; if ((flags & DMA_INT_EN) != 0) hw_desc->u.control_generic.int_enable = 1; if ((flags & DMA_FENCE) != 0) hw_desc->u.control_generic.fence = 1; hw_desc->size = size; hw_desc->src_addr = src; hw_desc->dest_addr = dst; desc->bus_dmadesc.callback_fn = callback_fn; desc->bus_dmadesc.callback_arg = callback_arg; return (desc); } struct bus_dmadesc * ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); desc = ioat_op_generic(ioat, IOAT_OP_COPY, 8, 0, 0, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma; hw_desc->u.control.null = 1; ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); if (((src | dst) & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_COPY, len, src, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); CTR6(KTR_IOAT, "%s channel=%u desc=%p dest=%lx src=%lx len=%lx", __func__, ioat->chan_idx, &desc->bus_dmadesc, dst, src, len); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy_8k_aligned(bus_dmaengine_t dmaengine, bus_addr_t dst1, bus_addr_t dst2, bus_addr_t src1, bus_addr_t src2, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_dma_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); if (((src1 | src2 | dst1 | dst2) & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of src/dst invalid\n", __func__); return (NULL); } if (((src1 | src2 | dst1 | dst2) & PAGE_MASK) != 0) { ioat_log_message(0, "%s: Addresses must be page-aligned\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_COPY, 2 * PAGE_SIZE, src1, dst1, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->dma; if (src2 != src1 + PAGE_SIZE) { hw_desc->u.control.src_page_break = 1; hw_desc->next_src_addr = src2; } if (dst2 != dst1 + PAGE_SIZE) { hw_desc->u.control.dest_page_break = 1; hw_desc->next_dest_addr = dst2; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_copy_crc(bus_dmaengine_t dmaengine, bus_addr_t dst, bus_addr_t src, bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_crc32_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; uint32_t teststore; uint8_t op; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); if ((ioat->capabilities & IOAT_DMACAP_MOVECRC) == 0) { ioat_log_message(0, "%s: Device lacks MOVECRC capability\n", __func__); return (NULL); } if (((src | dst) & (0xffffffull << 40)) != 0) { ioat_log_message(0, "%s: High 24 bits of src/dst invalid\n", __func__); return (NULL); } teststore = (flags & _DMA_CRC_TESTSTORE); if (teststore == _DMA_CRC_TESTSTORE) { ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__); return (NULL); } if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) { ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n", __func__); return (NULL); } switch (teststore) { case DMA_CRC_STORE: op = IOAT_OP_MOVECRC_STORE; break; case DMA_CRC_TEST: op = IOAT_OP_MOVECRC_TEST; break; default: KASSERT(teststore == 0, ("bogus")); op = IOAT_OP_MOVECRC; break; } if ((flags & DMA_CRC_INLINE) == 0 && (crcptr & (0xffffffull << 40)) != 0) { ioat_log_message(0, "%s: High 24 bits of crcptr invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, op, len, src, dst, callback_fn, callback_arg, flags & ~_DMA_CRC_FLAGS); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32; if ((flags & DMA_CRC_INLINE) == 0) hw_desc->crc_address = crcptr; else hw_desc->u.control.crc_location = 1; if (initialseed != NULL) { hw_desc->u.control.use_seed = 1; hw_desc->seed = *initialseed; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_crc(bus_dmaengine_t dmaengine, bus_addr_t src, bus_size_t len, uint32_t *initialseed, bus_addr_t crcptr, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_crc32_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; uint32_t teststore; uint8_t op; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); if ((ioat->capabilities & IOAT_DMACAP_CRC) == 0) { ioat_log_message(0, "%s: Device lacks CRC capability\n", __func__); return (NULL); } if ((src & (0xffffffull << 40)) != 0) { ioat_log_message(0, "%s: High 24 bits of src invalid\n", __func__); return (NULL); } teststore = (flags & _DMA_CRC_TESTSTORE); if (teststore == _DMA_CRC_TESTSTORE) { ioat_log_message(0, "%s: TEST and STORE invalid\n", __func__); return (NULL); } if (teststore == 0 && (flags & DMA_CRC_INLINE) != 0) { ioat_log_message(0, "%s: INLINE invalid without TEST or STORE\n", __func__); return (NULL); } switch (teststore) { case DMA_CRC_STORE: op = IOAT_OP_CRC_STORE; break; case DMA_CRC_TEST: op = IOAT_OP_CRC_TEST; break; default: KASSERT(teststore == 0, ("bogus")); op = IOAT_OP_CRC; break; } if ((flags & DMA_CRC_INLINE) == 0 && (crcptr & (0xffffffull << 40)) != 0) { ioat_log_message(0, "%s: High 24 bits of crcptr invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, op, len, src, 0, callback_fn, callback_arg, flags & ~_DMA_CRC_FLAGS); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->crc32; if ((flags & DMA_CRC_INLINE) == 0) hw_desc->crc_address = crcptr; else hw_desc->u.control.crc_location = 1; if (initialseed != NULL) { hw_desc->u.control.use_seed = 1; hw_desc->seed = *initialseed; } if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } struct bus_dmadesc * ioat_blockfill(bus_dmaengine_t dmaengine, bus_addr_t dst, uint64_t fillpattern, bus_size_t len, bus_dmaengine_callback_t callback_fn, void *callback_arg, uint32_t flags) { struct ioat_fill_hw_descriptor *hw_desc; struct ioat_descriptor *desc; struct ioat_softc *ioat; ioat = to_ioat_softc(dmaengine); CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); if ((ioat->capabilities & IOAT_DMACAP_BFILL) == 0) { ioat_log_message(0, "%s: Device lacks BFILL capability\n", __func__); return (NULL); } if ((dst & (0xffffull << 48)) != 0) { ioat_log_message(0, "%s: High 16 bits of dst invalid\n", __func__); return (NULL); } desc = ioat_op_generic(ioat, IOAT_OP_FILL, len, fillpattern, dst, callback_fn, callback_arg, flags); if (desc == NULL) return (NULL); hw_desc = &ioat_get_descriptor(ioat, desc->id)->fill; if (g_ioat_debug_level >= 3) dump_descriptor(hw_desc); ioat_submit_single(ioat); return (&desc->bus_dmadesc); } /* * Ring Management */ static inline uint32_t ioat_get_active(struct ioat_softc *ioat) { return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1)); } static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat) { return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1); } /* * Reserves space in this IOAT descriptor ring by ensuring enough slots remain * for 'num_descs'. * * If mflags contains M_WAITOK, blocks until enough space is available. * * Returns zero on success, or an errno on error. If num_descs is beyond the * maximum ring size, returns EINVAl; if allocation would block and mflags * contains M_NOWAIT, returns EAGAIN. * * Must be called with the submit_lock held; returns with the lock held. The * lock may be dropped to allocate the ring. * * (The submit_lock is needed to add any entries to the ring, so callers are * assured enough room is available.) */ static int ioat_reserve_space(struct ioat_softc *ioat, uint32_t num_descs, int mflags) { boolean_t dug; int error; mtx_assert(&ioat->submit_lock, MA_OWNED); error = 0; dug = FALSE; if (num_descs < 1 || num_descs >= (1 << ioat->ring_size_order)) { error = EINVAL; goto out; } for (;;) { if (ioat->quiescing) { error = ENXIO; goto out; } if (ioat_get_ring_space(ioat) >= num_descs) goto out; CTR3(KTR_IOAT, "%s channel=%u starved (%u)", __func__, ioat->chan_idx, num_descs); if (!dug && !ioat->is_submitter_processing) { ioat->is_submitter_processing = TRUE; mtx_unlock(&ioat->submit_lock); CTR2(KTR_IOAT, "%s channel=%u attempting to process events", __func__, ioat->chan_idx); ioat_process_events(ioat); mtx_lock(&ioat->submit_lock); dug = TRUE; KASSERT(ioat->is_submitter_processing == TRUE, ("is_submitter_processing")); ioat->is_submitter_processing = FALSE; wakeup(&ioat->tail); continue; } if ((mflags & M_WAITOK) == 0) { error = EAGAIN; break; } CTR2(KTR_IOAT, "%s channel=%u blocking on completions", __func__, ioat->chan_idx); msleep(&ioat->tail, &ioat->submit_lock, 0, "ioat_full", 0); continue; } out: mtx_assert(&ioat->submit_lock, MA_OWNED); KASSERT(!ioat->quiescing || error == ENXIO, ("reserved during quiesce")); return (error); } static void ioat_free_ring(struct ioat_softc *ioat, uint32_t size, struct ioat_descriptor *ring) { free(ring, M_IOAT); } static struct ioat_descriptor * ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index) { return (&ioat->ring[index % (1 << ioat->ring_size_order)]); } static union ioat_hw_descriptor * ioat_get_descriptor(struct ioat_softc *ioat, uint32_t index) { return (&ioat->hw_desc_ring[index % (1 << ioat->ring_size_order)]); } static void ioat_halted_debug(struct ioat_softc *ioat, uint32_t chanerr) { union ioat_hw_descriptor *desc; ioat_log_message(0, "Channel halted (%b)\n", (int)chanerr, IOAT_CHANERR_STR); if (chanerr == 0) return; mtx_assert(&ioat->cleanup_lock, MA_OWNED); desc = ioat_get_descriptor(ioat, ioat->tail + 0); dump_descriptor(desc); desc = ioat_get_descriptor(ioat, ioat->tail + 1); dump_descriptor(desc); } static void ioat_poll_timer_callback(void *arg) { struct ioat_softc *ioat; ioat = arg; ioat_log_message(3, "%s\n", __func__); ioat_process_events(ioat); } /* * Support Functions */ static void ioat_submit_single(struct ioat_softc *ioat) { mtx_assert(&ioat->submit_lock, MA_OWNED); ioat_get(ioat, IOAT_ACTIVE_DESCR_REF); atomic_add_rel_int(&ioat->head, 1); atomic_add_rel_int(&ioat->hw_head, 1); CTR5(KTR_IOAT, "%s channel=%u head=%u hw_head=%u tail=%u", __func__, ioat->chan_idx, ioat->head, ioat->hw_head & UINT16_MAX, ioat->tail); ioat->stats.descriptors_submitted++; } static int ioat_reset_hw(struct ioat_softc *ioat) { uint64_t status; uint32_t chanerr; unsigned timeout; int error; CTR2(KTR_IOAT, "%s channel=%u", __func__, ioat->chan_idx); mtx_lock(IOAT_REFLK); while (ioat->resetting && !ioat->destroying) msleep(&ioat->resetting, IOAT_REFLK, 0, "IRH_drain", 0); if (ioat->destroying) { mtx_unlock(IOAT_REFLK); return (ENXIO); } ioat->resetting = TRUE; ioat->quiescing = TRUE; ioat_drain_locked(ioat); mtx_unlock(IOAT_REFLK); /* * Suspend ioat_process_events while the hardware and softc are in an * indeterminate state. */ mtx_lock(&ioat->cleanup_lock); ioat->resetting_cleanup = TRUE; mtx_unlock(&ioat->cleanup_lock); CTR2(KTR_IOAT, "%s channel=%u quiesced and drained", __func__, ioat->chan_idx); status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(ioat); /* Wait at most 20 ms */ for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) && timeout < 20; timeout++) { DELAY(1000); status = ioat_get_chansts(ioat); } if (timeout == 20) { error = ETIMEDOUT; goto out; } KASSERT(ioat_get_active(ioat) == 0, ("active after quiesce")); chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr); CTR2(KTR_IOAT, "%s channel=%u hardware suspended", __func__, ioat->chan_idx); /* * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors * that can cause stability issues for IOAT v3. */ pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07, 4); chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4); pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4); /* * BDXDE and BWD models reset MSI-X registers on device reset. * Save/restore their contents manually. */ if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets MSI-X registers; saving\n"); pci_save_state(ioat->device); } ioat_reset(ioat); CTR2(KTR_IOAT, "%s channel=%u hardware reset", __func__, ioat->chan_idx); /* Wait at most 20 ms */ for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++) DELAY(1000); if (timeout == 20) { error = ETIMEDOUT; goto out; } if (ioat_model_resets_msix(ioat)) { ioat_log_message(1, "device resets registers; restored\n"); pci_restore_state(ioat->device); } /* Reset attempts to return the hardware to "halted." */ status = ioat_get_chansts(ioat); if (is_ioat_active(status) || is_ioat_idle(status)) { /* So this really shouldn't happen... */ ioat_log_message(0, "Device is active after a reset?\n"); ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); error = 0; goto out; } chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET); if (chanerr != 0) { mtx_lock(&ioat->cleanup_lock); ioat_halted_debug(ioat, chanerr); mtx_unlock(&ioat->cleanup_lock); error = EIO; goto out; } /* * Bring device back online after reset. Writing CHAINADDR brings the * device back to active. * * The internal ring counter resets to zero, so we have to start over * at zero as well. */ ioat->tail = ioat->head = ioat->hw_head = 0; ioat->last_seen = 0; *ioat->comp_update = 0; KASSERT(!ioat->is_completion_pending, ("bogus completion_pending")); ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN); ioat_write_chancmp(ioat, ioat->comp_update_bus_addr); ioat_write_chainaddr(ioat, RING_PHYS_ADDR(ioat, 0)); error = 0; CTR2(KTR_IOAT, "%s channel=%u configured channel", __func__, ioat->chan_idx); out: /* Enqueues a null operation and ensures it completes. */ if (error == 0) { error = ioat_start_channel(ioat); CTR2(KTR_IOAT, "%s channel=%u started channel", __func__, ioat->chan_idx); } /* * Resume completions now that ring state is consistent. */ mtx_lock(&ioat->cleanup_lock); ioat->resetting_cleanup = FALSE; mtx_unlock(&ioat->cleanup_lock); /* Unblock submission of new work */ mtx_lock(IOAT_REFLK); ioat->quiescing = FALSE; wakeup(&ioat->quiescing); ioat->resetting = FALSE; wakeup(&ioat->resetting); if (ioat->is_completion_pending) callout_reset(&ioat->poll_timer, 1, ioat_poll_timer_callback, ioat); CTR2(KTR_IOAT, "%s channel=%u reset done", __func__, ioat->chan_idx); mtx_unlock(IOAT_REFLK); return (error); } static int sysctl_handle_chansts(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; uint64_t status; int error; ioat = arg1; status = ioat_get_chansts(ioat) & IOAT_CHANSTS_STATUS; sbuf_new_for_sysctl(&sb, NULL, 256, req); switch (status) { case IOAT_CHANSTS_ACTIVE: sbuf_printf(&sb, "ACTIVE"); break; case IOAT_CHANSTS_IDLE: sbuf_printf(&sb, "IDLE"); break; case IOAT_CHANSTS_SUSPENDED: sbuf_printf(&sb, "SUSPENDED"); break; case IOAT_CHANSTS_HALTED: sbuf_printf(&sb, "HALTED"); break; case IOAT_CHANSTS_ARMED: sbuf_printf(&sb, "ARMED"); break; default: sbuf_printf(&sb, "UNKNOWN"); break; } error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_dpi(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; struct sbuf sb; #define PRECISION "1" const uintmax_t factor = 10; uintmax_t rate; int error; ioat = arg1; sbuf_new_for_sysctl(&sb, NULL, 16, req); if (ioat->stats.interrupts == 0) { sbuf_printf(&sb, "NaN"); goto out; } rate = ioat->stats.descriptors_processed * factor / ioat->stats.interrupts; sbuf_printf(&sb, "%ju.%." PRECISION "ju", rate / factor, rate % factor); #undef PRECISION out: error = sbuf_finish(&sb); sbuf_delete(&sb); if (error != 0 || req->newptr == NULL) return (error); return (EINVAL); } static int sysctl_handle_reset(SYSCTL_HANDLER_ARGS) { struct ioat_softc *ioat; int error, arg; ioat = arg1; arg = 0; error = SYSCTL_OUT(req, &arg, sizeof(arg)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &arg, sizeof(arg)); if (error != 0) return (error); if (arg != 0) error = ioat_reset_hw(ioat); return (error); } static void dump_descriptor(void *hw_desc) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 8; j++) printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]); printf("\n"); } } static void ioat_setup_sysctl(device_t device) { struct sysctl_oid_list *par, *statpar, *state, *hammer; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmp; struct ioat_softc *ioat; ioat = DEVICE2SOFTC(device); ctx = device_get_sysctl_ctx(device); tree = device_get_sysctl_tree(device); par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "version", CTLFLAG_RD, &ioat->version, 0, "HW version (0xMM form)"); SYSCTL_ADD_UINT(ctx, par, OID_AUTO, "max_xfer_size", CTLFLAG_RD, &ioat->max_xfer_size, 0, "HW maximum transfer size"); SYSCTL_ADD_INT(ctx, par, OID_AUTO, "intrdelay_supported", CTLFLAG_RD, &ioat->intrdelay_supported, 0, "Is INTRDELAY supported"); SYSCTL_ADD_U16(ctx, par, OID_AUTO, "intrdelay_max", CTLFLAG_RD, &ioat->intrdelay_max, 0, "Maximum configurable INTRDELAY on this channel (microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "state", CTLFLAG_RD, NULL, "IOAT channel internal state"); state = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "ring_size_order", CTLFLAG_RD, &ioat->ring_size_order, 0, "SW descriptor ring size order"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "head", CTLFLAG_RD, &ioat->head, 0, "SW descriptor head pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "tail", CTLFLAG_RD, &ioat->tail, 0, "SW descriptor tail pointer index"); SYSCTL_ADD_UINT(ctx, state, OID_AUTO, "hw_head", CTLFLAG_RD, &ioat->hw_head, 0, "HW DMACOUNT"); SYSCTL_ADD_UQUAD(ctx, state, OID_AUTO, "last_completion", CTLFLAG_RD, ioat->comp_update, "HW addr of last completion"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_submitter_processing", CTLFLAG_RD, &ioat->is_submitter_processing, 0, "submitter processing"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_completion_pending", CTLFLAG_RD, &ioat->is_completion_pending, 0, "completion pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_reset_pending", CTLFLAG_RD, &ioat->is_reset_pending, 0, "reset pending"); SYSCTL_ADD_INT(ctx, state, OID_AUTO, "is_channel_running", CTLFLAG_RD, &ioat->is_channel_running, 0, "channel running"); SYSCTL_ADD_PROC(ctx, state, OID_AUTO, "chansts", CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_chansts, "A", "String of the channel status"); SYSCTL_ADD_U16(ctx, state, OID_AUTO, "intrdelay", CTLFLAG_RD, &ioat->cached_intrdelay, 0, "Current INTRDELAY on this channel (cached, microseconds)"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "hammer", CTLFLAG_RD, NULL, "Big hammers (mostly for testing)"); hammer = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_PROC(ctx, hammer, OID_AUTO, "force_hw_reset", CTLTYPE_INT | CTLFLAG_RW, ioat, 0, sysctl_handle_reset, "I", "Set to non-zero to reset the hardware"); tmp = SYSCTL_ADD_NODE(ctx, par, OID_AUTO, "stats", CTLFLAG_RD, NULL, "IOAT channel statistics"); statpar = SYSCTL_CHILDREN(tmp); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "interrupts", CTLFLAG_RW, &ioat->stats.interrupts, "Number of interrupts processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "descriptors", CTLFLAG_RW, &ioat->stats.descriptors_processed, "Number of descriptors processed on this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "submitted", CTLFLAG_RW, &ioat->stats.descriptors_submitted, "Number of descriptors submitted to this channel"); SYSCTL_ADD_UQUAD(ctx, statpar, OID_AUTO, "errored", CTLFLAG_RW, &ioat->stats.descriptors_error, "Number of descriptors failed by channel errors"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "halts", CTLFLAG_RW, &ioat->stats.channel_halts, 0, "Number of times the channel has halted"); SYSCTL_ADD_U32(ctx, statpar, OID_AUTO, "last_halt_chanerr", CTLFLAG_RW, &ioat->stats.last_halt_chanerr, 0, "The raw CHANERR when the channel was last halted"); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "desc_per_interrupt", CTLTYPE_STRING | CTLFLAG_RD, ioat, 0, sysctl_handle_dpi, "A", "Descriptors per interrupt"); } static inline struct ioat_softc * ioat_get(struct ioat_softc *ioat, enum ioat_ref_kind kind) { uint32_t old; KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus")); old = atomic_fetchadd_32(&ioat->refcnt, 1); KASSERT(old < UINT32_MAX, ("refcnt overflow")); #ifdef INVARIANTS old = atomic_fetchadd_32(&ioat->refkinds[kind], 1); KASSERT(old < UINT32_MAX, ("refcnt kind overflow")); #endif return (ioat); } static inline void ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind) { _ioat_putn(ioat, n, kind, FALSE); } static inline void ioat_putn_locked(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind) { _ioat_putn(ioat, n, kind, TRUE); } static inline void _ioat_putn(struct ioat_softc *ioat, uint32_t n, enum ioat_ref_kind kind, boolean_t locked) { uint32_t old; KASSERT(kind < IOAT_NUM_REF_KINDS, ("bogus")); if (n == 0) return; #ifdef INVARIANTS old = atomic_fetchadd_32(&ioat->refkinds[kind], -n); KASSERT(old >= n, ("refcnt kind underflow")); #endif /* Skip acquiring the lock if resulting refcnt > 0. */ for (;;) { old = ioat->refcnt; if (old <= n) break; if (atomic_cmpset_32(&ioat->refcnt, old, old - n)) return; } if (locked) mtx_assert(IOAT_REFLK, MA_OWNED); else mtx_lock(IOAT_REFLK); old = atomic_fetchadd_32(&ioat->refcnt, -n); KASSERT(old >= n, ("refcnt error")); if (old == n) wakeup(IOAT_REFLK); if (!locked) mtx_unlock(IOAT_REFLK); } static inline void ioat_put(struct ioat_softc *ioat, enum ioat_ref_kind kind) { ioat_putn(ioat, 1, kind); } static void ioat_drain_locked(struct ioat_softc *ioat) { mtx_assert(IOAT_REFLK, MA_OWNED); while (ioat->refcnt > 0) msleep(IOAT_REFLK, IOAT_REFLK, 0, "ioat_drain", 0); } #ifdef DDB #define _db_show_lock(lo) LOCK_CLASS(lo)->lc_ddb_show(lo) #define db_show_lock(lk) _db_show_lock(&(lk)->lock_object) DB_SHOW_COMMAND(ioat, db_show_ioat) { struct ioat_softc *sc; unsigned idx; if (!have_addr) goto usage; idx = (unsigned)addr; if (idx >= ioat_channel_index) goto usage; sc = ioat_channel[idx]; db_printf("ioat softc at %p\n", sc); if (sc == NULL) return; db_printf(" version: %d\n", sc->version); db_printf(" chan_idx: %u\n", sc->chan_idx); db_printf(" submit_lock: "); db_show_lock(&sc->submit_lock); db_printf(" capabilities: %b\n", (int)sc->capabilities, IOAT_DMACAP_STR); db_printf(" cached_intrdelay: %u\n", sc->cached_intrdelay); db_printf(" *comp_update: 0x%jx\n", (uintmax_t)*sc->comp_update); db_printf(" poll_timer:\n"); db_printf(" c_time: %ju\n", (uintmax_t)sc->poll_timer.c_time); db_printf(" c_arg: %p\n", sc->poll_timer.c_arg); db_printf(" c_func: %p\n", sc->poll_timer.c_func); db_printf(" c_lock: %p\n", sc->poll_timer.c_lock); db_printf(" c_flags: 0x%x\n", (unsigned)sc->poll_timer.c_flags); db_printf(" quiescing: %d\n", (int)sc->quiescing); db_printf(" destroying: %d\n", (int)sc->destroying); db_printf(" is_submitter_processing: %d\n", (int)sc->is_submitter_processing); db_printf(" is_completion_pending: %d\n", (int)sc->is_completion_pending); db_printf(" is_reset_pending: %d\n", (int)sc->is_reset_pending); db_printf(" is_channel_running: %d\n", (int)sc->is_channel_running); db_printf(" intrdelay_supported: %d\n", (int)sc->intrdelay_supported); db_printf(" resetting: %d\n", (int)sc->resetting); db_printf(" head: %u\n", sc->head); db_printf(" tail: %u\n", sc->tail); db_printf(" hw_head: %u\n", sc->hw_head); db_printf(" ring_size_order: %u\n", sc->ring_size_order); db_printf(" last_seen: 0x%lx\n", sc->last_seen); db_printf(" ring: %p\n", sc->ring); db_printf(" descriptors: %p\n", sc->hw_desc_ring); db_printf(" descriptors (phys): 0x%jx\n", (uintmax_t)sc->hw_desc_bus_addr); db_printf(" ring[%u] (tail):\n", sc->tail % (1 << sc->ring_size_order)); db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->tail)->id); db_printf(" addr: 0x%lx\n", RING_PHYS_ADDR(sc, sc->tail)); db_printf(" next: 0x%lx\n", ioat_get_descriptor(sc, sc->tail)->generic.next); db_printf(" ring[%u] (head - 1):\n", (sc->head - 1) % (1 << sc->ring_size_order)); db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->head - 1)->id); db_printf(" addr: 0x%lx\n", RING_PHYS_ADDR(sc, sc->head - 1)); db_printf(" next: 0x%lx\n", ioat_get_descriptor(sc, sc->head - 1)->generic.next); db_printf(" ring[%u] (head):\n", (sc->head) % (1 << sc->ring_size_order)); db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->head)->id); db_printf(" addr: 0x%lx\n", RING_PHYS_ADDR(sc, sc->head)); db_printf(" next: 0x%lx\n", ioat_get_descriptor(sc, sc->head)->generic.next); for (idx = 0; idx < (1 << sc->ring_size_order); idx++) if ((*sc->comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK) == RING_PHYS_ADDR(sc, idx)) db_printf(" ring[%u] == hardware tail\n", idx); db_printf(" cleanup_lock: "); db_show_lock(&sc->cleanup_lock); db_printf(" refcnt: %u\n", sc->refcnt); #ifdef INVARIANTS CTASSERT(IOAT_NUM_REF_KINDS == 2); db_printf(" refkinds: [ENG=%u, DESCR=%u]\n", sc->refkinds[0], sc->refkinds[1]); #endif db_printf(" stats:\n"); db_printf(" interrupts: %lu\n", sc->stats.interrupts); db_printf(" descriptors_processed: %lu\n", sc->stats.descriptors_processed); db_printf(" descriptors_error: %lu\n", sc->stats.descriptors_error); db_printf(" descriptors_submitted: %lu\n", sc->stats.descriptors_submitted); db_printf(" channel_halts: %u\n", sc->stats.channel_halts); db_printf(" last_halt_chanerr: %u\n", sc->stats.last_halt_chanerr); if (db_pager_quit) return; db_printf(" hw status:\n"); db_printf(" status: 0x%lx\n", ioat_get_chansts(sc)); db_printf(" chanctrl: 0x%x\n", (unsigned)ioat_read_2(sc, IOAT_CHANCTRL_OFFSET)); db_printf(" chancmd: 0x%x\n", (unsigned)ioat_read_1(sc, IOAT_CHANCMD_OFFSET)); db_printf(" dmacount: 0x%x\n", (unsigned)ioat_read_2(sc, IOAT_DMACOUNT_OFFSET)); db_printf(" chainaddr: 0x%lx\n", ioat_read_double_4(sc, IOAT_CHAINADDR_OFFSET_LOW)); db_printf(" chancmp: 0x%lx\n", ioat_read_double_4(sc, IOAT_CHANCMP_OFFSET_LOW)); db_printf(" chanerr: %b\n", (int)ioat_read_4(sc, IOAT_CHANERR_OFFSET), IOAT_CHANERR_STR); return; usage: db_printf("usage: show ioat <0-%u>\n", ioat_channel_index); return; } #endif /* DDB */ Index: head/sys/dev/ntb/ntb_hw/ntb_hw_intel.c =================================================================== --- head/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 323585) +++ head/sys/dev/ntb/ntb_hw/ntb_hw_intel.c (revision 323586) @@ -1,3124 +1,3123 @@ /*- * Copyright (c) 2016-2017 Alexander Motin * Copyright (C) 2013 Intel Corporation * Copyright (C) 2015 EMC Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * The Non-Transparent Bridge (NTB) is a device that allows you to connect * two or more systems using a PCI-e links, providing remote memory access. * * This module contains a driver for NTB hardware in Intel Xeon/Atom CPUs. * * NOTE: Much of the code in this module is shared with Linux. Any patches may * be picked up and redistributed in Linux with a dual GPL/BSD license. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ntb_hw_intel.h" #include "../ntb.h" #define MAX_MSIX_INTERRUPTS MAX(XEON_DB_COUNT, ATOM_DB_COUNT) #define NTB_HB_TIMEOUT 1 /* second */ #define ATOM_LINK_RECOVERY_TIME 500 /* ms */ #define BAR_HIGH_MASK (~((1ull << 12) - 1)) #define NTB_MSIX_VER_GUARD 0xaabbccdd #define NTB_MSIX_RECEIVED 0xe0f0e0f0 /* * PCI constants could be somewhere more generic, but aren't defined/used in * pci.c. */ #define PCI_MSIX_ENTRY_SIZE 16 #define PCI_MSIX_ENTRY_LOWER_ADDR 0 #define PCI_MSIX_ENTRY_UPPER_ADDR 4 #define PCI_MSIX_ENTRY_DATA 8 enum ntb_device_type { NTB_XEON, NTB_ATOM }; /* ntb_conn_type are hardware numbers, cannot change. */ enum ntb_conn_type { NTB_CONN_TRANSPARENT = 0, NTB_CONN_B2B = 1, NTB_CONN_RP = 2, }; enum ntb_b2b_direction { NTB_DEV_USD = 0, NTB_DEV_DSD = 1, }; enum ntb_bar { NTB_CONFIG_BAR = 0, NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3, NTB_MAX_BARS }; enum { NTB_MSIX_GUARD = 0, NTB_MSIX_DATA0, NTB_MSIX_DATA1, NTB_MSIX_DATA2, NTB_MSIX_OFS0, NTB_MSIX_OFS1, NTB_MSIX_OFS2, NTB_MSIX_DONE, NTB_MAX_MSIX_SPAD }; /* Device features and workarounds */ #define HAS_FEATURE(ntb, feature) \ (((ntb)->features & (feature)) != 0) struct ntb_hw_info { uint32_t device_id; const char *desc; enum ntb_device_type type; uint32_t features; }; struct ntb_pci_bar_info { bus_space_tag_t pci_bus_tag; bus_space_handle_t pci_bus_handle; int pci_resource_id; struct resource *pci_resource; vm_paddr_t pbase; caddr_t vbase; vm_size_t size; vm_memattr_t map_mode; /* Configuration register offsets */ uint32_t psz_off; uint32_t ssz_off; uint32_t pbarxlat_off; }; struct ntb_int_info { struct resource *res; int rid; void *tag; }; struct ntb_vec { struct ntb_softc *ntb; uint32_t num; unsigned masked; }; struct ntb_reg { uint32_t ntb_ctl; uint32_t lnk_sta; uint8_t db_size; unsigned mw_bar[NTB_MAX_BARS]; }; struct ntb_alt_reg { uint32_t db_bell; uint32_t db_mask; uint32_t spad; }; struct ntb_xlat_reg { uint32_t bar0_base; uint32_t bar2_base; uint32_t bar4_base; uint32_t bar5_base; uint32_t bar2_xlat; uint32_t bar4_xlat; uint32_t bar5_xlat; uint32_t bar2_limit; uint32_t bar4_limit; uint32_t bar5_limit; }; struct ntb_b2b_addr { uint64_t bar0_addr; uint64_t bar2_addr64; uint64_t bar4_addr64; uint64_t bar4_addr32; uint64_t bar5_addr32; }; struct ntb_msix_data { uint32_t nmd_ofs; uint32_t nmd_data; }; struct ntb_softc { /* ntb.c context. Do not move! Must go first! */ void *ntb_store; device_t device; enum ntb_device_type type; uint32_t features; struct ntb_pci_bar_info bar_info[NTB_MAX_BARS]; struct ntb_int_info int_info[MAX_MSIX_INTERRUPTS]; uint32_t allocated_interrupts; struct ntb_msix_data peer_msix_data[XEON_NONLINK_DB_MSIX_BITS]; struct ntb_msix_data msix_data[XEON_NONLINK_DB_MSIX_BITS]; bool peer_msix_good; bool peer_msix_done; struct ntb_pci_bar_info *peer_lapic_bar; struct callout peer_msix_work; struct callout heartbeat_timer; struct callout lr_timer; struct ntb_vec *msix_vec; uint32_t ppd; enum ntb_conn_type conn_type; enum ntb_b2b_direction dev_type; /* Offset of peer bar0 in B2B BAR */ uint64_t b2b_off; /* Memory window used to access peer bar0 */ #define B2B_MW_DISABLED UINT8_MAX uint8_t b2b_mw_idx; uint32_t msix_xlat; uint8_t msix_mw_idx; uint8_t mw_count; uint8_t spad_count; uint8_t db_count; uint8_t db_vec_count; uint8_t db_vec_shift; /* Protects local db_mask. */ #define DB_MASK_LOCK(sc) mtx_lock_spin(&(sc)->db_mask_lock) #define DB_MASK_UNLOCK(sc) mtx_unlock_spin(&(sc)->db_mask_lock) #define DB_MASK_ASSERT(sc,f) mtx_assert(&(sc)->db_mask_lock, (f)) struct mtx db_mask_lock; volatile uint32_t ntb_ctl; volatile uint32_t lnk_sta; uint64_t db_valid_mask; uint64_t db_link_mask; uint64_t db_mask; uint64_t fake_db; /* NTB_SB01BASE_LOCKUP*/ uint64_t force_db; /* NTB_SB01BASE_LOCKUP*/ int last_ts; /* ticks @ last irq */ const struct ntb_reg *reg; const struct ntb_alt_reg *self_reg; const struct ntb_alt_reg *peer_reg; const struct ntb_xlat_reg *xlat_reg; }; #ifdef __i386__ static __inline uint64_t bus_space_read_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset) { return (bus_space_read_4(tag, handle, offset) | ((uint64_t)bus_space_read_4(tag, handle, offset + 4)) << 32); } static __inline void bus_space_write_8(bus_space_tag_t tag, bus_space_handle_t handle, bus_size_t offset, uint64_t val) { bus_space_write_4(tag, handle, offset, val); bus_space_write_4(tag, handle, offset + 4, val >> 32); } #endif #define intel_ntb_bar_read(SIZE, bar, offset) \ bus_space_read_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \ ntb->bar_info[(bar)].pci_bus_handle, (offset)) #define intel_ntb_bar_write(SIZE, bar, offset, val) \ bus_space_write_ ## SIZE (ntb->bar_info[(bar)].pci_bus_tag, \ ntb->bar_info[(bar)].pci_bus_handle, (offset), (val)) #define intel_ntb_reg_read(SIZE, offset) \ intel_ntb_bar_read(SIZE, NTB_CONFIG_BAR, offset) #define intel_ntb_reg_write(SIZE, offset, val) \ intel_ntb_bar_write(SIZE, NTB_CONFIG_BAR, offset, val) #define intel_ntb_mw_read(SIZE, offset) \ intel_ntb_bar_read(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \ offset) #define intel_ntb_mw_write(SIZE, offset, val) \ intel_ntb_bar_write(SIZE, intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx), \ offset, val) static int intel_ntb_probe(device_t device); static int intel_ntb_attach(device_t device); static int intel_ntb_detach(device_t device); static uint64_t intel_ntb_db_valid_mask(device_t dev); static void intel_ntb_spad_clear(device_t dev); static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector); static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width); static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed, enum ntb_width width); static int intel_ntb_link_disable(device_t dev); static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val); static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val); static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *, unsigned uidx); static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *, unsigned mw); static inline bool bar_is_64bit(struct ntb_softc *, enum ntb_bar); static inline void bar_get_xlat_params(struct ntb_softc *, enum ntb_bar, uint32_t *base, uint32_t *xlat, uint32_t *lmt); static int intel_ntb_map_pci_bars(struct ntb_softc *ntb); static int intel_ntb_mw_set_wc_internal(struct ntb_softc *, unsigned idx, vm_memattr_t); static void print_map_success(struct ntb_softc *, struct ntb_pci_bar_info *, const char *); static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar); static int map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar); static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb); static int intel_ntb_remap_msix(device_t, uint32_t desired, uint32_t avail); static int intel_ntb_init_isr(struct ntb_softc *ntb); static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb); static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors); static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb); static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *, uint64_t db_vector); static void intel_ntb_interrupt(struct ntb_softc *, uint32_t vec); static void ndev_vec_isr(void *arg); static void ndev_irq_isr(void *arg); static inline uint64_t db_ioread(struct ntb_softc *, uint64_t regoff); static inline void db_iowrite(struct ntb_softc *, uint64_t regoff, uint64_t); static inline void db_iowrite_raw(struct ntb_softc *, uint64_t regoff, uint64_t); static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors); static void intel_ntb_free_msix_vec(struct ntb_softc *ntb); static void intel_ntb_get_msix_info(struct ntb_softc *ntb); static void intel_ntb_exchange_msix(void *); static struct ntb_hw_info *intel_ntb_get_device_info(uint32_t device_id); static void intel_ntb_detect_max_mw(struct ntb_softc *ntb); static int intel_ntb_detect_xeon(struct ntb_softc *ntb); static int intel_ntb_detect_atom(struct ntb_softc *ntb); static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb); static int intel_ntb_atom_init_dev(struct ntb_softc *ntb); static void intel_ntb_teardown_xeon(struct ntb_softc *ntb); static void configure_atom_secondary_side_bars(struct ntb_softc *ntb); static void xeon_reset_sbar_size(struct ntb_softc *, enum ntb_bar idx, enum ntb_bar regbar); static void xeon_set_sbar_base_and_limit(struct ntb_softc *, uint64_t base_addr, enum ntb_bar idx, enum ntb_bar regbar); static void xeon_set_pbar_xlat(struct ntb_softc *, uint64_t base_addr, enum ntb_bar idx); static int xeon_setup_b2b_mw(struct ntb_softc *, const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr); static inline bool link_is_up(struct ntb_softc *ntb); static inline bool _xeon_link_is_up(struct ntb_softc *ntb); static inline bool atom_link_is_err(struct ntb_softc *ntb); static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *); static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *); static void atom_link_hb(void *arg); static void recover_atom_link(void *arg); static bool intel_ntb_poll_link(struct ntb_softc *ntb); static void save_bar_parameters(struct ntb_pci_bar_info *bar); static void intel_ntb_sysctl_init(struct ntb_softc *); static int sysctl_handle_features(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS); static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS); static int sysctl_handle_register(SYSCTL_HANDLER_ARGS); static unsigned g_ntb_hw_debug_level; SYSCTL_UINT(_hw_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ntb_hw_debug_level, 0, "ntb_hw log level -- higher is more verbose"); #define intel_ntb_printf(lvl, ...) do { \ if ((lvl) <= g_ntb_hw_debug_level) { \ device_printf(ntb->device, __VA_ARGS__); \ } \ } while (0) #define _NTB_PAT_UC 0 #define _NTB_PAT_WC 1 #define _NTB_PAT_WT 4 #define _NTB_PAT_WP 5 #define _NTB_PAT_WB 6 #define _NTB_PAT_UCM 7 static unsigned g_ntb_mw_pat = _NTB_PAT_UC; SYSCTL_UINT(_hw_ntb, OID_AUTO, default_mw_pat, CTLFLAG_RDTUN, &g_ntb_mw_pat, 0, "Configure the default memory window cache flags (PAT): " "UC: " __XSTRING(_NTB_PAT_UC) ", " "WC: " __XSTRING(_NTB_PAT_WC) ", " "WT: " __XSTRING(_NTB_PAT_WT) ", " "WP: " __XSTRING(_NTB_PAT_WP) ", " "WB: " __XSTRING(_NTB_PAT_WB) ", " "UC-: " __XSTRING(_NTB_PAT_UCM)); static inline vm_memattr_t intel_ntb_pat_flags(void) { switch (g_ntb_mw_pat) { case _NTB_PAT_WC: return (VM_MEMATTR_WRITE_COMBINING); case _NTB_PAT_WT: return (VM_MEMATTR_WRITE_THROUGH); case _NTB_PAT_WP: return (VM_MEMATTR_WRITE_PROTECTED); case _NTB_PAT_WB: return (VM_MEMATTR_WRITE_BACK); case _NTB_PAT_UCM: return (VM_MEMATTR_WEAK_UNCACHEABLE); case _NTB_PAT_UC: /* FALLTHROUGH */ default: return (VM_MEMATTR_UNCACHEABLE); } } /* * Well, this obviously doesn't belong here, but it doesn't seem to exist * anywhere better yet. */ static inline const char * intel_ntb_vm_memattr_to_str(vm_memattr_t pat) { switch (pat) { case VM_MEMATTR_WRITE_COMBINING: return ("WRITE_COMBINING"); case VM_MEMATTR_WRITE_THROUGH: return ("WRITE_THROUGH"); case VM_MEMATTR_WRITE_PROTECTED: return ("WRITE_PROTECTED"); case VM_MEMATTR_WRITE_BACK: return ("WRITE_BACK"); case VM_MEMATTR_WEAK_UNCACHEABLE: return ("UNCACHED"); case VM_MEMATTR_UNCACHEABLE: return ("UNCACHEABLE"); default: return ("UNKNOWN"); } } static int g_ntb_msix_idx = 1; SYSCTL_INT(_hw_ntb, OID_AUTO, msix_mw_idx, CTLFLAG_RDTUN, &g_ntb_msix_idx, 0, "Use this memory window to access the peer MSIX message complex on " "certain Xeon-based NTB systems, as a workaround for a hardware errata. " "Like b2b_mw_idx, negative values index from the last available memory " "window. (Applies on Xeon platforms with SB01BASE_LOCKUP errata.)"); static int g_ntb_mw_idx = -1; SYSCTL_INT(_hw_ntb, OID_AUTO, b2b_mw_idx, CTLFLAG_RDTUN, &g_ntb_mw_idx, 0, "Use this memory window to access the peer NTB registers. A " "non-negative value starts from the first MW index; a negative value " "starts from the last MW index. The default is -1, i.e., the last " "available memory window. Both sides of the NTB MUST set the same " "value here! (Applies on Xeon platforms with SDOORBELL_LOCKUP errata.)"); /* Hardware owns the low 16 bits of features. */ #define NTB_BAR_SIZE_4K (1 << 0) #define NTB_SDOORBELL_LOCKUP (1 << 1) #define NTB_SB01BASE_LOCKUP (1 << 2) #define NTB_B2BDOORBELL_BIT14 (1 << 3) /* Software/configuration owns the top 16 bits. */ #define NTB_SPLIT_BAR (1ull << 16) #define NTB_FEATURES_STR \ "\20\21SPLIT_BAR4\04B2B_DOORBELL_BIT14\03SB01BASE_LOCKUP" \ "\02SDOORBELL_LOCKUP\01BAR_SIZE_4K" static struct ntb_hw_info pci_ids[] = { /* XXX: PS/SS IDs left out until they are supported. */ { 0x0C4E8086, "BWD Atom Processor S1200 Non-Transparent Bridge B2B", NTB_ATOM, 0 }, { 0x37258086, "JSF Xeon C35xx/C55xx Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 }, { 0x3C0D8086, "SNB Xeon E5/Core i7 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 }, { 0x0E0D8086, "IVT Xeon E5 V2 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP | NTB_BAR_SIZE_4K }, { 0x2F0D8086, "HSX Xeon E5 V3 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP }, { 0x6F0D8086, "BDX Xeon E5 V4 Non-Transparent Bridge B2B", NTB_XEON, NTB_SDOORBELL_LOCKUP | NTB_B2BDOORBELL_BIT14 | NTB_SB01BASE_LOCKUP }, - - { 0x00000000, NULL, NTB_ATOM, 0 } }; static const struct ntb_reg atom_reg = { .ntb_ctl = ATOM_NTBCNTL_OFFSET, .lnk_sta = ATOM_LINK_STATUS_OFFSET, .db_size = sizeof(uint64_t), .mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2 }, }; static const struct ntb_alt_reg atom_pri_reg = { .db_bell = ATOM_PDOORBELL_OFFSET, .db_mask = ATOM_PDBMSK_OFFSET, .spad = ATOM_SPAD_OFFSET, }; static const struct ntb_alt_reg atom_b2b_reg = { .db_bell = ATOM_B2B_DOORBELL_OFFSET, .spad = ATOM_B2B_SPAD_OFFSET, }; static const struct ntb_xlat_reg atom_sec_xlat = { #if 0 /* "FIXME" says the Linux driver. */ .bar0_base = ATOM_SBAR0BASE_OFFSET, .bar2_base = ATOM_SBAR2BASE_OFFSET, .bar4_base = ATOM_SBAR4BASE_OFFSET, .bar2_limit = ATOM_SBAR2LMT_OFFSET, .bar4_limit = ATOM_SBAR4LMT_OFFSET, #endif .bar2_xlat = ATOM_SBAR2XLAT_OFFSET, .bar4_xlat = ATOM_SBAR4XLAT_OFFSET, }; static const struct ntb_reg xeon_reg = { .ntb_ctl = XEON_NTBCNTL_OFFSET, .lnk_sta = XEON_LINK_STATUS_OFFSET, .db_size = sizeof(uint16_t), .mw_bar = { NTB_B2B_BAR_1, NTB_B2B_BAR_2, NTB_B2B_BAR_3 }, }; static const struct ntb_alt_reg xeon_pri_reg = { .db_bell = XEON_PDOORBELL_OFFSET, .db_mask = XEON_PDBMSK_OFFSET, .spad = XEON_SPAD_OFFSET, }; static const struct ntb_alt_reg xeon_b2b_reg = { .db_bell = XEON_B2B_DOORBELL_OFFSET, .spad = XEON_B2B_SPAD_OFFSET, }; static const struct ntb_xlat_reg xeon_sec_xlat = { .bar0_base = XEON_SBAR0BASE_OFFSET, .bar2_base = XEON_SBAR2BASE_OFFSET, .bar4_base = XEON_SBAR4BASE_OFFSET, .bar5_base = XEON_SBAR5BASE_OFFSET, .bar2_limit = XEON_SBAR2LMT_OFFSET, .bar4_limit = XEON_SBAR4LMT_OFFSET, .bar5_limit = XEON_SBAR5LMT_OFFSET, .bar2_xlat = XEON_SBAR2XLAT_OFFSET, .bar4_xlat = XEON_SBAR4XLAT_OFFSET, .bar5_xlat = XEON_SBAR5XLAT_OFFSET, }; static struct ntb_b2b_addr xeon_b2b_usd_addr = { .bar0_addr = XEON_B2B_BAR0_ADDR, .bar2_addr64 = XEON_B2B_BAR2_ADDR64, .bar4_addr64 = XEON_B2B_BAR4_ADDR64, .bar4_addr32 = XEON_B2B_BAR4_ADDR32, .bar5_addr32 = XEON_B2B_BAR5_ADDR32, }; static struct ntb_b2b_addr xeon_b2b_dsd_addr = { .bar0_addr = XEON_B2B_BAR0_ADDR, .bar2_addr64 = XEON_B2B_BAR2_ADDR64, .bar4_addr64 = XEON_B2B_BAR4_ADDR64, .bar4_addr32 = XEON_B2B_BAR4_ADDR32, .bar5_addr32 = XEON_B2B_BAR5_ADDR32, }; SYSCTL_NODE(_hw_ntb, OID_AUTO, xeon_b2b, CTLFLAG_RW, 0, "B2B MW segment overrides -- MUST be the same on both sides"); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar2_addr64, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar2_addr64, 0, "If using B2B topology on Xeon " "hardware, use this 64-bit address on the bus between the NTB devices for " "the window at BAR2, on the upstream side of the link. MUST be the same " "address on both sides."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr64, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar4_addr64, 0, "See usd_bar2_addr64, but BAR4."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar4_addr32, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar4_addr32, 0, "See usd_bar2_addr64, but BAR4 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, usd_bar5_addr32, CTLFLAG_RDTUN, &xeon_b2b_usd_addr.bar5_addr32, 0, "See usd_bar2_addr64, but BAR5 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar2_addr64, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar2_addr64, 0, "If using B2B topology on Xeon " "hardware, use this 64-bit address on the bus between the NTB devices for " "the window at BAR2, on the downstream side of the link. MUST be the same" " address on both sides."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr64, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar4_addr64, 0, "See dsd_bar2_addr64, but BAR4."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar4_addr32, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar4_addr32, 0, "See dsd_bar2_addr64, but BAR4 " "(split-BAR mode)."); SYSCTL_UQUAD(_hw_ntb_xeon_b2b, OID_AUTO, dsd_bar5_addr32, CTLFLAG_RDTUN, &xeon_b2b_dsd_addr.bar5_addr32, 0, "See dsd_bar2_addr64, but BAR5 " "(split-BAR mode)."); /* * OS <-> Driver interface structures */ MALLOC_DEFINE(M_NTB, "ntb_hw", "ntb_hw driver memory allocations"); /* * OS <-> Driver linkage functions */ static int intel_ntb_probe(device_t device) { struct ntb_hw_info *p; p = intel_ntb_get_device_info(pci_get_devid(device)); if (p == NULL) return (ENXIO); device_set_desc(device, p->desc); return (0); } static int intel_ntb_attach(device_t device) { struct ntb_softc *ntb; struct ntb_hw_info *p; int error; ntb = device_get_softc(device); p = intel_ntb_get_device_info(pci_get_devid(device)); ntb->device = device; ntb->type = p->type; ntb->features = p->features; ntb->b2b_mw_idx = B2B_MW_DISABLED; ntb->msix_mw_idx = B2B_MW_DISABLED; /* Heartbeat timer for NTB_ATOM since there is no link interrupt */ callout_init(&ntb->heartbeat_timer, 1); callout_init(&ntb->lr_timer, 1); callout_init(&ntb->peer_msix_work, 1); mtx_init(&ntb->db_mask_lock, "ntb hw bits", NULL, MTX_SPIN); if (ntb->type == NTB_ATOM) error = intel_ntb_detect_atom(ntb); else error = intel_ntb_detect_xeon(ntb); if (error != 0) goto out; intel_ntb_detect_max_mw(ntb); pci_enable_busmaster(ntb->device); error = intel_ntb_map_pci_bars(ntb); if (error != 0) goto out; if (ntb->type == NTB_ATOM) error = intel_ntb_atom_init_dev(ntb); else error = intel_ntb_xeon_init_dev(ntb); if (error != 0) goto out; intel_ntb_spad_clear(device); intel_ntb_poll_link(ntb); intel_ntb_sysctl_init(ntb); /* Attach children to this controller */ error = ntb_register_device(device); out: if (error != 0) intel_ntb_detach(device); return (error); } static int intel_ntb_detach(device_t device) { struct ntb_softc *ntb; ntb = device_get_softc(device); /* Detach & delete all children */ ntb_unregister_device(device); if (ntb->self_reg != NULL) { DB_MASK_LOCK(ntb); db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_valid_mask); DB_MASK_UNLOCK(ntb); } callout_drain(&ntb->heartbeat_timer); callout_drain(&ntb->lr_timer); callout_drain(&ntb->peer_msix_work); pci_disable_busmaster(ntb->device); if (ntb->type == NTB_XEON) intel_ntb_teardown_xeon(ntb); intel_ntb_teardown_interrupts(ntb); mtx_destroy(&ntb->db_mask_lock); intel_ntb_unmap_pci_bar(ntb); return (0); } /* * Driver internal routines */ static inline enum ntb_bar intel_ntb_mw_to_bar(struct ntb_softc *ntb, unsigned mw) { KASSERT(mw < ntb->mw_count, ("%s: mw:%u > count:%u", __func__, mw, (unsigned)ntb->mw_count)); KASSERT(ntb->reg->mw_bar[mw] != 0, ("invalid mw")); return (ntb->reg->mw_bar[mw]); } static inline bool bar_is_64bit(struct ntb_softc *ntb, enum ntb_bar bar) { /* XXX This assertion could be stronger. */ KASSERT(bar < NTB_MAX_BARS, ("bogus bar")); return (bar < NTB_B2B_BAR_2 || !HAS_FEATURE(ntb, NTB_SPLIT_BAR)); } static inline void bar_get_xlat_params(struct ntb_softc *ntb, enum ntb_bar bar, uint32_t *base, uint32_t *xlat, uint32_t *lmt) { uint32_t basev, lmtv, xlatv; switch (bar) { case NTB_B2B_BAR_1: basev = ntb->xlat_reg->bar2_base; lmtv = ntb->xlat_reg->bar2_limit; xlatv = ntb->xlat_reg->bar2_xlat; break; case NTB_B2B_BAR_2: basev = ntb->xlat_reg->bar4_base; lmtv = ntb->xlat_reg->bar4_limit; xlatv = ntb->xlat_reg->bar4_xlat; break; case NTB_B2B_BAR_3: basev = ntb->xlat_reg->bar5_base; lmtv = ntb->xlat_reg->bar5_limit; xlatv = ntb->xlat_reg->bar5_xlat; break; default: KASSERT(bar >= NTB_B2B_BAR_1 && bar < NTB_MAX_BARS, ("bad bar")); basev = lmtv = xlatv = 0; break; } if (base != NULL) *base = basev; if (xlat != NULL) *xlat = xlatv; if (lmt != NULL) *lmt = lmtv; } static int intel_ntb_map_pci_bars(struct ntb_softc *ntb) { int rc; ntb->bar_info[NTB_CONFIG_BAR].pci_resource_id = PCIR_BAR(0); rc = map_mmr_bar(ntb, &ntb->bar_info[NTB_CONFIG_BAR]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_1].pci_resource_id = PCIR_BAR(2); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_1]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_1].psz_off = XEON_PBAR23SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_1].ssz_off = XEON_SBAR23SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off = XEON_PBAR2XLAT_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pci_resource_id = PCIR_BAR(4); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_2]); if (rc != 0) goto out; ntb->bar_info[NTB_B2B_BAR_2].psz_off = XEON_PBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].ssz_off = XEON_SBAR4SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off = XEON_PBAR4XLAT_OFFSET; if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR)) goto out; ntb->bar_info[NTB_B2B_BAR_3].pci_resource_id = PCIR_BAR(5); rc = map_memory_window_bar(ntb, &ntb->bar_info[NTB_B2B_BAR_3]); ntb->bar_info[NTB_B2B_BAR_3].psz_off = XEON_PBAR5SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_3].ssz_off = XEON_SBAR5SZ_OFFSET; ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off = XEON_PBAR5XLAT_OFFSET; out: if (rc != 0) device_printf(ntb->device, "unable to allocate pci resource\n"); return (rc); } static void print_map_success(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar, const char *kind) { device_printf(ntb->device, "Mapped BAR%d v:[%p-%p] p:[%p-%p] (0x%jx bytes) (%s)\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), (uintmax_t)bar->size, kind); } static int map_mmr_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) { bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY, &bar->pci_resource_id, RF_ACTIVE); if (bar->pci_resource == NULL) return (ENXIO); save_bar_parameters(bar); bar->map_mode = VM_MEMATTR_UNCACHEABLE; print_map_success(ntb, bar, "mmr"); return (0); } static int map_memory_window_bar(struct ntb_softc *ntb, struct ntb_pci_bar_info *bar) { int rc; vm_memattr_t mapmode; uint8_t bar_size_bits = 0; bar->pci_resource = bus_alloc_resource_any(ntb->device, SYS_RES_MEMORY, &bar->pci_resource_id, RF_ACTIVE); if (bar->pci_resource == NULL) return (ENXIO); save_bar_parameters(bar); /* * Ivytown NTB BAR sizes are misreported by the hardware due to a * hardware issue. To work around this, query the size it should be * configured to by the device and modify the resource to correspond to * this new size. The BIOS on systems with this problem is required to * provide enough address space to allow the driver to make this change * safely. * * Ideally I could have just specified the size when I allocated the * resource like: * bus_alloc_resource(ntb->device, * SYS_RES_MEMORY, &bar->pci_resource_id, 0ul, ~0ul, * 1ul << bar_size_bits, RF_ACTIVE); * but the PCI driver does not honor the size in this call, so we have * to modify it after the fact. */ if (HAS_FEATURE(ntb, NTB_BAR_SIZE_4K)) { if (bar->pci_resource_id == PCIR_BAR(2)) bar_size_bits = pci_read_config(ntb->device, XEON_PBAR23SZ_OFFSET, 1); else bar_size_bits = pci_read_config(ntb->device, XEON_PBAR45SZ_OFFSET, 1); rc = bus_adjust_resource(ntb->device, SYS_RES_MEMORY, bar->pci_resource, bar->pbase, bar->pbase + (1ul << bar_size_bits) - 1); if (rc != 0) { device_printf(ntb->device, "unable to resize bar\n"); return (rc); } save_bar_parameters(bar); } bar->map_mode = VM_MEMATTR_UNCACHEABLE; print_map_success(ntb, bar, "mw"); /* * Optionally, mark MW BARs as anything other than UC to improve * performance. */ mapmode = intel_ntb_pat_flags(); if (mapmode == bar->map_mode) return (0); rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mapmode); if (rc == 0) { bar->map_mode = mapmode; device_printf(ntb->device, "Marked BAR%d v:[%p-%p] p:[%p-%p] as " "%s.\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), intel_ntb_vm_memattr_to_str(mapmode)); } else device_printf(ntb->device, "Unable to mark BAR%d v:[%p-%p] p:[%p-%p] as " "%s: %d\n", PCI_RID2BAR(bar->pci_resource_id), bar->vbase, (char *)bar->vbase + bar->size - 1, (void *)bar->pbase, (void *)(bar->pbase + bar->size - 1), intel_ntb_vm_memattr_to_str(mapmode), rc); /* Proceed anyway */ return (0); } static void intel_ntb_unmap_pci_bar(struct ntb_softc *ntb) { struct ntb_pci_bar_info *current_bar; int i; for (i = 0; i < NTB_MAX_BARS; i++) { current_bar = &ntb->bar_info[i]; if (current_bar->pci_resource != NULL) bus_release_resource(ntb->device, SYS_RES_MEMORY, current_bar->pci_resource_id, current_bar->pci_resource); } } static int intel_ntb_setup_msix(struct ntb_softc *ntb, uint32_t num_vectors) { uint32_t i; int rc; for (i = 0; i < num_vectors; i++) { ntb->int_info[i].rid = i + 1; ntb->int_info[i].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, &ntb->int_info[i].rid, RF_ACTIVE); if (ntb->int_info[i].res == NULL) { device_printf(ntb->device, "bus_alloc_resource failed\n"); return (ENOMEM); } ntb->int_info[i].tag = NULL; ntb->allocated_interrupts++; rc = bus_setup_intr(ntb->device, ntb->int_info[i].res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_vec_isr, &ntb->msix_vec[i], &ntb->int_info[i].tag); if (rc != 0) { device_printf(ntb->device, "bus_setup_intr failed\n"); return (ENXIO); } } return (0); } /* * The Linux NTB driver drops from MSI-X to legacy INTx if a unique vector * cannot be allocated for each MSI-X message. JHB seems to think remapping * should be okay. This tunable should enable us to test that hypothesis * when someone gets their hands on some Xeon hardware. */ static int ntb_force_remap_mode; SYSCTL_INT(_hw_ntb, OID_AUTO, force_remap_mode, CTLFLAG_RDTUN, &ntb_force_remap_mode, 0, "If enabled, force MSI-X messages to be remapped" " to a smaller number of ithreads, even if the desired number are " "available"); /* * In case it is NOT ok, give consumers an abort button. */ static int ntb_prefer_intx; SYSCTL_INT(_hw_ntb, OID_AUTO, prefer_intx_to_remap, CTLFLAG_RDTUN, &ntb_prefer_intx, 0, "If enabled, prefer to use legacy INTx mode rather " "than remapping MSI-X messages over available slots (match Linux driver " "behavior)"); /* * Remap the desired number of MSI-X messages to available ithreads in a simple * round-robin fashion. */ static int intel_ntb_remap_msix(device_t dev, uint32_t desired, uint32_t avail) { u_int *vectors; uint32_t i; int rc; if (ntb_prefer_intx != 0) return (ENXIO); vectors = malloc(desired * sizeof(*vectors), M_NTB, M_ZERO | M_WAITOK); for (i = 0; i < desired; i++) vectors[i] = (i % avail) + 1; rc = pci_remap_msix(dev, desired, vectors); free(vectors, M_NTB); return (rc); } static int intel_ntb_init_isr(struct ntb_softc *ntb) { uint32_t desired_vectors, num_vectors; int rc; ntb->allocated_interrupts = 0; ntb->last_ts = ticks; /* * Mask all doorbell interrupts. (Except link events!) */ DB_MASK_LOCK(ntb); ntb->db_mask = ntb->db_valid_mask; db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); num_vectors = desired_vectors = MIN(pci_msix_count(ntb->device), ntb->db_count); if (desired_vectors >= 1) { rc = pci_alloc_msix(ntb->device, &num_vectors); if (ntb_force_remap_mode != 0 && rc == 0 && num_vectors == desired_vectors) num_vectors--; if (rc == 0 && num_vectors < desired_vectors) { rc = intel_ntb_remap_msix(ntb->device, desired_vectors, num_vectors); if (rc == 0) num_vectors = desired_vectors; else pci_release_msi(ntb->device); } if (rc != 0) num_vectors = 1; } else num_vectors = 1; if (ntb->type == NTB_XEON && num_vectors < ntb->db_vec_count) { if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround does not support MSI or INTX\n"); return (EINVAL); } ntb->db_vec_count = 1; ntb->db_vec_shift = XEON_DB_TOTAL_SHIFT; rc = intel_ntb_setup_legacy_interrupt(ntb); } else { if (num_vectors - 1 != XEON_NONLINK_DB_MSIX_BITS && HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { device_printf(ntb->device, "Errata workaround expects %d doorbell bits\n", XEON_NONLINK_DB_MSIX_BITS); return (EINVAL); } intel_ntb_create_msix_vec(ntb, num_vectors); rc = intel_ntb_setup_msix(ntb, num_vectors); } if (rc != 0) { device_printf(ntb->device, "Error allocating interrupts: %d\n", rc); intel_ntb_free_msix_vec(ntb); } return (rc); } static int intel_ntb_setup_legacy_interrupt(struct ntb_softc *ntb) { int rc; ntb->int_info[0].rid = 0; ntb->int_info[0].res = bus_alloc_resource_any(ntb->device, SYS_RES_IRQ, &ntb->int_info[0].rid, RF_SHAREABLE|RF_ACTIVE); if (ntb->int_info[0].res == NULL) { device_printf(ntb->device, "bus_alloc_resource failed\n"); return (ENOMEM); } ntb->int_info[0].tag = NULL; ntb->allocated_interrupts = 1; rc = bus_setup_intr(ntb->device, ntb->int_info[0].res, INTR_MPSAFE | INTR_TYPE_MISC, NULL, ndev_irq_isr, ntb, &ntb->int_info[0].tag); if (rc != 0) { device_printf(ntb->device, "bus_setup_intr failed\n"); return (ENXIO); } return (0); } static void intel_ntb_teardown_interrupts(struct ntb_softc *ntb) { struct ntb_int_info *current_int; int i; for (i = 0; i < ntb->allocated_interrupts; i++) { current_int = &ntb->int_info[i]; if (current_int->tag != NULL) bus_teardown_intr(ntb->device, current_int->res, current_int->tag); if (current_int->res != NULL) bus_release_resource(ntb->device, SYS_RES_IRQ, rman_get_rid(current_int->res), current_int->res); } intel_ntb_free_msix_vec(ntb); pci_release_msi(ntb->device); } /* * Doorbell register and mask are 64-bit on Atom, 16-bit on Xeon. Abstract it * out to make code clearer. */ static inline uint64_t db_ioread(struct ntb_softc *ntb, uint64_t regoff) { if (ntb->type == NTB_ATOM) return (intel_ntb_reg_read(8, regoff)); KASSERT(ntb->type == NTB_XEON, ("bad ntb type")); return (intel_ntb_reg_read(2, regoff)); } static inline void db_iowrite(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) { KASSERT((val & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(val & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); if (regoff == ntb->self_reg->db_mask) DB_MASK_ASSERT(ntb, MA_OWNED); db_iowrite_raw(ntb, regoff, val); } static inline void db_iowrite_raw(struct ntb_softc *ntb, uint64_t regoff, uint64_t val) { if (ntb->type == NTB_ATOM) { intel_ntb_reg_write(8, regoff, val); return; } KASSERT(ntb->type == NTB_XEON, ("bad ntb type")); intel_ntb_reg_write(2, regoff, (uint16_t)val); } static void intel_ntb_db_set_mask(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); DB_MASK_LOCK(ntb); ntb->db_mask |= bits; if (!HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); } static void intel_ntb_db_clear_mask(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); uint64_t ibits; int i; KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); DB_MASK_LOCK(ntb); ibits = ntb->fake_db & ntb->db_mask & bits; ntb->db_mask &= ~bits; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { /* Simulate fake interrupts if unmasked DB bits are set. */ ntb->force_db |= ibits; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if ((ibits & intel_ntb_db_vector_mask(dev, i)) != 0) swi_sched(ntb->int_info[i].tag, 0); } } else { db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); } DB_MASK_UNLOCK(ntb); } static uint64_t intel_ntb_db_read(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) return (ntb->fake_db); return (db_ioread(ntb, ntb->self_reg->db_bell)); } static void intel_ntb_db_clear(device_t dev, uint64_t bits) { struct ntb_softc *ntb = device_get_softc(dev); KASSERT((bits & ~ntb->db_valid_mask) == 0, ("%s: Invalid bits 0x%jx (valid: 0x%jx)", __func__, (uintmax_t)(bits & ~ntb->db_valid_mask), (uintmax_t)ntb->db_valid_mask)); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { DB_MASK_LOCK(ntb); ntb->fake_db &= ~bits; DB_MASK_UNLOCK(ntb); return; } db_iowrite(ntb, ntb->self_reg->db_bell, bits); } static inline uint64_t intel_ntb_vec_mask(struct ntb_softc *ntb, uint64_t db_vector) { uint64_t shift, mask; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { /* * Remap vectors in custom way to make at least first * three doorbells to not generate stray events. * This breaks Linux compatibility (if one existed) * when more then one DB is used (not by if_ntb). */ if (db_vector < XEON_NONLINK_DB_MSIX_BITS - 1) return (1 << db_vector); if (db_vector == XEON_NONLINK_DB_MSIX_BITS - 1) return (0x7ffc); } shift = ntb->db_vec_shift; mask = (1ull << shift) - 1; return (mask << (shift * db_vector)); } static void intel_ntb_interrupt(struct ntb_softc *ntb, uint32_t vec) { uint64_t vec_mask; ntb->last_ts = ticks; vec_mask = intel_ntb_vec_mask(ntb, vec); if ((vec_mask & ntb->db_link_mask) != 0) { if (intel_ntb_poll_link(ntb)) ntb_link_event(ntb->device); } if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && (vec_mask & ntb->db_link_mask) == 0) { DB_MASK_LOCK(ntb); /* * Do not report same DB events again if not cleared yet, * unless the mask was just cleared for them and this * interrupt handler call can be the consequence of it. */ vec_mask &= ~ntb->fake_db | ntb->force_db; ntb->force_db &= ~vec_mask; /* Update our internal doorbell register. */ ntb->fake_db |= vec_mask; /* Do not report masked DB events. */ vec_mask &= ~ntb->db_mask; DB_MASK_UNLOCK(ntb); } if ((vec_mask & ntb->db_valid_mask) != 0) ntb_db_event(ntb->device, vec); } static void ndev_vec_isr(void *arg) { struct ntb_vec *nvec = arg; intel_ntb_interrupt(nvec->ntb, nvec->num); } static void ndev_irq_isr(void *arg) { /* If we couldn't set up MSI-X, we only have the one vector. */ intel_ntb_interrupt(arg, 0); } static int intel_ntb_create_msix_vec(struct ntb_softc *ntb, uint32_t num_vectors) { uint32_t i; ntb->msix_vec = malloc(num_vectors * sizeof(*ntb->msix_vec), M_NTB, M_ZERO | M_WAITOK); for (i = 0; i < num_vectors; i++) { ntb->msix_vec[i].num = i; ntb->msix_vec[i].ntb = ntb; } return (0); } static void intel_ntb_free_msix_vec(struct ntb_softc *ntb) { if (ntb->msix_vec == NULL) return; free(ntb->msix_vec, M_NTB); ntb->msix_vec = NULL; } static void intel_ntb_get_msix_info(struct ntb_softc *ntb) { struct pci_devinfo *dinfo; struct pcicfg_msix *msix; uint32_t laddr, data, i, offset; dinfo = device_get_ivars(ntb->device); msix = &dinfo->cfg.msix; CTASSERT(XEON_NONLINK_DB_MSIX_BITS == nitems(ntb->msix_data)); for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { offset = msix->msix_table_offset + i * PCI_MSIX_ENTRY_SIZE; laddr = bus_read_4(msix->msix_table_res, offset + PCI_MSIX_ENTRY_LOWER_ADDR); intel_ntb_printf(2, "local MSIX addr(%u): 0x%x\n", i, laddr); KASSERT((laddr & MSI_INTEL_ADDR_BASE) == MSI_INTEL_ADDR_BASE, ("local MSIX addr 0x%x not in MSI base 0x%x", laddr, MSI_INTEL_ADDR_BASE)); ntb->msix_data[i].nmd_ofs = laddr; data = bus_read_4(msix->msix_table_res, offset + PCI_MSIX_ENTRY_DATA); intel_ntb_printf(2, "local MSIX data(%u): 0x%x\n", i, data); ntb->msix_data[i].nmd_data = data; } } static struct ntb_hw_info * intel_ntb_get_device_info(uint32_t device_id) { - struct ntb_hw_info *ep = pci_ids; + struct ntb_hw_info *ep; - while (ep->device_id) { + for (ep = pci_ids; ep < &pci_ids[nitems(pci_ids)]; ep++) { if (ep->device_id == device_id) return (ep); - ++ep; } return (NULL); } static void intel_ntb_teardown_xeon(struct ntb_softc *ntb) { if (ntb->reg != NULL) intel_ntb_link_disable(ntb->device); } static void intel_ntb_detect_max_mw(struct ntb_softc *ntb) { if (ntb->type == NTB_ATOM) { ntb->mw_count = ATOM_MW_COUNT; return; } if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) ntb->mw_count = XEON_HSX_SPLIT_MW_COUNT; else ntb->mw_count = XEON_SNB_MW_COUNT; } static int intel_ntb_detect_xeon(struct ntb_softc *ntb) { uint8_t ppd, conn_type; ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 1); ntb->ppd = ppd; if ((ppd & XEON_PPD_DEV_TYPE) != 0) ntb->dev_type = NTB_DEV_DSD; else ntb->dev_type = NTB_DEV_USD; if ((ppd & XEON_PPD_SPLIT_BAR) != 0) ntb->features |= NTB_SPLIT_BAR; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP) && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { device_printf(ntb->device, "Can not apply SB01BASE_LOCKUP workaround " "with split BARs disabled!\n"); device_printf(ntb->device, "Expect system hangs under heavy NTB traffic!\n"); ntb->features &= ~NTB_SB01BASE_LOCKUP; } /* * SDOORBELL errata workaround gets in the way of SB01BASE_LOCKUP * errata workaround; only do one at a time. */ if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) ntb->features &= ~NTB_SDOORBELL_LOCKUP; conn_type = ppd & XEON_PPD_CONN_TYPE; switch (conn_type) { case NTB_CONN_B2B: ntb->conn_type = conn_type; break; case NTB_CONN_RP: case NTB_CONN_TRANSPARENT: default: device_printf(ntb->device, "Unsupported connection type: %u\n", (unsigned)conn_type); return (ENXIO); } return (0); } static int intel_ntb_detect_atom(struct ntb_softc *ntb) { uint32_t ppd, conn_type; ppd = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); ntb->ppd = ppd; if ((ppd & ATOM_PPD_DEV_TYPE) != 0) ntb->dev_type = NTB_DEV_DSD; else ntb->dev_type = NTB_DEV_USD; conn_type = (ppd & ATOM_PPD_CONN_TYPE) >> 8; switch (conn_type) { case NTB_CONN_B2B: ntb->conn_type = conn_type; break; default: device_printf(ntb->device, "Unsupported NTB configuration\n"); return (ENXIO); } return (0); } static int intel_ntb_xeon_init_dev(struct ntb_softc *ntb) { int rc; ntb->spad_count = XEON_SPAD_COUNT; ntb->db_count = XEON_DB_COUNT; ntb->db_link_mask = XEON_DB_LINK_BIT; ntb->db_vec_count = XEON_DB_MSIX_VECTOR_COUNT; ntb->db_vec_shift = XEON_DB_MSIX_VECTOR_SHIFT; if (ntb->conn_type != NTB_CONN_B2B) { device_printf(ntb->device, "Connection type %d not supported\n", ntb->conn_type); return (ENXIO); } ntb->reg = &xeon_reg; ntb->self_reg = &xeon_pri_reg; ntb->peer_reg = &xeon_b2b_reg; ntb->xlat_reg = &xeon_sec_xlat; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { ntb->force_db = ntb->fake_db = 0; ntb->msix_mw_idx = (ntb->mw_count + g_ntb_msix_idx) % ntb->mw_count; intel_ntb_printf(2, "Setting up MSIX mw idx %d means %u\n", g_ntb_msix_idx, ntb->msix_mw_idx); rc = intel_ntb_mw_set_wc_internal(ntb, ntb->msix_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); } else if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { /* * There is a Xeon hardware errata related to writes to SDOORBELL or * B2BDOORBELL in conjunction with inbound access to NTB MMIO space, * which may hang the system. To workaround this, use a memory * window to access the interrupt and scratch pad registers on the * remote system. */ ntb->b2b_mw_idx = (ntb->mw_count + g_ntb_mw_idx) % ntb->mw_count; intel_ntb_printf(2, "Setting up b2b mw idx %d means %u\n", g_ntb_mw_idx, ntb->b2b_mw_idx); rc = intel_ntb_mw_set_wc_internal(ntb, ntb->b2b_mw_idx, VM_MEMATTR_UNCACHEABLE); KASSERT(rc == 0, ("shouldn't fail")); } else if (HAS_FEATURE(ntb, NTB_B2BDOORBELL_BIT14)) /* * HW Errata on bit 14 of b2bdoorbell register. Writes will not be * mirrored to the remote system. Shrink the number of bits by one, * since bit 14 is the last bit. * * On REGS_THRU_MW errata mode, we don't use the b2bdoorbell register * anyway. Nor for non-B2B connection types. */ ntb->db_count = XEON_DB_COUNT - 1; ntb->db_valid_mask = (1ull << ntb->db_count) - 1; if (ntb->dev_type == NTB_DEV_USD) rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_dsd_addr, &xeon_b2b_usd_addr); else rc = xeon_setup_b2b_mw(ntb, &xeon_b2b_usd_addr, &xeon_b2b_dsd_addr); if (rc != 0) return (rc); /* Enable Bus Master and Memory Space on the secondary side */ intel_ntb_reg_write(2, XEON_SPCICMD_OFFSET, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); /* * Mask all doorbell interrupts. */ DB_MASK_LOCK(ntb); ntb->db_mask = ntb->db_valid_mask; db_iowrite(ntb, ntb->self_reg->db_mask, ntb->db_mask); DB_MASK_UNLOCK(ntb); rc = intel_ntb_init_isr(ntb); return (rc); } static int intel_ntb_atom_init_dev(struct ntb_softc *ntb) { int error; KASSERT(ntb->conn_type == NTB_CONN_B2B, ("Unsupported NTB configuration (%d)\n", ntb->conn_type)); ntb->spad_count = ATOM_SPAD_COUNT; ntb->db_count = ATOM_DB_COUNT; ntb->db_vec_count = ATOM_DB_MSIX_VECTOR_COUNT; ntb->db_vec_shift = ATOM_DB_MSIX_VECTOR_SHIFT; ntb->db_valid_mask = (1ull << ntb->db_count) - 1; ntb->reg = &atom_reg; ntb->self_reg = &atom_pri_reg; ntb->peer_reg = &atom_b2b_reg; ntb->xlat_reg = &atom_sec_xlat; /* * FIXME - MSI-X bug on early Atom HW, remove once internal issue is * resolved. Mask transaction layer internal parity errors. */ pci_write_config(ntb->device, 0xFC, 0x4, 4); configure_atom_secondary_side_bars(ntb); /* Enable Bus Master and Memory Space on the secondary side */ intel_ntb_reg_write(2, ATOM_SPCICMD_OFFSET, PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); error = intel_ntb_init_isr(ntb); if (error != 0) return (error); /* Initiate PCI-E link training */ intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); callout_reset(&ntb->heartbeat_timer, 0, atom_link_hb, ntb); return (0); } /* XXX: Linux driver doesn't seem to do any of this for Atom. */ static void configure_atom_secondary_side_bars(struct ntb_softc *ntb) { if (ntb->dev_type == NTB_DEV_USD) { intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET, XEON_B2B_BAR4_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64); } else { intel_ntb_reg_write(8, ATOM_PBAR2XLAT_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_PBAR4XLAT_OFFSET, XEON_B2B_BAR4_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR23_OFFSET, XEON_B2B_BAR2_ADDR64); intel_ntb_reg_write(8, ATOM_MBAR45_OFFSET, XEON_B2B_BAR4_ADDR64); } } /* * When working around Xeon SDOORBELL errata by remapping remote registers in a * MW, limit the B2B MW to half a MW. By sharing a MW, half the shared MW * remains for use by a higher layer. * * Will only be used if working around SDOORBELL errata and the BIOS-configured * MW size is sufficiently large. */ static unsigned int ntb_b2b_mw_share; SYSCTL_UINT(_hw_ntb, OID_AUTO, b2b_mw_share, CTLFLAG_RDTUN, &ntb_b2b_mw_share, 0, "If enabled (non-zero), prefer to share half of the B2B peer register " "MW with higher level consumers. Both sides of the NTB MUST set the same " "value here."); static void xeon_reset_sbar_size(struct ntb_softc *ntb, enum ntb_bar idx, enum ntb_bar regbar) { struct ntb_pci_bar_info *bar; uint8_t bar_sz; if (!HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_3) return; bar = &ntb->bar_info[idx]; bar_sz = pci_read_config(ntb->device, bar->psz_off, 1); if (idx == regbar) { if (ntb->b2b_off != 0) bar_sz--; else bar_sz = 0; } pci_write_config(ntb->device, bar->ssz_off, bar_sz, 1); bar_sz = pci_read_config(ntb->device, bar->ssz_off, 1); (void)bar_sz; } static void xeon_set_sbar_base_and_limit(struct ntb_softc *ntb, uint64_t bar_addr, enum ntb_bar idx, enum ntb_bar regbar) { uint64_t reg_val; uint32_t base_reg, lmt_reg; bar_get_xlat_params(ntb, idx, &base_reg, NULL, &lmt_reg); if (idx == regbar) { if (ntb->b2b_off) bar_addr += ntb->b2b_off; else bar_addr = 0; } if (!bar_is_64bit(ntb, idx)) { intel_ntb_reg_write(4, base_reg, bar_addr); reg_val = intel_ntb_reg_read(4, base_reg); (void)reg_val; intel_ntb_reg_write(4, lmt_reg, bar_addr); reg_val = intel_ntb_reg_read(4, lmt_reg); (void)reg_val; } else { intel_ntb_reg_write(8, base_reg, bar_addr); reg_val = intel_ntb_reg_read(8, base_reg); (void)reg_val; intel_ntb_reg_write(8, lmt_reg, bar_addr); reg_val = intel_ntb_reg_read(8, lmt_reg); (void)reg_val; } } static void xeon_set_pbar_xlat(struct ntb_softc *ntb, uint64_t base_addr, enum ntb_bar idx) { struct ntb_pci_bar_info *bar; bar = &ntb->bar_info[idx]; if (HAS_FEATURE(ntb, NTB_SPLIT_BAR) && idx >= NTB_B2B_BAR_2) { intel_ntb_reg_write(4, bar->pbarxlat_off, base_addr); base_addr = intel_ntb_reg_read(4, bar->pbarxlat_off); } else { intel_ntb_reg_write(8, bar->pbarxlat_off, base_addr); base_addr = intel_ntb_reg_read(8, bar->pbarxlat_off); } (void)base_addr; } static int xeon_setup_b2b_mw(struct ntb_softc *ntb, const struct ntb_b2b_addr *addr, const struct ntb_b2b_addr *peer_addr) { struct ntb_pci_bar_info *b2b_bar; vm_size_t bar_size; uint64_t bar_addr; enum ntb_bar b2b_bar_num, i; if (ntb->b2b_mw_idx == B2B_MW_DISABLED) { b2b_bar = NULL; b2b_bar_num = NTB_CONFIG_BAR; ntb->b2b_off = 0; } else { b2b_bar_num = intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx); KASSERT(b2b_bar_num > 0 && b2b_bar_num < NTB_MAX_BARS, ("invalid b2b mw bar")); b2b_bar = &ntb->bar_info[b2b_bar_num]; bar_size = b2b_bar->size; if (ntb_b2b_mw_share != 0 && (bar_size >> 1) >= XEON_B2B_MIN_SIZE) ntb->b2b_off = bar_size >> 1; else if (bar_size >= XEON_B2B_MIN_SIZE) { ntb->b2b_off = 0; } else { device_printf(ntb->device, "B2B bar size is too small!\n"); return (EIO); } } /* * Reset the secondary bar sizes to match the primary bar sizes. * (Except, disable or halve the size of the B2B secondary bar.) */ for (i = NTB_B2B_BAR_1; i < NTB_MAX_BARS; i++) xeon_reset_sbar_size(ntb, i, b2b_bar_num); bar_addr = 0; if (b2b_bar_num == NTB_CONFIG_BAR) bar_addr = addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = addr->bar2_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = addr->bar4_addr32; else if (b2b_bar_num == NTB_B2B_BAR_3) bar_addr = addr->bar5_addr32; else KASSERT(false, ("invalid bar")); intel_ntb_reg_write(8, XEON_SBAR0BASE_OFFSET, bar_addr); /* * Other SBARs are normally hit by the PBAR xlat, except for the b2b * register BAR. The B2B BAR is either disabled above or configured * half-size. It starts at PBAR xlat + offset. * * Also set up incoming BAR limits == base (zero length window). */ xeon_set_sbar_base_and_limit(ntb, addr->bar2_addr64, NTB_B2B_BAR_1, b2b_bar_num); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr32, NTB_B2B_BAR_2, b2b_bar_num); xeon_set_sbar_base_and_limit(ntb, addr->bar5_addr32, NTB_B2B_BAR_3, b2b_bar_num); } else xeon_set_sbar_base_and_limit(ntb, addr->bar4_addr64, NTB_B2B_BAR_2, b2b_bar_num); /* Zero incoming translation addrs */ intel_ntb_reg_write(8, XEON_SBAR2XLAT_OFFSET, 0); intel_ntb_reg_write(8, XEON_SBAR4XLAT_OFFSET, 0); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { uint32_t xlat_reg, lmt_reg; enum ntb_bar bar_num; /* * We point the chosen MSIX MW BAR xlat to remote LAPIC for * workaround */ bar_num = intel_ntb_mw_to_bar(ntb, ntb->msix_mw_idx); bar_get_xlat_params(ntb, bar_num, NULL, &xlat_reg, &lmt_reg); if (bar_is_64bit(ntb, bar_num)) { intel_ntb_reg_write(8, xlat_reg, MSI_INTEL_ADDR_BASE); ntb->msix_xlat = intel_ntb_reg_read(8, xlat_reg); intel_ntb_reg_write(8, lmt_reg, 0); } else { intel_ntb_reg_write(4, xlat_reg, MSI_INTEL_ADDR_BASE); ntb->msix_xlat = intel_ntb_reg_read(4, xlat_reg); intel_ntb_reg_write(4, lmt_reg, 0); } ntb->peer_lapic_bar = &ntb->bar_info[bar_num]; } (void)intel_ntb_reg_read(8, XEON_SBAR2XLAT_OFFSET); (void)intel_ntb_reg_read(8, XEON_SBAR4XLAT_OFFSET); /* Zero outgoing translation limits (whole bar size windows) */ intel_ntb_reg_write(8, XEON_PBAR2LMT_OFFSET, 0); intel_ntb_reg_write(8, XEON_PBAR4LMT_OFFSET, 0); /* Set outgoing translation offsets */ xeon_set_pbar_xlat(ntb, peer_addr->bar2_addr64, NTB_B2B_BAR_1); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr32, NTB_B2B_BAR_2); xeon_set_pbar_xlat(ntb, peer_addr->bar5_addr32, NTB_B2B_BAR_3); } else xeon_set_pbar_xlat(ntb, peer_addr->bar4_addr64, NTB_B2B_BAR_2); /* Set the translation offset for B2B registers */ bar_addr = 0; if (b2b_bar_num == NTB_CONFIG_BAR) bar_addr = peer_addr->bar0_addr; else if (b2b_bar_num == NTB_B2B_BAR_1) bar_addr = peer_addr->bar2_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2 && !HAS_FEATURE(ntb, NTB_SPLIT_BAR)) bar_addr = peer_addr->bar4_addr64; else if (b2b_bar_num == NTB_B2B_BAR_2) bar_addr = peer_addr->bar4_addr32; else if (b2b_bar_num == NTB_B2B_BAR_3) bar_addr = peer_addr->bar5_addr32; else KASSERT(false, ("invalid bar")); /* * B2B_XLAT_OFFSET is a 64-bit register but can only be written 32 bits * at a time. */ intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETL, bar_addr & 0xffffffff); intel_ntb_reg_write(4, XEON_B2B_XLAT_OFFSETU, bar_addr >> 32); return (0); } static inline bool _xeon_link_is_up(struct ntb_softc *ntb) { if (ntb->conn_type == NTB_CONN_TRANSPARENT) return (true); return ((ntb->lnk_sta & NTB_LINK_STATUS_ACTIVE) != 0); } static inline bool link_is_up(struct ntb_softc *ntb) { if (ntb->type == NTB_XEON) return (_xeon_link_is_up(ntb) && (ntb->peer_msix_good || !HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP))); KASSERT(ntb->type == NTB_ATOM, ("ntb type")); return ((ntb->ntb_ctl & ATOM_CNTL_LINK_DOWN) == 0); } static inline bool atom_link_is_err(struct ntb_softc *ntb) { uint32_t status; KASSERT(ntb->type == NTB_ATOM, ("ntb type")); status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET); if ((status & ATOM_LTSSMSTATEJMP_FORCEDETECT) != 0) return (true); status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET); return ((status & ATOM_IBIST_ERR_OFLOW) != 0); } /* Atom does not have link status interrupt, poll on that platform */ static void atom_link_hb(void *arg) { struct ntb_softc *ntb = arg; sbintime_t timo, poll_ts; timo = NTB_HB_TIMEOUT * hz; poll_ts = ntb->last_ts + timo; /* * Delay polling the link status if an interrupt was received, unless * the cached link status says the link is down. */ if ((sbintime_t)ticks - poll_ts < 0 && link_is_up(ntb)) { timo = poll_ts - ticks; goto out; } if (intel_ntb_poll_link(ntb)) ntb_link_event(ntb->device); if (!link_is_up(ntb) && atom_link_is_err(ntb)) { /* Link is down with error, proceed with recovery */ callout_reset(&ntb->lr_timer, 0, recover_atom_link, ntb); return; } out: callout_reset(&ntb->heartbeat_timer, timo, atom_link_hb, ntb); } static void atom_perform_link_restart(struct ntb_softc *ntb) { uint32_t status; /* Driver resets the NTB ModPhy lanes - magic! */ intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0xe0); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x40); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG4, 0x60); intel_ntb_reg_write(1, ATOM_MODPHY_PCSREG6, 0x60); /* Driver waits 100ms to allow the NTB ModPhy to settle */ pause("ModPhy", hz / 10); /* Clear AER Errors, write to clear */ status = intel_ntb_reg_read(4, ATOM_ERRCORSTS_OFFSET); status &= PCIM_AER_COR_REPLAY_ROLLOVER; intel_ntb_reg_write(4, ATOM_ERRCORSTS_OFFSET, status); /* Clear unexpected electrical idle event in LTSSM, write to clear */ status = intel_ntb_reg_read(4, ATOM_LTSSMERRSTS0_OFFSET); status |= ATOM_LTSSMERRSTS0_UNEXPECTEDEI; intel_ntb_reg_write(4, ATOM_LTSSMERRSTS0_OFFSET, status); /* Clear DeSkew Buffer error, write to clear */ status = intel_ntb_reg_read(4, ATOM_DESKEWSTS_OFFSET); status |= ATOM_DESKEWSTS_DBERR; intel_ntb_reg_write(4, ATOM_DESKEWSTS_OFFSET, status); status = intel_ntb_reg_read(4, ATOM_IBSTERRRCRVSTS0_OFFSET); status &= ATOM_IBIST_ERR_OFLOW; intel_ntb_reg_write(4, ATOM_IBSTERRRCRVSTS0_OFFSET, status); /* Releases the NTB state machine to allow the link to retrain */ status = intel_ntb_reg_read(4, ATOM_LTSSMSTATEJMP_OFFSET); status &= ~ATOM_LTSSMSTATEJMP_FORCEDETECT; intel_ntb_reg_write(4, ATOM_LTSSMSTATEJMP_OFFSET, status); } static int intel_ntb_link_enable(device_t dev, enum ntb_speed speed __unused, enum ntb_width width __unused) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; intel_ntb_printf(2, "%s\n", __func__); if (ntb->type == NTB_ATOM) { pci_write_config(ntb->device, NTB_PPD_OFFSET, ntb->ppd | ATOM_PPD_INIT_LINK, 4); return (0); } if (ntb->conn_type == NTB_CONN_TRANSPARENT) { ntb_link_event(dev); return (0); } cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK); cntl |= NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP; cntl |= NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP; if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl |= NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP; intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } static int intel_ntb_link_disable(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; intel_ntb_printf(2, "%s\n", __func__); if (ntb->conn_type == NTB_CONN_TRANSPARENT) { ntb_link_event(dev); return (0); } cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); cntl &= ~(NTB_CNTL_P2S_BAR23_SNOOP | NTB_CNTL_S2P_BAR23_SNOOP); cntl &= ~(NTB_CNTL_P2S_BAR4_SNOOP | NTB_CNTL_S2P_BAR4_SNOOP); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) cntl &= ~(NTB_CNTL_P2S_BAR5_SNOOP | NTB_CNTL_S2P_BAR5_SNOOP); cntl |= NTB_CNTL_LINK_DISABLE | NTB_CNTL_CFG_LOCK; intel_ntb_reg_write(4, ntb->reg->ntb_ctl, cntl); return (0); } static bool intel_ntb_link_enabled(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint32_t cntl; if (ntb->type == NTB_ATOM) { cntl = pci_read_config(ntb->device, NTB_PPD_OFFSET, 4); return ((cntl & ATOM_PPD_INIT_LINK) != 0); } if (ntb->conn_type == NTB_CONN_TRANSPARENT) return (true); cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); return ((cntl & NTB_CNTL_LINK_DISABLE) == 0); } static void recover_atom_link(void *arg) { struct ntb_softc *ntb = arg; unsigned speed, width, oldspeed, oldwidth; uint32_t status32; atom_perform_link_restart(ntb); /* * There is a potential race between the 2 NTB devices recovering at * the same time. If the times are the same, the link will not recover * and the driver will be stuck in this loop forever. Add a random * interval to the recovery time to prevent this race. */ status32 = arc4random() % ATOM_LINK_RECOVERY_TIME; pause("Link", (ATOM_LINK_RECOVERY_TIME + status32) * hz / 1000); if (atom_link_is_err(ntb)) goto retry; status32 = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); if ((status32 & ATOM_CNTL_LINK_DOWN) != 0) goto out; status32 = intel_ntb_reg_read(4, ntb->reg->lnk_sta); width = NTB_LNK_STA_WIDTH(status32); speed = status32 & NTB_LINK_SPEED_MASK; oldwidth = NTB_LNK_STA_WIDTH(ntb->lnk_sta); oldspeed = ntb->lnk_sta & NTB_LINK_SPEED_MASK; if (oldwidth != width || oldspeed != speed) goto retry; out: callout_reset(&ntb->heartbeat_timer, NTB_HB_TIMEOUT * hz, atom_link_hb, ntb); return; retry: callout_reset(&ntb->lr_timer, NTB_HB_TIMEOUT * hz, recover_atom_link, ntb); } /* * Polls the HW link status register(s); returns true if something has changed. */ static bool intel_ntb_poll_link(struct ntb_softc *ntb) { uint32_t ntb_cntl; uint16_t reg_val; if (ntb->type == NTB_ATOM) { ntb_cntl = intel_ntb_reg_read(4, ntb->reg->ntb_ctl); if (ntb_cntl == ntb->ntb_ctl) return (false); ntb->ntb_ctl = ntb_cntl; ntb->lnk_sta = intel_ntb_reg_read(4, ntb->reg->lnk_sta); } else { db_iowrite_raw(ntb, ntb->self_reg->db_bell, ntb->db_link_mask); reg_val = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2); if (reg_val == ntb->lnk_sta) return (false); ntb->lnk_sta = reg_val; if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { if (_xeon_link_is_up(ntb)) { if (!ntb->peer_msix_good) { callout_reset(&ntb->peer_msix_work, 0, intel_ntb_exchange_msix, ntb); return (false); } } else { ntb->peer_msix_good = false; ntb->peer_msix_done = false; } } } return (true); } static inline enum ntb_speed intel_ntb_link_sta_speed(struct ntb_softc *ntb) { if (!link_is_up(ntb)) return (NTB_SPEED_NONE); return (ntb->lnk_sta & NTB_LINK_SPEED_MASK); } static inline enum ntb_width intel_ntb_link_sta_width(struct ntb_softc *ntb) { if (!link_is_up(ntb)) return (NTB_WIDTH_NONE); return (NTB_LNK_STA_WIDTH(ntb->lnk_sta)); } SYSCTL_NODE(_hw_ntb, OID_AUTO, debug_info, CTLFLAG_RW, 0, "Driver state, statistics, and HW registers"); #define NTB_REGSZ_MASK (3ul << 30) #define NTB_REG_64 (1ul << 30) #define NTB_REG_32 (2ul << 30) #define NTB_REG_16 (3ul << 30) #define NTB_REG_8 (0ul << 30) #define NTB_DB_READ (1ul << 29) #define NTB_PCI_REG (1ul << 28) #define NTB_REGFLAGS_MASK (NTB_REGSZ_MASK | NTB_DB_READ | NTB_PCI_REG) static void intel_ntb_sysctl_init(struct ntb_softc *ntb) { struct sysctl_oid_list *globals, *tree_par, *regpar, *statpar, *errpar; struct sysctl_ctx_list *ctx; struct sysctl_oid *tree, *tmptree; ctx = device_get_sysctl_ctx(ntb->device); globals = SYSCTL_CHILDREN(device_get_sysctl_tree(ntb->device)); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "link_status", CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_link_status_human, "A", "Link status (human readable)"); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "active", CTLFLAG_RD | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_status, "IU", "Link status (1=active, 0=inactive)"); SYSCTL_ADD_PROC(ctx, globals, OID_AUTO, "admin_up", CTLFLAG_RW | CTLTYPE_UINT, ntb, 0, sysctl_handle_link_admin, "IU", "Set/get interface status (1=UP, 0=DOWN)"); tree = SYSCTL_ADD_NODE(ctx, globals, OID_AUTO, "debug_info", CTLFLAG_RD, NULL, "Driver state, statistics, and HW registers"); tree_par = SYSCTL_CHILDREN(tree); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "conn_type", CTLFLAG_RD, &ntb->conn_type, 0, "0 - Transparent; 1 - B2B; 2 - Root Port"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "dev_type", CTLFLAG_RD, &ntb->dev_type, 0, "0 - USD; 1 - DSD"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ppd", CTLFLAG_RD, &ntb->ppd, 0, "Raw PPD register (cached)"); if (ntb->b2b_mw_idx != B2B_MW_DISABLED) { SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "b2b_idx", CTLFLAG_RD, &ntb->b2b_mw_idx, 0, "Index of the MW used for B2B remote register access"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "b2b_off", CTLFLAG_RD, &ntb->b2b_off, "If non-zero, offset of B2B register region in shared MW"); } SYSCTL_ADD_PROC(ctx, tree_par, OID_AUTO, "features", CTLFLAG_RD | CTLTYPE_STRING, ntb, 0, sysctl_handle_features, "A", "Features/errata of this NTB device"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "ntb_ctl", CTLFLAG_RD, __DEVOLATILE(uint32_t *, &ntb->ntb_ctl), 0, "NTB CTL register (cached)"); SYSCTL_ADD_UINT(ctx, tree_par, OID_AUTO, "lnk_sta", CTLFLAG_RD, __DEVOLATILE(uint32_t *, &ntb->lnk_sta), 0, "LNK STA register (cached)"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "mw_count", CTLFLAG_RD, &ntb->mw_count, 0, "MW count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "spad_count", CTLFLAG_RD, &ntb->spad_count, 0, "Scratchpad count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_count", CTLFLAG_RD, &ntb->db_count, 0, "Doorbell count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_count", CTLFLAG_RD, &ntb->db_vec_count, 0, "Doorbell vector count"); SYSCTL_ADD_U8(ctx, tree_par, OID_AUTO, "db_vec_shift", CTLFLAG_RD, &ntb->db_vec_shift, 0, "Doorbell vector shift"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_valid_mask", CTLFLAG_RD, &ntb->db_valid_mask, "Doorbell valid mask"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_link_mask", CTLFLAG_RD, &ntb->db_link_mask, "Doorbell link mask"); SYSCTL_ADD_UQUAD(ctx, tree_par, OID_AUTO, "db_mask", CTLFLAG_RD, &ntb->db_mask, "Doorbell mask (cached)"); tmptree = SYSCTL_ADD_NODE(ctx, tree_par, OID_AUTO, "registers", CTLFLAG_RD, NULL, "Raw HW registers (big-endian)"); regpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ntbcntl", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->reg->ntb_ctl, sysctl_handle_register, "IU", "NTB Control register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcap", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | 0x19c, sysctl_handle_register, "IU", "NTB Link Capabilities"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnkcon", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | 0x1a0, sysctl_handle_register, "IU", "NTB Link Control register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_mask", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_mask, sysctl_handle_register, "QU", "Doorbell mask register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "db_bell", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | NTB_DB_READ | ntb->self_reg->db_bell, sysctl_handle_register, "QU", "Doorbell register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_xlat, sysctl_handle_register, "QU", "Incoming XLAT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_xlat, sysctl_handle_register, "IU", "Incoming XLAT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_xlat, sysctl_handle_register, "IU", "Incoming XLAT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_xlat45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_xlat, sysctl_handle_register, "QU", "Incoming XLAT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_limit, sysctl_handle_register, "QU", "Incoming LMT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_limit, sysctl_handle_register, "IU", "Incoming LMT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_limit, sysctl_handle_register, "IU", "Incoming LMT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "incoming_lmt45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_limit, sysctl_handle_register, "QU", "Incoming LMT45 register"); } if (ntb->type == NTB_ATOM) return; tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_stats", CTLFLAG_RD, NULL, "Xeon HW statistics"); statpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, statpar, OID_AUTO, "upstream_mem_miss", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | XEON_USMEMMISS_OFFSET, sysctl_handle_register, "SU", "Upstream Memory Miss"); tmptree = SYSCTL_ADD_NODE(ctx, regpar, OID_AUTO, "xeon_hw_err", CTLFLAG_RD, NULL, "Xeon HW errors"); errpar = SYSCTL_CHILDREN(tmptree); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "ppd", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | NTB_PPD_OFFSET, sysctl_handle_register, "CU", "PPD"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar23_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR23SZ_OFFSET, sysctl_handle_register, "CU", "PBAR23 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar4_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR4SZ_OFFSET, sysctl_handle_register, "CU", "PBAR4 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "pbar5_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_PBAR5SZ_OFFSET, sysctl_handle_register, "CU", "PBAR5 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR23SZ_OFFSET, sysctl_handle_register, "CU", "SBAR23 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR4SZ_OFFSET, sysctl_handle_register, "CU", "SBAR4 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_sz", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_8 | NTB_PCI_REG | XEON_SBAR5SZ_OFFSET, sysctl_handle_register, "CU", "SBAR5 SZ (log2)"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "devsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_DEVSTS_OFFSET, sysctl_handle_register, "SU", "DEVSTS"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "lnksts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_LINK_STATUS_OFFSET, sysctl_handle_register, "SU", "LNKSTS"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "slnksts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_16 | NTB_PCI_REG | XEON_SLINK_STATUS_OFFSET, sysctl_handle_register, "SU", "SLNKSTS"); SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "uncerrsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | NTB_PCI_REG | XEON_UNCERRSTS_OFFSET, sysctl_handle_register, "IU", "UNCERRSTS"); SYSCTL_ADD_PROC(ctx, errpar, OID_AUTO, "corerrsts", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | NTB_PCI_REG | XEON_CORERRSTS_OFFSET, sysctl_handle_register, "IU", "CORERRSTS"); if (ntb->conn_type != NTB_CONN_B2B) return; SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_1].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, sysctl_handle_register, "IU", "Outgoing XLAT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->bar_info[NTB_B2B_BAR_3].pbarxlat_off, sysctl_handle_register, "IU", "Outgoing XLAT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_xlat45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->bar_info[NTB_B2B_BAR_2].pbarxlat_off, sysctl_handle_register, "QU", "Outgoing XLAT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt23", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR2LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT23 register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt4", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR4LMT_OFFSET, sysctl_handle_register, "IU", "Outgoing LMT4 register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt5", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | XEON_PBAR5LMT_OFFSET, sysctl_handle_register, "IU", "Outgoing LMT5 register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "outgoing_lmt45", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | XEON_PBAR4LMT_OFFSET, sysctl_handle_register, "QU", "Outgoing LMT45 register"); } SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar01_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar0_base, sysctl_handle_register, "QU", "Secondary BAR01 base register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar23_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar2_base, sysctl_handle_register, "QU", "Secondary BAR23 base register"); if (HAS_FEATURE(ntb, NTB_SPLIT_BAR)) { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar4_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar4_base, sysctl_handle_register, "IU", "Secondary BAR4 base register"); SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar5_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_32 | ntb->xlat_reg->bar5_base, sysctl_handle_register, "IU", "Secondary BAR5 base register"); } else { SYSCTL_ADD_PROC(ctx, regpar, OID_AUTO, "sbar45_base", CTLFLAG_RD | CTLTYPE_OPAQUE, ntb, NTB_REG_64 | ntb->xlat_reg->bar4_base, sysctl_handle_register, "QU", "Secondary BAR45 base register"); } } static int sysctl_handle_features(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; struct sbuf sb; int error; sbuf_new_for_sysctl(&sb, NULL, 256, req); sbuf_printf(&sb, "%b", ntb->features, NTB_FEATURES_STR); error = sbuf_finish(&sb); sbuf_delete(&sb); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_link_admin(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; unsigned old, new; int error; old = intel_ntb_link_enabled(ntb->device); error = SYSCTL_OUT(req, &old, sizeof(old)); if (error != 0 || req->newptr == NULL) return (error); error = SYSCTL_IN(req, &new, sizeof(new)); if (error != 0) return (error); intel_ntb_printf(0, "Admin set interface state to '%sabled'\n", (new != 0)? "en" : "dis"); if (new != 0) error = intel_ntb_link_enable(ntb->device, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); else error = intel_ntb_link_disable(ntb->device); return (error); } static int sysctl_handle_link_status_human(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; struct sbuf sb; enum ntb_speed speed; enum ntb_width width; int error; sbuf_new_for_sysctl(&sb, NULL, 32, req); if (intel_ntb_link_is_up(ntb->device, &speed, &width)) sbuf_printf(&sb, "up / PCIe Gen %u / Width x%u", (unsigned)speed, (unsigned)width); else sbuf_printf(&sb, "down"); error = sbuf_finish(&sb); sbuf_delete(&sb); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_link_status(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb = arg1; unsigned res; int error; res = intel_ntb_link_is_up(ntb->device, NULL, NULL); error = SYSCTL_OUT(req, &res, sizeof(res)); if (error || !req->newptr) return (error); return (EINVAL); } static int sysctl_handle_register(SYSCTL_HANDLER_ARGS) { struct ntb_softc *ntb; const void *outp; uintptr_t sz; uint64_t umv; char be[sizeof(umv)]; size_t outsz; uint32_t reg; bool db, pci; int error; ntb = arg1; reg = arg2 & ~NTB_REGFLAGS_MASK; sz = arg2 & NTB_REGSZ_MASK; db = (arg2 & NTB_DB_READ) != 0; pci = (arg2 & NTB_PCI_REG) != 0; KASSERT(!(db && pci), ("bogus")); if (db) { KASSERT(sz == NTB_REG_64, ("bogus")); umv = db_ioread(ntb, reg); outsz = sizeof(uint64_t); } else { switch (sz) { case NTB_REG_64: if (pci) umv = pci_read_config(ntb->device, reg, 8); else umv = intel_ntb_reg_read(8, reg); outsz = sizeof(uint64_t); break; case NTB_REG_32: if (pci) umv = pci_read_config(ntb->device, reg, 4); else umv = intel_ntb_reg_read(4, reg); outsz = sizeof(uint32_t); break; case NTB_REG_16: if (pci) umv = pci_read_config(ntb->device, reg, 2); else umv = intel_ntb_reg_read(2, reg); outsz = sizeof(uint16_t); break; case NTB_REG_8: if (pci) umv = pci_read_config(ntb->device, reg, 1); else umv = intel_ntb_reg_read(1, reg); outsz = sizeof(uint8_t); break; default: panic("bogus"); break; } } /* Encode bigendian so that sysctl -x is legible. */ be64enc(be, umv); outp = ((char *)be) + sizeof(umv) - outsz; error = SYSCTL_OUT(req, outp, outsz); if (error || !req->newptr) return (error); return (EINVAL); } static unsigned intel_ntb_user_mw_to_idx(struct ntb_softc *ntb, unsigned uidx) { if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 && uidx >= ntb->b2b_mw_idx) || (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx)) uidx++; if ((ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0 && uidx >= ntb->b2b_mw_idx) && (ntb->msix_mw_idx != B2B_MW_DISABLED && uidx >= ntb->msix_mw_idx)) uidx++; return (uidx); } #ifndef EARLY_AP_STARTUP static int msix_ready; static void intel_ntb_msix_ready(void *arg __unused) { msix_ready = 1; } SYSINIT(intel_ntb_msix_ready, SI_SUB_SMP, SI_ORDER_ANY, intel_ntb_msix_ready, NULL); #endif static void intel_ntb_exchange_msix(void *ctx) { struct ntb_softc *ntb; uint32_t val; unsigned i; ntb = ctx; if (ntb->peer_msix_good) goto msix_good; if (ntb->peer_msix_done) goto msix_done; #ifndef EARLY_AP_STARTUP /* Block MSIX negotiation until SMP started and IRQ reshuffled. */ if (!msix_ready) goto reschedule; #endif intel_ntb_get_msix_info(ntb); for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DATA0 + i, ntb->msix_data[i].nmd_data); intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_OFS0 + i, ntb->msix_data[i].nmd_ofs - ntb->msix_xlat); } intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_GUARD, NTB_MSIX_VER_GUARD); intel_ntb_spad_read(ntb->device, NTB_MSIX_GUARD, &val); if (val != NTB_MSIX_VER_GUARD) goto reschedule; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { intel_ntb_spad_read(ntb->device, NTB_MSIX_DATA0 + i, &val); intel_ntb_printf(2, "remote MSIX data(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_data = val; intel_ntb_spad_read(ntb->device, NTB_MSIX_OFS0 + i, &val); intel_ntb_printf(2, "remote MSIX addr(%u): 0x%x\n", i, val); ntb->peer_msix_data[i].nmd_ofs = val; } ntb->peer_msix_done = true; msix_done: intel_ntb_peer_spad_write(ntb->device, NTB_MSIX_DONE, NTB_MSIX_RECEIVED); intel_ntb_spad_read(ntb->device, NTB_MSIX_DONE, &val); if (val != NTB_MSIX_RECEIVED) goto reschedule; intel_ntb_spad_clear(ntb->device); ntb->peer_msix_good = true; /* Give peer time to see our NTB_MSIX_RECEIVED. */ goto reschedule; msix_good: intel_ntb_poll_link(ntb); ntb_link_event(ntb->device); return; reschedule: ntb->lnk_sta = pci_read_config(ntb->device, ntb->reg->lnk_sta, 2); if (_xeon_link_is_up(ntb)) { callout_reset(&ntb->peer_msix_work, hz * (ntb->peer_msix_good ? 2 : 1) / 10, intel_ntb_exchange_msix, ntb); } else intel_ntb_spad_clear(ntb->device); } /* * Public API to the rest of the OS */ static uint8_t intel_ntb_spad_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->spad_count); } static uint8_t intel_ntb_mw_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); uint8_t res; res = ntb->mw_count; if (ntb->b2b_mw_idx != B2B_MW_DISABLED && ntb->b2b_off == 0) res--; if (ntb->msix_mw_idx != B2B_MW_DISABLED) res--; return (res); } static int intel_ntb_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); intel_ntb_reg_write(4, ntb->self_reg->spad + idx * 4, val); return (0); } /* * Zeros the local scratchpad. */ static void intel_ntb_spad_clear(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); unsigned i; for (i = 0; i < ntb->spad_count; i++) intel_ntb_spad_write(dev, i, 0); } static int intel_ntb_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); *val = intel_ntb_reg_read(4, ntb->self_reg->spad + idx * 4); return (0); } static int intel_ntb_peer_spad_write(device_t dev, unsigned int idx, uint32_t val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) intel_ntb_mw_write(4, XEON_SPAD_OFFSET + idx * 4, val); else intel_ntb_reg_write(4, ntb->peer_reg->spad + idx * 4, val); return (0); } static int intel_ntb_peer_spad_read(device_t dev, unsigned int idx, uint32_t *val) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= ntb->spad_count) return (EINVAL); if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) *val = intel_ntb_mw_read(4, XEON_SPAD_OFFSET + idx * 4); else *val = intel_ntb_reg_read(4, ntb->peer_reg->spad + idx * 4); return (0); } static int intel_ntb_mw_get_range(device_t dev, unsigned mw_idx, vm_paddr_t *base, caddr_t *vbase, size_t *size, size_t *align, size_t *align_size, bus_addr_t *plimit) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; bus_addr_t limit; size_t bar_b2b_off; enum ntb_bar bar_num; if (mw_idx >= intel_ntb_mw_count(dev)) return (EINVAL); mw_idx = intel_ntb_user_mw_to_idx(ntb, mw_idx); bar_num = intel_ntb_mw_to_bar(ntb, mw_idx); bar = &ntb->bar_info[bar_num]; bar_b2b_off = 0; if (mw_idx == ntb->b2b_mw_idx) { KASSERT(ntb->b2b_off != 0, ("user shouldn't get non-shared b2b mw")); bar_b2b_off = ntb->b2b_off; } if (bar_is_64bit(ntb, bar_num)) limit = BUS_SPACE_MAXADDR; else limit = BUS_SPACE_MAXADDR_32BIT; if (base != NULL) *base = bar->pbase + bar_b2b_off; if (vbase != NULL) *vbase = bar->vbase + bar_b2b_off; if (size != NULL) *size = bar->size - bar_b2b_off; if (align != NULL) *align = bar->size; if (align_size != NULL) *align_size = 1; if (plimit != NULL) *plimit = limit; return (0); } static int intel_ntb_mw_set_trans(device_t dev, unsigned idx, bus_addr_t addr, size_t size) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t base, limit, reg_val; size_t bar_size, mw_size; uint32_t base_reg, xlat_reg, limit_reg; enum ntb_bar bar_num; if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); bar_num = intel_ntb_mw_to_bar(ntb, idx); bar = &ntb->bar_info[bar_num]; bar_size = bar->size; if (idx == ntb->b2b_mw_idx) mw_size = bar_size - ntb->b2b_off; else mw_size = bar_size; /* Hardware requires that addr is aligned to bar size */ if ((addr & (bar_size - 1)) != 0) return (EINVAL); if (size > mw_size) return (EINVAL); bar_get_xlat_params(ntb, bar_num, &base_reg, &xlat_reg, &limit_reg); limit = 0; if (bar_is_64bit(ntb, bar_num)) { base = intel_ntb_reg_read(8, base_reg) & BAR_HIGH_MASK; if (limit_reg != 0 && size != mw_size) limit = base + size; /* Set and verify translation address */ intel_ntb_reg_write(8, xlat_reg, addr); reg_val = intel_ntb_reg_read(8, xlat_reg) & BAR_HIGH_MASK; if (reg_val != addr) { intel_ntb_reg_write(8, xlat_reg, 0); return (EIO); } /* Set and verify the limit */ intel_ntb_reg_write(8, limit_reg, limit); reg_val = intel_ntb_reg_read(8, limit_reg) & BAR_HIGH_MASK; if (reg_val != limit) { intel_ntb_reg_write(8, limit_reg, base); intel_ntb_reg_write(8, xlat_reg, 0); return (EIO); } } else { /* Configure 32-bit (split) BAR MW */ if ((addr & UINT32_MAX) != addr) return (ERANGE); if (((addr + size) & UINT32_MAX) != (addr + size)) return (ERANGE); base = intel_ntb_reg_read(4, base_reg) & BAR_HIGH_MASK; if (limit_reg != 0 && size != mw_size) limit = base + size; /* Set and verify translation address */ intel_ntb_reg_write(4, xlat_reg, addr); reg_val = intel_ntb_reg_read(4, xlat_reg) & BAR_HIGH_MASK; if (reg_val != addr) { intel_ntb_reg_write(4, xlat_reg, 0); return (EIO); } /* Set and verify the limit */ intel_ntb_reg_write(4, limit_reg, limit); reg_val = intel_ntb_reg_read(4, limit_reg) & BAR_HIGH_MASK; if (reg_val != limit) { intel_ntb_reg_write(4, limit_reg, base); intel_ntb_reg_write(4, xlat_reg, 0); return (EIO); } } return (0); } static int intel_ntb_mw_clear_trans(device_t dev, unsigned mw_idx) { return (intel_ntb_mw_set_trans(dev, mw_idx, 0, 0)); } static int intel_ntb_mw_get_wc(device_t dev, unsigned idx, vm_memattr_t *mode) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)]; *mode = bar->map_mode; return (0); } static int intel_ntb_mw_set_wc(device_t dev, unsigned idx, vm_memattr_t mode) { struct ntb_softc *ntb = device_get_softc(dev); if (idx >= intel_ntb_mw_count(dev)) return (EINVAL); idx = intel_ntb_user_mw_to_idx(ntb, idx); return (intel_ntb_mw_set_wc_internal(ntb, idx, mode)); } static int intel_ntb_mw_set_wc_internal(struct ntb_softc *ntb, unsigned idx, vm_memattr_t mode) { struct ntb_pci_bar_info *bar; int rc; bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, idx)]; if (bar->map_mode == mode) return (0); rc = pmap_change_attr((vm_offset_t)bar->vbase, bar->size, mode); if (rc == 0) bar->map_mode = mode; return (rc); } static void intel_ntb_peer_db_set(device_t dev, uint64_t bit) { struct ntb_softc *ntb = device_get_softc(dev); if (HAS_FEATURE(ntb, NTB_SB01BASE_LOCKUP)) { struct ntb_pci_bar_info *lapic; unsigned i; lapic = ntb->peer_lapic_bar; for (i = 0; i < XEON_NONLINK_DB_MSIX_BITS; i++) { if ((bit & intel_ntb_db_vector_mask(dev, i)) != 0) bus_space_write_4(lapic->pci_bus_tag, lapic->pci_bus_handle, ntb->peer_msix_data[i].nmd_ofs, ntb->peer_msix_data[i].nmd_data); } return; } if (HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { intel_ntb_mw_write(2, XEON_PDOORBELL_OFFSET, bit); return; } db_iowrite(ntb, ntb->peer_reg->db_bell, bit); } static int intel_ntb_peer_db_addr(device_t dev, bus_addr_t *db_addr, vm_size_t *db_size) { struct ntb_softc *ntb = device_get_softc(dev); struct ntb_pci_bar_info *bar; uint64_t regoff; KASSERT((db_addr != NULL && db_size != NULL), ("must be non-NULL")); if (!HAS_FEATURE(ntb, NTB_SDOORBELL_LOCKUP)) { bar = &ntb->bar_info[NTB_CONFIG_BAR]; regoff = ntb->peer_reg->db_bell; } else { KASSERT(ntb->b2b_mw_idx != B2B_MW_DISABLED, ("invalid b2b idx")); bar = &ntb->bar_info[intel_ntb_mw_to_bar(ntb, ntb->b2b_mw_idx)]; regoff = XEON_PDOORBELL_OFFSET; } KASSERT(bar->pci_bus_tag != X86_BUS_SPACE_IO, ("uh oh")); /* HACK: Specific to current x86 bus implementation. */ *db_addr = ((uint64_t)bar->pci_bus_handle + regoff); *db_size = ntb->reg->db_size; return (0); } static uint64_t intel_ntb_db_valid_mask(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_valid_mask); } static int intel_ntb_db_vector_count(device_t dev) { struct ntb_softc *ntb = device_get_softc(dev); return (ntb->db_vec_count); } static uint64_t intel_ntb_db_vector_mask(device_t dev, uint32_t vector) { struct ntb_softc *ntb = device_get_softc(dev); if (vector > ntb->db_vec_count) return (0); return (ntb->db_valid_mask & intel_ntb_vec_mask(ntb, vector)); } static bool intel_ntb_link_is_up(device_t dev, enum ntb_speed *speed, enum ntb_width *width) { struct ntb_softc *ntb = device_get_softc(dev); if (speed != NULL) *speed = intel_ntb_link_sta_speed(ntb); if (width != NULL) *width = intel_ntb_link_sta_width(ntb); return (link_is_up(ntb)); } static void save_bar_parameters(struct ntb_pci_bar_info *bar) { bar->pci_bus_tag = rman_get_bustag(bar->pci_resource); bar->pci_bus_handle = rman_get_bushandle(bar->pci_resource); bar->pbase = rman_get_start(bar->pci_resource); bar->size = rman_get_size(bar->pci_resource); bar->vbase = rman_get_virtual(bar->pci_resource); } static device_method_t ntb_intel_methods[] = { /* Device interface */ DEVMETHOD(device_probe, intel_ntb_probe), DEVMETHOD(device_attach, intel_ntb_attach), DEVMETHOD(device_detach, intel_ntb_detach), /* Bus interface */ DEVMETHOD(bus_child_location_str, ntb_child_location_str), DEVMETHOD(bus_print_child, ntb_print_child), /* NTB interface */ DEVMETHOD(ntb_link_is_up, intel_ntb_link_is_up), DEVMETHOD(ntb_link_enable, intel_ntb_link_enable), DEVMETHOD(ntb_link_disable, intel_ntb_link_disable), DEVMETHOD(ntb_link_enabled, intel_ntb_link_enabled), DEVMETHOD(ntb_mw_count, intel_ntb_mw_count), DEVMETHOD(ntb_mw_get_range, intel_ntb_mw_get_range), DEVMETHOD(ntb_mw_set_trans, intel_ntb_mw_set_trans), DEVMETHOD(ntb_mw_clear_trans, intel_ntb_mw_clear_trans), DEVMETHOD(ntb_mw_get_wc, intel_ntb_mw_get_wc), DEVMETHOD(ntb_mw_set_wc, intel_ntb_mw_set_wc), DEVMETHOD(ntb_spad_count, intel_ntb_spad_count), DEVMETHOD(ntb_spad_clear, intel_ntb_spad_clear), DEVMETHOD(ntb_spad_write, intel_ntb_spad_write), DEVMETHOD(ntb_spad_read, intel_ntb_spad_read), DEVMETHOD(ntb_peer_spad_write, intel_ntb_peer_spad_write), DEVMETHOD(ntb_peer_spad_read, intel_ntb_peer_spad_read), DEVMETHOD(ntb_db_valid_mask, intel_ntb_db_valid_mask), DEVMETHOD(ntb_db_vector_count, intel_ntb_db_vector_count), DEVMETHOD(ntb_db_vector_mask, intel_ntb_db_vector_mask), DEVMETHOD(ntb_db_clear, intel_ntb_db_clear), DEVMETHOD(ntb_db_clear_mask, intel_ntb_db_clear_mask), DEVMETHOD(ntb_db_read, intel_ntb_db_read), DEVMETHOD(ntb_db_set_mask, intel_ntb_db_set_mask), DEVMETHOD(ntb_peer_db_addr, intel_ntb_peer_db_addr), DEVMETHOD(ntb_peer_db_set, intel_ntb_peer_db_set), DEVMETHOD_END }; static DEFINE_CLASS_0(ntb_hw, ntb_intel_driver, ntb_intel_methods, sizeof(struct ntb_softc)); DRIVER_MODULE(ntb_hw_intel, pci, ntb_intel_driver, ntb_hw_devclass, NULL, NULL); MODULE_DEPEND(ntb_hw_intel, ntb, 1, 1, 1); MODULE_VERSION(ntb_hw_intel, 1); +MODULE_PNP_INFO("W32:vendor/device;D:human", pci, ntb_hw_intel, pci_ids, + sizeof(pci_ids[0]), nitems(pci_ids));