diff --git a/sys/ddb/db_expr.c b/sys/ddb/db_expr.c index b3198611a04c..340951089248 100644 --- a/sys/ddb/db_expr.c +++ b/sys/ddb/db_expr.c @@ -1,379 +1,380 @@ /*- * SPDX-License-Identifier: MIT-CMU * * Mach Operating System * Copyright (c) 1991,1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Author: David B. Golub, Carnegie Mellon University * Date: 7/90 */ #include __FBSDID("$FreeBSD$"); #include +#include #include #include #include #include static bool db_add_expr(db_expr_t *valuep); static bool db_mult_expr(db_expr_t *valuep); static bool db_shift_expr(db_expr_t *valuep); static bool db_term(db_expr_t *valuep); static bool db_unary(db_expr_t *valuep); static bool db_logical_or_expr(db_expr_t *valuep); static bool db_logical_and_expr(db_expr_t *valuep); static bool db_logical_relation_expr(db_expr_t *valuep); static bool db_term(db_expr_t *valuep) { int t; t = db_read_token(); if (t == tIDENT) { if (!db_value_of_name(db_tok_string, valuep) && !db_value_of_name_pcpu(db_tok_string, valuep) && !db_value_of_name_vnet(db_tok_string, valuep)) { db_printf("Symbol '%s' not found\n", db_tok_string); db_error(NULL); /*NOTREACHED*/ } return (true); } if (t == tNUMBER) { *valuep = (db_expr_t)db_tok_number; return (true); } if (t == tDOT) { *valuep = (db_expr_t)db_dot; return (true); } if (t == tDOTDOT) { *valuep = (db_expr_t)db_prev; return (true); } if (t == tPLUS) { *valuep = (db_expr_t) db_next; return (true); } if (t == tDITTO) { *valuep = (db_expr_t)db_last_addr; return (true); } if (t == tDOLLAR) { if (!db_get_variable(valuep)) return (false); return (true); } if (t == tLPAREN) { if (!db_expression(valuep)) { db_printf("Expression syntax error after '%c'\n", '('); db_error(NULL); /*NOTREACHED*/ } t = db_read_token(); if (t != tRPAREN) { db_printf("Expression syntax error -- expected '%c'\n", ')'); db_error(NULL); /*NOTREACHED*/ } return (true); } db_unread_token(t); return (false); } static bool db_unary(db_expr_t *valuep) { int t; t = db_read_token(); if (t == tMINUS) { if (!db_unary(valuep)) { db_printf("Expression syntax error after '%c'\n", '-'); db_error(NULL); /*NOTREACHED*/ } *valuep = -*valuep; return (true); } if (t == tEXCL) { if(!db_unary(valuep)) { db_printf("Expression syntax error after '%c'\n", '!'); db_error(NULL); /* NOTREACHED */ } *valuep = (!(*valuep)); return (true); } if (t == tBIT_NOT) { if(!db_unary(valuep)) { db_printf("Expression syntax error after '%c'\n", '~'); db_error(NULL); /* NOTREACHED */ } *valuep = (~(*valuep)); return (true); } if (t == tSTAR) { /* indirection */ if (!db_unary(valuep)) { db_printf("Expression syntax error after '%c'\n", '*'); db_error(NULL); /*NOTREACHED*/ } *valuep = db_get_value((db_addr_t)*valuep, sizeof(void *), false); return (true); } db_unread_token(t); return (db_term(valuep)); } static bool db_mult_expr(db_expr_t *valuep) { db_expr_t lhs, rhs; int t; if (!db_unary(&lhs)) return (false); t = db_read_token(); while (t == tSTAR || t == tSLASH || t == tPCT || t == tHASH || t == tBIT_AND ) { if (!db_term(&rhs)) { db_printf("Expression syntax error after '%c'\n", t == tSTAR ? '*' : t == tSLASH ? '/' : t == tPCT ? '%' : t == tHASH ? '#' : '&'); db_error(NULL); /*NOTREACHED*/ } switch(t) { case tSTAR: lhs *= rhs; break; case tBIT_AND: lhs &= rhs; break; default: if (rhs == 0) { db_error("Division by 0\n"); /*NOTREACHED*/ } if (t == tSLASH) lhs /= rhs; else if (t == tPCT) lhs %= rhs; else lhs = roundup(lhs, rhs); } t = db_read_token(); } db_unread_token(t); *valuep = lhs; return (true); } static bool db_add_expr(db_expr_t *valuep) { db_expr_t lhs, rhs; int t; if (!db_mult_expr(&lhs)) return (false); t = db_read_token(); while (t == tPLUS || t == tMINUS || t == tBIT_OR) { if (!db_mult_expr(&rhs)) { db_printf("Expression syntax error after '%c'\n", t == tPLUS ? '+' : t == tMINUS ? '-' : '|'); db_error(NULL); /*NOTREACHED*/ } switch (t) { case tPLUS: lhs += rhs; break; case tMINUS: lhs -= rhs; break; case tBIT_OR: lhs |= rhs; break; default: - __unreachable(); + __assert_unreachable(); } t = db_read_token(); } db_unread_token(t); *valuep = lhs; return (true); } static bool db_shift_expr(db_expr_t *valuep) { db_expr_t lhs, rhs; int t; if (!db_add_expr(&lhs)) return (false); t = db_read_token(); while (t == tSHIFT_L || t == tSHIFT_R) { if (!db_add_expr(&rhs)) { db_printf("Expression syntax error after '%s'\n", t == tSHIFT_L ? "<<" : ">>"); db_error(NULL); /*NOTREACHED*/ } if (rhs < 0) { db_printf("Negative shift amount %jd\n", (intmax_t)rhs); db_error(NULL); /*NOTREACHED*/ } if (t == tSHIFT_L) lhs <<= rhs; else { /* Shift right is unsigned */ lhs = (db_addr_t)lhs >> rhs; } t = db_read_token(); } db_unread_token(t); *valuep = lhs; return (true); } static bool db_logical_relation_expr( db_expr_t *valuep) { db_expr_t lhs, rhs; int t; if (!db_shift_expr(&lhs)) return (false); t = db_read_token(); while (t == tLOG_EQ || t == tLOG_NOT_EQ || t == tGREATER || t == tGREATER_EQ || t == tLESS || t == tLESS_EQ) { if (!db_shift_expr(&rhs)) { db_printf("Expression syntax error after '%s'\n", t == tLOG_EQ ? "==" : t == tLOG_NOT_EQ ? "!=" : t == tGREATER ? ">" : t == tGREATER_EQ ? ">=" : t == tLESS ? "<" : "<="); db_error(NULL); /*NOTREACHED*/ } switch(t) { case tLOG_EQ: lhs = (lhs == rhs); break; case tLOG_NOT_EQ: lhs = (lhs != rhs); break; case tGREATER: lhs = (lhs > rhs); break; case tGREATER_EQ: lhs = (lhs >= rhs); break; case tLESS: lhs = (lhs < rhs); break; case tLESS_EQ: lhs = (lhs <= rhs); break; default: - __unreachable(); + __assert_unreachable(); } t = db_read_token(); } db_unread_token(t); *valuep = lhs; return (true); } static bool db_logical_and_expr( db_expr_t *valuep) { db_expr_t lhs, rhs; int t; if (!db_logical_relation_expr(&lhs)) return (false); t = db_read_token(); while (t == tLOG_AND) { if (!db_logical_relation_expr(&rhs)) { db_printf("Expression syntax error after '%s'\n", "&&"); db_error(NULL); /*NOTREACHED*/ } lhs = (lhs && rhs); t = db_read_token(); } db_unread_token(t); *valuep = lhs; return (true); } static bool db_logical_or_expr( db_expr_t *valuep) { db_expr_t lhs, rhs; int t; if (!db_logical_and_expr(&lhs)) return(false); t = db_read_token(); while (t == tLOG_OR) { if (!db_logical_and_expr(&rhs)) { db_printf("Expression syntax error after '%s'\n", "||"); db_error(NULL); /*NOTREACHED*/ } lhs = (lhs || rhs); t = db_read_token(); } db_unread_token(t); *valuep = lhs; return (true); } int db_expression(db_expr_t *valuep) { return (db_logical_or_expr(valuep)); } diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c index 597737c98c8c..80c0e3c538ef 100644 --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -1,814 +1,814 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2008, 2009 Rui Paulo * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2009-2012 Jung-uk Kim * All rights reserved. * Copyright (c) 2017-2020 Conrad Meyer . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Driver for the AMD CPU on-die thermal sensors. * Initially based on the k8temp Linux driver. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include typedef enum { CORE0_SENSOR0, CORE0_SENSOR1, CORE1_SENSOR0, CORE1_SENSOR1, CORE0, CORE1, CCD1, CCD_BASE = CCD1, CCD2, CCD3, CCD4, CCD5, CCD6, CCD7, CCD8, CCD_MAX = CCD8, NUM_CCDS = CCD_MAX - CCD_BASE + 1, } amdsensor_t; struct amdtemp_softc { int sc_ncores; int sc_ntemps; int sc_flags; #define AMDTEMP_FLAG_CS_SWAP 0x01 /* ThermSenseCoreSel is inverted. */ #define AMDTEMP_FLAG_CT_10BIT 0x02 /* CurTmp is 10-bit wide. */ #define AMDTEMP_FLAG_ALT_OFFSET 0x04 /* CurTmp starts at -28C. */ int32_t sc_offset; int32_t (*sc_gettemp)(device_t, amdsensor_t); struct sysctl_oid *sc_sysctl_cpu[MAXCPU]; struct intr_config_hook sc_ich; device_t sc_smn; }; /* * N.B. The numbers in macro names below are significant and represent CPU * family and model numbers. Do not make up fictitious family or model numbers * when adding support for new devices. */ #define VENDORID_AMD 0x1022 #define DEVICEID_AMD_MISC0F 0x1103 #define DEVICEID_AMD_MISC10 0x1203 #define DEVICEID_AMD_MISC11 0x1303 #define DEVICEID_AMD_MISC14 0x1703 #define DEVICEID_AMD_MISC15 0x1603 #define DEVICEID_AMD_MISC15_M10H 0x1403 #define DEVICEID_AMD_MISC15_M30H 0x141d #define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576 #define DEVICEID_AMD_MISC16 0x1533 #define DEVICEID_AMD_MISC16_M30H 0x1583 #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450 #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0 #define DEVICEID_AMD_HOSTB17H_M30H_ROOT 0x1480 /* Also M70h. */ static const struct amdtemp_product { uint16_t amdtemp_vendorid; uint16_t amdtemp_deviceid; /* * 0xFC register is only valid on the D18F3 PCI device; SMN temp * drivers do not attach to that device. */ bool amdtemp_has_cpuid; } amdtemp_products[] = { { VENDORID_AMD, DEVICEID_AMD_MISC0F, true }, { VENDORID_AMD, DEVICEID_AMD_MISC10, true }, { VENDORID_AMD, DEVICEID_AMD_MISC11, true }, { VENDORID_AMD, DEVICEID_AMD_MISC14, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true }, { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_MISC16, true }, { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false }, { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M30H_ROOT, false }, }; /* * Reported Temperature Control Register, family 0Fh-15h (some models), 16h. */ #define AMDTEMP_REPTMP_CTRL 0xa4 #define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff #define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21 #define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3 #define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16 /* * Reported Temperature, Family 15h, M60+ * * Same register bit definitions as other Family 15h CPUs, but access is * indirect via SMN, like Family 17h. */ #define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4 /* * Reported Temperature, Family 17h * * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register * provide the current temp. bit 19, when clear, means the temp is reported in * a range 0.."225C" (probable typo for 255C), and when set changes the range * to -49..206C. */ #define AMDTEMP_17H_CUR_TMP 0x59800 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1u << 19) /* * The following register set was discovered experimentally by Ondrej Čerman * and collaborators, but is not (yet) documented in a PPR/OSRR (other than * the M70H PPR SMN memory map showing [0x59800, +0x314] as allocated to * SMU::THM). It seems plausible and the Linux sensor folks have adopted it. */ #define AMDTEMP_17H_CCD_TMP_BASE 0x59954 #define AMDTEMP_17H_CCD_TMP_VALID (1u << 11) /* * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius). */ #define AMDTEMP_CURTMP_RANGE_ADJUST 490 /* * Thermaltrip Status Register (Family 0Fh only) */ #define AMDTEMP_THERMTP_STAT 0xe4 #define AMDTEMP_TTSR_SELCORE 0x04 #define AMDTEMP_TTSR_SELSENSOR 0x40 /* * DRAM Configuration High Register */ #define AMDTEMP_DRAM_CONF_HIGH 0x94 /* Function 2 */ #define AMDTEMP_DRAM_MODE_DDR3 0x0100 /* * CPU Family/Model Register */ #define AMDTEMP_CPUID 0xfc /* * Device methods. */ static void amdtemp_identify(driver_t *driver, device_t parent); static int amdtemp_probe(device_t dev); static int amdtemp_attach(device_t dev); static void amdtemp_intrhook(void *arg); static int amdtemp_detach(device_t dev); static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor); static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model); static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS); static device_method_t amdtemp_methods[] = { /* Device interface */ DEVMETHOD(device_identify, amdtemp_identify), DEVMETHOD(device_probe, amdtemp_probe), DEVMETHOD(device_attach, amdtemp_attach), DEVMETHOD(device_detach, amdtemp_detach), DEVMETHOD_END }; static driver_t amdtemp_driver = { "amdtemp", amdtemp_methods, sizeof(struct amdtemp_softc), }; static devclass_t amdtemp_devclass; DRIVER_MODULE(amdtemp, hostb, amdtemp_driver, amdtemp_devclass, NULL, NULL); MODULE_VERSION(amdtemp, 1); MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1); MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products, nitems(amdtemp_products)); static bool amdtemp_match(device_t dev, const struct amdtemp_product **product_out) { int i; uint16_t vendor, devid; vendor = pci_get_vendor(dev); devid = pci_get_device(dev); for (i = 0; i < nitems(amdtemp_products); i++) { if (vendor == amdtemp_products[i].amdtemp_vendorid && devid == amdtemp_products[i].amdtemp_deviceid) { if (product_out != NULL) *product_out = &amdtemp_products[i]; return (true); } } return (false); } static void amdtemp_identify(driver_t *driver, device_t parent) { device_t child; /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "amdtemp", -1) != NULL) return; if (amdtemp_match(parent, NULL)) { child = device_add_child(parent, "amdtemp", -1); if (child == NULL) device_printf(parent, "add amdtemp child failed\n"); } } static int amdtemp_probe(device_t dev) { uint32_t family, model; if (resource_disabled("amdtemp", 0)) return (ENXIO); if (!amdtemp_match(device_get_parent(dev), NULL)) return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); model = CPUID_TO_MODEL(cpu_id); switch (family) { case 0x0f: if ((model == 0x04 && (cpu_id & CPUID_STEPPING) == 0) || (model == 0x05 && (cpu_id & CPUID_STEPPING) <= 1)) return (ENXIO); break; case 0x10: case 0x11: case 0x12: case 0x14: case 0x15: case 0x16: case 0x17: break; default: return (ENXIO); } device_set_desc(dev, "AMD CPU On-Die Thermal Sensors"); return (BUS_PROBE_GENERIC); } static int amdtemp_attach(device_t dev) { char tn[32]; u_int regs[4]; const struct amdtemp_product *product; struct amdtemp_softc *sc; struct sysctl_ctx_list *sysctlctx; struct sysctl_oid *sysctlnode; uint32_t cpuid, family, model; u_int bid; int erratum319, unit; bool needsmn; sc = device_get_softc(dev); erratum319 = 0; needsmn = false; if (!amdtemp_match(device_get_parent(dev), &product)) return (ENXIO); cpuid = cpu_id; family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); /* * This checks for the byzantine condition of running a heterogenous * revision multi-socket system where the attach thread is potentially * probing a remote socket's PCI device. * * Currently, such scenarios are unsupported on models using the SMN * (because on those models, amdtemp(4) attaches to a different PCI * device than the one that contains AMDTEMP_CPUID). * * The ancient 0x0F family of devices only supports this register from * models 40h+. */ if (product->amdtemp_has_cpuid && (family > 0x0f || (family == 0x0f && model >= 0x40))) { cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID, 4); family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); } switch (family) { case 0x0f: /* * Thermaltrip Status Register * * - ThermSenseCoreSel * * Revision F & G: 0 - Core1, 1 - Core0 * Other: 0 - Core0, 1 - Core1 * * - CurTmp * * Revision G: bits 23-14 * Other: bits 23-16 * * XXX According to the BKDG, CurTmp, ThermSenseSel and * ThermSenseCoreSel bits were introduced in Revision F * but CurTmp seems working fine as early as Revision C. * However, it is not clear whether ThermSenseSel and/or * ThermSenseCoreSel work in undocumented cases as well. * In fact, the Linux driver suggests it may not work but * we just assume it does until we find otherwise. * * XXX According to Linux, CurTmp starts at -28C on * Socket AM2 Revision G processors, which is not * documented anywhere. */ if (model >= 0x40) sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP; if (model >= 0x60 && model != 0xc1) { do_cpuid(0x80000001, regs); bid = (regs[1] >> 9) & 0x1f; switch (model) { case 0x68: /* Socket S1g1 */ case 0x6c: case 0x7c: break; case 0x6b: /* Socket AM2 and ASB1 (2 cores) */ if (bid != 0x0b && bid != 0x0c) sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; break; case 0x6f: /* Socket AM2 and ASB1 (1 core) */ case 0x7f: if (bid != 0x07 && bid != 0x09 && bid != 0x0c) sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; break; default: sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET; } sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT; } /* * There are two sensors per core. */ sc->sc_ntemps = 2; sc->sc_gettemp = amdtemp_gettemp0f; break; case 0x10: /* * Erratum 319 Inaccurate Temperature Measurement * * http://support.amd.com/us/Processor_TechDocs/41322.pdf */ do_cpuid(0x80000001, regs); switch ((regs[1] >> 28) & 0xf) { case 0: /* Socket F */ erratum319 = 1; break; case 1: /* Socket AM2+ or AM3 */ if ((pci_cfgregread(pci_get_bus(dev), pci_get_slot(dev), 2, AMDTEMP_DRAM_CONF_HIGH, 2) & AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 || (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3)) break; /* XXX 00100F42h (RB-C2) exists in both formats. */ erratum319 = 1; break; } /* FALLTHROUGH */ case 0x11: case 0x12: case 0x14: case 0x15: case 0x16: sc->sc_ntemps = 1; /* * Some later (60h+) models of family 15h use a similar SMN * network as family 17h. (However, the register index differs * from 17h and the decoding matches other 10h-15h models, * which differ from 17h.) */ if (family == 0x15 && model >= 0x60) { sc->sc_gettemp = amdtemp_gettemp15hm60h; needsmn = true; } else sc->sc_gettemp = amdtemp_gettemp; break; case 0x17: sc->sc_ntemps = 1; sc->sc_gettemp = amdtemp_gettemp17h; needsmn = true; break; default: device_printf(dev, "Bogus family 0x%x\n", family); return (ENXIO); } if (needsmn) { sc->sc_smn = device_find_child( device_get_parent(dev), "amdsmn", -1); if (sc->sc_smn == NULL) { if (bootverbose) device_printf(dev, "No SMN device found\n"); return (ENXIO); } } /* Find number of cores per package. */ sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ? (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1; if (sc->sc_ncores > MAXCPU) return (ENXIO); if (erratum319) device_printf(dev, "Erratum 319: temperature measurement may be inaccurate\n"); if (bootverbose) device_printf(dev, "Found %d cores and %d sensors.\n", sc->sc_ncores, sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1); /* * dev.amdtemp.N tree. */ unit = device_get_unit(dev); snprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit); TUNABLE_INT_FETCH(tn, &sc->sc_offset); sysctlctx = device_get_sysctl_ctx(dev); SYSCTL_ADD_INT(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0, "Temperature sensor offset"); sysctlnode = SYSCTL_ADD_NODE(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "core0", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 0"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor0", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE0_SENSOR0, amdtemp_sysctl, "IK", "Core 0 / Sensor 0 temperature"); if (family == 0x17) amdtemp_probe_ccd_sensors17h(dev, model); else if (sc->sc_ntemps > 1) { SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor1", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE0_SENSOR1, amdtemp_sysctl, "IK", "Core 0 / Sensor 1 temperature"); if (sc->sc_ncores > 1) { sysctlnode = SYSCTL_ADD_NODE(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "core1", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Core 1"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor0", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE1_SENSOR0, amdtemp_sysctl, "IK", "Core 1 / Sensor 0 temperature"); SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(sysctlnode), OID_AUTO, "sensor1", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, CORE1_SENSOR1, amdtemp_sysctl, "IK", "Core 1 / Sensor 1 temperature"); } } /* * Try to create dev.cpu sysctl entries and setup intrhook function. * This is needed because the cpu driver may be loaded late on boot, * after us. */ amdtemp_intrhook(dev); sc->sc_ich.ich_func = amdtemp_intrhook; sc->sc_ich.ich_arg = dev; if (config_intrhook_establish(&sc->sc_ich) != 0) { device_printf(dev, "config_intrhook_establish failed!\n"); return (ENXIO); } return (0); } void amdtemp_intrhook(void *arg) { struct amdtemp_softc *sc; struct sysctl_ctx_list *sysctlctx; device_t dev = (device_t)arg; device_t acpi, cpu, nexus; amdsensor_t sensor; int i; sc = device_get_softc(dev); /* * dev.cpu.N.temperature. */ nexus = device_find_child(root_bus, "nexus", 0); acpi = device_find_child(nexus, "acpi", 0); for (i = 0; i < sc->sc_ncores; i++) { if (sc->sc_sysctl_cpu[i] != NULL) continue; cpu = device_find_child(acpi, "cpu", device_get_unit(dev) * sc->sc_ncores + i); if (cpu != NULL) { sysctlctx = device_get_sysctl_ctx(cpu); sensor = sc->sc_ntemps > 1 ? (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0; sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx, SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)), OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_NEEDGIANT, dev, sensor, amdtemp_sysctl, "IK", "Current temparature"); } } if (sc->sc_ich.ich_arg != NULL) config_intrhook_disestablish(&sc->sc_ich); } int amdtemp_detach(device_t dev) { struct amdtemp_softc *sc = device_get_softc(dev); int i; for (i = 0; i < sc->sc_ncores; i++) if (sc->sc_sysctl_cpu[i] != NULL) sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0); /* NewBus removes the dev.amdtemp.N tree by itself. */ return (0); } static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS) { device_t dev = (device_t)arg1; struct amdtemp_softc *sc = device_get_softc(dev); amdsensor_t sensor = (amdsensor_t)arg2; int32_t auxtemp[2], temp; int error; switch (sensor) { case CORE0: auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0); auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1); temp = imax(auxtemp[0], auxtemp[1]); break; case CORE1: auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0); auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1); temp = imax(auxtemp[0], auxtemp[1]); break; default: temp = sc->sc_gettemp(dev, sensor); break; } error = sysctl_handle_int(oidp, &temp, 0, req); return (error); } #define AMDTEMP_ZERO_C_TO_K 2731 static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t mask, offset, temp; /* Set Sensor/Core selector. */ temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1); temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR); switch (sensor) { case CORE0_SENSOR1: temp |= AMDTEMP_TTSR_SELSENSOR; /* FALLTHROUGH */ case CORE0_SENSOR0: case CORE0: if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0) temp |= AMDTEMP_TTSR_SELCORE; break; case CORE1_SENSOR1: temp |= AMDTEMP_TTSR_SELSENSOR; /* FALLTHROUGH */ case CORE1_SENSOR0: case CORE1: if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0) temp |= AMDTEMP_TTSR_SELCORE; break; default: - __unreachable(); + __assert_unreachable(); } pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1); mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc; offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49; temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4); temp = ((temp >> 14) & mask) * 5 / 2; temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10; return (temp); } static uint32_t amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49) { uint32_t temp; /* Convert raw register subfield units (0.125C) to units of 0.1C. */ temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4; if (minus49) temp -= AMDTEMP_CURTMP_RANGE_ADJUST; temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10; return (temp); } static uint32_t amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val) { bool minus49; /* * On Family 15h and higher, if CurTmpTjSel is 11b, the range is * adjusted down by 49.0 degrees Celsius. (This adjustment is not * documented in BKDGs prior to family 15h model 00h.) */ minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 && ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) & AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3); return (amdtemp_decode_fam10h_to_17h(sc_offset, val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49)); } static uint32_t amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val) { bool minus49; minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0); return (amdtemp_decode_fam10h_to_17h(sc_offset, val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49)); } static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t temp; temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4); return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp)); } static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t val; int error; error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val); KASSERT(error == 0, ("amdsmn_read")); return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val)); } static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) { struct amdtemp_softc *sc = device_get_softc(dev); uint32_t val; int error; switch (sensor) { case CORE0_SENSOR0: /* Tctl */ error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val); KASSERT(error == 0, ("amdsmn_read")); return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val)); case CCD_BASE ... CCD_MAX: /* Tccd */ error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CCD_TMP_BASE + (((int)sensor - CCD_BASE) * sizeof(val)), &val); KASSERT(error == 0, ("amdsmn_read2")); KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0, ("sensor %d: not valid", (int)sensor)); return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true)); default: - __unreachable(); + __assert_unreachable(); } } static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model) { char sensor_name[16], sensor_descr[32]; struct amdtemp_softc *sc; uint32_t maxreg, i, val; int error; switch (model) { case 0x00 ... 0x1f: /* Zen1, Zen+ */ maxreg = 4; break; case 0x30 ... 0x3f: /* Zen2 TR/Epyc */ case 0x70 ... 0x7f: /* Zen2 Ryzen */ maxreg = 8; _Static_assert((int)NUM_CCDS >= 8, ""); break; default: device_printf(dev, "Unrecognized Family 17h Model: %02xh\n", model); return; } sc = device_get_softc(dev); for (i = 0; i < maxreg; i++) { error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CCD_TMP_BASE + (i * sizeof(val)), &val); if (error != 0) continue; if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0) continue; snprintf(sensor_name, sizeof(sensor_name), "ccd%u", i); snprintf(sensor_descr, sizeof(sensor_descr), "CCD %u temperature (Tccd%u)", i, i); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, sensor_name, CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr); } } diff --git a/sys/dev/nvdimm/nvdimm.c b/sys/dev/nvdimm/nvdimm.c index 4f3696fefd50..66638636ef0e 100644 --- a/sys/dev/nvdimm/nvdimm.c +++ b/sys/dev/nvdimm/nvdimm.c @@ -1,412 +1,413 @@ /*- * Copyright (c) 2017 The FreeBSD Foundation * All rights reserved. * Copyright (c) 2018, 2019 Intel Corporation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include "opt_ddb.h" #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #define _COMPONENT ACPI_OEM ACPI_MODULE_NAME("NVDIMM") static struct uuid intel_nvdimm_dsm_uuid = {0x4309AC30,0x0D11,0x11E4,0x91,0x91,{0x08,0x00,0x20,0x0C,0x9A,0x66}}; #define INTEL_NVDIMM_DSM_REV 1 #define INTEL_NVDIMM_DSM_GET_LABEL_SIZE 4 #define INTEL_NVDIMM_DSM_GET_LABEL_DATA 5 static devclass_t nvdimm_devclass; MALLOC_DEFINE(M_NVDIMM, "nvdimm", "NVDIMM driver memory"); static int read_label_area_size(struct nvdimm_dev *nv) { ACPI_OBJECT *result_buffer; ACPI_HANDLE handle; ACPI_STATUS status; ACPI_BUFFER result; uint32_t *out; int error; handle = nvdimm_root_get_acpi_handle(nv->nv_dev); if (handle == NULL) return (ENODEV); result.Length = ACPI_ALLOCATE_BUFFER; result.Pointer = NULL; status = acpi_EvaluateDSM(handle, (uint8_t *)&intel_nvdimm_dsm_uuid, INTEL_NVDIMM_DSM_REV, INTEL_NVDIMM_DSM_GET_LABEL_SIZE, NULL, &result); error = ENXIO; if (ACPI_SUCCESS(status) && result.Pointer != NULL && result.Length >= sizeof(ACPI_OBJECT)) { result_buffer = result.Pointer; if (result_buffer->Type == ACPI_TYPE_BUFFER && result_buffer->Buffer.Length >= 12) { out = (uint32_t *)result_buffer->Buffer.Pointer; nv->label_area_size = out[1]; nv->max_label_xfer = out[2]; error = 0; } } if (result.Pointer != NULL) AcpiOsFree(result.Pointer); return (error); } static int read_label_area(struct nvdimm_dev *nv, uint8_t *dest, off_t offset, off_t length) { ACPI_BUFFER result; ACPI_HANDLE handle; ACPI_OBJECT params_pkg, params_buf, *result_buf; ACPI_STATUS status; uint32_t params[2]; off_t to_read; int error; error = 0; handle = nvdimm_root_get_acpi_handle(nv->nv_dev); if (offset < 0 || length <= 0 || offset + length > nv->label_area_size || handle == NULL) return (ENODEV); params_pkg.Type = ACPI_TYPE_PACKAGE; params_pkg.Package.Count = 1; params_pkg.Package.Elements = ¶ms_buf; params_buf.Type = ACPI_TYPE_BUFFER; params_buf.Buffer.Length = sizeof(params); params_buf.Buffer.Pointer = (UINT8 *)params; while (length > 0) { to_read = MIN(length, nv->max_label_xfer); params[0] = offset; params[1] = to_read; result.Length = ACPI_ALLOCATE_BUFFER; result.Pointer = NULL; status = acpi_EvaluateDSM(handle, (uint8_t *)&intel_nvdimm_dsm_uuid, INTEL_NVDIMM_DSM_REV, INTEL_NVDIMM_DSM_GET_LABEL_DATA, ¶ms_pkg, &result); if (ACPI_FAILURE(status) || result.Length < sizeof(ACPI_OBJECT) || result.Pointer == NULL) { error = ENXIO; break; } result_buf = (ACPI_OBJECT *)result.Pointer; if (result_buf->Type != ACPI_TYPE_BUFFER || result_buf->Buffer.Pointer == NULL || result_buf->Buffer.Length != 4 + to_read || ((uint16_t *)result_buf->Buffer.Pointer)[0] != 0) { error = ENXIO; break; } bcopy(result_buf->Buffer.Pointer + 4, dest, to_read); dest += to_read; offset += to_read; length -= to_read; if (result.Pointer != NULL) { AcpiOsFree(result.Pointer); result.Pointer = NULL; } } if (result.Pointer != NULL) AcpiOsFree(result.Pointer); return (error); } static uint64_t fletcher64(const void *data, size_t length) { size_t i; uint32_t a, b; const uint32_t *d; a = 0; b = 0; d = (const uint32_t *)data; length = length / sizeof(uint32_t); for (i = 0; i < length; i++) { a += d[i]; b += a; } return ((uint64_t)b << 32 | a); } static bool label_index_is_valid(struct nvdimm_label_index *index, uint32_t max_labels, size_t size, size_t offset) { uint64_t checksum; index = (struct nvdimm_label_index *)((uint8_t *)index + size * offset); if (strcmp(index->signature, NVDIMM_INDEX_BLOCK_SIGNATURE) != 0) return false; checksum = index->checksum; index->checksum = 0; if (checksum != fletcher64(index, size) || index->this_offset != size * offset || index->this_size != size || index->other_offset != size * (offset == 0 ? 1 : 0) || index->seq == 0 || index->seq > 3 || index->slot_cnt > max_labels || index->label_size != 1) return false; return true; } static int read_label(struct nvdimm_dev *nv, int num) { struct nvdimm_label_entry *entry, *i, *next; uint64_t checksum; off_t offset; int error; offset = nv->label_index->label_offset + num * (128 << nv->label_index->label_size); entry = malloc(sizeof(*entry), M_NVDIMM, M_WAITOK); error = read_label_area(nv, (uint8_t *)&entry->label, offset, sizeof(struct nvdimm_label)); if (error != 0) { free(entry, M_NVDIMM); return (error); } checksum = entry->label.checksum; entry->label.checksum = 0; if (checksum != fletcher64(&entry->label, sizeof(entry->label)) || entry->label.slot != num) { free(entry, M_NVDIMM); return (ENXIO); } /* Insertion ordered by dimm_phys_addr */ if (SLIST_EMPTY(&nv->labels) || entry->label.dimm_phys_addr <= SLIST_FIRST(&nv->labels)->label.dimm_phys_addr) { SLIST_INSERT_HEAD(&nv->labels, entry, link); return (0); } SLIST_FOREACH_SAFE(i, &nv->labels, link, next) { if (next == NULL || entry->label.dimm_phys_addr <= next->label.dimm_phys_addr) { SLIST_INSERT_AFTER(i, entry, link); return (0); } } - __unreachable(); + __assert_unreachable(); } static int read_labels(struct nvdimm_dev *nv) { struct nvdimm_label_index *indices, *index1; size_t bitfield_size, index_size, num_labels; int error, n; bool index_0_valid, index_1_valid; for (index_size = 256; ; index_size += 256) { num_labels = 8 * (index_size - sizeof(struct nvdimm_label_index)); if (index_size + num_labels * sizeof(struct nvdimm_label) >= nv->label_area_size) break; } num_labels = (nv->label_area_size - index_size) / sizeof(struct nvdimm_label); bitfield_size = roundup2(num_labels, 8) / 8; indices = malloc(2 * index_size, M_NVDIMM, M_WAITOK); index1 = (void *)((uint8_t *)indices + index_size); error = read_label_area(nv, (void *)indices, 0, 2 * index_size); if (error != 0) { free(indices, M_NVDIMM); return (error); } index_0_valid = label_index_is_valid(indices, num_labels, index_size, 0); index_1_valid = label_index_is_valid(indices, num_labels, index_size, 1); if (!index_0_valid && !index_1_valid) { free(indices, M_NVDIMM); return (ENXIO); } if (index_0_valid && index_1_valid) { if (((int)indices->seq - (int)index1->seq + 3) % 3 == 1) { /* index 0 was more recently updated */ index_1_valid = false; } else { /* * either index 1 was more recently updated, * or the sequence numbers are equal, in which * case the specification says the block with * the higher offset is to be treated as valid */ index_0_valid = false; } } nv->label_index = malloc(index_size, M_NVDIMM, M_WAITOK); bcopy(index_0_valid ? indices : index1, nv->label_index, index_size); free(indices, M_NVDIMM); bit_ffc_at((bitstr_t *)nv->label_index->free, 0, nv->label_index->slot_cnt, &n); while (n >= 0) { read_label(nv, n); bit_ffc_at((bitstr_t *)nv->label_index->free, n + 1, nv->label_index->slot_cnt, &n); } return (0); } struct nvdimm_dev * nvdimm_find_by_handle(nfit_handle_t nv_handle) { struct nvdimm_dev *res; device_t *dimms; int i, error, num_dimms; res = NULL; error = devclass_get_devices(nvdimm_devclass, &dimms, &num_dimms); if (error != 0) return (NULL); for (i = 0; i < num_dimms; i++) { if (nvdimm_root_get_device_handle(dimms[i]) == nv_handle) { res = device_get_softc(dimms[i]); break; } } free(dimms, M_TEMP); return (res); } static int nvdimm_probe(device_t dev) { return (BUS_PROBE_NOWILDCARD); } static int nvdimm_attach(device_t dev) { struct nvdimm_dev *nv; ACPI_TABLE_NFIT *nfitbl; ACPI_HANDLE handle; ACPI_STATUS status; int error; nv = device_get_softc(dev); handle = nvdimm_root_get_acpi_handle(dev); MPASS(handle != NULL); nv->nv_dev = dev; nv->nv_handle = nvdimm_root_get_device_handle(dev); status = AcpiGetTable(ACPI_SIG_NFIT, 1, (ACPI_TABLE_HEADER **)&nfitbl); if (ACPI_FAILURE(status)) { if (bootverbose) device_printf(dev, "cannot get NFIT\n"); return (ENXIO); } acpi_nfit_get_flush_addrs(nfitbl, nv->nv_handle, &nv->nv_flush_addr, &nv->nv_flush_addr_cnt); AcpiPutTable(&nfitbl->Header); error = read_label_area_size(nv); if (error == 0) { /* * Ignoring errors reading labels. Not all NVDIMMs * support labels and namespaces. */ read_labels(nv); } return (0); } static int nvdimm_detach(device_t dev) { struct nvdimm_dev *nv; struct nvdimm_label_entry *label, *next; nv = device_get_softc(dev); free(nv->nv_flush_addr, M_NVDIMM); free(nv->label_index, M_NVDIMM); SLIST_FOREACH_SAFE(label, &nv->labels, link, next) { SLIST_REMOVE_HEAD(&nv->labels, link); free(label, M_NVDIMM); } return (0); } static int nvdimm_suspend(device_t dev) { return (0); } static int nvdimm_resume(device_t dev) { return (0); } static device_method_t nvdimm_methods[] = { DEVMETHOD(device_probe, nvdimm_probe), DEVMETHOD(device_attach, nvdimm_attach), DEVMETHOD(device_detach, nvdimm_detach), DEVMETHOD(device_suspend, nvdimm_suspend), DEVMETHOD(device_resume, nvdimm_resume), DEVMETHOD_END }; static driver_t nvdimm_driver = { "nvdimm", nvdimm_methods, sizeof(struct nvdimm_dev), }; DRIVER_MODULE(nvdimm, nvdimm_acpi_root, nvdimm_driver, nvdimm_devclass, NULL, NULL); MODULE_DEPEND(nvdimm, acpi, 1, 1, 1); diff --git a/sys/dev/ow/ow.c b/sys/dev/ow/ow.c index eda33ea986d4..a6583accaa59 100644 --- a/sys/dev/ow/ow.c +++ b/sys/dev/ow/ow.c @@ -1,746 +1,746 @@ /*- * Copyright (c) 2015 M. Warner Losh * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * lldev - link level device * ndev - network / transport device (this module) * pdev - presentation device (children of this module) */ typedef int ow_enum_fn(device_t, device_t); typedef int ow_found_fn(device_t, romid_t); struct ow_softc { device_t dev; /* Newbus driver back pointer */ struct mtx mtx; /* bus mutex */ device_t owner; /* bus owner, if != NULL */ }; struct ow_devinfo { romid_t romid; }; static int ow_acquire_bus(device_t ndev, device_t pdev, int how); static void ow_release_bus(device_t ndev, device_t pdev); #define OW_LOCK(_sc) mtx_lock(&(_sc)->mtx) #define OW_UNLOCK(_sc) mtx_unlock(&(_sc)->mtx) #define OW_LOCK_DESTROY(_sc) mtx_destroy(&_sc->mtx) #define OW_ASSERT_LOCKED(_sc) mtx_assert(&_sc->mtx, MA_OWNED) #define OW_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->mtx, MA_NOTOWNED) static MALLOC_DEFINE(M_OW, "ow", "House keeping data for 1wire bus"); static const struct ow_timing timing_regular_min = { .t_slot = 60, .t_low0 = 60, .t_low1 = 1, .t_release = 0, .t_rec = 1, .t_rdv = 15, /* fixed */ .t_rstl = 480, .t_rsth = 480, .t_pdl = 60, .t_pdh = 15, .t_lowr = 1, }; static const struct ow_timing timing_regular_max = { .t_slot = 120, .t_low0 = 120, .t_low1 = 15, .t_release = 45, .t_rec = 960, /* infinity */ .t_rdv = 15, /* fixed */ .t_rstl = 960, /* infinity */ .t_rsth = 960, /* infinity */ .t_pdl = 240, /* 60us to 240us */ .t_pdh = 60, /* 15us to 60us */ .t_lowr = 15, /* 1us */ }; static struct ow_timing timing_regular = { .t_slot = 60, /* 60 <= t < 120 */ .t_low0 = 60, /* 60 <= t < t_slot < 120 */ .t_low1 = 1, /* 1 <= t < 15 */ .t_release = 45, /* 0 <= t < 45 */ .t_rec = 15, /* 1 <= t < inf */ .t_rdv = 15, /* t == 15 */ .t_rstl = 480, /* 480 <= t < inf */ .t_rsth = 480, /* 480 <= t < inf */ .t_pdl = 60, /* 60 <= t < 240 */ .t_pdh = 60, /* 15 <= t < 60 */ .t_lowr = 1, /* 1 <= t < 15 */ }; /* NB: Untested */ static const struct ow_timing timing_overdrive_min = { .t_slot = 6, .t_low0 = 6, .t_low1 = 1, .t_release = 0, .t_rec = 1, .t_rdv = 2, /* fixed */ .t_rstl = 48, .t_rsth = 48, .t_pdl = 8, .t_pdh = 2, .t_lowr = 1, }; static const struct ow_timing timing_overdrive_max = { .t_slot = 16, .t_low0 = 16, .t_low1 = 2, .t_release = 4, .t_rec = 960, /* infinity */ .t_rdv = 2, /* fixed */ .t_rstl = 80, .t_rsth = 960, /* infinity */ .t_pdl = 24, .t_pdh = 6, .t_lowr = 2, }; static struct ow_timing timing_overdrive = { .t_slot = 11, /* 6 <= t < 16 */ .t_low0 = 6, /* 6 <= t < t_slot < 16 */ .t_low1 = 1, /* 1 <= t < 2 */ .t_release = 4, /* 0 <= t < 4 */ .t_rec = 1, /* 1 <= t < inf */ .t_rdv = 2, /* t == 2 */ .t_rstl = 48, /* 48 <= t < 80 */ .t_rsth = 48, /* 48 <= t < inf */ .t_pdl = 8, /* 8 <= t < 24 */ .t_pdh = 2, /* 2 <= t < 6 */ .t_lowr = 1, /* 1 <= t < 2 */ }; SYSCTL_NODE(_hw, OID_AUTO, ow, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "1-Wire protocol"); SYSCTL_NODE(_hw_ow, OID_AUTO, regular, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Regular mode timings"); SYSCTL_NODE(_hw_ow, OID_AUTO, overdrive, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "Overdrive mode timings"); #define _OW_TIMING_SYSCTL(mode, param) \ static int \ sysctl_ow_timing_ ## mode ## _ ## param(SYSCTL_HANDLER_ARGS) \ { \ int val = timing_ ## mode.param; \ int err; \ err = sysctl_handle_int(oidp, &val, 0, req); \ if (err != 0 || req->newptr == NULL) \ return (err); \ if (val < timing_ ## mode ## _min.param) \ return (EINVAL); \ else if (val >= timing_ ## mode ## _max.param) \ return (EINVAL); \ timing_ ## mode.param = val; \ return (0); \ } \ SYSCTL_PROC(_hw_ow_ ## mode, OID_AUTO, param, \ CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, 0, sizeof(int), \ sysctl_ow_timing_ ## mode ## _ ## param, "I", \ "1-Wire timing parameter in microseconds (-1 resets to default)") #define OW_TIMING_SYSCTL(param) \ _OW_TIMING_SYSCTL(regular, param); \ _OW_TIMING_SYSCTL(overdrive, param) OW_TIMING_SYSCTL(t_slot); OW_TIMING_SYSCTL(t_low0); OW_TIMING_SYSCTL(t_low1); OW_TIMING_SYSCTL(t_release); OW_TIMING_SYSCTL(t_rec); OW_TIMING_SYSCTL(t_rdv); OW_TIMING_SYSCTL(t_rstl); OW_TIMING_SYSCTL(t_rsth); OW_TIMING_SYSCTL(t_pdl); OW_TIMING_SYSCTL(t_pdh); OW_TIMING_SYSCTL(t_lowr); #undef _OW_TIMING_SYSCTL #undef OW_TIMING_SYSCTL static void ow_send_byte(device_t lldev, struct ow_timing *t, uint8_t byte) { int i; for (i = 0; i < 8; i++) if (byte & (1 << i)) OWLL_WRITE_ONE(lldev, t); else OWLL_WRITE_ZERO(lldev, t); } static void ow_read_byte(device_t lldev, struct ow_timing *t, uint8_t *bytep) { int i; uint8_t byte = 0; int bit; for (i = 0; i < 8; i++) { OWLL_READ_DATA(lldev, t, &bit); byte |= bit << i; } *bytep = byte; } static int ow_send_command(device_t ndev, device_t pdev, struct ow_cmd *cmd) { int present, i, bit, tries; device_t lldev; struct ow_timing *t; lldev = device_get_parent(ndev); /* * Retry the reset a couple of times before giving up. */ tries = 4; do { OWLL_RESET_AND_PRESENCE(lldev, &timing_regular, &present); if (present == 1) device_printf(ndev, "Reset said no device on bus?.\n"); } while (present == 1 && tries-- > 0); if (present == 1) { device_printf(ndev, "Reset said the device wasn't there.\n"); return ENOENT; /* No devices acked the RESET */ } if (present == -1) { device_printf(ndev, "Reset discovered bus wired wrong.\n"); return ENOENT; } for (i = 0; i < cmd->rom_len; i++) ow_send_byte(lldev, &timing_regular, cmd->rom_cmd[i]); for (i = 0; i < cmd->rom_read_len; i++) ow_read_byte(lldev, &timing_regular, cmd->rom_read + i); if (cmd->xpt_len) { /* * Per AN937, the reset pulse and ROM level are always * done with the regular timings. Certain ROM commands * put the device into overdrive mode for the remainder * of the data transfer, which is why we have to pass the * timings here. Commands that need to be handled like this * are expected to be flagged by the client. */ t = (cmd->flags & OW_FLAG_OVERDRIVE) ? &timing_overdrive : &timing_regular; for (i = 0; i < cmd->xpt_len; i++) ow_send_byte(lldev, t, cmd->xpt_cmd[i]); if (cmd->flags & OW_FLAG_READ_BIT) { memset(cmd->xpt_read, 0, (cmd->xpt_read_len + 7) / 8); for (i = 0; i < cmd->xpt_read_len; i++) { OWLL_READ_DATA(lldev, t, &bit); cmd->xpt_read[i / 8] |= bit << (i % 8); } } else { for (i = 0; i < cmd->xpt_read_len; i++) ow_read_byte(lldev, t, cmd->xpt_read + i); } } return 0; } static int ow_search_rom(device_t lldev, device_t dev) { struct ow_cmd cmd; memset(&cmd, 0, sizeof(cmd)); cmd.rom_cmd[0] = SEARCH_ROM; cmd.rom_len = 1; return ow_send_command(lldev, dev, &cmd); } #if 0 static int ow_alarm_search(device_t lldev, device_t dev) { struct ow_cmd cmd; memset(&cmd, 0, sizeof(cmd)); cmd.rom_cmd[0] = ALARM_SEARCH; cmd.rom_len = 1; return ow_send_command(lldev, dev, &cmd); } #endif static int ow_add_child(device_t dev, romid_t romid) { struct ow_devinfo *di; device_t child; di = malloc(sizeof(*di), M_OW, M_WAITOK); di->romid = romid; child = device_add_child(dev, NULL, -1); if (child == NULL) { free(di, M_OW); return ENOMEM; } device_set_ivars(child, di); return (0); } static device_t ow_child_by_romid(device_t dev, romid_t romid) { device_t *children, retval, child; int nkid, i; struct ow_devinfo *di; if (device_get_children(dev, &children, &nkid) != 0) return (NULL); retval = NULL; for (i = 0; i < nkid; i++) { child = children[i]; di = device_get_ivars(child); if (di->romid == romid) { retval = child; break; } } free(children, M_TEMP); return (retval); } /* * CRC generator table -- taken from AN937 DOW CRC LOOKUP FUNCTION Table 2 */ const uint8_t ow_crc_table[] = { 0, 94, 188, 226, 97, 63, 221, 131, 194, 156, 126, 32, 163, 253, 31, 65, 157, 195, 33, 127, 252, 162, 64, 30, 95, 1, 227, 189, 62, 96, 130, 220, 35, 125, 159, 193, 66, 28, 254, 160, 225, 191, 93, 3, 128, 222, 60, 98, 190, 224, 2, 92, 223, 129, 99, 61, 124, 34, 192, 158, 29, 67, 161, 255, 70, 24, 250, 164, 39, 121, 155, 197, 132, 218, 56, 102, 229, 187, 89, 7, 219, 133,103, 57, 186, 228, 6, 88, 25, 71, 165, 251, 120, 38, 196, 154, 101, 59, 217, 135, 4, 90, 184, 230, 167, 249, 27, 69, 198, 152, 122, 36, 248, 166, 68, 26, 153, 199, 37, 123, 58, 100, 134, 216, 91, 5, 231, 185, 140,210, 48, 110, 237, 179, 81, 15, 78, 16, 242, 172, 47, 113,147, 205, 17, 79, 173, 243, 112, 46, 204, 146, 211,141, 111, 49, 178, 236, 14, 80, 175, 241, 19, 77, 206, 144, 114, 44, 109, 51, 209, 143, 12, 82,176, 238, 50, 108, 142, 208, 83, 13, 239, 177, 240, 174, 76, 18, 145, 207, 45, 115, 202, 148, 118, 40, 171, 245, 23, 73, 8, 86, 180, 234, 105, 55, 213, 139, 87, 9, 235, 181, 54, 104, 138, 212, 149, 203, 41, 119, 244, 170, 72, 22, 233, 183, 85, 11, 136, 214, 52, 106, 43, 117, 151, 201, 74, 20, 246, 168, 116, 42, 200, 150, 21, 75, 169, 247, 182, 232, 10, 84, 215, 137, 107, 53 }; /* * Converted from DO_CRC page 131 ANN937 */ static uint8_t ow_crc(device_t ndev, device_t pdev, uint8_t *buffer, size_t len) { uint8_t crc = 0; int i; for (i = 0; i < len; i++) crc = ow_crc_table[crc ^ buffer[i]]; return crc; } static int ow_check_crc(romid_t romid) { return ow_crc(NULL, NULL, (uint8_t *)&romid, sizeof(romid)) == 0; } static int ow_device_found(device_t dev, romid_t romid) { /* XXX Move this up into enumerate? */ /* * All valid ROM IDs have a valid CRC. Check that first. */ if (!ow_check_crc(romid)) { device_printf(dev, "Device romid %8D failed CRC.\n", &romid, ":"); return EINVAL; } /* * If we've seen this child before, don't add a new one for it. */ if (ow_child_by_romid(dev, romid) != NULL) return 0; return ow_add_child(dev, romid); } static int ow_enumerate(device_t dev, ow_enum_fn *enumfp, ow_found_fn *foundfp) { device_t lldev = device_get_parent(dev); int first, second, i, dir, prior, last, err, retries; uint64_t probed, last_mask; int sanity = 10; prior = -1; last_mask = 0; retries = 0; last = -2; err = ow_acquire_bus(dev, dev, OWN_DONTWAIT); if (err != 0) return err; while (last != -1) { if (sanity-- < 0) { printf("Reached the sanity limit\n"); return EIO; } again: probed = 0; last = -1; /* * See AN397 section 5.II.C.3 for the algorithm (though a bit * poorly stated). The search command forces each device to * send ROM ID bits one at a time (first the bit, then the * complement) the master (us) sends back a bit. If the * device's bit doesn't match what we send back, that device * stops sending bits back. So each time through we remember * where we made the last decision (always 0). If there's a * conflict there this time (and there will be in the absence * of a hardware failure) we go with 1. This way, we prune the * devices on the bus and wind up with a unique ROM. We know * we're done when we detect no new conflicts. The same * algorithm is used for devices in alarm state as well. * * In addition, experience has shown that sometimes devices * stop responding in the middle of enumeration, so try this * step again a few times when that happens. It is unclear if * this is due to a nosiy electrical environment or some odd * timing issue. */ /* * The enumeration command should be successfully sent, if not, * we have big issues on the bus so punt. Lower layers report * any unusual errors, so we don't need to here. */ err = enumfp(dev, dev); if (err != 0) return (err); for (i = 0; i < 64; i++) { OWLL_READ_DATA(lldev, &timing_regular, &first); OWLL_READ_DATA(lldev, &timing_regular, &second); switch (first | second << 1) { case 0: /* Conflict */ if (i < prior) dir = (last_mask >> i) & 1; else dir = i == prior; if (dir == 0) last = i; break; case 1: /* 1 then 0 -> 1 for all */ dir = 1; break; case 2: /* 0 then 1 -> 0 for all */ dir = 0; break; case 3: /* * No device responded. This is unexpected, but * experience has shown that on some platforms * we miss a timing window, or otherwise have * an issue. Start this step over. Since we've * not updated prior yet, we can just jump to * the top of the loop for a re-do of this step. */ printf("oops, starting over\n"); if (++retries > 5) return (EIO); goto again; default: /* NOTREACHED */ - __unreachable(); + __assert_unreachable(); } if (dir) { OWLL_WRITE_ONE(lldev, &timing_regular); probed |= 1ull << i; } else { OWLL_WRITE_ZERO(lldev, &timing_regular); } } retries = 0; foundfp(dev, probed); last_mask = probed; prior = last; } ow_release_bus(dev, dev); return (0); } static int ow_probe(device_t dev) { device_set_desc(dev, "1 Wire Bus"); return (BUS_PROBE_GENERIC); } static int ow_attach(device_t ndev) { struct ow_softc *sc; /* * Find all the devices on the bus. We don't probe / attach them in the * enumeration phase. We do this because we want to allow the probe / * attach routines of the child drivers to have as full an access to the * bus as possible. While we reset things before the next step of the * search (so it would likely be OK to allow access by the clients to * the bus), it is more conservative to find them all, then to do the * attach of the devices. This also allows the child devices to have * more knowledge of the bus. We also ignore errors from the enumeration * because they might happen after we've found a few devices. */ sc = device_get_softc(ndev); sc->dev = ndev; mtx_init(&sc->mtx, device_get_nameunit(sc->dev), "ow", MTX_DEF); ow_enumerate(ndev, ow_search_rom, ow_device_found); return bus_generic_attach(ndev); } static int ow_detach(device_t ndev) { device_t *children, child; int nkid, i; struct ow_devinfo *di; struct ow_softc *sc; sc = device_get_softc(ndev); /* * detach all the children first. This is blocking until any threads * have stopped, etc. */ bus_generic_detach(ndev); /* * We delete all the children, and free up the ivars */ if (device_get_children(ndev, &children, &nkid) != 0) return ENOMEM; for (i = 0; i < nkid; i++) { child = children[i]; di = device_get_ivars(child); free(di, M_OW); device_delete_child(ndev, child); } free(children, M_TEMP); OW_LOCK_DESTROY(sc); return 0; } /* * Not sure this is really needed. I'm having trouble figuring out what * location means in the context of the one wire bus. */ static int ow_child_location_str(device_t dev, device_t child, char *buf, size_t buflen) { *buf = '\0'; return (0); } static int ow_child_pnpinfo_str(device_t dev, device_t child, char *buf, size_t buflen) { struct ow_devinfo *di; di = device_get_ivars(child); snprintf(buf, buflen, "romid=%8D", &di->romid, ":"); return (0); } static int ow_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct ow_devinfo *di; romid_t **ptr; di = device_get_ivars(child); switch (which) { case OW_IVAR_FAMILY: *result = di->romid & 0xff; break; case OW_IVAR_ROMID: ptr = (romid_t **)result; *ptr = &di->romid; break; default: return EINVAL; } return 0; } static int ow_write_ivar(device_t dev, device_t child, int which, uintptr_t value) { return EINVAL; } static int ow_print_child(device_t ndev, device_t pdev) { int retval = 0; struct ow_devinfo *di; di = device_get_ivars(pdev); retval += bus_print_child_header(ndev, pdev); retval += printf(" romid %8D", &di->romid, ":"); retval += bus_print_child_footer(ndev, pdev); return retval; } static void ow_probe_nomatch(device_t ndev, device_t pdev) { struct ow_devinfo *di; di = device_get_ivars(pdev); device_printf(ndev, "romid %8D: no driver\n", &di->romid, ":"); } static int ow_acquire_bus(device_t ndev, device_t pdev, int how) { struct ow_softc *sc; sc = device_get_softc(ndev); OW_ASSERT_UNLOCKED(sc); OW_LOCK(sc); if (sc->owner != NULL) { if (sc->owner == pdev) panic("%s: %s recursively acquiring the bus.\n", device_get_nameunit(ndev), device_get_nameunit(pdev)); if (how == OWN_DONTWAIT) { OW_UNLOCK(sc); return EWOULDBLOCK; } while (sc->owner != NULL) mtx_sleep(sc, &sc->mtx, 0, "owbuswait", 0); } sc->owner = pdev; OW_UNLOCK(sc); return 0; } static void ow_release_bus(device_t ndev, device_t pdev) { struct ow_softc *sc; sc = device_get_softc(ndev); OW_ASSERT_UNLOCKED(sc); OW_LOCK(sc); if (sc->owner == NULL) panic("%s: %s releasing unowned bus.", device_get_nameunit(ndev), device_get_nameunit(pdev)); if (sc->owner != pdev) panic("%s: %s don't own the bus. %s does. game over.", device_get_nameunit(ndev), device_get_nameunit(pdev), device_get_nameunit(sc->owner)); sc->owner = NULL; wakeup(sc); OW_UNLOCK(sc); } devclass_t ow_devclass; static device_method_t ow_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ow_probe), DEVMETHOD(device_attach, ow_attach), DEVMETHOD(device_detach, ow_detach), /* Bus interface */ DEVMETHOD(bus_child_pnpinfo_str, ow_child_pnpinfo_str), DEVMETHOD(bus_child_location_str, ow_child_location_str), DEVMETHOD(bus_read_ivar, ow_read_ivar), DEVMETHOD(bus_write_ivar, ow_write_ivar), DEVMETHOD(bus_print_child, ow_print_child), DEVMETHOD(bus_probe_nomatch, ow_probe_nomatch), /* One Wire Network/Transport layer interface */ DEVMETHOD(own_send_command, ow_send_command), DEVMETHOD(own_acquire_bus, ow_acquire_bus), DEVMETHOD(own_release_bus, ow_release_bus), DEVMETHOD(own_crc, ow_crc), { 0, 0 } }; static driver_t ow_driver = { "ow", ow_methods, sizeof(struct ow_softc), }; DRIVER_MODULE(ow, owc, ow_driver, ow_devclass, 0, 0); MODULE_VERSION(ow, 1); diff --git a/sys/net/mppcc.c b/sys/net/mppcc.c index 8ec21ad7c652..c1bf566c790c 100644 --- a/sys/net/mppcc.c +++ b/sys/net/mppcc.c @@ -1,300 +1,300 @@ /*- * Copyright (c) 2002-2004 Jan Dubiec * Copyright (c) 2007 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * MPPC decompression library. * Version 1.0 * * Note that Hi/Fn (later acquired by Exar Corporation) held US patents * on some implementation-critical aspects of MPPC compression. * These patents lapsed due to non-payment of fees in 2007 and by 2015 * expired altogether. */ #include #include #include #define MPPE_HIST_LEN 8192 #define HASH(x) (((40543*(((((x)[0]<<4)^(x)[1])<<4)^(x)[2]))>>4) & 0x1fff) struct MPPC_comp_state { uint8_t hist[2*MPPE_HIST_LEN]; uint16_t histptr; uint16_t hash[MPPE_HIST_LEN]; }; /* Inserts 1 to 8 bits into the output buffer. */ static void __inline putbits8(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) { buf += *i; if (*l >= n) { *l = (*l) - n; val <<= *l; *buf = *buf | (val & 0xff); if (*l == 0) { *l = 8; (*i)++; *(++buf) = 0; } } else { (*i)++; *l = 8 - n + (*l); val <<= *l; *buf = *buf | ((val >> 8) & 0xff); *(++buf) = val & 0xff; } } /* Inserts 9 to 16 bits into the output buffer. */ static void __inline putbits16(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) { buf += *i; if (*l >= n - 8) { (*i)++; *l = 8 - n + (*l); val <<= *l; *buf = *buf | ((val >> 8) & 0xff); *(++buf) = val & 0xff; if (*l == 0) { *l = 8; (*i)++; *(++buf) = 0; } } else { (*i)++; (*i)++; *l = 16 - n + (*l); val <<= *l; *buf = *buf | ((val >> 16) & 0xff); *(++buf) = (val >> 8) & 0xff; *(++buf) = val & 0xff; } } /* Inserts 17 to 24 bits into the output buffer. */ static void __inline putbits24(uint8_t *buf, uint32_t val, const uint32_t n, uint32_t *i, uint32_t *l) { buf += *i; if (*l >= n - 16) { (*i)++; (*i)++; *l = 16 - n + (*l); val <<= *l; *buf = *buf | ((val >> 16) & 0xff); *(++buf) = (val >> 8) & 0xff; *(++buf) = val & 0xff; if (*l == 0) { *l = 8; (*i)++; *(++buf) = 0; } } else { (*i)++; (*i)++; (*i)++; *l = 24 - n + (*l); val <<= *l; *buf = *buf | ((val >> 24) & 0xff); *(++buf) = (val >> 16) & 0xff; *(++buf) = (val >> 8) & 0xff; *(++buf) = val & 0xff; } } size_t MPPC_SizeOfCompressionHistory(void) { return (sizeof(struct MPPC_comp_state)); } void MPPC_InitCompressionHistory(char *history) { struct MPPC_comp_state *state = (struct MPPC_comp_state*)history; bzero(history, sizeof(struct MPPC_comp_state)); state->histptr = MPPE_HIST_LEN; } int MPPC_Compress(u_char **src, u_char **dst, u_long *srcCnt, u_long *dstCnt, char *history, int flags, int undef) { struct MPPC_comp_state *state = (struct MPPC_comp_state*)history; uint32_t olen, off, len, idx, i, l; uint8_t *hist, *sbuf, *p, *q, *r, *s; int rtn = MPPC_OK; /* * At this point, to avoid possible buffer overflow caused by packet * expansion during/after compression, we should make sure we have * space for the worst case. * Maximum MPPC packet expansion is 12.5%. This is the worst case when * all octets in the input buffer are >= 0x80 and we cannot find any * repeated tokens. */ if (*dstCnt < (*srcCnt * 9 / 8 + 2)) { rtn &= ~MPPC_OK; return (rtn); } /* We can't compress more then MPPE_HIST_LEN bytes in a call. */ if (*srcCnt > MPPE_HIST_LEN) { rtn &= ~MPPC_OK; return (rtn); } hist = state->hist + MPPE_HIST_LEN; /* check if there is enough room at the end of the history */ if (state->histptr + *srcCnt >= 2*MPPE_HIST_LEN) { rtn |= MPPC_RESTART_HISTORY; state->histptr = MPPE_HIST_LEN; memcpy(state->hist, hist, MPPE_HIST_LEN); } /* Add packet to the history. */ sbuf = state->hist + state->histptr; memcpy(sbuf, *src, *srcCnt); state->histptr += *srcCnt; /* compress data */ r = sbuf + *srcCnt; **dst = olen = i = 0; l = 8; while (i < *srcCnt - 2) { s = q = sbuf + i; /* Prognose matching position using hash function. */ idx = HASH(s); p = hist + state->hash[idx]; state->hash[idx] = (uint16_t) (s - hist); if (p > s) /* It was before MPPC_RESTART_HISTORY. */ p -= MPPE_HIST_LEN; /* Try previous history buffer. */ off = s - p; /* Check our prognosis. */ if (off > MPPE_HIST_LEN - 1 || off < 1 || *p++ != *s++ || *p++ != *s++ || *p++ != *s++) { /* No match found; encode literal byte. */ if ((*src)[i] < 0x80) { /* literal byte < 0x80 */ putbits8(*dst, (uint32_t) (*src)[i], 8, &olen, &l); } else { /* literal byte >= 0x80 */ putbits16(*dst, (uint32_t) (0x100|((*src)[i]&0x7f)), 9, &olen, &l); } ++i; continue; } /* Find length of the matching fragment */ #if defined(__amd64__) || defined(__i386__) /* Optimization for CPUs without strict data aligning requirements */ while ((*((uint32_t*)p) == *((uint32_t*)s)) && (s < (r - 3))) { p+=4; s+=4; } #endif while((*p++ == *s++) && (s <= r)); len = s - q - 1; i += len; /* At least 3 character match found; code data. */ /* Encode offset. */ if (off < 64) { /* 10-bit offset; 0 <= offset < 64 */ putbits16(*dst, 0x3c0|off, 10, &olen, &l); } else if (off < 320) { /* 12-bit offset; 64 <= offset < 320 */ putbits16(*dst, 0xe00|(off-64), 12, &olen, &l); } else if (off < 8192) { /* 16-bit offset; 320 <= offset < 8192 */ putbits16(*dst, 0xc000|(off-320), 16, &olen, &l); } else { /* NOTREACHED */ - __unreachable(); + __assert_unreachable(); rtn &= ~MPPC_OK; return (rtn); } /* Encode length of match. */ if (len < 4) { /* length = 3 */ putbits8(*dst, 0, 1, &olen, &l); } else if (len < 8) { /* 4 <= length < 8 */ putbits8(*dst, 0x08|(len&0x03), 4, &olen, &l); } else if (len < 16) { /* 8 <= length < 16 */ putbits8(*dst, 0x30|(len&0x07), 6, &olen, &l); } else if (len < 32) { /* 16 <= length < 32 */ putbits8(*dst, 0xe0|(len&0x0f), 8, &olen, &l); } else if (len < 64) { /* 32 <= length < 64 */ putbits16(*dst, 0x3c0|(len&0x1f), 10, &olen, &l); } else if (len < 128) { /* 64 <= length < 128 */ putbits16(*dst, 0xf80|(len&0x3f), 12, &olen, &l); } else if (len < 256) { /* 128 <= length < 256 */ putbits16(*dst, 0x3f00|(len&0x7f), 14, &olen, &l); } else if (len < 512) { /* 256 <= length < 512 */ putbits16(*dst, 0xfe00|(len&0xff), 16, &olen, &l); } else if (len < 1024) { /* 512 <= length < 1024 */ putbits24(*dst, 0x3fc00|(len&0x1ff), 18, &olen, &l); } else if (len < 2048) { /* 1024 <= length < 2048 */ putbits24(*dst, 0xff800|(len&0x3ff), 20, &olen, &l); } else if (len < 4096) { /* 2048 <= length < 4096 */ putbits24(*dst, 0x3ff000|(len&0x7ff), 22, &olen, &l); } else if (len < 8192) { /* 4096 <= length < 8192 */ putbits24(*dst, 0xffe000|(len&0xfff), 24, &olen, &l); } else { /* NOTREACHED */ rtn &= ~MPPC_OK; return (rtn); } } /* Add remaining octets to the output. */ while(*srcCnt - i > 0) { if ((*src)[i] < 0x80) { /* literal byte < 0x80 */ putbits8(*dst, (uint32_t) (*src)[i++], 8, &olen, &l); } else { /* literal byte >= 0x80 */ putbits16(*dst, (uint32_t) (0x100|((*src)[i++]&0x7f)), 9, &olen, &l); } } /* Reset unused bits of the last output octet. */ if ((l != 0) && (l != 8)) { putbits8(*dst, 0, l, &olen, &l); } /* If result is bigger then original, set flag and flush history. */ if ((*srcCnt < olen) || ((flags & MPPC_SAVE_HISTORY) == 0)) { if (*srcCnt < olen) rtn |= MPPC_EXPANDED; bzero(history, sizeof(struct MPPC_comp_state)); state->histptr = MPPE_HIST_LEN; } *src += *srcCnt; *srcCnt = 0; *dst += olen; *dstCnt -= olen; return (rtn); } diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 628dd4e0f5b1..b105a27a6f73 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -1,615 +1,619 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD$ */ #ifndef _SYS_SYSTM_H_ #define _SYS_SYSTM_H_ #include #include #include #include #include #include /* for people using printf mainly */ __NULLABILITY_PRAGMA_PUSH extern int cold; /* nonzero if we are doing a cold boot */ extern int suspend_blocked; /* block suspend due to pending shutdown */ extern int rebooting; /* kern_reboot() has been called. */ extern const char *panicstr; /* panic message */ extern bool panicked; #define KERNEL_PANICKED() __predict_false(panicked) extern char version[]; /* system version */ extern char compiler_version[]; /* compiler version */ extern char copyright[]; /* system copyright */ extern int kstack_pages; /* number of kernel stack pages */ extern u_long pagesizes[]; /* supported page sizes */ extern long physmem; /* physical memory */ extern long realmem; /* 'real' memory */ extern char *rootdevnames[2]; /* names of possible root devices */ extern int boothowto; /* reboot flags, from console subsystem */ extern int bootverbose; /* nonzero to print verbose messages */ extern int maxusers; /* system tune hint */ extern int ngroups_max; /* max # of supplemental groups */ extern int vm_guest; /* Running as virtual machine guest? */ /* * Detected virtual machine guest types. The intention is to expand * and/or add to the VM_GUEST_VM type if specific VM functionality is * ever implemented (e.g. vendor-specific paravirtualization features). * Keep in sync with vm_guest_sysctl_names[]. */ enum VM_GUEST { VM_GUEST_NO = 0, VM_GUEST_VM, VM_GUEST_XEN, VM_GUEST_HV, VM_GUEST_VMWARE, VM_GUEST_KVM, VM_GUEST_BHYVE, VM_GUEST_VBOX, VM_GUEST_PARALLELS, VM_LAST }; /* * These functions need to be declared before the KASSERT macro is invoked in * !KASSERT_PANIC_OPTIONAL builds, so their declarations are sort of out of * place compared to other function definitions in this header. On the other * hand, this header is a bit disorganized anyway. */ void panic(const char *, ...) __dead2 __printflike(1, 2); void vpanic(const char *, __va_list) __dead2 __printflike(1, 0); #if defined(WITNESS) || defined(INVARIANT_SUPPORT) #ifdef KASSERT_PANIC_OPTIONAL void kassert_panic(const char *fmt, ...) __printflike(1, 2); #else #define kassert_panic panic #endif #endif #ifdef INVARIANTS /* The option is always available */ #define KASSERT(exp,msg) do { \ if (__predict_false(!(exp))) \ kassert_panic msg; \ } while (0) #define VNASSERT(exp, vp, msg) do { \ if (__predict_false(!(exp))) { \ vn_printf(vp, "VNASSERT failed: %s not true at %s:%d (%s)\n",\ #exp, __FILE__, __LINE__, __func__); \ kassert_panic msg; \ } \ } while (0) #define VNPASS(exp, vp) do { \ const char *_exp = #exp; \ VNASSERT(exp, vp, ("condition %s not met at %s:%d (%s)", \ _exp, __FILE__, __LINE__, __func__)); \ } while (0) +#define __assert_unreachable() \ + panic("executing segment marked as unreachable at %s:%d (%s)\n", \ + __FILE__, __LINE__, __func__) #else #define KASSERT(exp,msg) do { \ } while (0) #define VNASSERT(exp, vp, msg) do { \ } while (0) #define VNPASS(exp, vp) do { \ } while (0) +#define __assert_unreachable() __unreachable() #endif #ifndef CTASSERT /* Allow lint to override */ #define CTASSERT(x) _Static_assert(x, "compile-time assertion failed") #endif #if defined(_KERNEL) #include /* MAXCPU */ #include /* curthread */ #include #endif /* * Assert that a pointer can be loaded from memory atomically. * * This assertion enforces stronger alignment than necessary. For example, * on some architectures, atomicity for unaligned loads will depend on * whether or not the load spans multiple cache lines. */ #define ASSERT_ATOMIC_LOAD_PTR(var, msg) \ KASSERT(sizeof(var) == sizeof(void *) && \ ((uintptr_t)&(var) & (sizeof(void *) - 1)) == 0, msg) /* * Assert that a thread is in critical(9) section. */ #define CRITICAL_ASSERT(td) \ KASSERT((td)->td_critnest >= 1, ("Not in critical section")); /* * If we have already panic'd and this is the thread that called * panic(), then don't block on any mutexes but silently succeed. * Otherwise, the kernel will deadlock since the scheduler isn't * going to run the thread that holds any lock we need. */ #define SCHEDULER_STOPPED_TD(td) ({ \ MPASS((td) == curthread); \ __predict_false((td)->td_stopsched); \ }) #define SCHEDULER_STOPPED() SCHEDULER_STOPPED_TD(curthread) /* * Align variables. */ #define __read_mostly __section(".data.read_mostly") #define __read_frequently __section(".data.read_frequently") #define __exclusive_cache_line __aligned(CACHE_LINE_SIZE) \ __section(".data.exclusive_cache_line") /* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used * in two files. * XXX most of these variables should be const. */ extern int osreldate; extern bool dynamic_kenv; extern struct mtx kenv_lock; extern char *kern_envp; extern char *md_envp; extern char static_env[]; extern char static_hints[]; /* by config for now */ extern char **kenvp; extern const void *zero_region; /* address space maps to a zeroed page */ extern int unmapped_buf_allowed; #ifdef __LP64__ #define IOSIZE_MAX iosize_max() #define DEVFS_IOSIZE_MAX devfs_iosize_max() #else #define IOSIZE_MAX SSIZE_MAX #define DEVFS_IOSIZE_MAX SSIZE_MAX #endif /* * General function declarations. */ struct inpcb; struct lock_object; struct malloc_type; struct mtx; struct proc; struct socket; struct thread; struct tty; struct ucred; struct uio; struct _jmp_buf; struct trapframe; struct eventtimer; int setjmp(struct _jmp_buf *) __returns_twice; void longjmp(struct _jmp_buf *, int) __dead2; int dumpstatus(vm_offset_t addr, off_t count); int nullop(void); int eopnotsupp(void); int ureadc(int, struct uio *); void hashdestroy(void *, struct malloc_type *, u_long); void *hashinit(int count, struct malloc_type *type, u_long *hashmask); void *hashinit_flags(int count, struct malloc_type *type, u_long *hashmask, int flags); #define HASH_NOWAIT 0x00000001 #define HASH_WAITOK 0x00000002 void *phashinit(int count, struct malloc_type *type, u_long *nentries); void *phashinit_flags(int count, struct malloc_type *type, u_long *nentries, int flags); void g_waitidle(void); void cpu_boot(int); void cpu_flush_dcache(void *, size_t); void cpu_rootconf(void); void critical_enter_KBI(void); void critical_exit_KBI(void); void critical_exit_preempt(void); void init_param1(void); void init_param2(long physpages); void init_static_kenv(char *, size_t); void tablefull(const char *); /* * Allocate per-thread "current" state in the linuxkpi */ extern int (*lkpi_alloc_current)(struct thread *, int); int linux_alloc_current_noop(struct thread *, int); #if defined(KLD_MODULE) || defined(KTR_CRITICAL) || !defined(_KERNEL) || defined(GENOFFSET) #define critical_enter() critical_enter_KBI() #define critical_exit() critical_exit_KBI() #else static __inline void critical_enter(void) { struct thread_lite *td; td = (struct thread_lite *)curthread; td->td_critnest++; __compiler_membar(); } static __inline void critical_exit(void) { struct thread_lite *td; td = (struct thread_lite *)curthread; KASSERT(td->td_critnest != 0, ("critical_exit: td_critnest == 0")); __compiler_membar(); td->td_critnest--; __compiler_membar(); if (__predict_false(td->td_owepreempt)) critical_exit_preempt(); } #endif #ifdef EARLY_PRINTF typedef void early_putc_t(int ch); extern early_putc_t *early_putc; #endif int kvprintf(char const *, void (*)(int, void*), void *, int, __va_list) __printflike(1, 0); void log(int, const char *, ...) __printflike(2, 3); void log_console(struct uio *); void vlog(int, const char *, __va_list) __printflike(2, 0); int asprintf(char **ret, struct malloc_type *mtp, const char *format, ...) __printflike(3, 4); int printf(const char *, ...) __printflike(1, 2); int snprintf(char *, size_t, const char *, ...) __printflike(3, 4); int sprintf(char *buf, const char *, ...) __printflike(2, 3); int uprintf(const char *, ...) __printflike(1, 2); int vprintf(const char *, __va_list) __printflike(1, 0); int vasprintf(char **ret, struct malloc_type *mtp, const char *format, __va_list ap) __printflike(3, 0); int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); int sscanf(const char *, char const * _Nonnull, ...) __scanflike(2, 3); int vsscanf(const char * _Nonnull, char const * _Nonnull, __va_list) __scanflike(2, 0); long strtol(const char *, char **, int); u_long strtoul(const char *, char **, int); quad_t strtoq(const char *, char **, int); u_quad_t strtouq(const char *, char **, int); void tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4); void vtprintf(struct proc *, int, const char *, __va_list) __printflike(3, 0); void hexdump(const void *ptr, int length, const char *hdr, int flags); #define HD_COLUMN_MASK 0xff #define HD_DELIM_MASK 0xff00 #define HD_OMIT_COUNT (1 << 16) #define HD_OMIT_HEX (1 << 17) #define HD_OMIT_CHARS (1 << 18) #define ovbcopy(f, t, l) bcopy((f), (t), (l)) void bcopy(const void * _Nonnull from, void * _Nonnull to, size_t len); void bzero(void * _Nonnull buf, size_t len); void explicit_bzero(void * _Nonnull, size_t); int bcmp(const void *b1, const void *b2, size_t len); void *memset(void * _Nonnull buf, int c, size_t len); void *memcpy(void * _Nonnull to, const void * _Nonnull from, size_t len); void *memmove(void * _Nonnull dest, const void * _Nonnull src, size_t n); int memcmp(const void *b1, const void *b2, size_t len); #ifdef KCSAN void *kcsan_memset(void *, int, size_t); void *kcsan_memcpy(void *, const void *, size_t); void *kcsan_memmove(void *, const void *, size_t); int kcsan_memcmp(const void *, const void *, size_t); #define bcopy(from, to, len) kcsan_memmove((to), (from), (len)) #define bzero(buf, len) kcsan_memset((buf), 0, (len)) #define bcmp(b1, b2, len) kcsan_memcmp((b1), (b2), (len)) #define memset(buf, c, len) kcsan_memset((buf), (c), (len)) #define memcpy(to, from, len) kcsan_memcpy((to), (from), (len)) #define memmove(dest, src, n) kcsan_memmove((dest), (src), (n)) #define memcmp(b1, b2, len) kcsan_memcmp((b1), (b2), (len)) #else #define bcopy(from, to, len) __builtin_memmove((to), (from), (len)) #define bzero(buf, len) __builtin_memset((buf), 0, (len)) #define bcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) #define memset(buf, c, len) __builtin_memset((buf), (c), (len)) #define memcpy(to, from, len) __builtin_memcpy((to), (from), (len)) #define memmove(dest, src, n) __builtin_memmove((dest), (src), (n)) #define memcmp(b1, b2, len) __builtin_memcmp((b1), (b2), (len)) #endif void *memset_early(void * _Nonnull buf, int c, size_t len); #define bzero_early(buf, len) memset_early((buf), 0, (len)) void *memcpy_early(void * _Nonnull to, const void * _Nonnull from, size_t len); void *memmove_early(void * _Nonnull dest, const void * _Nonnull src, size_t n); #define bcopy_early(from, to, len) memmove_early((to), (from), (len)) int copystr(const void * _Nonnull __restrict kfaddr, void * _Nonnull __restrict kdaddr, size_t len, size_t * __restrict lencopied); int copyinstr(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len, size_t * __restrict lencopied); int copyin(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len); int copyin_nofault(const void * __restrict udaddr, void * _Nonnull __restrict kaddr, size_t len); int copyout(const void * _Nonnull __restrict kaddr, void * __restrict udaddr, size_t len); int copyout_nofault(const void * _Nonnull __restrict kaddr, void * __restrict udaddr, size_t len); #ifdef KCSAN int kcsan_copystr(const void *, void *, size_t, size_t *); int kcsan_copyin(const void *, void *, size_t); int kcsan_copyinstr(const void *, void *, size_t, size_t *); int kcsan_copyout(const void *, void *, size_t); #define copystr(kf, k, l, lc) kcsan_copystr((kf), (k), (l), (lc)) #define copyin(u, k, l) kcsan_copyin((u), (k), (l)) #define copyinstr(u, k, l, lc) kcsan_copyinstr((u), (k), (l), (lc)) #define copyout(k, u, l) kcsan_copyout((k), (u), (l)) #endif int fubyte(volatile const void *base); long fuword(volatile const void *base); int fuword16(volatile const void *base); int32_t fuword32(volatile const void *base); int64_t fuword64(volatile const void *base); int fueword(volatile const void *base, long *val); int fueword32(volatile const void *base, int32_t *val); int fueword64(volatile const void *base, int64_t *val); int subyte(volatile void *base, int byte); int suword(volatile void *base, long word); int suword16(volatile void *base, int word); int suword32(volatile void *base, int32_t word); int suword64(volatile void *base, int64_t word); uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval); u_long casuword(volatile u_long *p, u_long oldval, u_long newval); int casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp, uint32_t newval); int casueword(volatile u_long *p, u_long oldval, u_long *oldvalp, u_long newval); void realitexpire(void *); int sysbeep(int hertz, int period); void hardclock(int cnt, int usermode); void hardclock_sync(int cpu); void softclock(void *); void statclock(int cnt, int usermode); void profclock(int cnt, int usermode, uintfptr_t pc); int hardclockintr(void); void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); void suspendclock(void); void resumeclock(void); sbintime_t cpu_idleclock(void); void cpu_activeclock(void); void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt); void cpu_et_frequency(struct eventtimer *et, uint64_t newfreq); extern int cpu_disable_c2_sleep; extern int cpu_disable_c3_sleep; char *kern_getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data); int getenv_uint(const char *name, unsigned int *data); int getenv_long(const char *name, long *data); int getenv_ulong(const char *name, unsigned long *data); int getenv_string(const char *name, char *data, int size); int getenv_int64(const char *name, int64_t *data); int getenv_uint64(const char *name, uint64_t *data); int getenv_quad(const char *name, quad_t *data); int kern_setenv(const char *name, const char *value); int kern_unsetenv(const char *name); int testenv(const char *name); int getenv_array(const char *name, void *data, int size, int *psize, int type_size, bool allow_signed); #define GETENV_UNSIGNED false /* negative numbers not allowed */ #define GETENV_SIGNED true /* negative numbers allowed */ typedef uint64_t (cpu_tick_f)(void); void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var); extern cpu_tick_f *cpu_ticks; uint64_t cpu_tickrate(void); uint64_t cputick2usec(uint64_t tick); #ifdef APM_FIXUP_CALLTODO struct timeval; void adjust_timeout_calltodo(struct timeval *time_change); #endif /* APM_FIXUP_CALLTODO */ #include /* Initialize the world */ void consinit(void); void cpu_initclocks(void); void cpu_initclocks_bsp(void); void cpu_initclocks_ap(void); void usrinfoinit(void); /* Finalize the world */ void kern_reboot(int) __dead2; void shutdown_nice(int); /* Stubs for obsolete functions that used to be for interrupt management */ static __inline intrmask_t splbio(void) { return 0; } static __inline intrmask_t splcam(void) { return 0; } static __inline intrmask_t splclock(void) { return 0; } static __inline intrmask_t splhigh(void) { return 0; } static __inline intrmask_t splimp(void) { return 0; } static __inline intrmask_t splnet(void) { return 0; } static __inline intrmask_t spltty(void) { return 0; } static __inline void splx(intrmask_t ipl __unused) { return; } /* * Common `proc' functions are declared here so that proc.h can be included * less often. */ int _sleep(const void * _Nonnull chan, struct lock_object *lock, int pri, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), \ tick_sbt * (timo), 0, C_HARDCLOCK) #define msleep_sbt(chan, mtx, pri, wmesg, bt, pr, flags) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (bt), (pr), \ (flags)) int msleep_spin_sbt(const void * _Nonnull chan, struct mtx *mtx, const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define msleep_spin(chan, mtx, wmesg, timo) \ msleep_spin_sbt((chan), (mtx), (wmesg), tick_sbt * (timo), \ 0, C_HARDCLOCK) int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags); #define pause(wmesg, timo) \ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK) #define pause_sig(wmesg, timo) \ pause_sbt((wmesg), tick_sbt * (timo), 0, C_HARDCLOCK | C_CATCH) #define tsleep(chan, pri, wmesg, timo) \ _sleep((chan), NULL, (pri), (wmesg), tick_sbt * (timo), \ 0, C_HARDCLOCK) #define tsleep_sbt(chan, pri, wmesg, bt, pr, flags) \ _sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags)) void wakeup(const void *chan); void wakeup_one(const void *chan); void wakeup_any(const void *chan); /* * Common `struct cdev *' stuff are declared here to avoid #include poisoning */ struct cdev; dev_t dev2udev(struct cdev *x); const char *devtoname(struct cdev *cdev); #ifdef __LP64__ size_t devfs_iosize_max(void); size_t iosize_max(void); #endif int poll_no_poll(int events); /* XXX: Should be void nanodelay(u_int nsec); */ void DELAY(int usec); /* Root mount holdback API */ struct root_hold_token { int flags; const char *who; TAILQ_ENTRY(root_hold_token) list; }; struct root_hold_token *root_mount_hold(const char *identifier); void root_mount_hold_token(const char *identifier, struct root_hold_token *h); void root_mount_rel(struct root_hold_token *h); int root_mounted(void); /* * Unit number allocation API. (kern/subr_unit.c) */ struct unrhdr; struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); void init_unrhdr(struct unrhdr *uh, int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clear_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh); int alloc_unr(struct unrhdr *uh); int alloc_unr_specific(struct unrhdr *uh, u_int item); int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); #ifndef __LP64__ #define UNR64_LOCKED #endif struct unrhdr64 { uint64_t counter; }; static __inline void new_unrhdr64(struct unrhdr64 *unr64, uint64_t low) { unr64->counter = low; } #ifdef UNR64_LOCKED uint64_t alloc_unr64(struct unrhdr64 *); #else static __inline uint64_t alloc_unr64(struct unrhdr64 *unr64) { return (atomic_fetchadd_64(&unr64->counter, 1)); } #endif void intr_prof_stack_use(struct thread *td, struct trapframe *frame); void counted_warning(unsigned *counter, const char *msg); /* * APIs to manage deprecation and obsolescence. */ struct device; void _gone_in(int major, const char *msg); void _gone_in_dev(struct device *dev, int major, const char *msg); #ifdef NO_OBSOLETE_CODE #define __gone_ok(m, msg) \ _Static_assert(m < P_OSREL_MAJOR(__FreeBSD_version)), \ "Obsolete code: " msg); #else #define __gone_ok(m, msg) #endif #define gone_in(major, msg) __gone_ok(major, msg) _gone_in(major, msg) #define gone_in_dev(dev, major, msg) __gone_ok(major, msg) _gone_in_dev(dev, major, msg) __NULLABILITY_PRAGMA_POP #endif /* !_SYS_SYSTM_H_ */ diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c index ce9edad13c1e..31666f0053c3 100644 --- a/sys/vm/vm_radix.c +++ b/sys/vm/vm_radix.c @@ -1,911 +1,911 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2013 EMC Corp. * Copyright (c) 2011 Jeffrey Roberson * Copyright (c) 2008 Mayur Shardul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ /* * Path-compressed radix trie implementation. * The following code is not generalized into a general purpose library * because there are way too many parameters embedded that should really * be decided by the library consumers. At the same time, consumers * of this code must achieve highest possible performance. * * The implementation takes into account the following rationale: * - Size of the nodes should be as small as possible but still big enough * to avoid a large maximum depth for the trie. This is a balance * between the necessity to not wire too much physical memory for the nodes * and the necessity to avoid too much cache pollution during the trie * operations. * - There is not a huge bias toward the number of lookup operations over * the number of insert and remove operations. This basically implies * that optimizations supposedly helping one operation but hurting the * other might be carefully evaluated. * - On average not many nodes are expected to be fully populated, hence * level compression may just complicate things. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif /* * These widths should allow the pointers to a node's children to fit within * a single cache line. The extra levels from a narrow width should not be * a problem thanks to path compression. */ #ifdef __LP64__ #define VM_RADIX_WIDTH 4 #else #define VM_RADIX_WIDTH 3 #endif #define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) #define VM_RADIX_MASK (VM_RADIX_COUNT - 1) #define VM_RADIX_LIMIT \ (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1) /* Flag bits stored in node pointers. */ #define VM_RADIX_ISLEAF 0x1 #define VM_RADIX_FLAGS 0x1 #define VM_RADIX_PAD VM_RADIX_FLAGS /* Returns one unit associated with specified level. */ #define VM_RADIX_UNITLEVEL(lev) \ ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) enum vm_radix_access { SMR, LOCKED, UNSERIALIZED }; struct vm_radix_node; typedef SMR_POINTER(struct vm_radix_node *) smrnode_t; struct vm_radix_node { vm_pindex_t rn_owner; /* Owner of record. */ uint16_t rn_count; /* Valid children. */ uint8_t rn_clev; /* Current level. */ int8_t rn_last; /* zero last ptr. */ smrnode_t rn_child[VM_RADIX_COUNT]; /* Child nodes. */ }; static uma_zone_t vm_radix_node_zone; static smr_t vm_radix_smr; static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, enum vm_radix_access access); /* * Allocate a radix node. */ static struct vm_radix_node * vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) { struct vm_radix_node *rnode; rnode = uma_zalloc_smr(vm_radix_node_zone, M_NOWAIT); if (rnode == NULL) return (NULL); /* * We want to clear the last child pointer after the final section * has exited so lookup can not return false negatives. It is done * here because it will be cache-cold in the dtor callback. */ if (rnode->rn_last != 0) { vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1], NULL, UNSERIALIZED); rnode->rn_last = 0; } rnode->rn_owner = owner; rnode->rn_count = count; rnode->rn_clev = clevel; return (rnode); } /* * Free radix node. */ static __inline void vm_radix_node_put(struct vm_radix_node *rnode, int8_t last) { #ifdef INVARIANTS int slot; KASSERT(rnode->rn_count == 0, ("vm_radix_node_put: rnode %p has %d children", rnode, rnode->rn_count)); for (slot = 0; slot < VM_RADIX_COUNT; slot++) { if (slot == last) continue; KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) == NULL, ("vm_radix_node_put: rnode %p has a child", rnode)); } #endif /* Off by one so a freshly zero'd node is not assigned to. */ rnode->rn_last = last + 1; uma_zfree_smr(vm_radix_node_zone, rnode); } /* * Return the position in the array for a given level. */ static __inline int vm_radix_slot(vm_pindex_t index, uint16_t level) { return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); } /* Trims the key after the specified level. */ static __inline vm_pindex_t vm_radix_trimkey(vm_pindex_t index, uint16_t level) { vm_pindex_t ret; ret = index; if (level > 0) { ret >>= level * VM_RADIX_WIDTH; ret <<= level * VM_RADIX_WIDTH; } return (ret); } /* * Fetch a node pointer from a slot in another node. */ static __inline struct vm_radix_node * vm_radix_node_load(smrnode_t *p, enum vm_radix_access access) { switch (access) { case UNSERIALIZED: return (smr_unserialized_load(p, true)); case LOCKED: return (smr_serialized_load(p, true)); case SMR: return (smr_entered_load(p, vm_radix_smr)); } - __unreachable(); + __assert_unreachable(); } static __inline void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v, enum vm_radix_access access) { switch (access) { case UNSERIALIZED: smr_unserialized_store(p, v, true); break; case LOCKED: smr_serialized_store(p, v, true); break; case SMR: panic("vm_radix_node_store: Not supported in smr section."); } } /* * Get the root node for a radix tree. */ static __inline struct vm_radix_node * vm_radix_root_load(struct vm_radix *rtree, enum vm_radix_access access) { return (vm_radix_node_load((smrnode_t *)&rtree->rt_root, access)); } /* * Set the root node for a radix tree. */ static __inline void vm_radix_root_store(struct vm_radix *rtree, struct vm_radix_node *rnode, enum vm_radix_access access) { vm_radix_node_store((smrnode_t *)&rtree->rt_root, rnode, access); } /* * Returns TRUE if the specified radix node is a leaf and FALSE otherwise. */ static __inline boolean_t vm_radix_isleaf(struct vm_radix_node *rnode) { return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0); } /* * Returns the associated page extracted from rnode. */ static __inline vm_page_t vm_radix_topage(struct vm_radix_node *rnode) { return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS)); } /* * Adds the page as a child of the provided node. */ static __inline void vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, vm_page_t page, enum vm_radix_access access) { int slot; slot = vm_radix_slot(index, clev); vm_radix_node_store(&rnode->rn_child[slot], (struct vm_radix_node *)((uintptr_t)page | VM_RADIX_ISLEAF), access); } /* * Returns the slot where two keys differ. * It cannot accept 2 equal keys. */ static __inline uint16_t vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) { uint16_t clev; KASSERT(index1 != index2, ("%s: passing the same key value %jx", __func__, (uintmax_t)index1)); index1 ^= index2; for (clev = VM_RADIX_LIMIT;; clev--) if (vm_radix_slot(index1, clev) != 0) return (clev); } /* * Returns TRUE if it can be determined that key does not belong to the * specified rnode. Otherwise, returns FALSE. */ static __inline boolean_t vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) { if (rnode->rn_clev < VM_RADIX_LIMIT) { idx = vm_radix_trimkey(idx, rnode->rn_clev + 1); return (idx != rnode->rn_owner); } return (FALSE); } /* * Internal helper for vm_radix_reclaim_allnodes(). * This function is recursive. */ static void vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) { struct vm_radix_node *child; int slot; KASSERT(rnode->rn_count <= VM_RADIX_COUNT, ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode)); for (slot = 0; rnode->rn_count != 0; slot++) { child = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED); if (child == NULL) continue; if (!vm_radix_isleaf(child)) vm_radix_reclaim_allnodes_int(child); vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED); rnode->rn_count--; } vm_radix_node_put(rnode, -1); } #ifndef UMA_MD_SMALL_ALLOC void vm_radix_reserve_kva(void); /* * Reserve the KVA necessary to satisfy the node allocation. * This is mandatory in architectures not supporting direct * mapping as they will need otherwise to carve into the kernel maps for * every node allocation, resulting into deadlocks for consumers already * working with kernel maps. */ void vm_radix_reserve_kva(void) { /* * Calculate the number of reserved nodes, discounting the pages that * are needed to store them. */ if (!uma_zone_reserve_kva(vm_radix_node_zone, ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + sizeof(struct vm_radix_node)))) panic("%s: unable to reserve KVA", __func__); } #endif /* * Initialize the UMA slab zone. */ void vm_radix_zinit(void) { vm_radix_node_zone = uma_zcreate("RADIX NODE", sizeof(struct vm_radix_node), NULL, NULL, NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_SMR | UMA_ZONE_ZINIT); vm_radix_smr = uma_zone_get_smr(vm_radix_node_zone); } /* * Inserts the key-value pair into the trie. * Panics if the key already exists. */ int vm_radix_insert(struct vm_radix *rtree, vm_page_t page) { vm_pindex_t index, newind; struct vm_radix_node *rnode, *tmp; smrnode_t *parentp; vm_page_t m; int slot; uint16_t clev; index = page->pindex; /* * The owner of record for root is not really important because it * will never be used. */ rnode = vm_radix_root_load(rtree, LOCKED); if (rnode == NULL) { rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; return (0); } parentp = (smrnode_t *)&rtree->rt_root; for (;;) { if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex == index) panic("%s: key %jx is already present", __func__, (uintmax_t)index); clev = vm_radix_keydiff(m->pindex, index); tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); /* These writes are not yet visible due to ordering. */ vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED); vm_radix_addpage(tmp, m->pindex, clev, m, UNSERIALIZED); /* Synchronize to make leaf visible. */ vm_radix_node_store(parentp, tmp, LOCKED); return (0); } else if (vm_radix_keybarr(rnode, index)) break; slot = vm_radix_slot(index, rnode->rn_clev); parentp = &rnode->rn_child[slot]; tmp = vm_radix_node_load(parentp, LOCKED); if (tmp == NULL) { rnode->rn_count++; vm_radix_addpage(rnode, index, rnode->rn_clev, page, LOCKED); return (0); } rnode = tmp; } /* * A new node is needed because the right insertion level is reached. * Setup the new intermediate node and add the 2 children: the * new object and the older edge. */ newind = rnode->rn_owner; clev = vm_radix_keydiff(newind, index); tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); if (tmp == NULL) return (ENOMEM); slot = vm_radix_slot(newind, clev); /* These writes are not yet visible due to ordering. */ vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED); vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED); /* Serializing write to make the above visible. */ vm_radix_node_store(parentp, tmp, LOCKED); return (0); } /* * Returns TRUE if the specified radix tree contains a single leaf and FALSE * otherwise. */ boolean_t vm_radix_is_singleton(struct vm_radix *rtree) { struct vm_radix_node *rnode; rnode = vm_radix_root_load(rtree, LOCKED); if (rnode == NULL) return (FALSE); return (vm_radix_isleaf(rnode)); } /* * Returns the value stored at the index. If the index is not present, * NULL is returned. */ static __always_inline vm_page_t _vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index, enum vm_radix_access access) { struct vm_radix_node *rnode; vm_page_t m; int slot; rnode = vm_radix_root_load(rtree, access); while (rnode != NULL) { if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex == index) return (m); break; } if (vm_radix_keybarr(rnode, index)) break; slot = vm_radix_slot(index, rnode->rn_clev); rnode = vm_radix_node_load(&rnode->rn_child[slot], access); } return (NULL); } /* * Returns the value stored at the index assuming there is an external lock. * * If the index is not present, NULL is returned. */ vm_page_t vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) { return _vm_radix_lookup(rtree, index, LOCKED); } /* * Returns the value stored at the index without requiring an external lock. * * If the index is not present, NULL is returned. */ vm_page_t vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) { vm_page_t m; smr_enter(vm_radix_smr); m = _vm_radix_lookup(rtree, index, SMR); smr_exit(vm_radix_smr); return (m); } /* * Look up the nearest entry at a position greater than or equal to index. */ vm_page_t vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *stack[VM_RADIX_LIMIT]; vm_pindex_t inc; vm_page_t m; struct vm_radix_node *child, *rnode; #ifdef INVARIANTS int loops = 0; #endif int slot, tos; rnode = vm_radix_root_load(rtree, LOCKED); if (rnode == NULL) return (NULL); else if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex >= index) return (m); else return (NULL); } tos = 0; for (;;) { /* * If the keys differ before the current bisection node, * then the search key might rollback to the earliest * available bisection node or to the smallest key * in the current node (if the owner is greater than the * search key). */ if (vm_radix_keybarr(rnode, index)) { if (index > rnode->rn_owner) { ascend: KASSERT(++loops < 1000, ("vm_radix_lookup_ge: too many loops")); /* * Pop nodes from the stack until either the * stack is empty or a node that could have a * matching descendant is found. */ do { if (tos == 0) return (NULL); rnode = stack[--tos]; } while (vm_radix_slot(index, rnode->rn_clev) == (VM_RADIX_COUNT - 1)); /* * The following computation cannot overflow * because index's slot at the current level * is less than VM_RADIX_COUNT - 1. */ index = vm_radix_trimkey(index, rnode->rn_clev); index += VM_RADIX_UNITLEVEL(rnode->rn_clev); } else index = rnode->rn_owner; KASSERT(!vm_radix_keybarr(rnode, index), ("vm_radix_lookup_ge: keybarr failed")); } slot = vm_radix_slot(index, rnode->rn_clev); child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex >= index) return (m); } else if (child != NULL) goto descend; /* * Look for an available edge or page within the current * bisection node. */ if (slot < (VM_RADIX_COUNT - 1)) { inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); index = vm_radix_trimkey(index, rnode->rn_clev); do { index += inc; slot++; child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex >= index) return (m); } else if (child != NULL) goto descend; } while (slot < (VM_RADIX_COUNT - 1)); } KASSERT(child == NULL || vm_radix_isleaf(child), ("vm_radix_lookup_ge: child is radix node")); /* * If a page or edge greater than the search slot is not found * in the current node, ascend to the next higher-level node. */ goto ascend; descend: KASSERT(rnode->rn_clev > 0, ("vm_radix_lookup_ge: pushing leaf's parent")); KASSERT(tos < VM_RADIX_LIMIT, ("vm_radix_lookup_ge: stack overflow")); stack[tos++] = rnode; rnode = child; } } /* * Look up the nearest entry at a position less than or equal to index. */ vm_page_t vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *stack[VM_RADIX_LIMIT]; vm_pindex_t inc; vm_page_t m; struct vm_radix_node *child, *rnode; #ifdef INVARIANTS int loops = 0; #endif int slot, tos; rnode = vm_radix_root_load(rtree, LOCKED); if (rnode == NULL) return (NULL); else if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex <= index) return (m); else return (NULL); } tos = 0; for (;;) { /* * If the keys differ before the current bisection node, * then the search key might rollback to the earliest * available bisection node or to the largest key * in the current node (if the owner is smaller than the * search key). */ if (vm_radix_keybarr(rnode, index)) { if (index > rnode->rn_owner) { index = rnode->rn_owner + VM_RADIX_COUNT * VM_RADIX_UNITLEVEL(rnode->rn_clev); } else { ascend: KASSERT(++loops < 1000, ("vm_radix_lookup_le: too many loops")); /* * Pop nodes from the stack until either the * stack is empty or a node that could have a * matching descendant is found. */ do { if (tos == 0) return (NULL); rnode = stack[--tos]; } while (vm_radix_slot(index, rnode->rn_clev) == 0); /* * The following computation cannot overflow * because index's slot at the current level * is greater than 0. */ index = vm_radix_trimkey(index, rnode->rn_clev); } index--; KASSERT(!vm_radix_keybarr(rnode, index), ("vm_radix_lookup_le: keybarr failed")); } slot = vm_radix_slot(index, rnode->rn_clev); child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex <= index) return (m); } else if (child != NULL) goto descend; /* * Look for an available edge or page within the current * bisection node. */ if (slot > 0) { inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); index |= inc - 1; do { index -= inc; slot--; child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); if (vm_radix_isleaf(child)) { m = vm_radix_topage(child); if (m->pindex <= index) return (m); } else if (child != NULL) goto descend; } while (slot > 0); } KASSERT(child == NULL || vm_radix_isleaf(child), ("vm_radix_lookup_le: child is radix node")); /* * If a page or edge smaller than the search slot is not found * in the current node, ascend to the next higher-level node. */ goto ascend; descend: KASSERT(rnode->rn_clev > 0, ("vm_radix_lookup_le: pushing leaf's parent")); KASSERT(tos < VM_RADIX_LIMIT, ("vm_radix_lookup_le: stack overflow")); stack[tos++] = rnode; rnode = child; } } /* * Remove the specified index from the trie, and return the value stored at * that index. If the index is not present, return NULL. */ vm_page_t vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) { struct vm_radix_node *rnode, *parent, *tmp; vm_page_t m; int i, slot; rnode = vm_radix_root_load(rtree, LOCKED); if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex != index) return (NULL); vm_radix_root_store(rtree, NULL, LOCKED); return (m); } parent = NULL; for (;;) { if (rnode == NULL) return (NULL); slot = vm_radix_slot(index, rnode->rn_clev); tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); if (vm_radix_isleaf(tmp)) { m = vm_radix_topage(tmp); if (m->pindex != index) return (NULL); vm_radix_node_store(&rnode->rn_child[slot], NULL, LOCKED); rnode->rn_count--; if (rnode->rn_count > 1) return (m); for (i = 0; i < VM_RADIX_COUNT; i++) if (vm_radix_node_load(&rnode->rn_child[i], LOCKED) != NULL) break; KASSERT(i != VM_RADIX_COUNT, ("%s: invalid node configuration", __func__)); tmp = vm_radix_node_load(&rnode->rn_child[i], LOCKED); if (parent == NULL) vm_radix_root_store(rtree, tmp, LOCKED); else { slot = vm_radix_slot(index, parent->rn_clev); KASSERT(vm_radix_node_load( &parent->rn_child[slot], LOCKED) == rnode, ("%s: invalid child value", __func__)); vm_radix_node_store(&parent->rn_child[slot], tmp, LOCKED); } /* * The child is still valid and we can not zero the * pointer until all smr references are gone. */ rnode->rn_count--; vm_radix_node_put(rnode, i); return (m); } parent = rnode; rnode = tmp; } } /* * Remove and free all the nodes from the radix tree. * This function is recursive but there is a tight control on it as the * maximum depth of the tree is fixed. */ void vm_radix_reclaim_allnodes(struct vm_radix *rtree) { struct vm_radix_node *root; root = vm_radix_root_load(rtree, LOCKED); if (root == NULL) return; vm_radix_root_store(rtree, NULL, UNSERIALIZED); if (!vm_radix_isleaf(root)) vm_radix_reclaim_allnodes_int(root); } /* * Replace an existing page in the trie with another one. * Panics if there is not an old page in the trie at the new page's index. */ vm_page_t vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) { struct vm_radix_node *rnode, *tmp; vm_page_t m; vm_pindex_t index; int slot; index = newpage->pindex; rnode = vm_radix_root_load(rtree, LOCKED); if (rnode == NULL) panic("%s: replacing page on an empty trie", __func__); if (vm_radix_isleaf(rnode)) { m = vm_radix_topage(rnode); if (m->pindex != index) panic("%s: original replacing root key not found", __func__); rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF; return (m); } for (;;) { slot = vm_radix_slot(index, rnode->rn_clev); tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED); if (vm_radix_isleaf(tmp)) { m = vm_radix_topage(tmp); if (m->pindex == index) { vm_radix_node_store(&rnode->rn_child[slot], (struct vm_radix_node *)((uintptr_t)newpage | VM_RADIX_ISLEAF), LOCKED); return (m); } else break; } else if (tmp == NULL || vm_radix_keybarr(tmp, index)) break; rnode = tmp; } panic("%s: original replacing page not found", __func__); } void vm_radix_wait(void) { uma_zwait(vm_radix_node_zone); } #ifdef DDB /* * Show details about the given radix node. */ DB_SHOW_COMMAND(radixnode, db_show_radixnode) { struct vm_radix_node *rnode, *tmp; int i; if (!have_addr) return; rnode = (struct vm_radix_node *)addr; db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, rnode->rn_clev); for (i = 0; i < VM_RADIX_COUNT; i++) { tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED); if (tmp != NULL) db_printf("slot: %d, val: %p, page: %p, clev: %d\n", i, (void *)tmp, vm_radix_isleaf(tmp) ? vm_radix_topage(tmp) : NULL, rnode->rn_clev); } } #endif /* DDB */