Page MenuHomeFreeBSD

D54882.id177065.diff
No OneTemporary

D54882.id177065.diff

diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -254,6 +254,7 @@
${_imcsmb.4} \
inet.4 \
inet6.4 \
+ intel_thermal.4 \
intpm.4 \
intro.4 \
${_io.4} \
diff --git a/share/man/man4/intel_thermal.4 b/share/man/man4/intel_thermal.4
new file mode 100644
--- /dev/null
+++ b/share/man/man4/intel_thermal.4
@@ -0,0 +1,97 @@
+.\"
+.\" Copyright (c) 2026 Abdelkader Boudih <freebsd@seuros.com>
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.Dd May 3, 2026
+.Dt INTEL_THERMAL 4
+.Os
+.Sh NAME
+.Nm intel_thermal
+.Nd Intel Processor Thermal Device driver
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following lines in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device intel_thermal"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+intel_thermal_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver exposes Running Average Power Limit (RAPL) power limit and
+Thermal Design Power (TDP) information for Intel processors via
+.Xr sysctl 8 .
+.Pp
+The Processor Thermal device (B0D4F0) provides a PCI BAR0 that maps
+directly into the Memory Controller Hub Base Address Register (MCHBAR)
+address space.
+RAPL registers are accessed at fixed offsets within that window,
+mirroring the layout of the corresponding Model Specific Registers (MSRs).
+.Pp
+The power unit is read from the
+.Dv RAPL_UNIT
+register at attach time.
+If the register returns an invalid value, the driver falls back to
+.Dv MSR_RAPL_POWER_UNIT
+(MSR 0x606).
+The Skylake default of 1/8 W steps is used if both reads fail.
+.Pp
+If firmware has locked the power limit registers, the driver reports
+this at attach time.
+All power values are read-only; this driver does not modify any limits.
+.Sh HARDWARE
+The
+.Nm
+driver supports the following devices:
+.Pp
+.Bl -bullet -compact
+.It
+Intel Skylake Processor Thermal Device (PCI ID 0x8086:0x1903)
+.El
+.Sh SYSCTLS
+The following
+.Xr sysctl 8
+variables are available:
+.Bl -tag -width indent
+.It Va dev.intel_thermal.%d.pl1
+Package power limit 1 (sustained), in milliwatts.
+This is the long-term average power limit enforced by the hardware.
+.It Va dev.intel_thermal.%d.pl2
+Package power limit 2 (short-term), in milliwatts.
+This is the burst power limit allowed for short durations.
+.It Va dev.intel_thermal.%d.tdp
+Thermal Design Power, in milliwatts.
+This is the maximum sustained power the cooling solution must dissipate.
+.It Va dev.intel_thermal.%d.locked
+Set to 1 if firmware has locked the power limit registers against
+software modification.
+.El
+.Sh SEE ALSO
+.Xr pchtherm 4 ,
+.Xr pci 4 ,
+.Xr sysctl 8
+.Pp
+Intel 64 and IA-32 Architectures Software Developer's Manual,
+Volume 4: Model-Specific Registers, MSR 0x610
+.Pq Dv MSR_PKG_POWER_LIMIT
+and MSR 0x614
+.Pq Dv MSR_PKG_POWER_INFO .
+.Sh HISTORY
+The
+.Nm
+driver first appeared in
+.Fx 16.0 .
+.Sh AUTHORS
+.An Abdelkader Boudih Aq Mt freebsd@seuros.com
+.Sh CAVEATS
+Only the Skylake Processor Thermal device (0x1903) has been tested.
+Other generations expose the same device at B0D4F0 with different PCI
+device IDs and may require per-generation validation before being added.
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -63,6 +63,7 @@
dev/agp/agp_i810.c optional agp
dev/agp/agp_via.c optional agp
dev/amdsmu/amdsmu.c optional amdsmu pci
+dev/intel/intel_thermal.c optional intel_thermal pci
dev/amdsbwd/amdsbwd.c optional amdsbwd
dev/amdsmn/amdsmn.c optional amdsmn | amdtemp
dev/amdtemp/amdtemp.c optional amdtemp
diff --git a/sys/dev/intel/intel_thermal.c b/sys/dev/intel/intel_thermal.c
new file mode 100644
--- /dev/null
+++ b/sys/dev/intel/intel_thermal.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2026 Abdelkader Boudih <freebsd@seuros.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Intel Processor Thermal Device driver.
+ *
+ * The Processor Thermal device (B0D4F0) provides a PCI BAR0 that maps
+ * directly into the MCHBAR address space. RAPL power limit and power
+ * info registers are accessed at fixed offsets within that window.
+ *
+ * Register offsets verified against:
+ * - 11th Gen Intel Core Processors Datasheet Vol 2a (doc 631122)
+ * - Intel 64 and IA-32 Architectures SDM Vol 4 (MSRs 0x606/0x610/0x614)
+ * - Linux drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+ * (for offset 0x5930, absent from public datasheets)
+ */
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+
+#include <machine/bus.h>
+#include <machine/cpufunc.h>
+#include <machine/resource.h>
+#include <machine/specialreg.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+/*
+ * RAPL MMIO register offsets relative to BAR0 (= MCHBAR base).
+ *
+ * ITH_RAPL_UNIT (0x5938): PACKAGE_POWER_SKU_UNIT_0_0_0_MCHBAR_PCU
+ * 11th Gen Intel Core Processors Datasheet Vol 2a (doc 631122) §3.3.22.
+ * MMIO mirror of MSR_RAPL_POWER_UNIT (MSR 0x606, SDM Vol 4).
+ *
+ * ITH_PKG_POWER_LIMIT (0x59a0): PACKAGE_RAPL_LIMIT_0_0_0_MCHBAR_PCU
+ * 11th Gen Intel Core Processors Datasheet Vol 2a (doc 631122) §3.3.31.
+ * 64-bit MMIO mirror of MSR_PKG_POWER_LIMIT (MSR 0x610, SDM Vol 4).
+ *
+ * ITH_PKG_POWER_INFO (0x5930): not present in public Intel datasheets
+ * (verified against docs 332688, 615212, 631122, 655259).
+ * Offset confirmed via Linux processor_thermal_rapl.c rapl_mmio_default[].
+ * MMIO mirror of MSR_PKG_POWER_INFO (MSR 0x614, SDM Vol 4).
+ */
+#define ITH_RAPL_UNIT 0x5938 /* power/energy/time unit multipliers */
+#define ITH_PKG_POWER_LIMIT 0x59a0 /* PL1/PL2 power limits (64-bit) */
+#define ITH_PKG_POWER_INFO 0x5930 /* TDP, min, max power (64-bit) */
+
+/* PKG_POWER_LIMIT bit fields (SDM Vol 4, MSR 0x610 layout) */
+#define ITH_PL1_MASK 0x7fffULL /* bits 14:0 - PL1 raw value */
+#define ITH_PL1_ENABLE (1ULL << 15)
+#define ITH_PL2_SHIFT 32
+#define ITH_PL2_MASK 0x7fffULL /* bits 46:32 - PL2 raw value */
+#define ITH_PL2_ENABLE (1ULL << 47)
+#define ITH_PL_LOCK (1ULL << 63)
+
+/* PKG_POWER_INFO bit fields (SDM Vol 4, MSR 0x614 layout) */
+#define ITH_TDP_MASK 0x7fffULL /* bits 14:0 - TDP raw value */
+
+/* RAPL_UNIT bits 3:0: power unit = 1W / 2^PU. Skylake default PU=3. */
+#define ITH_POWER_UNIT_MASK 0xf
+#define ITH_POWER_UNIT_DEFAULT 3 /* 1/8 W steps */
+
+struct intel_thermal_softc {
+ int rid;
+ struct resource *res;
+ bus_space_tag_t bst;
+ bus_space_handle_t bsh;
+ bool locked;
+ uint32_t power_unit; /* 1W / 2^power_unit */
+};
+
+static const struct pci_device_table intel_thermal_devices[] = {
+ { PCI_DEV(0x8086, 0x1903),
+ PCI_DESCR("Intel Processor Thermal Device") },
+};
+
+static inline uint32_t
+ith_read4(struct intel_thermal_softc *sc, uint32_t off)
+{
+ return (bus_space_read_4(sc->bst, sc->bsh, off));
+}
+
+static inline uint64_t
+ith_read8(struct intel_thermal_softc *sc, uint32_t off)
+{
+ uint64_t lo, hi;
+
+ lo = bus_space_read_4(sc->bst, sc->bsh, off);
+ hi = bus_space_read_4(sc->bst, sc->bsh, off + 4);
+ return (lo | (hi << 32));
+}
+
+/* Convert a raw RAPL power field to milliwatts. */
+static uint32_t
+ith_raw_to_mw(struct intel_thermal_softc *sc, uint64_t raw)
+{
+ return ((raw * 1000ULL) >> sc->power_unit);
+}
+
+static int
+intel_thermal_pl1_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint32_t mw;
+
+ mw = ith_raw_to_mw(sc,
+ ith_read8(sc, ITH_PKG_POWER_LIMIT) & ITH_PL1_MASK);
+ return (sysctl_handle_32(oidp, &mw, 0, req));
+}
+
+static int
+intel_thermal_pl2_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint64_t pl;
+ uint32_t mw;
+
+ pl = ith_read8(sc, ITH_PKG_POWER_LIMIT);
+ mw = ith_raw_to_mw(sc, (pl >> ITH_PL2_SHIFT) & ITH_PL2_MASK);
+ return (sysctl_handle_32(oidp, &mw, 0, req));
+}
+
+static int
+intel_thermal_tdp_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint32_t mw;
+
+ mw = ith_raw_to_mw(sc,
+ ith_read8(sc, ITH_PKG_POWER_INFO) & ITH_TDP_MASK);
+ return (sysctl_handle_32(oidp, &mw, 0, req));
+}
+
+static int
+intel_thermal_probe(device_t dev)
+{
+ const struct pci_device_table *tbl;
+
+ tbl = PCI_MATCH(dev, intel_thermal_devices);
+ if (tbl == NULL)
+ return (ENXIO);
+ /*
+ * The Processor Thermal device is always at B0D4F0. Enforce the
+ * full BDF to avoid claiming other thermal devices that share a
+ * PCI device ID but live at a different location.
+ */
+ if (pci_get_bus(dev) != 0 || pci_get_slot(dev) != 4 ||
+ pci_get_function(dev) != 0)
+ return (ENXIO);
+ device_set_desc(dev, tbl->descr);
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+intel_thermal_attach(device_t dev)
+{
+ struct intel_thermal_softc *sc = device_get_softc(dev);
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree;
+ uint64_t pl, msr_units;
+ uint32_t unit_reg;
+
+ sc->rid = PCIR_BAR(0);
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &sc->rid, RF_ACTIVE);
+ if (sc->res == NULL) {
+ device_printf(dev, "cannot allocate BAR0\n");
+ return (ENOMEM);
+ }
+ sc->bst = rman_get_bustag(sc->res);
+ sc->bsh = rman_get_bushandle(sc->res);
+
+ /* Verify BAR0 covers the highest register we access (0x59a0 + 8). */
+ if (rman_get_size(sc->res) < 0x59a8) {
+ device_printf(dev, "BAR0 too small (0x%jx)\n",
+ (uintmax_t)rman_get_size(sc->res));
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res);
+ sc->res = NULL;
+ return (ENXIO);
+ }
+
+ /*
+ * Read power unit from RAPL unit register (ITH_RAPL_UNIT, offset
+ * 0x5938). Bits 3:0 encode PU: power step = 1W / 2^PU.
+ * Fall back to MSR_RAPL_POWER_UNIT (0x606) if the MMIO read looks
+ * invalid, then to the Skylake default of PU=3 (0.125 W steps).
+ */
+ sc->power_unit = ITH_POWER_UNIT_DEFAULT;
+ unit_reg = ith_read4(sc, ITH_RAPL_UNIT);
+ if (unit_reg != 0 && unit_reg != 0xffffffff) {
+ sc->power_unit = unit_reg & ITH_POWER_UNIT_MASK;
+ } else if (rdmsr_safe(MSR_RAPL_POWER_UNIT, &msr_units) == 0) {
+ sc->power_unit = msr_units & ITH_POWER_UNIT_MASK;
+ }
+
+ pl = ith_read8(sc, ITH_PKG_POWER_LIMIT);
+ sc->locked = (pl & ITH_PL_LOCK) != 0;
+
+ device_printf(dev, "PL1 %u mW, PL2 %u mW, TDP %u mW%s\n",
+ ith_raw_to_mw(sc, pl & ITH_PL1_MASK),
+ ith_raw_to_mw(sc, (pl >> ITH_PL2_SHIFT) & ITH_PL2_MASK),
+ ith_raw_to_mw(sc, ith_read8(sc, ITH_PKG_POWER_INFO) & ITH_TDP_MASK),
+ sc->locked ? ", locked" : "");
+
+ ctx = device_get_sysctl_ctx(dev);
+ tree = device_get_sysctl_tree(dev);
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "pl1", CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_pl1_sysctl, "IU",
+ "PL1 sustained power limit (mW)");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "pl2", CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_pl2_sysctl, "IU",
+ "PL2 short-term power limit (mW)");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "tdp", CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_tdp_sysctl, "IU",
+ "Thermal Design Power (mW)");
+
+ SYSCTL_ADD_BOOL(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "locked", CTLFLAG_RD, &sc->locked, 0,
+ "Power limits locked by firmware");
+
+ return (0);
+}
+
+static int
+intel_thermal_detach(device_t dev)
+{
+ struct intel_thermal_softc *sc = device_get_softc(dev);
+
+ if (sc->res != NULL) {
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res);
+ sc->res = NULL;
+ }
+ return (0);
+}
+
+static device_method_t intel_thermal_methods[] = {
+ DEVMETHOD(device_probe, intel_thermal_probe),
+ DEVMETHOD(device_attach, intel_thermal_attach),
+ DEVMETHOD(device_detach, intel_thermal_detach),
+ DEVMETHOD_END
+};
+
+static driver_t intel_thermal_driver = {
+ "intel_thermal",
+ intel_thermal_methods,
+ sizeof(struct intel_thermal_softc)
+};
+
+DRIVER_MODULE(intel_thermal, pci, intel_thermal_driver, 0, 0);
+PCI_PNP_INFO(intel_thermal_devices);
+MODULE_VERSION(intel_thermal, 1);
+MODULE_DEPEND(intel_thermal, pci, 1, 1, 1);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -176,6 +176,7 @@
imgact_binmisc \
${_imx} \
${_intelspi} \
+ ${_intel_thermal} \
${_io} \
${_ioat} \
${_ipoib} \
@@ -822,6 +823,7 @@
_hyperv= hyperv
_ichwd= ichwd
_ida= ida
+_intel_thermal= intel_thermal
_intelspi= intelspi
_ips= ips
_isci= isci
diff --git a/sys/modules/intel_thermal/Makefile b/sys/modules/intel_thermal/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/intel_thermal/Makefile
@@ -0,0 +1,6 @@
+.PATH: ${SRCTOP}/sys/dev/intel
+KMOD= intel_thermal
+SRCS= intel_thermal.c
+SRCS+= device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>

File Metadata

Mime Type
text/plain
Expires
Thu, May 28, 8:28 PM (11 h, 13 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33592002
Default Alt Text
D54882.id177065.diff (12 KB)

Event Timeline