Page MenuHomeFreeBSD

D54882.id.diff
No OneTemporary

D54882.id.diff

diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -254,6 +254,7 @@
${_imcsmb.4} \
inet.4 \
inet6.4 \
+ intel_thermal.4 \
intpm.4 \
intro.4 \
${_io.4} \
diff --git a/share/man/man4/intel_thermal.4 b/share/man/man4/intel_thermal.4
new file mode 100644
--- /dev/null
+++ b/share/man/man4/intel_thermal.4
@@ -0,0 +1,97 @@
+.\"
+.\" Copyright (c) 2026 Abdelkader Boudih <freebsd@seuros.com>
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.Dd April 12, 2026
+.Dt INTEL_THERMAL 4
+.Os
+.Sh NAME
+.Nm intel_thermal
+.Nd Intel Processor Thermal Device driver
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device intel_thermal"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+intel_thermal_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides read-only access to RAPL (Running Average Power Limit)
+registers on Intel Skylake and later processors via
+.Xr sysctl 8 .
+.Pp
+The driver attaches to the Processor Thermal Device (PCI device B0D4)
+and exposes long-term (PL1) and short-term (PL2) power limits as well
+as the Thermal Design Power (TDP).
+Power values are reported in milliwatts.
+.Pp
+The RAPL power unit is read from MSR 0x606 at attach time.
+If the MSR read fails, a default unit of 1/8\ W is used.
+.Pp
+If firmware has locked access to the MMIO registers, the driver
+detects this condition at attach time and reports it via the
+.Va access_denied
+sysctl.
+All power readings return zero when access is denied.
+.Sh HARDWARE
+The
+.Nm
+driver supports the following device:
+.Pp
+.Bl -bullet -compact
+.It
+Intel Skylake Processor Thermal Device (PCI ID 0x8086:0x1903)
+.El
+.Pp
+Other Intel processor generations may use different MMIO register
+layouts and are not currently supported.
+.Sh SYSCTLS
+The following
+.Xr sysctl 8
+variables are available:
+.Bl -tag -width indent
+.It Va dev.intel_thermal.%d.access_denied
+Set to 1 if firmware has denied access to RAPL registers.
+.It Va dev.intel_thermal.%d.pl1
+Long-term (PL1) power limit in milliwatts.
+.It Va dev.intel_thermal.%d.pl2
+Short-term (PL2) power limit in milliwatts.
+.It Va dev.intel_thermal.%d.pl1_enabled
+Set to 1 if the PL1 power limit is enabled.
+.It Va dev.intel_thermal.%d.pl2_enabled
+Set to 1 if the PL2 power limit is enabled.
+.It Va dev.intel_thermal.%d.locked
+Set to 1 if power limits are locked by firmware.
+.It Va dev.intel_thermal.%d.tdp
+Thermal Design Power in milliwatts.
+.It Va dev.intel_thermal.%d.power_unit_div
+Power unit divisor derived from MSR 0x606.
+.It Va dev.intel_thermal.%d.power_unit_shift
+Power unit shift (bits 3:0 of MSR 0x606).
+.El
+.Sh SEE ALSO
+.Xr pci 4 ,
+.Xr sysctl 8
+.Pp
+Intel 10th Gen Core Processor Datasheet, Volume 2.
+.Sh HISTORY
+The
+.Nm
+driver first appeared in
+.Fx 16.0 .
+.Sh AUTHORS
+.An Abdelkader Boudih Aq Mt freebsd@seuros.com
+.Sh CAVEATS
+Only Skylake Processor Thermal Device (0x1903) is supported.
+Other processor generations have different register layouts and
+require per-generation validation.
diff --git a/sys/conf/files.x86 b/sys/conf/files.x86
--- a/sys/conf/files.x86
+++ b/sys/conf/files.x86
@@ -63,6 +63,7 @@
dev/agp/agp_i810.c optional agp
dev/agp/agp_via.c optional agp
dev/amdsmu/amdsmu.c optional amdsmu pci
+dev/intel/intel_thermal.c optional intel_thermal pci
dev/amdsbwd/amdsbwd.c optional amdsbwd
dev/amdsmn/amdsmn.c optional amdsmn | amdtemp
dev/amdtemp/amdtemp.c optional amdtemp
diff --git a/sys/dev/intel/intel_thermal.c b/sys/dev/intel/intel_thermal.c
new file mode 100644
--- /dev/null
+++ b/sys/dev/intel/intel_thermal.c
@@ -0,0 +1,323 @@
+/*
+ * Copyright (c) 2026 Abdelkader Boudih <freebsd@seuros.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+/*
+ * intel_thermal — Intel Processor Thermal Device driver
+ *
+ * Exposes RAPL power limits (PL1/PL2) and TDP via sysctl for
+ * Skylake and later processors with B0D4 thermal device.
+ *
+ * Register offsets from Intel 10th Gen Core Processor Datasheet Vol 2.
+ */
+
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/sysctl.h>
+
+#include <machine/bus.h>
+#include <machine/cpufunc.h>
+#include <machine/resource.h>
+#include <machine/specialreg.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+/*
+ * RAPL MMIO Register Offsets (relative to BAR0)
+ */
+#define RAPL_PKG_POWER_LIMIT 0x59A0
+#define RAPL_PKG_POWER_INFO 0x5994
+
+/* Power limit register bits */
+#define POWER_LIMIT_ENABLE (1ULL << 15)
+#define POWER_LIMIT2_ENABLE (1ULL << 47)
+#define POWER_LIMIT_LOCK (1ULL << 63)
+#define POWER_LIMIT_MASK 0x7FFF
+#define POWER_LIMIT2_SHIFT 32
+
+/* Default power unit divisor (1/8 W) if MSR read fails */
+#define POWER_UNIT_DEFAULT 8
+#define RAPL_POWER_UNIT_MASK 0x0F
+#define RAPL_POWER_UNIT_MAX_SHIFT 30
+#define MW_PER_WATT 1000ULL
+#define MMIO_ACCESS_DENIED UINT32_MAX
+
+struct intel_thermal_softc {
+ int rid;
+ struct resource *res;
+ bus_space_tag_t bst;
+ bus_space_handle_t bsh;
+ int access_denied;
+ uint32_t power_unit_div;
+ uint8_t power_unit_shift;
+};
+
+static const struct pci_device_table intel_thermal_devices[] = {
+ /* Only Skylake validated - other generations may differ */
+ { PCI_DEV(0x8086, 0x1903), PCI_DESCR("Skylake Processor Thermal")},
+};
+
+static inline uint32_t
+intel_thermal_read32(struct intel_thermal_softc *sc, uint32_t offset)
+{
+ return (bus_space_read_4(sc->bst, sc->bsh, offset));
+}
+
+static inline uint64_t
+intel_thermal_read64(struct intel_thermal_softc *sc, uint32_t offset)
+{
+ uint64_t lo, hi;
+ lo = bus_space_read_4(sc->bst, sc->bsh, offset);
+ hi = bus_space_read_4(sc->bst, sc->bsh, offset + 4);
+ return (lo | (hi << 32));
+}
+
+static int
+intel_thermal_pl1_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint64_t pl;
+ uint32_t watts_mw;
+
+ if (sc->access_denied) {
+ watts_mw = 0;
+ } else {
+ pl = intel_thermal_read64(sc, RAPL_PKG_POWER_LIMIT);
+ watts_mw = ((pl & POWER_LIMIT_MASK) * MW_PER_WATT) /
+ sc->power_unit_div;
+ }
+ return (sysctl_handle_32(oidp, &watts_mw, 0, req));
+}
+
+static int
+intel_thermal_pl2_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint64_t pl;
+ uint32_t watts_mw;
+
+ if (sc->access_denied) {
+ watts_mw = 0;
+ } else {
+ pl = intel_thermal_read64(sc, RAPL_PKG_POWER_LIMIT);
+ watts_mw = (((pl >> POWER_LIMIT2_SHIFT) & POWER_LIMIT_MASK) *
+ 1000ULL) / sc->power_unit_div;
+ }
+ return (sysctl_handle_32(oidp, &watts_mw, 0, req));
+}
+
+static int
+intel_thermal_pl1_enable_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint64_t pl;
+ int enabled;
+
+ if (sc->access_denied) {
+ enabled = 0;
+ } else {
+ pl = intel_thermal_read64(sc, RAPL_PKG_POWER_LIMIT);
+ enabled = (pl & POWER_LIMIT_ENABLE) ? 1 : 0;
+ }
+ return (sysctl_handle_int(oidp, &enabled, 0, req));
+}
+
+static int
+intel_thermal_pl2_enable_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint64_t pl;
+ int enabled;
+
+ if (sc->access_denied) {
+ enabled = 0;
+ } else {
+ pl = intel_thermal_read64(sc, RAPL_PKG_POWER_LIMIT);
+ enabled = (pl & POWER_LIMIT2_ENABLE) ? 1 : 0;
+ }
+ return (sysctl_handle_int(oidp, &enabled, 0, req));
+}
+
+static int
+intel_thermal_locked_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint64_t pl;
+ int locked;
+
+ if (sc->access_denied) {
+ locked = 1;
+ } else {
+ pl = intel_thermal_read64(sc, RAPL_PKG_POWER_LIMIT);
+ locked = (pl & POWER_LIMIT_LOCK) ? 1 : 0;
+ }
+ return (sysctl_handle_int(oidp, &locked, 0, req));
+}
+
+static int
+intel_thermal_tdp_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct intel_thermal_softc *sc = oidp->oid_arg1;
+ uint32_t info;
+ uint32_t tdp_mw;
+
+ if (sc->access_denied) {
+ tdp_mw = 0;
+ } else {
+ info = intel_thermal_read32(sc, RAPL_PKG_POWER_INFO);
+ tdp_mw = ((info & POWER_LIMIT_MASK) * MW_PER_WATT) /
+ sc->power_unit_div;
+ }
+ return (sysctl_handle_32(oidp, &tdp_mw, 0, req));
+}
+
+static int
+intel_thermal_probe(device_t dev)
+{
+ const struct pci_device_table *tbl;
+
+ tbl = PCI_MATCH(dev, intel_thermal_devices);
+ if (tbl == NULL)
+ return (ENXIO);
+ device_set_desc(dev, tbl->descr);
+ return (BUS_PROBE_DEFAULT);
+}
+
+static int
+intel_thermal_attach(device_t dev)
+{
+ struct intel_thermal_softc *sc = device_get_softc(dev);
+ struct sysctl_ctx_list *ctx;
+ struct sysctl_oid *tree;
+ uint32_t pwr_limit_lo, pwr_limit_hi, pwr_info;
+ uint64_t rapl_units;
+ uint32_t pu;
+
+ sc->rid = PCIR_BAR(0);
+ sc->res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+ &sc->rid, RF_ACTIVE);
+ if (sc->res == NULL) {
+ device_printf(dev, "cannot allocate BAR0\n");
+ return (ENOMEM);
+ }
+
+ sc->bst = rman_get_bustag(sc->res);
+ sc->bsh = rman_get_bushandle(sc->res);
+
+ /*
+ * Read RAPL power unit from MSR 0x606.
+ * Bits 3:0 = power unit exponent, units are 1/2^PU Watts.
+ * Fallback to 1/8 W (PU=3) if MSR read fails.
+ */
+ sc->power_unit_div = POWER_UNIT_DEFAULT;
+ sc->power_unit_shift = 3;
+ if (rdmsr_safe(MSR_RAPL_POWER_UNIT, &rapl_units) != 0) {
+ device_printf(dev,
+ "MSR_RAPL_POWER_UNIT read failed, using default\n");
+ } else {
+ pu = rapl_units & RAPL_POWER_UNIT_MASK;
+ if (pu <= RAPL_POWER_UNIT_MAX_SHIFT) {
+ sc->power_unit_shift = pu;
+ sc->power_unit_div = 1U << pu;
+ }
+ }
+
+ /*
+ * Check multiple registers to detect firmware lock.
+ * All reading 0xFFFFFFFF indicates access denied.
+ */
+ pwr_limit_lo = intel_thermal_read32(sc, RAPL_PKG_POWER_LIMIT);
+ pwr_limit_hi = intel_thermal_read32(sc, RAPL_PKG_POWER_LIMIT + 4);
+ pwr_info = intel_thermal_read32(sc, RAPL_PKG_POWER_INFO);
+
+ if (pwr_limit_lo == MMIO_ACCESS_DENIED &&
+ pwr_limit_hi == MMIO_ACCESS_DENIED &&
+ pwr_info == MMIO_ACCESS_DENIED) {
+ sc->access_denied = 1;
+ device_printf(dev, "MMIO access denied by firmware\n");
+ } else {
+ sc->access_denied = 0;
+ }
+
+ ctx = device_get_sysctl_ctx(dev);
+ tree = device_get_sysctl_tree(dev);
+
+ SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "access_denied", CTLFLAG_RD | CTLFLAG_MPSAFE, &sc->access_denied, 0,
+ "Firmware denied access to RAPL registers");
+
+ SYSCTL_ADD_U32(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "power_unit_div", CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &sc->power_unit_div, 0, "Power unit divisor (from MSR 0x606)");
+
+ SYSCTL_ADD_U8(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "power_unit_shift", CTLFLAG_RD | CTLFLAG_MPSAFE,
+ &sc->power_unit_shift, 0,
+ "Power unit shift (bits 3:0 of MSR 0x606)");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "pl1", CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_pl1_sysctl, "IU",
+ "PL1 (long-term) power limit in milliwatts");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "pl2", CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_pl2_sysctl, "IU",
+ "PL2 (short-term) power limit in milliwatts");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "pl1_enabled", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_pl1_enable_sysctl, "I",
+ "PL1 power limit enabled");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "pl2_enabled", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_pl2_enable_sysctl, "I",
+ "PL2 power limit enabled");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "locked", CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_locked_sysctl, "I",
+ "Power limits locked by firmware");
+
+ SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
+ "tdp", CTLTYPE_U32 | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ sc, 0, intel_thermal_tdp_sysctl, "IU",
+ "Thermal Design Power in milliwatts");
+
+ return (0);
+}
+
+static int
+intel_thermal_detach(device_t dev)
+{
+ struct intel_thermal_softc *sc = device_get_softc(dev);
+
+ if (sc->res != NULL)
+ bus_release_resource(dev, SYS_RES_MEMORY, sc->rid, sc->res);
+ return (0);
+}
+
+static device_method_t intel_thermal_methods[] = {
+ DEVMETHOD(device_probe, intel_thermal_probe),
+ DEVMETHOD(device_attach, intel_thermal_attach),
+ DEVMETHOD(device_detach, intel_thermal_detach),
+ DEVMETHOD_END
+};
+
+static driver_t intel_thermal_driver = {
+ "intel_thermal",
+ intel_thermal_methods,
+ sizeof(struct intel_thermal_softc)
+};
+
+DRIVER_MODULE(intel_thermal, pci, intel_thermal_driver, 0, 0);
+PCI_PNP_INFO(intel_thermal_devices);
+MODULE_VERSION(intel_thermal, 1);
+MODULE_DEPEND(intel_thermal, pci, 1, 1, 1);
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -176,6 +176,7 @@
imgact_binmisc \
${_imx} \
${_intelspi} \
+ ${_intel_thermal} \
${_io} \
${_ioat} \
${_ipoib} \
@@ -822,6 +823,7 @@
_hyperv= hyperv
_ichwd= ichwd
_ida= ida
+_intel_thermal= intel_thermal
_intelspi= intelspi
_ips= ips
_isci= isci
diff --git a/sys/modules/intel_thermal/Makefile b/sys/modules/intel_thermal/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/intel_thermal/Makefile
@@ -0,0 +1,6 @@
+.PATH: ${SRCTOP}/sys/dev/intel
+KMOD= intel_thermal
+SRCS= intel_thermal.c
+SRCS+= device_if.h bus_if.h pci_if.h
+
+.include <bsd.kmod.mk>

File Metadata

Mime Type
text/plain
Expires
Fri, May 1, 3:40 AM (4 h, 31 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32552765
Default Alt Text
D54882.id.diff (13 KB)

Event Timeline