Page MenuHomeFreeBSD

D56546.id175967.diff
No OneTemporary

D56546.id175967.diff

diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -110,6 +110,7 @@
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
+options HMP
# CPU frequency control
device cpufreq
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -220,6 +220,15 @@
# Mandatory:
options SMP # Symmetric MultiProcessor Kernel
+#####################################################################
+# HMP OPTIONS:
+#
+# HMP enables building of Heterogeneous MultiProcessor Kernel.
+# This depends on SMP and SCHED_ULE.
+
+# Mandatory:
+options HMP # Heterogeneous MultiProcessor Kernel
+
# EARLY_AP_STARTUP releases the Application Processors earlier in the
# kernel startup process (before devices are probed) rather than at the
# end. This is a temporary option for use during the transition from
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3968,6 +3968,7 @@
kern/subr_gtaskqueue.c standard
kern/subr_hash.c standard
kern/subr_hints.c standard
+kern/subr_hmp.c optional hmp
kern/subr_kdb.c standard
kern/subr_kobj.c standard
kern/subr_lock.c standard
diff --git a/sys/conf/options b/sys/conf/options
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -635,6 +635,7 @@
# Standard SMP options
EARLY_AP_STARTUP opt_global.h
SMP opt_global.h
+HMP opt_global.h
NUMA opt_global.h
# Size of the kernel message buffer
diff --git a/sys/kern/subr_hmp.c b/sys/kern/subr_hmp.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/subr_hmp.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2026 FreeBSD Foundation
+ *
+ * This software was developed by Minsoo Choo under sponsorship from the
+ * FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/systm.h>
+#include <sys/hmp.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+/* System-wide CPU capability information */
+struct hmp hmp_state;
+
+/* Per-CPU HMP state */
+DPCPU_DEFINE(struct hmp_pcpu, hmp_pcpu);
+
+/*
+ * Initialization
+ */
+
+static void
+hmp_detect_scores(void)
+{
+ struct hmp_pcpu *hp;
+ int cpu;
+
+ CPU_FOREACH(cpu) {
+ hp = DPCPU_ID_PTR(cpu, hmp_pcpu);
+ if (hp->flags & HMP_FLAG_DYNAMIC) {
+ hmp_state.has_scores = true;
+ return;
+ }
+ }
+
+ hmp_state.has_scores = false;
+}
+
+static void
+hmp_set_total_capacity(void)
+{
+ struct hmp_pcpu *hp;
+ int cpu;
+
+ hmp_state.total_capacity = 0;
+
+ CPU_FOREACH(cpu) {
+ hp = DPCPU_ID_PTR(cpu, hmp_pcpu);
+ hmp_state.total_capacity += hp->capacity;
+ }
+}
+
+static void
+hmp_init(void *arg __unused)
+{
+ /* Choose and call provider */
+
+ hmp_detect_scores();
+ hmp_set_total_capacity();
+}
+SYSINIT(hmp, SI_SUB_SMP + 1, SI_ORDER_ANY, hmp_init, NULL);
+
+/*
+ * Helper functions for scheduler
+ */
+
+/*
+ * Fall back for hmp_best_cpu in case processor doesn't support dynamic
+ * score update. Takes O(n).
+ *
+ * TODO: Precalculate cpu with highest capacity on boot after initialization.
+ */
+int
+hmp_highest_capacity_cpu(const cpuset_t *mask)
+{
+ struct hmp_pcpu *hp;
+ hmp_score_t best_cap;
+ int best_cpu, cpu;
+
+ best_cpu = -1;
+ best_cap = 0;
+
+ CPU_FOREACH(cpu) {
+ if (mask != NULL && !CPU_ISSET(cpu, mask))
+ continue;
+ hp = DPCPU_ID_PTR(cpu, hmp_pcpu);
+ if (hp->capacity > best_cap) {
+ best_cap = hp->capacity;
+ best_cpu = cpu;
+ }
+ }
+
+ return (best_cpu);
+}
+
+/*
+ * Find CPU with best score for given class and capability for thread
+ * placement. Fall backs to capacity if scores are not provided. Takes O(n).
+ *
+ * It is possible that a score of previously read cpu is updated by a provider
+ * while this function is still traversing remaining cpus, but the effect is
+ * negligible. Also, holding a mutex for global snapshot is expensive in this
+ * case.
+ *
+ * TODO: If this brings severe performance degradation, score providers should
+ * maintain and update index everytime new information is fed and
+ * the scheduler should use the index which takes O(1).
+ */
+int
+hmp_best_cpu(const cpuset_t *mask, enum score_type st)
+{
+ struct hmp_pcpu *hp;
+ hmp_score_t best_score;
+ int best_cpu, cpu;
+
+ if (!hmp_state.has_scores)
+ return (hmp_highest_capacity_cpu(mask));
+
+ best_cpu = -1;
+ best_score = 0;
+
+ CPU_FOREACH(cpu) {
+ if (mask != NULL && !CPU_ISSET(cpu, mask))
+ continue;
+ hp = DPCPU_ID_PTR(cpu, hmp_pcpu);
+ if (hp->scores[st] > best_score) {
+ best_score = hp->scores[st];
+ best_cpu = cpu;
+ }
+ }
+
+ return (best_cpu);
+}
+
+/*
+ * Sysctls
+ */
+
+static int
+hmp_sysctl_capacity(SYSCTL_HANDLER_ARGS)
+{
+ struct hmp_pcpu *hp;
+ unsigned int v;
+
+ hp = DPCPU_ID_PTR(arg2, hmp_pcpu);
+ v = hp->capacity;
+ return (sysctl_handle_int(oidp, &v, 0, req));
+}
+
+static int
+hmp_sysctl_capacity_percent(SYSCTL_HANDLER_ARGS)
+{
+ struct hmp_pcpu *hp;
+ unsigned int v;
+
+ hp = DPCPU_ID_PTR(arg2, hmp_pcpu);
+ v = HMP_CAPACITY_NORMAL_TO_PERCENT(hp->capacity);
+ return (sysctl_handle_int(oidp, &v, 0, req));
+}
+
+static int
+hmp_sysctl_score(SYSCTL_HANDLER_ARGS)
+{
+ struct hmp_pcpu *hp;
+ int cpu = arg2 & 0xffff;
+ enum score_type st = (arg2 >> 16) & 0xffff;
+ unsigned int v;
+
+ hp = DPCPU_ID_PTR(cpu, hmp_pcpu);
+ v = hmp_score(hp, st);
+ return (sysctl_handle_int(oidp, &v, 0, req));
+}
+
+static int
+hmp_sysctl_dynamic(SYSCTL_HANDLER_ARGS)
+{
+ struct hmp_pcpu *hp;
+ int v;
+
+ hp = DPCPU_ID_PTR(arg2, hmp_pcpu);
+ v = hmp_flag(hp, HMP_FLAG_DYNAMIC) ? 1 : 0;
+ return (sysctl_handle_int(oidp, &v, 0, req));
+}
+
+static int
+hmp_sysctl_throttled(SYSCTL_HANDLER_ARGS)
+{
+ struct hmp_pcpu *hp;
+ int v;
+
+ hp = DPCPU_ID_PTR(arg2, hmp_pcpu);
+ v = hmp_flag(hp, HMP_FLAG_THROTTLED) ? 1 : 0;
+ return (sysctl_handle_int(oidp, &v, 0, req));
+}
+
+static SYSCTL_NODE(_kern, OID_AUTO, hmp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "Heterogeneous multi-processing state");
+
+SYSCTL_U16(_kern_hmp, OID_AUTO, total_capacity, CTLFLAG_RD,
+ &hmp_state.total_capacity, 0,
+ "Sum of per-CPU capacities (normalized to HMP_CAPACITY_SCALE)");
+
+SYSCTL_BOOL(_kern_hmp, OID_AUTO, has_scores, CTLFLAG_RD,
+ &hmp_state.has_scores, 0,
+ "True if at least one CPU provides dynamic perf/eff scores");
+
+static void
+hmp_sysctl_init(void *arg __unused)
+{
+ struct sysctl_oid *cpu_root, *cpu_node;
+ struct sysctl_oid_list *cpu_root_children, *cpu_children;
+ char name[8];
+ int cpu;
+
+ /*
+ * Per-CPU tree: kern.hmp.cpu.<N>.{capacity,perf_score,eff_score,...}
+ */
+ cpu_root = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_hmp),
+ OID_AUTO, "cpu", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+ "Per-CPU HMP state");
+ if (cpu_root == NULL)
+ return;
+ cpu_root_children = SYSCTL_CHILDREN(cpu_root);
+
+ CPU_FOREACH(cpu) {
+ snprintf(name, sizeof(name), "%d", cpu);
+ cpu_node = SYSCTL_ADD_NODE(NULL, cpu_root_children, OID_AUTO,
+ name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+ "Per-CPU HMP state");
+ if (cpu_node == NULL)
+ continue;
+ cpu_children = SYSCTL_CHILDREN(cpu_node);
+
+ SYSCTL_ADD_PROC(NULL, cpu_children, OID_AUTO, "capacity",
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, cpu,
+ hmp_sysctl_capacity, "IU",
+ "Static capacity (0-HMP_CAPACITY_SCALE)");
+
+ SYSCTL_ADD_PROC(NULL, cpu_children, OID_AUTO, "capacity_percent",
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, cpu,
+ hmp_sysctl_capacity_percent, "IU",
+ "Static capacity as a percentage of the scale");
+
+ SYSCTL_ADD_PROC(NULL, cpu_children, OID_AUTO, "perf_score",
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+ cpu | (HMP_SCORE_PERF << 16), hmp_sysctl_score, "IU",
+ "Current performance score (dynamic)");
+
+ SYSCTL_ADD_PROC(NULL, cpu_children, OID_AUTO, "eff_score",
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
+ cpu | (HMP_SCORE_EFF << 16), hmp_sysctl_score, "IU",
+ "Current efficiency score (dynamic)");
+
+ SYSCTL_ADD_PROC(NULL, cpu_children, OID_AUTO, "flags",
+ CTLTYPE_UINT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, cpu,
+ hmp_sysctl_dynamic, "I",
+ "CPU has dynamic scores");
+
+ SYSCTL_ADD_PROC(NULL, cpu_children, OID_AUTO, "throttled",
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, cpu,
+ hmp_sysctl_throttled, "I",
+ "CPU currently throttled (thermal/power)");
+ }
+}
+SYSINIT(hmp_sysctl, SI_SUB_SMP + 2, SI_ORDER_ANY, hmp_sysctl_init, NULL);
diff --git a/sys/sys/hmp.h b/sys/sys/hmp.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/hmp.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2026 FreeBSD Foundation
+ *
+ * This software was developed by Minsoo Choo under sponsorship from the
+ * FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#ifndef _SYS_HMP_H_
+#define _SYS_HMP_H_
+
+#ifdef _KERNEL
+#ifndef LOCORE
+
+#include <sys/types.h>
+#include <sys/pcpu.h>
+
+#include <machine/atomic.h>
+
+#ifdef HMP
+
+#define HMP_CAPACITY_SCALE 1024
+#define HMP_CAPACITY_MAX HMP_CAPACITY_SCALE
+#define HMP_CAPACITY_DEFAULT HMP_CAPACITY_MAX
+
+/* Capacity normalization macro functions */
+#define HMP_CAPACITY_NORMAL_FROM(x, y) (((x) * HMP_CAPACITY_SCALE) / (y))
+#define HMP_CAPACITY_NORMAL_FROM_255(x) HMP_CAPACITY_NORMAL_FROM((x), 255)
+#define HMP_CAPACITY_NORMAL_FROM_1024(x) HMP_CAPACITY_NORMAL_FROM((x), 1024)
+
+#define HMP_CAPACITY_NORMAL_TO(x, y) (((x) * (y)) / HMP_CAPACITY_SCALE)
+#define HMP_CAPACITY_NORMAL_TO_PERCENT(x) HMP_CAPACITY_NORMAL_TO((x), 100)
+
+/*
+ * Score type
+ */
+enum score_type {
+ HMP_SCORE_PERF,
+ HMP_SCORE_EFF,
+};
+
+/*
+ * Flags (stored in hmp_flags)
+ */
+#define HMP_FLAG_DYNAMIC (1u << 0) /* Scores update at runtime */
+#define HMP_FLAG_THROTTLED (1u << 1) /* CPU currently throttled */
+
+/*
+ * CPU capacity type.
+ * This value should be normalized to 0-1024.
+ *
+ * ACPI CPPC : 0-255
+ * Intel HFI : 0-255
+ * Arm capacity : 0-1024
+ *
+ * Whenever there is a new capability score scheme where highest score excceds
+ * 1024, HMP_CAPACITY_SCALE should be bumped to the new highest score for
+ * fine-grained score management on a new architecture.
+ */
+typedef uint16_t hmp_capacity_t;
+
+/*
+ * CPU score type.
+ * This doesn't need to be normalized.
+ */
+typedef uint16_t hmp_score_t;
+
+/*
+ * System-wide CPU capability state - initialized on boot
+ */
+struct hmp {
+ hmp_score_t total_capacity; /* Precalculated for scheduler */
+ bool has_scores; /* Runtime updates available */
+};
+extern struct hmp hmp_state;
+
+/*
+ * Per-CPU HMP state.
+ *
+ * capacity: Static relative throughput, set at boot from
+ * microarchitecture and max frequency. Used by the
+ * load balancer to distribute work proportionally.
+ *
+ * perf_score: Current performance capability (higher = faster).
+ * Updated at runtime by the active score provider
+ * (HFI, SCMI, CPPC). Read by the scheduler for
+ * thread placement of interactive/realtime threads.
+ *
+ * eff_score: Current efficiency capability (higher = less power
+ * per unit of work). Updated alongside perf_score.
+ * Read by the scheduler for placement of background
+ * and idle-priority threads.
+ *
+ * flags: HMP_FLAG_* bits. HMP_FLAG_THROTTLED is set by the
+ * provider when performance is constrained (thermal,
+ * power budget). The scheduler uses it to steer
+ * PRI_TIMESHARE threads away from throttled CPUs.
+ *
+ * All fields are written by a single provider and read locklessly
+ * by the scheduler; see the synchronization note in subr_hmp.c.
+ *
+ * Capacity is initialized once during initialization, so it doesn't need
+ * synchronization. Other fields need to be accessed atomically through
+ * accessors and setters below.
+ */
+struct hmp_pcpu {
+ hmp_score_t capacity;
+ hmp_score_t scores[2];
+ uint8_t flags;
+};
+DPCPU_DECLARE(struct hmp_pcpu, hmp_pcpu);
+
+/*
+ * Accessors
+ */
+static inline hmp_score_t
+hmp_score(struct hmp_pcpu *hp, enum score_type st)
+{
+ return atomic_load_acq_16(&hp->scores[st]);
+}
+
+static inline bool
+hmp_flag(struct hmp_pcpu *hp, uint8_t flag)
+{
+ return ((atomic_load_acq_8(&hp->flags) & flag) != 0);
+}
+
+/*
+ * Setters
+ */
+static inline void
+hmp_set_score(struct hmp_pcpu *hp, enum score_type st, hmp_score_t score)
+{
+ atomic_store_rel_16(&hp->scores[st], score);
+}
+
+static inline void
+hmp_set_flags(struct hmp_pcpu *hp, uint8_t flags)
+{
+ atomic_set_8(&hp->flags, flags);
+}
+
+static inline void
+hmp_clear_flags(struct hmp_pcpu *hp, uint8_t flags)
+{
+ atomic_clear_8(&hp->flags, flags);
+}
+
+/*
+ * Helper functions for scheduler
+ */
+int hmp_highest_capacity_cpu(const cpuset_t *mask);
+int hmp_best_cpu(const cpuset_t *mask, enum score_type st);
+
+#endif /* HMP */
+#endif /* !LOCORE */
+#endif /* _KERNEL */
+#endif /* _SYS_HMP_H_ */

File Metadata

Mime Type
text/plain
Expires
Sun, Apr 26, 6:53 AM (16 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32171322
Default Alt Text
D56546.id175967.diff (12 KB)

Event Timeline