Page MenuHomeFreeBSD

D54674.id169733.diff
No OneTemporary

D54674.id169733.diff

diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h
--- a/sys/amd64/include/atomic.h
+++ b/sys/amd64/include/atomic.h
@@ -64,7 +64,7 @@
* avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
* An assertion in amd64/vm_machdep.c ensures that the value is correct.
*/
-#define OFFSETOF_MONITORBUF 0x100
+#define OFFSETOF_MONITORBUF 0x200
#endif
#if defined(SAN_NEEDS_INTERCEPTORS) && !defined(SAN_RUNTIME)
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -101,7 +101,8 @@
u_int pc_small_core; \
u_int pc_pcid_invlpg_workaround; \
struct pmap_pcid pc_kpmap_store; \
- char __pad[2900] /* pad to UMA_PCPU_ALLOC_SIZE */
+ uint8_t pc_hfi_index; /* HFI table index */ \
+ char __pad[2644] /* pad to UMA_PCPU_ALLOC_SIZE */
#define PC_DBREG_CMD_NONE 0
#define PC_DBREG_CMD_LOAD 1
diff --git a/sys/arm/include/pcpu.h b/sys/arm/include/pcpu.h
--- a/sys/arm/include/pcpu.h
+++ b/sys/arm/include/pcpu.h
@@ -63,7 +63,7 @@
uint32_t pc_original_actlr; \
uint64_t pc_clock; \
uint32_t pc_mpidr; \
- char __pad[135]
+ char __pad[391]
#ifdef _KERNEL
diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h
--- a/sys/arm64/include/pcpu.h
+++ b/sys/arm64/include/pcpu.h
@@ -51,7 +51,7 @@
uint64_t pc_mpidr; \
u_int pc_bcast_tlbi_workaround; \
uint64_t pc_release_addr; \
- char __pad[189]
+ char __pad[384]
#ifdef _KERNEL
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -211,7 +211,7 @@
options SCHED_4BSD
options SCHED_STATS
#options SCHED_ULE
-
+
#####################################################################
# SMP OPTIONS:
#
@@ -220,6 +220,15 @@
# Mandatory:
options SMP # Symmetric MultiProcessor Kernel
+#####################################################################
+# HMP OPTIONS:
+#
+# HMP enables building of Heterogeneous MultiProcessor Kernel.
+# This depends on SMP and SCHED_ULE.
+
+# Mandatory:
+options HMP # Heterogeneous MultiProcessor Kernel
+
# EARLY_AP_STARTUP releases the Application Processors earlier in the
# kernel startup process (before devices are probed) rather than at the
# end. This is a temporary option for use during the transition from
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1270,7 +1270,7 @@
dev/bhnd/cores/chipc/bhnd_sprom_chipc.c optional bhnd
dev/bhnd/cores/chipc/bhnd_pmu_chipc.c optional bhnd
dev/bhnd/cores/chipc/chipc.c optional bhnd
-dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi
+dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi
dev/bhnd/cores/chipc/chipc_gpio.c optional bhnd gpio
dev/bhnd/cores/chipc/chipc_slicer.c optional bhnd cfi | bhnd spibus
dev/bhnd/cores/chipc/chipc_spi.c optional bhnd spibus
@@ -3955,6 +3955,7 @@
kern/subr_gtaskqueue.c standard
kern/subr_hash.c standard
kern/subr_hints.c standard
+kern/subr_hmp.c standard
kern/subr_kdb.c standard
kern/subr_kobj.c standard
kern/subr_lock.c standard
diff --git a/sys/conf/options b/sys/conf/options
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -639,6 +639,7 @@
EARLY_AP_STARTUP opt_global.h
SMP opt_global.h
NUMA opt_global.h
+HMP opt_global.h
# Size of the kernel message buffer
MSGBUF_SIZE opt_msgbuf.h
diff --git a/sys/kern/subr_hmp.c b/sys/kern/subr_hmp.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/subr_hmp.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2026 The FreeBSD Foundation
+ *
+ * This software was developed by Minsoo Choo under sponsorship from the
+ * FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <sys/systm.h>
+#include <sys/hmp.h>
+#include <sys/kernel.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
+
+#ifdef HMP
+
+/* System-wide CPU capability information */
+struct cpucap_t cpucap;
+
+/*
+ * Detect if any CPU has dynamic scores
+ */
+static void
+cpucap_detect_scores(void)
+{
+ struct pcpu *pc;
+
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+ if (pc->pc_cap_flags & CPUCAP_FLAG_DYNAMIC) {
+ cpucap.has_scores = 1;
+ return;
+ }
+ }
+
+ cpucap.has_scores = false;
+}
+
+/*
+ * Initialize machine-independent cpucap state
+ */
+static void
+cpucap_init(void *arg __unused)
+{
+ struct pcpu *pc;
+
+ /* Default system state */
+ cpucap.total_capacity = 0;
+ cpucap.class_count = 1;
+ cpucap.cap_count = 0;
+ cpucap.cap_bitmap = 0;
+ cpucap.has_scores = false;
+
+ /* Initialize pcpu fields with defaults */
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+ pc->pc_cap_capacity = CPUCAP_SCORE_DEFAULT;
+ pc->pc_cap_flags = 0;
+ }
+
+ /* MD init populates actual values */
+ cpucap_md_init();
+
+ cpucap_detect_scores();
+
+ /* Log what we found */
+ if (cpucap.has_scores) {
+ printf("cpucap: scores=%d, classes=%d, caps=%d)\n",
+ cpucap.has_scores, cpucap.class_count,
+ cpucap.cap_count);
+ }
+}
+SYSINIT(cpucap, SI_SUB_SMP + 1, SI_ORDER_ANY, cpucap_init, NULL);
+
+/*
+ * MI functions called by machine-dependent code to set values during boot
+ */
+void
+cpucap_set_class_count(int count)
+{
+ if (count < 1)
+ count = 1;
+ if (count > CPUCAP_CLASS_MAX)
+ count = CPUCAP_CLASS_MAX;
+ cpucap.class_count = count;
+}
+
+void
+cpucap_set_cap_count(int count, uint8_t bitmap)
+{
+ if (count > CPUCAP_CAP_MAX)
+ count = CPUCAP_CAP_MAX;
+ cpucap.cap_count = count;
+ cpucap.cap_bitmap = bitmap;
+}
+
+/*
+ * Machine-independent functions for scheduler
+ */
+
+/*
+ * Fall back for cpucap_best_cpu in case processor doesn't support dynamic
+ * score update. Takes O(n).
+ *
+ * TODO: Precalculate cpu with highest capacity on boot after initialization.
+ */
+int
+cpucap_highest_capacity_cpu(const cpuset_t *mask)
+{
+ struct pcpu *pc;
+ int best_cpu;
+ cpucap_score_t best_cap;
+
+ best_cpu = -1;
+ best_cap = 0;
+
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+ if (mask != NULL && !CPU_ISSET(pc->pc_cpuid, mask))
+ continue;
+
+ if (pc->pc_cap_capacity > best_cap) {
+ best_cap = pc->pc_cap_capacity;
+ best_cpu = pc->pc_cpuid;
+ }
+ }
+
+ return best_cpu;
+}
+
+/*
+ * Find CPU with best score for given class and capability for thread
+ * placement. Fall backs to capacity if scores are not provided. Takes O(n).
+ *
+ * TODO: If this brings severe performance degradation, score providers should
+ * maintain and update index everytime new information is fed and
+ * the scheduler should use the index which takes O(1).
+ */
+int
+cpucap_best_cpu(const cpuset_t *mask, int class, int cap)
+{
+ struct pcpu *pc;
+ int best_cpu;
+ cpucap_score_t best_score;
+
+ /*
+ * If no dynamic scores, fall back to static capacity
+ */
+ if (!cpucap.has_scores)
+ return cpucap_highest_capacity_cpu(mask);
+
+ if (class >= cpucap.class_count)
+ class = 0;
+ if (cap >= cpucap.cap_count)
+ return cpucap_highest_capacity_cpu(mask);
+
+ best_cpu = -1;
+ best_score = 0;
+
+ STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
+ if (mask != NULL && !CPU_ISSET(pc->pc_cpuid, mask))
+ continue;
+
+ if (pc->pc_cap_scores[class][cap] > best_score) {
+ best_score = pc->pc_cap_scores[class][cap];
+ best_cpu = pc->pc_cpuid;
+ }
+ }
+
+ return best_cpu;
+}
+
+/*
+ * Weak MD functions - overridden by architecture
+ */
+__weak_symbol void
+cpucap_md_init(void)
+{
+}
+
+__weak_symbol int
+cpucap_md_get_class(struct thread *td __unused)
+{
+ return 0;
+}
+
+#endif /* HMP */
diff --git a/sys/powerpc/include/pcpu.h b/sys/powerpc/include/pcpu.h
--- a/sys/powerpc/include/pcpu.h
+++ b/sys/powerpc/include/pcpu.h
@@ -68,7 +68,7 @@
struct pvo_entry *qmap_pvo; \
struct mtx qmap_lock; \
uint64_t opal_hmi_flags; \
- char __pad[1337];
+ char __pad[1081];
#ifdef __powerpc64__
#define PCPU_MD_AIM_FIELDS PCPU_MD_AIM64_FIELDS
diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h
--- a/sys/riscv/include/pcpu.h
+++ b/sys/riscv/include/pcpu.h
@@ -47,7 +47,7 @@
uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \
uint32_t pc_hart; /* Hart ID */ \
uint64_t pc_clock; \
- char __pad[48] /* Pad to factor of PAGE_SIZE */
+ char __pad[552] /* Pad to factor of PAGE_SIZE */
#ifdef _KERNEL
diff --git a/sys/sys/_hmp.h b/sys/sys/_hmp.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/_hmp.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2026 The FreeBSD Foundation
+ *
+ * This software was developed by Minsoo Choo under sponsorship from the
+ * FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#ifndef _SYS__HMP_H_
+#define _SYS__HMP_H_
+
+#include <sys/types.h>
+
+/*
+ * CPU capability score type.
+ * This value should be normalized to 0-1024.
+ *
+ * ACPI CPPC : 0-255
+ * Intel HFI : 0-255
+ * Arm capacity : 0-1024
+ *
+ * Whenever there is a new capability score scheme where highest score excceds
+ * 1024, CPUCAP_SCORE_SCALE should be bumped to the new highest score for
+ * fine-grained score management on a new architecture.
+ */
+typedef uint16_t cpucap_score_t;
+
+/*
+ * Maximum supported classes and capabilities
+ *
+ * These are compile-time maximums for array sizing to avoid dynamic
+ * allocation. Actual counts are runtime values for from
+ * cpucap.class_count and cpucap.cap_count.
+ *
+ * Intel SDM reserves ECS[15:8] for class count (max 256)
+ * Intel SDM reserves EDX[7:0] for capability bitmap (max 8 capabilities)
+ *
+ * We use conservative maximums to avoid excessive memory usage.
+ */
+#define CPUCAP_CLASS_MAX 16
+#define CPUCAP_CAP_MAX 8
+
+#endif /* _SYS__HMP_H_ */
diff --git a/sys/sys/hmp.h b/sys/sys/hmp.h
new file mode 100644
--- /dev/null
+++ b/sys/sys/hmp.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2026 The FreeBSD Foundation
+ *
+ * This software was developed by Minsoo Choo under sponsorship from the
+ * FreeBSD Foundation.
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#ifndef _SYS_HMP_H_
+#define _SYS_HMP_H_
+
+#ifdef _KERNEL
+#ifndef LOCORE
+
+#include <sys/types.h>
+#include <sys/_hmp.h>
+#include <sys/pcpu.h>
+
+#include <machine/atomic.h>
+
+#include "opt_global.h"
+
+#ifdef HMP
+
+#define CPUCAP_SCORE_SCALE 1024
+#define CPUCAP_SCORE_MAX CPUCAP_SCORE_SCALE
+#define CPUCAP_SCORE_DEFAULT CPUCAP_SCORE_MAX
+
+/* Score normalization macro functions */
+#define CPUCAP_SCORE_NORMAL_FROM(x, y) \
+ (((x) * CPUCAP_SCORE_SCALE) / (y))
+#define CPUCAP_SCORE_NORMAL_FROM_255(x) CPUCAP_SCORE_NORMAL_FROM((x), 255)
+#define CPUCAP_SCORE_NORMAL_FROM_1024(x) CPUCAP_SCORE_NORMAL_FROM((x), 1024)
+
+#define CPUCAP_SCORE_NORMAL_TO(x, y) \
+ (((x) * (y)) / CPUCAP_SCORE_SCALE)
+/*
+ * Users should be able to see their CPUCAP_SCORE through sysctl. CPUCAP_SCORE
+ * is displayed as percentage so they don't need to know CPUCAP_SCORE_MAX.
+ */
+#define CPUCAP_SCORE_NORMAL_TO_PERCENT(x) CPUCAP_SCORE_NORMAL_TO((x), 100)
+
+/*
+ * Capabilty indices from Intel SDM
+ *
+ * These are bit positions in CPUID.06H:EDX[7:0]
+ * Bit 0 and 1 are always set together (either 00 or 11)
+ * Only valid if corresponsidng bit is set in capability bitmap.
+ *
+ * If not provided, CPUCAP_SCORE_DEFAULT will be used.
+ *
+ */
+#define CPUCAP_CAP_PERF 0 /* Performance capability */
+#define CPUCAP_CAP_EFF 1 /* Efficiency capability */
+/* Bit 2-7 are reserved */
+
+/*
+ * Flags (stored in pc_cap_flags)
+ */
+#define CPUCAP_FLAG_VALID (1u << 0) /* Capability data is valid */
+#define CPUCAP_FLAG_DYNAMIC (1u << 1) /* Scores update at runtime */
+#define CPUCAP_FLAG_THROTTLED (1u << 2) /* CPU currently throttled */
+
+/*
+ * System-wide CPU capability state
+ *
+ * Do not modify directly — use accessors and setters below
+ *
+ * capacity : processor's heterogeneity. Static and initialized at boot time.
+ * score : processor's real-time status. dynamic and updated by drivers.
+ *
+ * Both use cpucap_score_t type and normalized to 0-1024.
+ */
+struct cpucap_t {
+ cpucap_score_t total_capacity; /* Precalculated for scheduler */
+ uint8_t class_count; /* cl: class count */
+ uint8_t cap_count; /* cp: capability count */
+ uint8_t cap_bitmap; /* Enabled capabilities */
+ bool has_scores; /* Runtime updates available */
+};
+
+extern struct cpucap_t cpucap;
+
+/*
+ * Accessors - use atomic loads for runtime-updated fields
+ */
+static inline cpucap_score_t
+cpucap_capacity(struct pcpu *pc)
+{
+ return (pc->pc_cap_capacity);
+}
+
+static inline cpucap_score_t
+cpucap_score(struct pcpu *pc, int class, int cap)
+{
+ if (__predict_false(!cpucap.has_scores))
+ return pc->pc_cap_capacity; /* Fall back to capacity */
+ if (__predict_false(class >= cpucap.class_count))
+ class = 0;
+ if (__predict_false(cap >= cpucap.cap_count))
+ return CPUCAP_SCORE_DEFAULT;
+
+ return atomic_load_acq_16(&pc->pc_cap_scores[class][cap]);
+}
+
+static inline cpucap_score_t
+cpucap_perf_score(struct pcpu *pc, int class)
+{
+ return cpucap_score(pc, class, CPUCAP_CAP_PERF);
+}
+
+static inline cpucap_score_t
+cpucap_eff_score(struct pcpu *pc, int class)
+{
+ return cpucap_score(pc, class, CPUCAP_CAP_EFF);
+}
+
+static inline int
+cpucap_throttled(struct pcpu *pc)
+{
+ return ((atomic_load_acq_8(&pc->pc_cap_flags) & CPUCAP_FLAG_THROTTLED) != 0);
+}
+
+static inline int
+cpucap_is_dynamic(struct pcpu *pc)
+{
+ return ((pc->pc_cap_flags & CPUCAP_FLAG_DYNAMIC) != 0);
+}
+
+/*
+ * Setters - called by MD initializer and drivers
+ * Use atomic stores for fields that may be read concurrently
+ */
+static inline void
+cpucap_cpu_set_capacity(struct pcpu *pc, cpucap_score_t cap)
+{
+ pc->pc_cap_capacity = cap;
+ atomic_set_8(&pc->pc_cap_flags, CPUCAP_FLAG_VALID);
+}
+
+static inline void
+cpucap_cpu_set_score(struct pcpu *pc, int class, int cap, cpucap_score_t score)
+{
+ if (class < CPUCAP_CLASS_MAX && cap < CPUCAP_CAP_MAX)
+ atomic_store_rel_16(&pc->pc_cap_scores[class][cap], score);
+}
+
+static inline void
+cpucap_cpu_set_flags(struct pcpu *pc, uint8_t flags)
+{
+ atomic_set_8(&pc->pc_cap_flags, flags);
+}
+
+static inline void
+cpucap_cpu_clear_flags(struct pcpu *pc, uint8_t flags)
+{
+ atomic_clear_8(&pc->pc_cap_flags, flags);
+}
+
+/*
+ * Called by MD initializer to set values
+ */
+void cpucap_set_class_count(int count);
+void cpucap_set_cap_count(int count, uint8_t bitmap);
+void cpucap_set_capacity_varies(int varies);
+void cpucap_set_dynamic_scores(int dynamic);
+
+/*
+ * MI thread classification - wrapper that calls MD implementation
+ * Returns thread class (0 to class_count-1)
+ *
+ * On Intel: May use hardware ITD classification
+ * On others: Uses software heuristic (priority-based)
+ */
+int cpucap_get_class(struct thread *td);
+
+/*
+ * Called by scheduler
+ */
+int cpucap_highest_capacity_cpu(const cpuset_t *mask);
+int cpucap_best_cpu(const cpuset_t *mask, int class, int cap);
+
+/*
+ * Machine-dependent implementation
+ * If not exist, weak symbol will be used
+ */
+void cpucap_md_init(void);
+int cpucap_md_get_class(struct thread *td);
+
+/*
+ * ACPI CPPC interface - MI, implemented in kern/subr_cpucap_acpi.c
+ * Called during ACPI CPU device attach
+ */
+void cpucap_acpi_init(void);
+void cpucap_acpi_cpu_attach(device_t dev);
+
+#endif /* HMP */
+#endif /* !LOCORE */
+#endif /* _KERNEL */
+#endif /* _SYS_HMP_H_ */
diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h
--- a/sys/sys/pcpu.h
+++ b/sys/sys/pcpu.h
@@ -39,6 +39,7 @@
#include <sys/param.h>
#include <sys/_cpuset.h>
+#include <sys/_hmp.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/_sx.h>
@@ -194,6 +195,10 @@
uintptr_t pc_dynamic; /* Dynamic per-cpu data area */
uint64_t pc_early_dummy_counter; /* Startup time counter(9) */
uintptr_t pc_zpcpu_offset; /* Offset into zpcpu allocs */
+ cpucap_score_t pc_cap_capacity; /* Capacity for load balancing */
+ cpucap_score_t pc_cap_scores[CPUCAP_CLASS_MAX][CPUCAP_CAP_MAX];
+ /* Dynamic scores for thread placement */
+ uint8_t pc_cap_flags; /* capability flags, CPUCAP_FLAG_* */
/*
* Keep MD fields last, so that CPU-specific variations on a

File Metadata

Mime Type
text/plain
Expires
Wed, Jan 21, 9:37 AM (2 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27799390
Default Alt Text
D54674.id169733.diff (15 KB)

Event Timeline