D2728.id5919.diff
No OneTemporary
Actions

Size

31 KB

Referenced Files

None

Subscribers

None

D2728.id5919.diff
View Options

	Index: sys/kern/subr_smp.c
	===================================================================
	--- sys/kern/subr_smp.c
	+++ sys/kern/subr_smp.c
	@@ -39,6 +39,7 @@
	#include <sys/proc.h>
	#include <sys/bus.h>
	#include <sys/lock.h>
	+#include <sys/malloc.h>
	#include <sys/mutex.h>
	#include <sys/pcpu.h>
	#include <sys/sched.h>
	@@ -51,6 +52,10 @@
	#include "opt_sched.h"

	#ifdef SMP
	+MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data");
	+#endif
	+
	+#ifdef SMP
	volatile cpuset_t stopped_cpus;
	volatile cpuset_t started_cpus;
	volatile cpuset_t suspended_cpus;
	@@ -538,7 +543,7 @@
	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
	}

	-static struct cpu_group group[MAXCPU];
	+static struct cpu_group group[MAXCPU * MAX_CACHE_LEVELS + 1];

	struct cpu_group *
	smp_topo(void)
	@@ -598,6 +603,17 @@
	}

	struct cpu_group *
	+smp_topo_alloc(u_int count)
	+{
	+ static u_int index;
	+ u_int curr;
	+
	+ curr = index;
	+ index += count;
	+ return (&group[curr]);
	+}
	+
	+struct cpu_group *
	smp_topo_none(void)
	{
	struct cpu_group *top;
	@@ -843,3 +859,233 @@
	return (error);
	}

	+
	+#ifdef SMP
	+void
	+topo_init_node(struct topo_node *node)
	+{
	+
	+ bzero(node, sizeof(*node));
	+ TAILQ_INIT(&node->children);
	+}
	+
	+void
	+topo_init_root(struct topo_node *root)
	+{
	+
	+ topo_init_node(root);
	+ root->type = TOPO_TYPE_SYSTEM;
	+}
	+
	+struct topo_node *
	+topo_add_node_by_hwid(struct topo_node *parent, int hwid,
	+ topo_node_type type, uintptr_t subtype)
	+{
	+ struct topo_node *node;
	+
	+ TAILQ_FOREACH_REVERSE(node, &parent->children,
	+ topo_children, siblings) {
	+ if (node->hwid == hwid
	+ && node->type == type && node->subtype == subtype) {
	+ return (node);
	+ }
	+ }
	+
	+ node = malloc(sizeof(*node), M_TOPO, M_WAITOK);
	+ topo_init_node(node);
	+ node->parent = parent;
	+ node->hwid = hwid;
	+ node->type = type;
	+ node->subtype = subtype;
	+ TAILQ_INSERT_TAIL(&parent->children, node, siblings);
	+ parent->nchildren++;
	+
	+ return (node);
	+}
	+
	+struct topo_node *
	+topo_find_node_by_hwid(struct topo_node *parent, int hwid,
	+ topo_node_type type, uintptr_t subtype)
	+{
	+
	+ struct topo_node *node;
	+
	+ TAILQ_FOREACH(node, &parent->children, siblings) {
	+ if (node->hwid == hwid
	+ && node->type == type && node->subtype == subtype) {
	+ return (node);
	+ }
	+ }
	+
	+ return (NULL);
	+}
	+
	+void
	+topo_promote_child(struct topo_node *child)
	+{
	+ struct topo_node *next;
	+ struct topo_node *node;
	+ struct topo_node *parent;
	+
	+ parent = child->parent;
	+ next = TAILQ_NEXT(child, siblings);
	+ TAILQ_REMOVE(&parent->children, child, siblings);
	+ TAILQ_INSERT_HEAD(&parent->children, child, siblings);
	+
	+ while (next != NULL) {
	+ node = next;
	+ next = TAILQ_NEXT(node, siblings);
	+ TAILQ_REMOVE(&parent->children, node, siblings);
	+ TAILQ_INSERT_AFTER(&parent->children, child, node, siblings);
	+ child = node;
	+ }
	+}
	+
	+struct topo_node *
	+topo_next_node(struct topo_node top, struct topo_node node)
	+{
	+ struct topo_node *next;
	+
	+ if ((next = TAILQ_FIRST(&node->children)) != NULL)
	+ return (next);
	+
	+ if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	+ return (next);
	+
	+ while ((node = node->parent) != top)
	+ if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	+ return (next);
	+
	+ return (NULL);
	+}
	+
	+struct topo_node *
	+topo_next_nonchild_node(struct topo_node top, struct topo_node node)
	+{
	+ struct topo_node *next;
	+
	+ if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	+ return (next);
	+
	+ while ((node = node->parent) != top)
	+ if ((next = TAILQ_NEXT(node, siblings)) != NULL)
	+ return (next);
	+
	+ return (NULL);
	+}
	+
	+void
	+topo_set_pu_id(struct topo_node *node, cpuid_t id)
	+{
	+
	+ KASSERT(node->type == TOPO_TYPE_PU,
	+ ("topo_set_pu_id: wrong node type: %u", node->type));
	+ KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0,
	+ ("topo_set_pu_id: cpuset already not empty"));
	+ node->id = id;
	+ CPU_SET(id, &node->cpuset);
	+ node->cpu_count = 1;
	+ node->subtype = 1;
	+
	+ while ((node = node->parent) != NULL) {
	+ if (CPU_ISSET(id, &node->cpuset))
	+ break;
	+ CPU_SET(id, &node->cpuset);
	+ node->cpu_count++;
	+ }
	+}
	+
	+int
	+topo_analyze(struct topo_node *topo_root, int all,
	+ int pkg_count, int cores_per_pkg, int *thrs_per_core)
	+{
	+ struct topo_node *pkg_node;
	+ struct topo_node *core_node;
	+ struct topo_node *pu_node;
	+ int thrs_per_pkg;
	+ int cpp_counter;
	+ int tpc_counter;
	+ int tpp_counter;
	+
	+ *pkg_count = 0;
	+ *cores_per_pkg = -1;
	+ *thrs_per_core = -1;
	+ thrs_per_pkg = -1;
	+ pkg_node = topo_root;
	+ while (pkg_node != NULL) {
	+ if (pkg_node->type != TOPO_TYPE_PKG) {
	+ pkg_node = topo_next_node(topo_root, pkg_node);
	+ continue;
	+ }
	+ if (!all && CPU_EMPTY(&pkg_node->cpuset)) {
	+ pkg_node = topo_next_nonchild_node(topo_root, pkg_node);
	+ continue;
	+ }
	+
	+ (*pkg_count)++;
	+
	+ cpp_counter = 0;
	+ tpp_counter = 0;
	+ core_node = pkg_node;
	+ while (core_node != NULL) {
	+ if (core_node->type == TOPO_TYPE_CORE) {
	+ if (!all && CPU_EMPTY(&core_node->cpuset)) {
	+ core_node =
	+ topo_next_nonchild_node(pkg_node,
	+ core_node);
	+ continue;
	+ }
	+
	+ cpp_counter++;
	+
	+ tpc_counter = 0;
	+ pu_node = core_node;
	+ while (pu_node != NULL) {
	+ if (pu_node->type == TOPO_TYPE_PU &&
	+ (all \|\| !CPU_EMPTY(&pu_node->cpuset)))
	+ tpc_counter++;
	+ pu_node = topo_next_node(core_node,
	+ pu_node);
	+ }
	+
	+ if (*thrs_per_core == -1)
	+ *thrs_per_core = tpc_counter;
	+ else if (*thrs_per_core != tpc_counter)
	+ return (0);
	+
	+ core_node = topo_next_nonchild_node(pkg_node,
	+ core_node);
	+ } else {
	+ /* PU node directly under PKG. */
	+ if (core_node->type == TOPO_TYPE_PU &&
	+ (all \|\| !CPU_EMPTY(&core_node->cpuset)))
	+ tpp_counter++;
	+ core_node = topo_next_node(pkg_node,
	+ core_node);
	+ }
	+ }
	+
	+ if (*cores_per_pkg == -1)
	+ *cores_per_pkg = cpp_counter;
	+ else if (*cores_per_pkg != cpp_counter)
	+ return (0);
	+ if (thrs_per_pkg == -1)
	+ thrs_per_pkg = tpp_counter;
	+ else if (thrs_per_pkg != tpp_counter)
	+ return (0);
	+
	+ pkg_node = topo_next_nonchild_node(topo_root, pkg_node);
	+ }
	+
	+ KASSERT(*pkg_count > 0,
	+ ("bug in topology or analysis"));
	+ if (*cores_per_pkg == 0) {
	+ KASSERT(*thrs_per_core == -1 && thrs_per_pkg > 0,
	+ ("bug in topology or analysis"));
	+ *thrs_per_core = thrs_per_pkg;
	+ }
	+
	+ return (1);
	+}
	+#endif /* SMP */
	+
	Index: sys/sys/smp.h
	===================================================================
	--- sys/sys/smp.h
	+++ sys/sys/smp.h
	@@ -17,9 +17,52 @@
	#ifndef LOCORE

	#include <sys/cpuset.h>
	+#include <sys/queue.h>

	/*
	- * Topology of a NUMA or HTT system.
	+ * Types of nodes in the topological tree.
	+ */
	+typedef enum {
	+ /* No node has this type; can be used in topo API calls. */
	+ TOPO_TYPE_DUMMY,
	+ /* Processing unit aka computing unit aka logical CPU. */
	+ TOPO_TYPE_PU,
	+ /* Physical subdivision of a package. */
	+ TOPO_TYPE_CORE,
	+ /* CPU L1/L2/L3 cache. */
	+ TOPO_TYPE_CACHE,
	+ /* Package aka chip, equivalent to socket. */
	+ TOPO_TYPE_PKG,
	+ /* NUMA node. */
	+ TOPO_TYPE_NODE,
	+ /* Other logical or physical grouping of PUs. */
	+ /* E.g. PUs on the same dye, or PUs sharing an FPU. */
	+ TOPO_TYPE_GROUP,
	+ /* The whole system. */
	+ TOPO_TYPE_SYSTEM
	+} topo_node_type;
	+
	+/* Hardware indenitifier of a topology component. */
	+typedef unsigned int hwid_t;
	+/* Logical CPU idenitifier. */
	+typedef int cpuid_t;
	+
	+/* A node in the topology. */
	+struct topo_node {
	+ struct topo_node *parent;
	+ TAILQ_HEAD(topo_children, topo_node) children;
	+ TAILQ_ENTRY(topo_node) siblings;
	+ cpuset_t cpuset;
	+ topo_node_type type;
	+ uintptr_t subtype;
	+ hwid_t hwid;
	+ cpuid_t id;
	+ int nchildren;
	+ int cpu_count;
	+};
	+
	+/*
	+ * Scheduling topology of a NUMA or SMP system.
	*
	* The top level topology is an array of pointers to groups. Each group
	* contains a bitmask of cpus in its group or subgroups. It may also
	@@ -52,6 +95,8 @@
	#define CG_SHARE_L2 2
	#define CG_SHARE_L3 3

	+#define MAX_CACHE_LEVELS CG_SHARE_L3
	+
	/*
	* Behavior modifiers for load balancing and affinity.
	*/
	@@ -60,10 +105,29 @@
	#define CG_FLAG_THREAD (CG_FLAG_HTT \| CG_FLAG_SMT) /* Any threading. */

	/*
	- * Convenience routines for building topologies.
	+ * Convenience routines for building and traversing topologies.
	*/
	#ifdef SMP
	+void topo_init_node(struct topo_node *node);
	+void topo_init_root(struct topo_node *root);
	+struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
	+ topo_node_type type, uintptr_t subtype);
	+struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
	+ topo_node_type type, uintptr_t subtype);
	+void topo_promote_child(struct topo_node *child);
	+struct topo_node * topo_next_node(struct topo_node *top,
	+ struct topo_node *node);
	+struct topo_node * topo_next_nonchild_node(struct topo_node *top,
	+ struct topo_node *node);
	+void topo_set_pu_id(struct topo_node *node, cpuid_t id);
	+int topo_analyze(struct topo_node topo_root, int all, int pkg_count,
	+ int cores_per_pkg, int thrs_per_core);
	+
	+#define TOPO_FOREACH(i, root) \
	+ for (i = root; i != NULL; i = topo_next_node(root, i))
	+
	struct cpu_group *smp_topo(void);
	+struct cpu_group *smp_topo_alloc(u_int count);
	struct cpu_group *smp_topo_none(void);
	struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
	struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
	Index: sys/x86/x86/mp_x86.c
	===================================================================
	--- sys/x86/x86/mp_x86.c
	+++ sys/x86/x86/mp_x86.c
	@@ -133,19 +133,28 @@
	* the APs.
	*/
	struct cpu_info cpu_info[MAX_APIC_ID + 1];
	-int cpu_apic_ids[MAXCPU];
	int apic_cpuids[MAX_APIC_ID + 1];
	+int cpu_apic_ids[MAXCPU];

	/* Holds pending bitmap based IPIs per CPU */
	volatile u_int cpu_ipi_pending[MAXCPU];

	-int cpu_logical; /* logical cpus per core */
	-int cpu_cores; /* cores per package */
	-
	static void release_aps(void *dummy);

	-static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */
	static int hyperthreading_allowed = 1;
	+SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN,
	+ &hyperthreading_allowed, 0, "Use Intel HTT logical CPUs");
	+
	+static struct topo_node topo_root;
	+
	+static int pkg_id_shift;
	+static int core_id_shift;
	+static int disabled_cpus;
	+
	+struct cache_info {
	+ int id_shift;
	+ int present;
	+} static caches[MAX_CACHE_LEVELS];

	void
	mem_range_AP_init(void)
	@@ -155,60 +164,125 @@
	mem_range_softc.mr_op->initAP(&mem_range_softc);
	}

	-static void
	-topo_probe_amd(void)
	+/*
	+ * Round up to the next power of two, if necessary, and then
	+ * take log2.
	+ * Returns -1 if argument is zero.
	+ */
	+static __inline int
	+mask_width(u_int x)
	{
	- int core_id_bits;
	- int id;

	- /* AMD processors do not support HTT. */
	- cpu_logical = 1;
	+ return (fls(x << (1 - powerof2(x))) - 1);
	+}

	- if ((amd_feature2 & AMDID2_CMP) == 0) {
	- cpu_cores = 1;
	- return;
	- }
	+static int
	+add_deterministic_cache(int type, int level, int share_count)
	+{

	- core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
	- AMDID_COREID_SIZE_SHIFT;
	- if (core_id_bits == 0) {
	- cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
	- return;
	+ if (type == 0)
	+ return (0);
	+ if (type > 3) {
	+ printf("unexpected cache type %d\n", type);
	+ return (1);
	+ }
	+ if (type == 2) /* ignore instruction cache */
	+ return (1);
	+ if (level == 0 \|\| level > MAX_CACHE_LEVELS) {
	+ printf("unexpected cache level %d\n", type);
	+ return (1);
	}

	- /* Fam 10h and newer should get here. */
	- for (id = 0; id <= MAX_APIC_ID; id++) {
	- /* Check logical CPU availability. */
	- if (!cpu_info[id].cpu_present \|\| cpu_info[id].cpu_disabled)
	- continue;
	- /* Check if logical CPU has the same package ID. */
	- if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits))
	- continue;
	- cpu_cores++;
	+ if (caches[level - 1].present) {
	+ printf("WARNING: multiple entries for L%u data cache\n", level);
	+ printf("%u => %u\n", caches[level - 1].id_shift,
	+ mask_width(share_count));
	}
	+ caches[level - 1].id_shift = mask_width(share_count);
	+ caches[level - 1].present = 1;
	+
	+ if (caches[level - 1].id_shift > pkg_id_shift) {
	+ printf("WARNING: L%u data cache covers more "
	+ "APIC IDs than a package\n", level);
	+ printf("%u > %u\n", caches[level - 1].id_shift, pkg_id_shift);
	+ caches[level - 1].id_shift = pkg_id_shift;
	+ }
	+ if (caches[level - 1].id_shift < core_id_shift) {
	+ printf("WARNING: L%u data cache covers less "
	+ "APIC IDs than a core\n", level);
	+ printf("%u < %u\n", caches[level - 1].id_shift, core_id_shift);
	+ caches[level - 1].id_shift = core_id_shift;
	+ }
	+
	+ return (1);
	}

	-/*
	- * Round up to the next power of two, if necessary, and then
	- * take log2.
	- * Returns -1 if argument is zero.
	- */
	-static __inline int
	-mask_width(u_int x)
	+static void
	+topo_probe_amd(void)
	{
	+ u_int p[4];
	+ int level;
	+ int share_count;
	+ int type;
	+ int i;

	- return (fls(x << (1 - powerof2(x))) - 1);
	+ /* No multi-core capability. */
	+ if ((amd_feature2 & AMDID2_CMP) == 0)
	+ return;
	+
	+ /* For families 10h and newer. */
	+ pkg_id_shift = (cpu_procinfo2 & AMDID_COREID_SIZE) >>
	+ AMDID_COREID_SIZE_SHIFT;
	+
	+ /* For 0Fh family. */
	+ if (pkg_id_shift == 0)
	+ pkg_id_shift =
	+ mask_width((cpu_procinfo2 & AMDID_CMP_CORES) + 1);
	+
	+ if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) {
	+ for (i = 0; ; i++) {
	+ cpuid_count(0x8000001d, i, p);
	+ type = p[0] & 0x1f;
	+ level = (p[0] >> 5) & 0x7;
	+ share_count = 1 + ((p[0] >> 14) & 0xfff);
	+
	+ if (!add_deterministic_cache(type, level, share_count))
	+ break;
	+ }
	+ } else {
	+ if (cpu_exthigh >= 0x80000005) {
	+ cpuid_count(0x80000005, 0, p);
	+ if (((p[2] >> 24) & 0xff) != 0) {
	+ caches[0].id_shift = 0;
	+ caches[0].present = 1;
	+ }
	+ }
	+ if (cpu_exthigh >= 0x80000006) {
	+ cpuid_count(0x80000006, 0, p);
	+ if (((p[2] >> 16) & 0xffff) != 0) {
	+ caches[1].id_shift = 0;
	+ caches[1].present = 1;
	+ }
	+ if (((p[3] >> 18) & 0x3fff) != 0) {
	+
	+ /*
	+ * TODO: Account for dual-node processors
	+ * where each node within a package has its own
	+ * L3 cache.
	+ */
	+ caches[2].id_shift = pkg_id_shift;
	+ caches[2].present = 1;
	+ }
	+ }
	+ }
	}

	static void
	-topo_probe_0x4(void)
	+topo_probe_intel_0x4(void)
	{
	u_int p[4];
	- int pkg_id_bits;
	- int core_id_bits;
	int max_cores;
	int max_logical;
	- int id;

	/* Both zero and one here mean one logical processor per package. */
	max_logical = (cpu_feature & CPUID_HTT) != 0 ?
	@@ -216,180 +290,432 @@
	if (max_logical <= 1)
	return;

	- /*
	- * Because of uniformity assumption we examine only
	- * those logical processors that belong to the same
	- * package as BSP. Further, we count number of
	- * logical processors that belong to the same core
	- * as BSP thus deducing number of threads per core.
	- */
	if (cpu_high >= 0x4) {
	cpuid_count(0x04, 0, p);
	max_cores = ((p[0] >> 26) & 0x3f) + 1;
	} else
	max_cores = 1;
	- core_id_bits = mask_width(max_logical/max_cores);
	- if (core_id_bits < 0)
	- return;
	- pkg_id_bits = core_id_bits + mask_width(max_cores);
	-
	- for (id = 0; id <= MAX_APIC_ID; id++) {
	- /* Check logical CPU availability. */
	- if (!cpu_info[id].cpu_present \|\| cpu_info[id].cpu_disabled)
	- continue;
	- /* Check if logical CPU has the same package ID. */
	- if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits))
	- continue;
	- cpu_cores++;
	- /* Check if logical CPU has the same package and core IDs. */
	- if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits))
	- cpu_logical++;
	- }
	-
	- KASSERT(cpu_cores >= 1 && cpu_logical >= 1,
	- ("topo_probe_0x4 couldn't find BSP"));

	- cpu_cores /= cpu_logical;
	- hyperthreading_cpus = cpu_logical;
	+ core_id_shift = mask_width(max_logical/max_cores);
	+ KASSERT(core_id_shift >= 0,
	+ ("intel topo: max_cores > max_logical\n"));
	+ pkg_id_shift = core_id_shift + mask_width(max_cores);
	}

	static void
	-topo_probe_0xb(void)
	+topo_probe_intel_0xb(void)
	{
	u_int p[4];
	int bits;
	- int cnt;
	- int i;
	- int logical;
	int type;
	- int x;
	+ int i;
	+
	+ /* Fall back if CPU leaf 11 doesn't really exist. */
	+ cpuid_count(0x0b, 0, p);
	+ if (p[1] == 0) {
	+ topo_probe_intel_0x4();
	+ return;
	+ }

	/* We only support three levels for now. */
	- for (i = 0; i < 3; i++) {
	+ for (i = 0; ; i++) {
	cpuid_count(0x0b, i, p);

	- /* Fall back if CPU leaf 11 doesn't really exist. */
	- if (i == 0 && p[1] == 0) {
	- topo_probe_0x4();
	- return;
	- }
	-
	bits = p[0] & 0x1f;
	- logical = p[1] &= 0xffff;
	type = (p[2] >> 8) & 0xff;
	- if (type == 0 \|\| logical == 0)
	+
	+ if (type == 0)
	break;
	- /*
	- * Because of uniformity assumption we examine only
	- * those logical processors that belong to the same
	- * package as BSP.
	- */
	- for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
	- if (!cpu_info[x].cpu_present \|\|
	- cpu_info[x].cpu_disabled)
	- continue;
	- if (x >> bits == boot_cpu_id >> bits)
	- cnt++;
	- }
	+
	+ /* TODO: check for duplicate (re-)assignment */
	if (type == CPUID_TYPE_SMT)
	- cpu_logical = cnt;
	+ core_id_shift = bits;
	else if (type == CPUID_TYPE_CORE)
	- cpu_cores = cnt;
	+ pkg_id_shift = bits;
	+ else
	+ printf("unknown CPU level type %d\n", type);
	+ }
	+
	+ if (pkg_id_shift < core_id_shift) {
	+ printf("WARNING: core covers more APIC IDs than a package\n");
	+ core_id_shift = pkg_id_shift;
	+ }
	+}
	+
	+static void
	+topo_probe_intel_caches(void)
	+{
	+ u_int p[4];
	+ int level;
	+ int share_count;
	+ int type;
	+ int i;
	+
	+ if (cpu_high < 0x4) {
	+ /*
	+ * Available cache level and sizes can be determined
	+ * via CPUID leaf 2, but that requires a huge table of hardcoded
	+ * values, so for now just assume L1 and L2 caches potentially
	+ * shared only by HTT processing units, if HTT is present.
	+ */
	+ caches[0].id_shift = pkg_id_shift;
	+ caches[0].present = 1;
	+ caches[1].id_shift = pkg_id_shift;
	+ caches[1].present = 1;
	+ return;
	+ }
	+
	+ for (i = 0; ; i++) {
	+ cpuid_count(0x4, i, p);
	+ type = p[0] & 0x1f;
	+ level = (p[0] >> 5) & 0x7;
	+ share_count = 1 + ((p[0] >> 14) & 0xfff);
	+
	+ if (!add_deterministic_cache(type, level, share_count))
	+ break;
	}
	- if (cpu_logical == 0)
	- cpu_logical = 1;
	- cpu_cores /= cpu_logical;
	+}
	+
	+static void
	+topo_probe_intel(void)
	+{
	+
	+ /*
	+ * See Intel(R) 64 Architecture Processor
	+ * Topology Enumeration article for details.
	+ *
	+ * Note that 0x1 <= cpu_high < 4 case should be
	+ * compatible with topo_probe_intel_0x4() logic when
	+ * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
	+ * or it should trigger the fallback otherwise.
	+ */
	+ if (cpu_high >= 0xb)
	+ topo_probe_intel_0xb();
	+ else if (cpu_high >= 0x1)
	+ topo_probe_intel_0x4();
	+
	+ topo_probe_intel_caches();
	}

	/*
	- * Both topology discovery code and code that consumes topology
	- * information assume top-down uniformity of the topology.
	- * That is, all physical packages must be identical and each
	- * core in a package must have the same number of threads.
	* Topology information is queried only on BSP, on which this
	* code runs and for which it can query CPUID information.
	- * Then topology is extrapolated on all packages using the
	- * uniformity assumption.
	+ * Then topology is extrapolated on all packages using an
	+ * assumption that APIC ID to hardware component ID mapping is
	+ * homogenious.
	+ * That doesn't necesserily imply that the topology is uniform.
	*/
	void
	topo_probe(void)
	{
	static int cpu_topo_probed = 0;
	+ struct x86_topo_layer {
	+ int type;
	+ int subtype;
	+ int id_shift;
	+ } topo_layers[MAX_CACHE_LEVELS + 3];
	+ struct topo_node *parent;
	+ struct topo_node *node;
	+ int layer;
	+ int nlayers;
	+ int node_id;
	+ int i;

	if (cpu_topo_probed)
	return;

	CPU_ZERO(&logical_cpus_mask);
	+
	if (mp_ncpus <= 1)
	- cpu_cores = cpu_logical = 1;
	+ ; /* nothing */
	else if (cpu_vendor_id == CPU_VENDOR_AMD)
	topo_probe_amd();
	- else if (cpu_vendor_id == CPU_VENDOR_INTEL) {
	- /*
	- * See Intel(R) 64 Architecture Processor
	- * Topology Enumeration article for details.
	- *
	- * Note that 0x1 <= cpu_high < 4 case should be
	- * compatible with topo_probe_0x4() logic when
	- * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1)
	- * or it should trigger the fallback otherwise.
	- */
	- if (cpu_high >= 0xb)
	- topo_probe_0xb();
	- else if (cpu_high >= 0x1)
	- topo_probe_0x4();
	- }
	+ else if (cpu_vendor_id == CPU_VENDOR_INTEL)
	+ topo_probe_intel();
	+
	+ KASSERT(pkg_id_shift >= core_id_shift,
	+ ("bug in APIC topology discovery"));
	+
	+ nlayers = 0;
	+ bzero(topo_layers, sizeof(topo_layers));
	+
	+ topo_layers[nlayers].type = TOPO_TYPE_PKG;
	+ topo_layers[nlayers].id_shift = pkg_id_shift;
	+ if (bootverbose)
	+ printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift);
	+ nlayers++;

	/*
	- * Fallback: assume each logical CPU is in separate
	- * physical package. That is, no multi-core, no SMT.
	+ * Consider all caches to be within a package/chip
	+ * and "in front" of all sub-components like
	+ * cores and hardware threads.
	*/
	- if (cpu_cores == 0 \|\| cpu_logical == 0)
	- cpu_cores = cpu_logical = 1;
	+ for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) {
	+ if (caches[i].present) {
	+ KASSERT(caches[i].id_shift <= pkg_id_shift,
	+ ("bug in APIC topology discovery"));
	+ KASSERT(caches[i].id_shift >= core_id_shift,
	+ ("bug in APIC topology discovery"));
	+
	+ topo_layers[nlayers].type = TOPO_TYPE_CACHE;
	+ topo_layers[nlayers].subtype = i + 1;
	+ topo_layers[nlayers].id_shift = caches[i].id_shift;
	+ if (bootverbose)
	+ printf("L%u cache ID shift: %u\n",
	+ topo_layers[nlayers].subtype,
	+ topo_layers[nlayers].id_shift);
	+ nlayers++;
	+ }
	+ }
	+
	+ if (pkg_id_shift > core_id_shift) {
	+ topo_layers[nlayers].type = TOPO_TYPE_CORE;
	+ topo_layers[nlayers].id_shift = core_id_shift;
	+ if (bootverbose)
	+ printf("Core ID shift: %u\n",
	+ topo_layers[nlayers].id_shift);
	+ nlayers++;
	+ }
	+
	+ topo_layers[nlayers].type = TOPO_TYPE_PU;
	+ topo_layers[nlayers].id_shift = 0;
	+ nlayers++;
	+
	+ topo_init_root(&topo_root);
	+ for (i = 0; i <= MAX_APIC_ID; ++i) {
	+ if (!cpu_info[i].cpu_present)
	+ continue;
	+
	+ parent = &topo_root;
	+ for (layer = 0; layer < nlayers; ++layer) {
	+ node_id = i >> topo_layers[layer].id_shift;
	+ parent = topo_add_node_by_hwid(parent, node_id,
	+ topo_layers[layer].type,
	+ topo_layers[layer].subtype);
	+ }
	+ }
	+
	+ parent = &topo_root;
	+ for (layer = 0; layer < nlayers; ++layer) {
	+ node_id = boot_cpu_id >> topo_layers[layer].id_shift;
	+ node = topo_find_node_by_hwid(parent, node_id,
	+ topo_layers[layer].type,
	+ topo_layers[layer].subtype);
	+ topo_promote_child(node);
	+ parent = node;
	+ }
	+
	cpu_topo_probed = 1;
	}

	-struct cpu_group *
	-cpu_topo(void)
	+/*
	+ * Assign logical CPU IDs to local APICs.
	+ */
	+void
	+assign_cpu_ids(void)
	{
	- int cg_flags;
	+ struct topo_node *node;
	+ u_int smt_mask;
	+
	+ smt_mask = (1u << core_id_shift) - 1;

	/*
	- * Determine whether any threading flags are
	- * necessry.
	+ * Assign CPU IDs to local APIC IDs and disable any CPUs
	+ * beyond MAXCPU. CPU 0 is always assigned to the BSP.
	*/
	- topo_probe();
	- if (cpu_logical > 1 && hyperthreading_cpus)
	- cg_flags = CG_FLAG_HTT;
	- else if (cpu_logical > 1)
	- cg_flags = CG_FLAG_SMT;
	+ mp_ncpus = 0;
	+ TOPO_FOREACH(node, &topo_root) {
	+ if (node->type != TOPO_TYPE_PU)
	+ continue;
	+
	+ if ((node->hwid & smt_mask) != (boot_cpu_id & smt_mask))
	+ cpu_info[node->hwid].cpu_hyperthread = 1;
	+
	+ if (resource_disabled("lapic", node->hwid)) {
	+ if (node->hwid != boot_cpu_id)
	+ cpu_info[node->hwid].cpu_disabled = 1;
	+ else
	+ printf("Cannot disable BSP, APIC ID = %d\n",
	+ node->hwid);
	+ }
	+
	+ if (!hyperthreading_allowed &&
	+ cpu_info[node->hwid].cpu_hyperthread)
	+ cpu_info[node->hwid].cpu_disabled = 1;
	+
	+ if (mp_ncpus >= MAXCPU)
	+ cpu_info[node->hwid].cpu_disabled = 1;
	+
	+ if (cpu_info[node->hwid].cpu_disabled) {
	+ disabled_cpus++;
	+ continue;
	+ }
	+
	+ cpu_apic_ids[mp_ncpus] = node->hwid;
	+ apic_cpuids[node->hwid] = mp_ncpus;
	+ topo_set_pu_id(node, mp_ncpus);
	+ mp_ncpus++;
	+ }
	+
	+ KASSERT(mp_maxid >= mp_ncpus - 1,
	+ ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
	+ mp_ncpus));
	+}
	+
	+/*
	+ * Print various information about the SMP system hardware and setup.
	+ */
	+void
	+cpu_mp_announce(void)
	+{
	+ struct topo_node *node;
	+ const char *hyperthread;
	+ int pkg_count;
	+ int cores_per_pkg;
	+ int thrs_per_core;
	+
	+ printf("FreeBSD/SMP: ");
	+ if (topo_analyze(&topo_root, 1, &pkg_count,
	+ &cores_per_pkg, &thrs_per_core)) {
	+ printf("%d package(s)", pkg_count);
	+ if (cores_per_pkg > 0)
	+ printf(" x %d core(s)", cores_per_pkg);
	+ if (thrs_per_core > 1)
	+ printf(" x %d hardware threads", thrs_per_core);
	+ } else {
	+ printf("Non-uniform topology");
	+ }
	+ printf("\n");
	+
	+ if (disabled_cpus) {
	+ printf("FreeBSD/SMP Online: ");
	+ if (topo_analyze(&topo_root, 0, &pkg_count,
	+ &cores_per_pkg, &thrs_per_core)) {
	+ printf("%d package(s)", pkg_count);
	+ if (cores_per_pkg > 0)
	+ printf(" x %d core(s)", cores_per_pkg);
	+ if (thrs_per_core > 1)
	+ printf(" x %d hardware threads", thrs_per_core);
	+ } else {
	+ printf("Non-uniform topology");
	+ }
	+ printf("\n");
	+ }
	+
	+ if (!bootverbose)
	+ return;
	+
	+ TOPO_FOREACH(node, &topo_root) {
	+ switch (node->type) {
	+ case TOPO_TYPE_PKG:
	+ printf("Package HW ID = %u (%#x)\n",
	+ node->hwid, node->hwid);
	+ break;
	+ case TOPO_TYPE_CORE:
	+ printf("\tCore HW ID = %u (%#x)\n",
	+ node->hwid, node->hwid);
	+ break;
	+ case TOPO_TYPE_PU:
	+ if (cpu_info[node->hwid].cpu_hyperthread)
	+ hyperthread = "/HT";
	+ else
	+ hyperthread = "";
	+
	+ if (node->subtype == 0)
	+ printf("\t\tCPU (AP%s): APIC ID: %u (%#x)"
	+ "(disabled)\n", hyperthread, node->hwid,
	+ node->hwid);
	+ else if (node->id == 0)
	+ printf("\t\tCPU0 (BSP): APIC ID: %u (%#x)\n",
	+ node->hwid, node->hwid);
	+ else
	+ printf("\t\tCPU%u (AP%s): APIC ID: %u (%#x)\n",
	+ node->id, hyperthread, node->hwid,
	+ node->hwid);
	+ break;
	+ default:
	+ /* ignored */
	+ break;
	+ }
	+ }
	+}
	+
	+static void
	+x86topo_add_sched_group(struct topo_node root, struct cpu_group cg_root)
	+{
	+ struct topo_node *node;
	+ int nchildren;
	+ int ncores;
	+ int i;
	+
	+ KASSERT(root->type == TOPO_TYPE_SYSTEM \|\| root->type == TOPO_TYPE_CACHE,
	+ ("x86topo_add_sched_group: bad type: %u", root->type));
	+ CPU_COPY(&root->cpuset, &cg_root->cg_mask);
	+ cg_root->cg_count = root->cpu_count;
	+ if (root->type == TOPO_TYPE_SYSTEM)
	+ cg_root->cg_level = CG_SHARE_NONE;
	else
	- cg_flags = 0;
	- if (mp_ncpus % (cpu_cores * cpu_logical) != 0) {
	- printf("WARNING: Non-uniform processors.\n");
	- printf("WARNING: Using suboptimal topology.\n");
	- return (smp_topo_none());
	+ cg_root->cg_level = root->subtype;
	+
	+ ncores = 0;
	+ node = root;
	+ while (node != NULL) {
	+ if (node->type != TOPO_TYPE_CORE) {
	+ node = topo_next_node(root, node);
	+ continue;
	+ }
	+
	+ ncores++;
	+ node = topo_next_nonchild_node(root, node);
	}
	- /*
	- * No multi-core or hyper-threaded.
	- */
	- if (cpu_logical * cpu_cores == 1)
	+
	+ if (cg_root->cg_level != CG_SHARE_NONE &&
	+ root->cpu_count > 1 && ncores < 2)
	+ cg_root->cg_flags = CG_FLAG_SMT;
	+
	+ nchildren = 0;
	+ node = root;
	+ while (node != NULL) {
	+ if (node->type != TOPO_TYPE_CACHE \|\|
	+ (root->type != TOPO_TYPE_SYSTEM &&
	+ CPU_CMP(&node->cpuset, &root->cpuset) == 0)) {
	+ node = topo_next_node(root, node);
	+ continue;
	+ }
	+ nchildren++;
	+ node = topo_next_nonchild_node(root, node);
	+ }
	+
	+ cg_root->cg_child = smp_topo_alloc(nchildren);
	+ cg_root->cg_children = nchildren;
	+
	+ node = root;
	+ i = 0;
	+ while (node != NULL) {
	+ if (node->type != TOPO_TYPE_CACHE \|\|
	+ (root->type != TOPO_TYPE_SYSTEM &&
	+ CPU_CMP(&node->cpuset, &root->cpuset) == 0)) {
	+ node = topo_next_node(root, node);
	+ continue;
	+ }
	+ cg_root->cg_child[i].cg_parent = cg_root;
	+ x86topo_add_sched_group(node, &cg_root->cg_child[i]);
	+ i++;
	+ node = topo_next_nonchild_node(root, node);
	+ }
	+}
	+
	+struct cpu_group *
	+cpu_topo(void)
	+{
	+ struct cpu_group *cg_root;
	+
	+ if (mp_ncpus <= 1)
	return (smp_topo_none());
	- /*
	- * Only HTT no multi-core.
	- */
	- if (cpu_logical > 1 && cpu_cores == 1)
	- return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags));
	- /*
	- * Only multi-core no HTT.
	- */
	- if (cpu_cores > 1 && cpu_logical == 1)
	- return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags));
	- /*
	- * Both HTT and multi-core.
	- */
	- return (smp_topo_2level(CG_SHARE_L2, cpu_cores,
	- CG_SHARE_L1, cpu_logical, cg_flags));
	+
	+ cg_root = smp_topo_alloc(1);
	+ x86topo_add_sched_group(&topo_root, cg_root);
	+ return (cg_root);
	}


	@@ -473,47 +799,9 @@
	}

	/*
	- * Print various information about the SMP system hardware and setup.
	+ * AP CPU's call this to initialize themselves.
	*/
	void
	-cpu_mp_announce(void)
	-{
	- const char *hyperthread;
	- int i;
	-
	- printf("FreeBSD/SMP: %d package(s) x %d core(s)",
	- mp_ncpus / (cpu_cores * cpu_logical), cpu_cores);
	- if (hyperthreading_cpus > 1)
	- printf(" x %d HTT threads", cpu_logical);
	- else if (cpu_logical > 1)
	- printf(" x %d SMT threads", cpu_logical);
	- printf("\n");
	-
	- /* List active CPUs first. */
	- printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
	- for (i = 1; i < mp_ncpus; i++) {
	- if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread)
	- hyperthread = "/HT";
	- else
	- hyperthread = "";
	- printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread,
	- cpu_apic_ids[i]);
	- }
	-
	- /* List disabled CPUs last. */
	- for (i = 0; i <= MAX_APIC_ID; i++) {
	- if (!cpu_info[i].cpu_present \|\| !cpu_info[i].cpu_disabled)
	- continue;
	- if (cpu_info[i].cpu_hyperthread)
	- hyperthread = "/HT";
	- else
	- hyperthread = "";
	- printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread,
	- i);
	- }
	-}
	-
	-void
	init_secondary_tail(void)
	{
	u_int cpuid;
	@@ -574,8 +862,7 @@
	printf("SMP: AP CPU #%d Launched!\n", cpuid);

	/* Determine if we are a logical CPU. */
	- /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
	- if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
	+ if (cpu_info[PCPU_GET(apic_id)].cpu_hyperthread)
	CPU_SET(cpuid, &logical_cpus_mask);

	if (bootverbose)
	@@ -640,85 +927,13 @@
	continue;

	/* Don't let hyperthreads service interrupts. */
	- if (cpu_logical > 1 &&
	- apic_id % cpu_logical != 0)
	+ if (cpu_info[apic_id].cpu_hyperthread)
	continue;

	intr_add_cpu(i);
	}
	}

	-/*
	- * Assign logical CPU IDs to local APICs.
	- */
	-void
	-assign_cpu_ids(void)
	-{
	- u_int i;
	-
	- TUNABLE_INT_FETCH("machdep.hyperthreading_allowed",
	- &hyperthreading_allowed);
	-
	- /* Check for explicitly disabled CPUs. */
	- for (i = 0; i <= MAX_APIC_ID; i++) {
	- if (!cpu_info[i].cpu_present \|\| cpu_info[i].cpu_bsp)
	- continue;
	-
	- if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
	- cpu_info[i].cpu_hyperthread = 1;
	-
	- /*
	- * Don't use HT CPU if it has been disabled by a
	- * tunable.
	- */
	- if (hyperthreading_allowed == 0) {
	- cpu_info[i].cpu_disabled = 1;
	- continue;
	- }
	- }
	-
	- /* Don't use this CPU if it has been disabled by a tunable. */
	- if (resource_disabled("lapic", i)) {
	- cpu_info[i].cpu_disabled = 1;
	- continue;
	- }
	- }
	-
	- if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
	- hyperthreading_cpus = 0;
	- cpu_logical = 1;
	- }
	-
	- /*
	- * Assign CPU IDs to local APIC IDs and disable any CPUs
	- * beyond MAXCPU. CPU 0 is always assigned to the BSP.
	- *
	- * To minimize confusion for userland, we attempt to number
	- * CPUs such that all threads and cores in a package are
	- * grouped together. For now we assume that the BSP is always
	- * the first thread in a package and just start adding APs
	- * starting with the BSP's APIC ID.
	- */
	- mp_ncpus = 1;
	- cpu_apic_ids[0] = boot_cpu_id;
	- apic_cpuids[boot_cpu_id] = 0;
	- for (i = boot_cpu_id + 1; i != boot_cpu_id;
	- i == MAX_APIC_ID ? i = 0 : i++) {
	- if (!cpu_info[i].cpu_present \|\| cpu_info[i].cpu_bsp \|\|
	- cpu_info[i].cpu_disabled)
	- continue;
	-
	- if (mp_ncpus < MAXCPU) {
	- cpu_apic_ids[mp_ncpus] = i;
	- apic_cpuids[i] = mp_ncpus;
	- mp_ncpus++;
	- } else
	- cpu_info[i].cpu_disabled = 1;
	- }
	- KASSERT(mp_maxid >= mp_ncpus - 1,
	- ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
	- mp_ncpus));
	-}

	#ifdef COUNT_XINVLTLB_HITS
	u_int xhits_gbl[MAXCPU];

File Metadata

Mime Type: text/plain
Expires: Tue, Jan 27, 9:15 PM (12 h, 55 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 28060264
Default Alt Text: D2728.id5919.diff (31 KB)

D2728.id5919.diffNo OneTemporaryActions

D2728.id5919.diffView Options

File Metadata

Event Timeline

D2728.id5919.diff
No OneTemporary
Actions

D2728.id5919.diff
View Options