Changeset View
Changeset View
Standalone View
Standalone View
head/sys/x86/x86/mp_x86.c
Show First 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | |||||
static int hyperthreading_allowed = 1; | static int hyperthreading_allowed = 1; | ||||
SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN, | SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN, | ||||
&hyperthreading_allowed, 0, "Use Intel HTT logical CPUs"); | &hyperthreading_allowed, 0, "Use Intel HTT logical CPUs"); | ||||
static struct topo_node topo_root; | static struct topo_node topo_root; | ||||
static int pkg_id_shift; | static int pkg_id_shift; | ||||
static int node_id_shift; | |||||
static int core_id_shift; | static int core_id_shift; | ||||
static int disabled_cpus; | static int disabled_cpus; | ||||
struct cache_info { | struct cache_info { | ||||
int id_shift; | int id_shift; | ||||
int present; | int present; | ||||
} static caches[MAX_CACHE_LEVELS]; | } static caches[MAX_CACHE_LEVELS]; | ||||
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines | topo_probe_amd(void) | ||||
* Later families define the value as threads per compute unit, | * Later families define the value as threads per compute unit, | ||||
* so we are following AMD's nomenclature here. | * so we are following AMD's nomenclature here. | ||||
*/ | */ | ||||
if ((amd_feature2 & AMDID2_TOPOLOGY) != 0 && | if ((amd_feature2 & AMDID2_TOPOLOGY) != 0 && | ||||
CPUID_TO_FAMILY(cpu_id) >= 0x16) { | CPUID_TO_FAMILY(cpu_id) >= 0x16) { | ||||
cpuid_count(0x8000001e, 0, p); | cpuid_count(0x8000001e, 0, p); | ||||
share_count = ((p[1] >> 8) & 0xff) + 1; | share_count = ((p[1] >> 8) & 0xff) + 1; | ||||
core_id_shift = mask_width(share_count); | core_id_shift = mask_width(share_count); | ||||
/* | |||||
* For Zen (17h), gather Nodes per Processor. Each node is a | |||||
* Zeppelin die; TR and EPYC CPUs will have multiple dies per | |||||
* package. Communication latency between dies is higher than | |||||
* within them. | |||||
*/ | |||||
nodes_per_socket = ((p[2] >> 8) & 0x7) + 1; | |||||
node_id_shift = pkg_id_shift - mask_width(nodes_per_socket); | |||||
} | } | ||||
if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) { | if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) { | ||||
for (i = 0; ; i++) { | for (i = 0; ; i++) { | ||||
cpuid_count(0x8000001d, i, p); | cpuid_count(0x8000001d, i, p); | ||||
type = p[0] & 0x1f; | type = p[0] & 0x1f; | ||||
level = (p[0] >> 5) & 0x7; | level = (p[0] >> 5) & 0x7; | ||||
share_count = 1 + ((p[0] >> 14) & 0xfff); | share_count = 1 + ((p[0] >> 14) & 0xfff); | ||||
▲ Show 20 Lines • Show All 193 Lines • ▼ Show 20 Lines | |||||
void | void | ||||
topo_probe(void) | topo_probe(void) | ||||
{ | { | ||||
static int cpu_topo_probed = 0; | static int cpu_topo_probed = 0; | ||||
struct x86_topo_layer { | struct x86_topo_layer { | ||||
int type; | int type; | ||||
int subtype; | int subtype; | ||||
int id_shift; | int id_shift; | ||||
} topo_layers[MAX_CACHE_LEVELS + 3]; | } topo_layers[MAX_CACHE_LEVELS + 4]; | ||||
struct topo_node *parent; | struct topo_node *parent; | ||||
struct topo_node *node; | struct topo_node *node; | ||||
int layer; | int layer; | ||||
int nlayers; | int nlayers; | ||||
int node_id; | int node_id; | ||||
int i; | int i; | ||||
if (cpu_topo_probed) | if (cpu_topo_probed) | ||||
Show All 15 Lines | topo_probe(void) | ||||
bzero(topo_layers, sizeof(topo_layers)); | bzero(topo_layers, sizeof(topo_layers)); | ||||
topo_layers[nlayers].type = TOPO_TYPE_PKG; | topo_layers[nlayers].type = TOPO_TYPE_PKG; | ||||
topo_layers[nlayers].id_shift = pkg_id_shift; | topo_layers[nlayers].id_shift = pkg_id_shift; | ||||
if (bootverbose) | if (bootverbose) | ||||
printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift); | printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift); | ||||
nlayers++; | nlayers++; | ||||
if (pkg_id_shift > node_id_shift && node_id_shift != 0) { | |||||
topo_layers[nlayers].type = TOPO_TYPE_GROUP; | |||||
topo_layers[nlayers].id_shift = node_id_shift; | |||||
if (bootverbose) | |||||
printf("Node ID shift: %u\n", | |||||
topo_layers[nlayers].id_shift); | |||||
nlayers++; | |||||
} | |||||
/* | /* | ||||
* Consider all caches to be within a package/chip | * Consider all caches to be within a package/chip | ||||
* and "in front" of all sub-components like | * and "in front" of all sub-components like | ||||
* cores and hardware threads. | * cores and hardware threads. | ||||
*/ | */ | ||||
for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) { | for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) { | ||||
if (caches[i].present) { | if (caches[i].present) { | ||||
if (node_id_shift != 0) | |||||
KASSERT(caches[i].id_shift <= node_id_shift, | |||||
("bug in APIC topology discovery")); | |||||
KASSERT(caches[i].id_shift <= pkg_id_shift, | KASSERT(caches[i].id_shift <= pkg_id_shift, | ||||
("bug in APIC topology discovery")); | ("bug in APIC topology discovery")); | ||||
KASSERT(caches[i].id_shift >= core_id_shift, | KASSERT(caches[i].id_shift >= core_id_shift, | ||||
("bug in APIC topology discovery")); | ("bug in APIC topology discovery")); | ||||
topo_layers[nlayers].type = TOPO_TYPE_CACHE; | topo_layers[nlayers].type = TOPO_TYPE_CACHE; | ||||
topo_layers[nlayers].subtype = i + 1; | topo_layers[nlayers].subtype = i + 1; | ||||
topo_layers[nlayers].id_shift = caches[i].id_shift; | topo_layers[nlayers].id_shift = caches[i].id_shift; | ||||
▲ Show 20 Lines • Show All 182 Lines • ▼ Show 20 Lines | |||||
static void | static void | ||||
x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root) | x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root) | ||||
{ | { | ||||
struct topo_node *node; | struct topo_node *node; | ||||
int nchildren; | int nchildren; | ||||
int ncores; | int ncores; | ||||
int i; | int i; | ||||
KASSERT(root->type == TOPO_TYPE_SYSTEM || root->type == TOPO_TYPE_CACHE, | KASSERT(root->type == TOPO_TYPE_SYSTEM || root->type == TOPO_TYPE_CACHE || | ||||
root->type == TOPO_TYPE_GROUP, | |||||
("x86topo_add_sched_group: bad type: %u", root->type)); | ("x86topo_add_sched_group: bad type: %u", root->type)); | ||||
CPU_COPY(&root->cpuset, &cg_root->cg_mask); | CPU_COPY(&root->cpuset, &cg_root->cg_mask); | ||||
cg_root->cg_count = root->cpu_count; | cg_root->cg_count = root->cpu_count; | ||||
if (root->type == TOPO_TYPE_SYSTEM) | if (root->type == TOPO_TYPE_SYSTEM) | ||||
cg_root->cg_level = CG_SHARE_NONE; | cg_root->cg_level = CG_SHARE_NONE; | ||||
else | else | ||||
cg_root->cg_level = root->subtype; | cg_root->cg_level = root->subtype; | ||||
Show All 23 Lines | x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root) | ||||
* We ignore cache nodes that cover all the same processors as the | * We ignore cache nodes that cover all the same processors as the | ||||
* root node. Also, we do not descend below found cache nodes. | * root node. Also, we do not descend below found cache nodes. | ||||
* That is, we count top-level "non-redundant" caches under the root | * That is, we count top-level "non-redundant" caches under the root | ||||
* node. | * node. | ||||
*/ | */ | ||||
nchildren = 0; | nchildren = 0; | ||||
node = root; | node = root; | ||||
while (node != NULL) { | while (node != NULL) { | ||||
if (node->type != TOPO_TYPE_CACHE || | if ((node->type != TOPO_TYPE_GROUP && | ||||
node->type != TOPO_TYPE_CACHE) || | |||||
(root->type != TOPO_TYPE_SYSTEM && | (root->type != TOPO_TYPE_SYSTEM && | ||||
CPU_CMP(&node->cpuset, &root->cpuset) == 0)) { | CPU_CMP(&node->cpuset, &root->cpuset) == 0)) { | ||||
node = topo_next_node(root, node); | node = topo_next_node(root, node); | ||||
continue; | continue; | ||||
} | } | ||||
nchildren++; | nchildren++; | ||||
node = topo_next_nonchild_node(root, node); | node = topo_next_nonchild_node(root, node); | ||||
} | } | ||||
cg_root->cg_child = smp_topo_alloc(nchildren); | cg_root->cg_child = smp_topo_alloc(nchildren); | ||||
cg_root->cg_children = nchildren; | cg_root->cg_children = nchildren; | ||||
/* | /* | ||||
* Now find again the same cache nodes as above and recursively | * Now find again the same cache nodes as above and recursively | ||||
* build scheduling topologies for them. | * build scheduling topologies for them. | ||||
*/ | */ | ||||
node = root; | node = root; | ||||
i = 0; | i = 0; | ||||
while (node != NULL) { | while (node != NULL) { | ||||
if (node->type != TOPO_TYPE_CACHE || | if ((node->type != TOPO_TYPE_GROUP && | ||||
node->type != TOPO_TYPE_CACHE) || | |||||
(root->type != TOPO_TYPE_SYSTEM && | (root->type != TOPO_TYPE_SYSTEM && | ||||
CPU_CMP(&node->cpuset, &root->cpuset) == 0)) { | CPU_CMP(&node->cpuset, &root->cpuset) == 0)) { | ||||
node = topo_next_node(root, node); | node = topo_next_node(root, node); | ||||
continue; | continue; | ||||
} | } | ||||
cg_root->cg_child[i].cg_parent = cg_root; | cg_root->cg_child[i].cg_parent = cg_root; | ||||
x86topo_add_sched_group(node, &cg_root->cg_child[i]); | x86topo_add_sched_group(node, &cg_root->cg_child[i]); | ||||
i++; | i++; | ||||
▲ Show 20 Lines • Show All 842 Lines • Show Last 20 Lines |