Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -760,6 +760,7 @@ } } + /* * Add a scheduling group, a group of logical processors sharing * a particular cache (and, thus having an affinity), to the scheduling @@ -822,12 +823,25 @@ node = topo_next_node(root, node); continue; } + + /* + * skip empty nodes, because they missmatch above test + * in every case and lead to disabled PUs. + */ + if ((root->type != TOPO_TYPE_SYSTEM) && + (CPU_COUNT(&node->cpuset) == 0)) { + node = topo_next_node(root, node); + continue; + } + nchildren++; node = topo_next_nonchild_node(root, node); } - cg_root->cg_child = smp_topo_alloc(nchildren); cg_root->cg_children = nchildren; + if (nchildren == 0) + return; + cg_root->cg_child = smp_topo_alloc(nchildren); /* * Now find again the same cache nodes as above and recursively @@ -843,6 +857,11 @@ node = topo_next_node(root, node); continue; } + if ((root->type != TOPO_TYPE_SYSTEM) && + (CPU_COUNT(&node->cpuset) == 0)) { + node = topo_next_node(root, node); + continue; + } cg_root->cg_child[i].cg_parent = cg_root; x86topo_add_sched_group(node, &cg_root->cg_child[i]); i++; @@ -850,6 +869,50 @@ } } +/* + * evaluate minimal and maximal depths of a cpu_group tree + */ +static void +x86topo_eval_depth(struct cpu_group *cg_root, uint32_t *depth_min, uint32_t *depth_max) +{ + *depth_max = 1; + if (cg_root->cg_children == 0) + *depth_min = 1; + else + *depth_min = -1; + + for (uint32_t i = 0; i < cg_root->cg_children; i++) + { + uint32_t node_min; + uint32_t node_max; + x86topo_eval_depth(cg_root->cg_child + i, &node_min, &node_max); + if (node_min + 1 < *depth_min) + *depth_min = node_min + 1; + if (node_max + 1 > *depth_max) + *depth_max = node_max + 1; + } +} + +/* + * flatten tree to given level. This is done by cutting children. + * currently children are allocated from static array in smp_topo_alloc, + * thus extra cleanup is not required + */ +static void +x86topo_flatten(struct cpu_group *cg_root, uint32_t level) +{ + if (level <= 1) + { + cg_root->cg_children = 0; + cg_root->cg_child = 0; + } + else + { + for (uint32_t i = 0; i < cg_root->cg_children; i++) + x86topo_flatten(cg_root->cg_child + i, level - 1); + } +} + /* * Build the MI scheduling topology from the discovered hardware topology. */ @@ -857,12 +920,23 @@ cpu_topo(void) { struct cpu_group *cg_root; + uint32_t depth_min, depth_max; if (mp_ncpus <= 1) return (smp_topo_none()); cg_root = smp_topo_alloc(1); x86topo_add_sched_group(&topo_root, cg_root); + + /* + * flaten topology to maximal hierarchy to avoid + * unbalanced trees. Depth can vary, if lapics are + * disabled by tunable + */ + x86topo_eval_depth(cg_root, &depth_min, &depth_max); + if ((depth_min >= 2) && (depth_max > depth_min)) + x86topo_flatten(cg_root, depth_min); + return (cg_root); }