Index: head/sys/arm64/arm64/identcpu.c =================================================================== --- head/sys/arm64/arm64/identcpu.c +++ head/sys/arm64/arm64/identcpu.c @@ -46,7 +46,6 @@ #include #include -static int ident_lock; static void print_cpu_features(u_int cpu); static u_long parse_cpu_features_hwcap(u_int cpu); @@ -67,6 +66,8 @@ SYSCTL_INT(_machdep_cache, OID_AUTO, allow_idc, CTLFLAG_RDTUN, &allow_idc, 0, "Allow optimizations based on the IDC cache bit"); +static void check_cpu_regs(u_int cpu); + /* * The default implementation of I-cache sync assumes we have an * aliasing cache until we know otherwise. @@ -1063,8 +1064,9 @@ dic = (allow_dic != 0); idc = (allow_idc != 0); + CPU_FOREACH(cpu) { - print_cpu_features(cpu); + check_cpu_regs(cpu); hwcap = parse_cpu_features_hwcap(cpu); if (elf_hwcap == 0) elf_hwcap = hwcap; @@ -1096,8 +1098,18 @@ install_undef_handler(true, user_mrs_handler); } -SYSINIT(identify_cpu, SI_SUB_SMP, SI_ORDER_ANY, identify_cpu_sysinit, NULL); +SYSINIT(identify_cpu, SI_SUB_CPU, SI_ORDER_ANY, identify_cpu_sysinit, NULL); +static void +cpu_features_sysinit(void *dummy __unused) +{ + u_int cpu; + + CPU_FOREACH(cpu) + print_cpu_features(cpu); +} +SYSINIT(cpu_features, SI_SUB_SMP, SI_ORDER_ANY, cpu_features_sysinit, NULL); + static u_long parse_cpu_features_hwcap(u_int cpu) { @@ -1468,7 +1480,8 @@ if (impl_id == cpu_implementers[i].impl_id || cpu_implementers[i].impl_id == 0) { cpu_desc[cpu].cpu_impl = impl_id; - cpu_desc[cpu].cpu_impl_name = cpu_implementers[i].impl_name; + cpu_desc[cpu].cpu_impl_name = + cpu_implementers[i].impl_name; cpu_partsp = cpu_implementers[i].cpu_parts; break; } @@ -1505,77 +1518,68 @@ cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(id_aa64mmfr2_el1); cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1); cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1); +} - if (cpu != 0) { - /* - * This code must run on one cpu at a time, but we are - * not scheduling on the current core so implement a - * simple spinlock. - */ - while (atomic_cmpset_acq_int(&ident_lock, 0, 1) == 0) - __asm __volatile("wfe" ::: "memory"); +static void +check_cpu_regs(u_int cpu) +{ - switch (cpu_aff_levels) { - case 0: - if (CPU_AFF0(cpu_desc[cpu].mpidr) != - CPU_AFF0(cpu_desc[0].mpidr)) - cpu_aff_levels = 1; - /* FALLTHROUGH */ - case 1: - if (CPU_AFF1(cpu_desc[cpu].mpidr) != - CPU_AFF1(cpu_desc[0].mpidr)) - cpu_aff_levels = 2; - /* FALLTHROUGH */ - case 2: - if (CPU_AFF2(cpu_desc[cpu].mpidr) != - CPU_AFF2(cpu_desc[0].mpidr)) - cpu_aff_levels = 3; - /* FALLTHROUGH */ - case 3: - if (CPU_AFF3(cpu_desc[cpu].mpidr) != - CPU_AFF3(cpu_desc[0].mpidr)) - cpu_aff_levels = 4; - break; - } + switch (cpu_aff_levels) { + case 0: + if (CPU_AFF0(cpu_desc[cpu].mpidr) != + CPU_AFF0(cpu_desc[0].mpidr)) + cpu_aff_levels = 1; + /* FALLTHROUGH */ + case 1: + if (CPU_AFF1(cpu_desc[cpu].mpidr) != + CPU_AFF1(cpu_desc[0].mpidr)) + cpu_aff_levels = 2; + /* FALLTHROUGH */ + case 2: + if (CPU_AFF2(cpu_desc[cpu].mpidr) != + CPU_AFF2(cpu_desc[0].mpidr)) + cpu_aff_levels = 3; + /* FALLTHROUGH */ + case 3: + if (CPU_AFF3(cpu_desc[cpu].mpidr) != + CPU_AFF3(cpu_desc[0].mpidr)) + cpu_aff_levels = 4; + break; + } - if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) - cpu_print_regs |= PRINT_ID_AA64_AFR0; - if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) - cpu_print_regs |= PRINT_ID_AA64_AFR1; + if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) + cpu_print_regs |= PRINT_ID_AA64_AFR0; + if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) + cpu_print_regs |= PRINT_ID_AA64_AFR1; - if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) - cpu_print_regs |= PRINT_ID_AA64_DFR0; - if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) - cpu_print_regs |= PRINT_ID_AA64_DFR1; + if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) + cpu_print_regs |= PRINT_ID_AA64_DFR0; + if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) + cpu_print_regs |= PRINT_ID_AA64_DFR1; - if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) - cpu_print_regs |= PRINT_ID_AA64_ISAR0; - if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) - cpu_print_regs |= PRINT_ID_AA64_ISAR1; + if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) + cpu_print_regs |= PRINT_ID_AA64_ISAR0; + if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) + cpu_print_regs |= PRINT_ID_AA64_ISAR1; - if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) - cpu_print_regs |= PRINT_ID_AA64_MMFR0; - if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) - cpu_print_regs |= PRINT_ID_AA64_MMFR1; - if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) - cpu_print_regs |= PRINT_ID_AA64_MMFR2; + if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) + cpu_print_regs |= PRINT_ID_AA64_MMFR0; + if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) + cpu_print_regs |= PRINT_ID_AA64_MMFR1; + if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) + cpu_print_regs |= PRINT_ID_AA64_MMFR2; - if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) - cpu_print_regs |= PRINT_ID_AA64_PFR0; - if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) - cpu_print_regs |= PRINT_ID_AA64_PFR1; + if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) + cpu_print_regs |= PRINT_ID_AA64_PFR0; + if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) + cpu_print_regs |= PRINT_ID_AA64_PFR1; - if (cpu_desc[cpu].ctr != cpu_desc[0].ctr) { - /* - * If the cache type register is different we may - * have a different l1 cache type. - */ - identify_cache(cpu_desc[cpu].ctr); - cpu_print_regs |= PRINT_CTR_EL0; - } - - /* Wake up the other CPUs */ - atomic_store_rel_int(&ident_lock, 0); - __asm __volatile("sev" ::: "memory"); + if (cpu_desc[cpu].ctr != cpu_desc[0].ctr) { + /* + * If the cache type register is different we may + * have a different l1 cache type. + */ + identify_cache(cpu_desc[cpu].ctr); + cpu_print_regs |= PRINT_CTR_EL0; } } Index: head/sys/arm64/arm64/mp_machdep.c =================================================================== --- head/sys/arm64/arm64/mp_machdep.c +++ head/sys/arm64/arm64/mp_machdep.c @@ -212,6 +212,19 @@ "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); + /* + * Identify current CPU. This is necessary to setup + * affinity registers and to provide support for + * runtime chip identification. + * + * We need this before signalling the CPU is ready to + * let the boot CPU use the results. + */ + identify_cpu(); + + /* Ensure the stores in identify_cpu have completed */ + atomic_thread_fence_acq_rel(); + /* Signal the BSP and spin until it has released all APs. */ atomic_add_int(&aps_started, 1); while (!atomic_load_int(&aps_ready)) @@ -227,12 +240,6 @@ ("pmap0 doesn't match cpu %ld's ttbr0", cpu)); pcpup->pc_curpmap = pmap0; - /* - * Identify current CPU. This is necessary to setup - * affinity registers and to provide support for - * runtime chip identification. - */ - identify_cpu(); install_cpu_errata(); intr_pic_init_secondary();