Index: sys/amd64/include/pcpu.h =================================================================== --- sys/amd64/include/pcpu.h +++ sys/amd64/include/pcpu.h @@ -39,7 +39,8 @@ struct monitorbuf { int idle_state; /* Used by cpu_idle_mwait. */ - char padding[128 - (1 * sizeof(int))]; + int stop_state; /* Used by cpustop_handler. */ + char padding[128 - (2 * sizeof(int))]; }; _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); @@ -90,6 +91,9 @@ #ifdef _KERNEL +#define MONITOR_STOPSTATE_RUNNING 0 +#define MONITOR_STOPSTATE_STOPPED 1 + #if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) /* Index: sys/i386/include/pcpu.h =================================================================== --- sys/i386/include/pcpu.h +++ sys/i386/include/pcpu.h @@ -43,7 +43,8 @@ struct monitorbuf { int idle_state; /* Used by cpu_idle_mwait. */ - char padding[128 - (1 * sizeof(int))]; + int stop_state; /* Used by cpustop_handler. */ + char padding[128 - (2 * sizeof(int))]; }; _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); @@ -90,6 +91,9 @@ #ifdef _KERNEL +#define MONITOR_STOPSTATE_RUNNING 0 +#define MONITOR_STOPSTATE_STOPPED 1 + #if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) /* Index: sys/kern/subr_smp.c =================================================================== --- sys/kern/subr_smp.c +++ sys/kern/subr_smp.c @@ -351,42 +351,68 @@ #endif volatile cpuset_t *cpus; - KASSERT(type == IPI_STOP || type == IPI_STOP_HARD #if X86 - || type == IPI_SUSPEND -#endif - , ("%s: invalid stop type", __func__)); + KASSERT(type == IPI_STOP || type == IPI_STOP_HARD + || type == IPI_SUSPEND, ("%s: invalid stop type", __func__)); if (!smp_started) return (0); CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); -#if X86 if (type == IPI_SUSPEND) cpus = &resuming_cpus; else -#endif cpus = &stopped_cpus; /* signal other cpus to restart */ -#if X86 if (type == IPI_SUSPEND) CPU_COPY_STORE_REL(&map, &toresume_cpus); else -#endif CPU_COPY_STORE_REL(&map, &started_cpus); -#if X86 + /* + * Wake up any CPUs stopped with MWAIT. From MI code we can't tell if + * MONITOR/MWAIT is enabled, but the potentially redundant writes are + * relatively inexpensive. + */ + if (type == IPI_STOP) { + struct monitorbuf *mb; + u_int id; + + CPU_FOREACH(id) { + if (!CPU_ISSET(id, &map)) + continue; + + mb = &pcpu_find(id)->pc_monitorbuf; + atomic_store_int(&mb->stop_state, + MONITOR_STOPSTATE_RUNNING); + } + } + if (!nmi_is_broadcast || nmi_kdb_lock == 0) { -#endif + /* wait for each to clear its bit */ + while (CPU_OVERLAP(cpus, &map)) + cpu_spinwait(); + } +#else /* !X86 */ + KASSERT(type == IPI_STOP || type == IPI_STOP_HARD, + ("%s: invalid stop type", __func__)); + + if (!smp_started) + return (0); + + CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); + + cpus = &stopped_cpus; + + /* signal other cpus to restart */ + CPU_COPY_STORE_REL(&map, &started_cpus); + /* wait for each to clear its bit */ while (CPU_OVERLAP(cpus, &map)) cpu_spinwait(); -#if X86 - } #endif - return (1); } Index: sys/x86/include/x86_smp.h =================================================================== --- sys/x86/include/x86_smp.h +++ sys/x86/include/x86_smp.h @@ -61,6 +61,11 @@ }; extern struct cpu_info *cpu_info; +/* + * Set if MWAIT does not reliably wake when the MONITORed address is written. + */ +extern bool mwait_cpustop_broken; + #ifdef COUNT_IPIS extern u_long *ipi_invltlb_counts[MAXCPU]; extern u_long *ipi_invlrng_counts[MAXCPU]; Index: sys/x86/x86/cpu_machdep.c =================================================================== --- sys/x86/x86/cpu_machdep.c +++ sys/x86/x86/cpu_machdep.c @@ -110,6 +110,13 @@ static volatile u_int cpu_reset_proxy_active; #endif +/* + * Automatically initialized per CPU errata in cpu_idle_tun below. + */ +bool mwait_cpustop_broken = false; +SYSCTL_BOOL(_machdep, OID_AUTO, mwait_cpustop_broken, CTLFLAG_RDTUN, + &mwait_cpustop_broken, 0, + "Can not reliably wake MONITOR/MWAIT cpus without interrupts"); /* * Machine dependent boot() routine @@ -358,6 +365,7 @@ cpu_reset(void) { #ifdef SMP + struct monitorbuf *mb; cpuset_t map; u_int cnt; @@ -378,6 +386,9 @@ /* Restart CPU #0. */ CPU_SETOF(0, &started_cpus); + mb = &pcpu_find(0)->pc_monitorbuf; + atomic_store_int(&mb->stop_state, + MONITOR_STOPSTATE_RUNNING); wmb(); cnt = 0; @@ -716,6 +727,7 @@ /* Ryzen erratas 1057, 1109. */ cpu_idle_selector("hlt"); idle_mwait = 0; + mwait_cpustop_broken = true; } if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) { @@ -727,6 +739,7 @@ * sleep states. */ cpu_idle_apl31_workaround = 1; + mwait_cpustop_broken = true; } TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround); } Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -161,6 +161,10 @@ unsigned int boot_address; +static bool stop_mwait = false; +SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0, + "Use MONITOR/MWAIT when stopping CPU, if available"); + #define MiB(v) (v ## ULL << 20) void @@ -1390,23 +1394,41 @@ } /* - * Handle an IPI_STOP by saving our current context and spinning until we - * are resumed. + * Handle an IPI_STOP by saving our current context and spinning (or mwaiting, + * if available) until we are resumed. */ void cpustop_handler(void) { + struct monitorbuf *mb; u_int cpu; + bool use_mwait; cpu = PCPU_GET(cpuid); savectx(&stoppcbs[cpu]); + use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) != 0 && + !mwait_cpustop_broken); + if (use_mwait) { + mb = PCPU_PTR(monitorbuf); + atomic_store_int(&mb->stop_state, + MONITOR_STOPSTATE_STOPPED); + } + /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) { + if (use_mwait) { + cpu_monitor(mb, 0, 0); + if (atomic_load_int(&mb->stop_state) == + MONITOR_STOPSTATE_STOPPED) + cpu_mwait(0, MWAIT_C1); + continue; + } + ia32_pause(); /*