Index: sys/kern/subr_smp.c =================================================================== --- sys/kern/subr_smp.c +++ sys/kern/subr_smp.c @@ -378,6 +378,26 @@ CPU_COPY_STORE_REL(&map, &started_cpus); #if X86 + /* + * Wake up any CPUs stopped with MWAIT. From MI code we can't tell if + * MONITOR/MWAIT is enabled, but the potentially redundant writes are + * relatively inexpensive. + */ + if (type == IPI_STOP) { + struct monitorbuf *mb; + u_int id; + + CPU_FOREACH(id) { + if (!CPU_ISSET(id, &map)) + continue; + + mb = (void *)pcpu_find(id)->pc_monitorbuf; + atomic_store_int(&mb->stop_state, + MONITOR_STOPSTATE_RUNNING); + } + wmb(); + } + if (!nmi_is_broadcast || nmi_kdb_lock == 0) { #endif /* wait for each to clear its bit */ Index: sys/x86/include/x86_smp.h =================================================================== --- sys/x86/include/x86_smp.h +++ sys/x86/include/x86_smp.h @@ -61,6 +61,16 @@ }; extern struct cpu_info *cpu_info; +struct monitorbuf { + /* Used by, e.g., cpu_idle_mwait. */ + int idle_state; + /* Used by cpustop_handler. */ + int stop_state; +#define MONITOR_STOPSTATE_RUNNING 0 +#define MONITOR_STOPSTATE_STOPPED 1 +}; +extern bool mwait_wakeup_broken; + #ifdef COUNT_IPIS extern u_long *ipi_invltlb_counts[MAXCPU]; extern u_long *ipi_invlrng_counts[MAXCPU]; Index: sys/x86/x86/cpu_machdep.c =================================================================== --- sys/x86/x86/cpu_machdep.c +++ sys/x86/x86/cpu_machdep.c @@ -110,6 +110,13 @@ static volatile u_int cpu_reset_proxy_active; #endif +/* + * Automatically initialized per CPU errata in cpu_idle_tun below. + */ +bool mwait_wakeup_broken = false; +SYSCTL_BOOL(_machdep, OID_AUTO, mwait_wakeup_broken, CTLFLAG_RDTUN, + &mwait_wakeup_broken, 0, + "Can not reliably wake MONITOR/MWAIT cpus without interrupts"); /* * Machine dependent boot() routine @@ -164,7 +171,7 @@ * but all Intel CPUs provide hardware coordination. */ - state = (int *)PCPU_PTR(monitorbuf); + state = &((struct monitorbuf *)PCPU_PTR(monitorbuf))->idle_state; KASSERT(atomic_load_int(state) == STATE_SLEEPING, ("cpu_mwait_cx: wrong monitorbuf state")); atomic_store_int(state, STATE_MWAIT); @@ -358,6 +365,7 @@ cpu_reset(void) { #ifdef SMP + struct monitorbuf *mb; cpuset_t map; u_int cnt; @@ -378,6 +386,9 @@ /* Restart CPU #0. */ CPU_SETOF(0, &started_cpus); + mb = (void *)pcpu_find(0)->pc_monitorbuf; + atomic_store_int(&mb->stop_state, + MONITOR_STOPSTATE_RUNNING); wmb(); cnt = 0; @@ -422,7 +433,7 @@ { int *state; - state = (int *)PCPU_PTR(monitorbuf); + state = &((struct monitorbuf *)PCPU_PTR(monitorbuf))->idle_state; atomic_store_int(state, STATE_SLEEPING); /* See comments in cpu_idle_hlt(). */ @@ -441,7 +452,7 @@ { int *state; - state = (int *)PCPU_PTR(monitorbuf); + state = &((struct monitorbuf *)PCPU_PTR(monitorbuf))->idle_state; atomic_store_int(state, STATE_SLEEPING); /* @@ -473,7 +484,7 @@ { int *state; - state = (int *)PCPU_PTR(monitorbuf); + state = &((struct monitorbuf *)PCPU_PTR(monitorbuf))->idle_state; atomic_store_int(state, STATE_MWAIT); /* See comments in cpu_idle_hlt(). */ @@ -498,7 +509,7 @@ int *state; int i; - state = (int *)PCPU_PTR(monitorbuf); + state = &((struct monitorbuf *)PCPU_PTR(monitorbuf))->idle_state; atomic_store_int(state, STATE_RUNNING); /* @@ -598,9 +609,11 @@ int cpu_idle_wakeup(int cpu) { + struct monitorbuf *mb; int *state; - state = (int *)pcpu_find(cpu)->pc_monitorbuf; + mb = (struct monitorbuf *)pcpu_find(cpu)->pc_monitorbuf; + state = &mb->idle_state; switch (atomic_load_int(state)) { case STATE_SLEEPING: return (0); @@ -714,6 +727,7 @@ /* Ryzen erratas 1057, 1109. */ cpu_idle_selector("hlt"); idle_mwait = 0; + mwait_wakeup_broken = true; } if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) { @@ -725,6 +739,7 @@ * sleep states. */ cpu_idle_apl31_workaround = 1; + mwait_wakeup_broken = true; } TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround); } Index: sys/x86/x86/mp_x86.c =================================================================== --- sys/x86/x86/mp_x86.c +++ sys/x86/x86/mp_x86.c @@ -161,6 +161,10 @@ unsigned int boot_address; +static bool stop_mwait = false; +SYSCTL_BOOL(_machdep, OID_AUTO, stop_mwait, CTLFLAG_RWTUN, &stop_mwait, 0, + "Use MONITOR/MWAIT when stopping CPU, if available"); + #define MiB(v) (v ## ULL << 20) void @@ -1390,13 +1394,15 @@ } /* - * Handle an IPI_STOP by saving our current context and spinning until we - * are resumed. + * Handle an IPI_STOP by saving our current context and spinning (or mwaiting, + * if available) until we are resumed. */ void cpustop_handler(void) { + struct monitorbuf *mb; u_int cpu; + bool use_mwait; cpu = PCPU_GET(cpuid); @@ -1405,8 +1411,23 @@ /* Indicate that we are stopped */ CPU_SET_ATOMIC(cpu, &stopped_cpus); + use_mwait = (stop_mwait && (cpu_feature2 & CPUID2_MON) && + !mwait_wakeup_broken); + if (use_mwait) + mb = (void *)PCPU_PTR(monitorbuf); + /* Wait for restart */ while (!CPU_ISSET(cpu, &started_cpus)) { + if (use_mwait) { + atomic_store_int(&mb->stop_state, + MONITOR_STOPSTATE_STOPPED); + cpu_monitor(mb, 0, 0); + if (atomic_load_int(&mb->stop_state) == + MONITOR_STOPSTATE_STOPPED) + cpu_mwait(0, MWAIT_C1); + continue; + } + ia32_pause(); /*