Index: sys/kern/sched_ule.c =================================================================== --- sys/kern/sched_ule.c +++ sys/kern/sched_ule.c @@ -733,7 +733,7 @@ } } if (match & CPU_SEARCH_HIGHEST) - if (tdq->tdq_load >= hgroup.cs_limit && + if (tdq->tdq_load > hgroup.cs_limit && tdq->tdq_transferable && CPU_ISSET(cpu, &hgroup.cs_mask)) { hgroup.cs_cpu = cpu; @@ -990,15 +990,15 @@ struct tdq *steal; cpuset_t mask; int thresh; - int cpu; + int cpu, switchcnt; - if (smp_started == 0 || steal_idle == 0) + if (smp_started == 0 || steal_idle == 0 || tdq->tdq_cg == NULL) return (1); CPU_FILL(&mask); CPU_CLR(PCPU_GET(cpuid), &mask); - /* We don't want to be preempted while we're iterating. */ - spinlock_enter(); - for (cg = tdq->tdq_cg; cg != NULL; ) { + restart: + switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; + for (cg = tdq->tdq_cg; ; ) { if ((cg->cg_flags & CG_FLAG_THREAD) == 0) thresh = steal_thresh; else @@ -1006,34 +1006,70 @@ cpu = sched_highest(cg, mask, thresh); if (cpu == -1) { cg = cg->cg_parent; + if (cg == NULL) + return (1); continue; } + /* + * We were assigned a thread but not preempted, so switch + * to it here rather than going through the extra expense + * of calling tdq_lock_pair() below and then immediately + * dropping the second lock. This catches about half the + * occurances of being assigned a thread while in this loop. + */ + if (tdq->tdq_load) { + thread_lock(curthread); + goto doswitch2; + } steal = TDQ_CPU(cpu); - CPU_CLR(cpu, &mask); tdq_lock_pair(tdq, steal); - if (steal->tdq_load < thresh || steal->tdq_transferable == 0) { + /* + * We were assigned a thread while waiting for the locks. + * Switch to it now instead of stealing a thread. + */ + if (tdq->tdq_load) + break; + /* + * We were preempted and the CPU loading info may be out + * of date, so restart the search. + */ + if (switchcnt != tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt) { tdq_unlock_pair(tdq, steal); - continue; + goto restart; } /* - * If a thread was added while interrupts were disabled don't - * steal one here. If we fail to acquire one due to affinity - * restrictions loop again with this cpu removed from the - * set. + * The data returned by sched_highest() is stale and + * the chosen CPU no longer has an eligible thread. + * + * Testing this ahead of tdq_lock_pair() only catches + * this situation about 20% of the time on an 8 core + * 16 thread Ryzen 7. */ - if (tdq->tdq_load == 0 && tdq_move(steal, tdq) == 0) { + if (steal->tdq_load < thresh || steal->tdq_transferable == 0) { tdq_unlock_pair(tdq, steal); - continue; + goto restart; } - spinlock_exit(); - TDQ_UNLOCK(steal); - mi_switch(SW_VOL | SWT_IDLE, NULL); - thread_unlock(curthread); - - return (0); + /* + * Steal the thread and switch to it. + */ + if (tdq_move(steal, tdq) != 0) + break; + /* + * We failed to acquire a thread even though it looked + * like one was available. This could be due to affinity + * restrictions or for other reasons. Loop again after + * removing this CPU from the set. The restart logic + * above does not restore this CPU to the set due to the + * likelyhood of failing here again. + */ + CPU_CLR(cpu, &mask); + tdq_unlock_pair(tdq, steal); } - spinlock_exit(); - return (1); + TDQ_UNLOCK(steal); + doswitch2: + mi_switch(SW_VOL | SWT_IDLE, NULL); + thread_unlock(curthread); + return (0); } /*