Page MenuHomeFreeBSD

D15985.diff
No OneTemporary

D15985.diff

Index: sched_ule.c
===================================================================
--- sched_ule.c
+++ sched_ule.c
@@ -205,6 +205,8 @@
* realstathz: stathz is sometimes 0 and run off of hz.
* sched_slice: Runtime of each thread before rescheduling.
* preempt_thresh: Priority threshold for preemption and remote IPIs.
+ * preempt_timeshare_delta: Preempt if the difference in timeshare
+ * priorities exceeds this threshold.
*/
static int sched_interact = SCHED_INTERACT_THRESH;
static int tickincr = 8 << SCHED_TICK_SHIFT;
@@ -214,10 +216,13 @@
#ifdef PREEMPTION
#ifdef FULL_PREEMPTION
static int preempt_thresh = PRI_MAX_IDLE;
+static int preempt_timeshare_delta = 0;
#else
static int preempt_thresh = PRI_MIN_KERN;
+static int preempt_timeshare_delta = (PRI_BATCH_RANGE - SCHED_PRI_NRESV) / 2;
#endif
#else
+static int preempt_timeshare_delta = INT_MAX;
static int preempt_thresh = 0;
#endif
static int static_boost = PRI_MIN_BATCH;
@@ -316,7 +321,9 @@
static void tdq_load_rem(struct tdq *, struct thread *);
static __inline void tdq_runq_add(struct tdq *, struct thread *, int);
static __inline void tdq_runq_rem(struct tdq *, struct thread *);
-static inline int sched_shouldpreempt(int, int, int);
+static __inline void tdq_runq_elevate(struct tdq *, struct thread *);
+static inline int sched_shouldpreempt(struct tdq *, struct thread *,
+ struct thread *);
void tdq_print(int cpu);
static void runq_print(struct runq *rq);
static void tdq_add(struct tdq *, struct thread *, int);
@@ -418,36 +425,85 @@
runq_print(&tdq->tdq_idle);
}
+/*
+ * Evaluate whether we should preempt a thread or simply set NEEDRESCHED.
+ */
static inline int
-sched_shouldpreempt(int pri, int cpri, int remote)
+sched_shouldpreempt(struct tdq *tdq, struct thread *td, struct thread *ctd)
{
+ int pri, cpri;
+ int remote, timeshare;
+ u_char ridx;
+
/*
* If the new priority is not better than the current priority there is
* nothing to do.
*/
+ pri = td->td_priority;
+ cpri = ctd->td_priority;
if (pri >= cpri)
return (0);
+
/*
* Always preempt idle.
*/
if (cpri >= PRI_MIN_IDLE)
return (1);
+
/*
+ * If the threads are not both on the timeshare queue
+ * NEEEDRESCHED is set unconditionally for the lower
+ * priority curthread. We will also preempt in most
+ * cases which will harmlessly clear the bit.
+ *
+ * The index determines run-order more strongly than
+ * priority for timeshare threads. We eliminate needless
+ * switches by filtering on run-queue order here.
+ */
+ timeshare = td_get_sched(td)->ts_runq == &tdq->tdq_timeshare &&
+ td_get_sched(ctd)->ts_runq == &tdq->tdq_timeshare;
+ ridx = tdq->tdq_ridx;
+ if (!timeshare ||
+ (u_char)(td->td_rqindex - ridx) <
+ (u_char)(ctd->td_rqindex - ridx)) {
+ if (ctd->td_lock == TDQ_LOCKPTR(tdq))
+ ctd->td_flags |= TDF_NEEDRESCHED;
+ }
+
+ /*
* If preemption is disabled don't preempt others.
*/
if (preempt_thresh == 0)
return (0);
+
/*
* Preempt if we exceed the threshold.
*/
if (pri <= preempt_thresh)
return (1);
+
/*
* If we're interactive or better and there is non-interactive
- * or worse running preempt only remote processors.
+ * or worse running preempt remote processors. Local processors
+ * will honor this from NEEDRESCHED and avoid preemption. A future
+ * enhancement could do the same for remote.
*/
+ remote = tdq != TDQ_SELF();
if (remote && pri <= PRI_MAX_INTERACT && cpri > PRI_MAX_INTERACT)
return (1);
+
+ /*
+ * If the difference between the two timeshare threads priorities
+ * exceeds the delta threshold we elevate the new thread on the
+ * timeshare queue and request a resched.
+ */
+ if (timeshare && cpri - pri > preempt_timeshare_delta) {
+ tdq_runq_elevate(tdq, td);
+ if (ctd->td_lock == TDQ_LOCKPTR(tdq))
+ ctd->td_flags |= TDF_NEEDRESCHED;
+ return (remote);
+ }
+
return (0);
}
@@ -502,6 +558,21 @@
runq_add(ts->ts_runq, td, flags);
}
+static void
+tdq_runq_elevate(struct tdq *tdq, struct thread *td)
+{
+ struct td_sched *ts;
+
+ TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+ THREAD_LOCK_ASSERT(td, MA_OWNED);
+
+ ts = td_get_sched(td);
+ if (ts->ts_runq == &tdq->tdq_timeshare) {
+ runq_remove_idx(ts->ts_runq, td, NULL);
+ runq_add_pri(ts->ts_runq, td, tdq->tdq_ridx, SRQ_PREEMPTED);
+ }
+}
+
/*
* Remove a thread from a run-queue. This typically happens when a thread
* is selected to run. Running threads are not on the queue and the
@@ -1075,15 +1146,14 @@
tdq_notify(struct tdq *tdq, struct thread *td)
{
struct thread *ctd;
- int pri;
int cpu;
if (tdq->tdq_ipipending)
return;
cpu = td_get_sched(td)->ts_cpu;
- pri = td->td_priority;
ctd = pcpu_find(cpu)->pc_curthread;
- if (!sched_shouldpreempt(pri, ctd->td_priority, 1))
+
+ if (!sched_shouldpreempt(tdq, td, ctd))
return;
/*
@@ -1567,8 +1637,8 @@
score = imax(0, sched_interact_score(td) + td->td_proc->p_nice);
if (score < sched_interact) {
pri = PRI_MIN_INTERACT;
- pri += ((PRI_MAX_INTERACT - PRI_MIN_INTERACT + 1) /
- sched_interact) * score;
+ pri += ((PRI_MAX_INTERACT - PRI_MIN_INTERACT) * score) /
+ sched_interact;
KASSERT(pri >= PRI_MIN_INTERACT && pri <= PRI_MAX_INTERACT,
("sched_priority: invalid interactive priority %d score %d",
pri, score));
@@ -2173,7 +2243,7 @@
return;
if (static_boost == 1 && prio)
sched_prio(td, prio);
- else if (static_boost && td->td_priority > static_boost)
+ else if (static_boost > 1 && td->td_priority > static_boost)
sched_prio(td, static_boost);
}
@@ -2330,6 +2400,7 @@
sched_preempt(struct thread *td)
{
struct tdq *tdq;
+ struct thread *ntd;
SDT_PROBE2(sched, , , surrender, td, td->td_proc);
@@ -2337,7 +2408,14 @@
tdq = TDQ_SELF();
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
tdq->tdq_ipipending = 0;
- if (td->td_priority > tdq->tdq_lowpri) {
+
+ /*
+ * The state could've changed since the remote processor signaled
+ * or it may have simply signaled to trigger NEEDRESCHED. We
+ * filter again here before preempting.
+ */
+ ntd = tdq_choose(tdq);
+ if (ntd != NULL && sched_shouldpreempt(tdq, ntd, td)) {
int flags;
flags = SW_INVOL | SW_PREEMPT;
@@ -2489,19 +2567,13 @@
sched_setpreempt(struct thread *td)
{
struct thread *ctd;
- int cpri;
- int pri;
THREAD_LOCK_ASSERT(curthread, MA_OWNED);
ctd = curthread;
- pri = td->td_priority;
- cpri = ctd->td_priority;
- if (pri < cpri)
- ctd->td_flags |= TDF_NEEDRESCHED;
- if (panicstr != NULL || pri >= cpri || cold || TD_IS_INHIBITED(ctd))
+ if (panicstr != NULL || cold || TD_IS_INHIBITED(ctd))
return;
- if (!sched_shouldpreempt(pri, cpri, 0))
+ if (!sched_shouldpreempt(TDQ_SELF(), td, ctd))
return;
ctd->td_owepreempt = 1;
}
@@ -3044,8 +3116,12 @@
SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW,
&preempt_thresh, 0,
"Maximal (lowest) priority for preemption");
+SYSCTL_INT(_kern_sched, OID_AUTO, preempt_timeshare_delta, CTLFLAG_RW,
+ &preempt_timeshare_delta, 0,
+ "Difference in timeshare priorities required for preemption");
SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RW, &static_boost, 0,
- "Assign static kernel priorities to sleeping threads");
+ "Elevate priorities of sleeping threads. "
+ "0 = disabled, 1 = kernel supplied value, >1 = specified priority.");
SYSCTL_INT(_kern_sched, OID_AUTO, idlespins, CTLFLAG_RW, &sched_idlespins, 0,
"Number of times idle thread will spin waiting for new work");
SYSCTL_INT(_kern_sched, OID_AUTO, idlespinthresh, CTLFLAG_RW,

File Metadata

Mime Type
text/plain
Expires
Sat, Dec 20, 7:27 AM (8 h, 58 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27093463
Default Alt Text
D15985.diff (7 KB)

Event Timeline