Page MenuHomeFreeBSD

D46566.id.diff
No OneTemporary

D46566.id.diff

diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -137,20 +137,20 @@
* and end of the MIN to MAX timeshare range are only reachable with negative
* or positive nice respectively.
*
- * PRI_RANGE: Priority range for utilization dependent priorities.
- * PRI_NRESV: Number of nice values.
+ * PRI_NRESV: Number of priority levels reserved to account for nice values.
+ * 5/4 is to preserve historical nice effect on computation ratios.
+ * PRI_RANGE: Length of range for priorities computed from CPU use.
* PRI_TICKS: Compute a priority in PRI_RANGE from the ticks count and total.
* PRI_NICE: Determines the part of the priority inherited from nice.
*/
-#define SCHED_PRI_NRESV (PRIO_MAX - PRIO_MIN)
-#define SCHED_PRI_NHALF (SCHED_PRI_NRESV / 2)
-#define SCHED_PRI_MIN (PRI_MIN_BATCH + SCHED_PRI_NHALF)
-#define SCHED_PRI_MAX (PRI_MAX_BATCH - SCHED_PRI_NHALF)
+#define SCHED_PRI_NRESV ((PRIO_MAX - PRIO_MIN) * 5 / 4)
+#define SCHED_PRI_MIN (PRI_MIN_BATCH + (SCHED_PRI_NRESV / 2))
+#define SCHED_PRI_MAX (PRI_MAX_BATCH - (SCHED_PRI_NRESV / 2))
#define SCHED_PRI_RANGE (SCHED_PRI_MAX - SCHED_PRI_MIN + 1)
#define SCHED_PRI_TICKS(ts) \
(SCHED_TICK_HZ((ts)) / \
(roundup(SCHED_TICK_TOTAL((ts)), SCHED_PRI_RANGE) / SCHED_PRI_RANGE))
-#define SCHED_PRI_NICE(nice) (nice)
+#define SCHED_PRI_NICE(nice) ((nice) * 5 / 4)
/*
* Runqueue indices for the implemented scheduling policies' priority bounds.
@@ -279,6 +279,11 @@
u_char tdq_owepreempt; /* (f) Remote preemption pending. */
u_char tdq_ts_off; /* (t) TS insertion offset. */
u_char tdq_ts_deq_off; /* (t) TS dequeue offset. */
+ /*
+ * (t) Number of (stathz) ticks since last offset incrementation
+ * correction.
+ */
+ u_char tdq_ts_ticks;
int tdq_id; /* (c) cpuid. */
struct runq tdq_runq; /* (t) Run queue. */
char tdq_name[TDQ_NAME_LEN];
@@ -362,6 +367,7 @@
static void tdq_load_add(struct tdq *, struct thread *);
static void tdq_load_rem(struct tdq *, struct thread *);
static inline void tdq_runq_add(struct tdq *, struct thread *, int);
+static inline void tdq_advance_ts_deq_off(struct tdq *, bool);
static inline void tdq_runq_rem(struct tdq *, struct thread *);
static inline int sched_shouldpreempt(int, int, int);
static void tdq_print(int cpu);
@@ -557,6 +563,33 @@
runq_add(&tdq->tdq_runq, td, flags);
}
+/*
+ * Advance the timesharing dequeue offset to the next non-empty queue or the
+ * insertion offset, whichever is closer.
+ *
+ * If 'deq_queue_known_empty' is true, then the queue where timesharing threads
+ * are currently removed for execution (pointed to by 'tdq_ts_deq_off') is
+ * assumed empty. Otherwise, this condition is checked for.
+ */
+static inline void
+tdq_advance_ts_deq_off(struct tdq *tdq, bool deq_queue_known_empty)
+{
+ /*
+ * We chose a simple iterative algorithm since the difference between
+ * offsets is small in practice (see sched_clock()).
+ */
+ while (tdq->tdq_ts_deq_off != tdq->tdq_ts_off) {
+ if (deq_queue_known_empty)
+ deq_queue_known_empty = false;
+ else if (!runq_is_queue_empty(&tdq->tdq_runq,
+ tdq->tdq_ts_deq_off + RQ_TS_POL_MIN))
+ break;
+
+ tdq->tdq_ts_deq_off = (tdq->tdq_ts_deq_off + 1) %
+ RQ_TS_POL_MODULO;
+ }
+}
+
/*
* Remove a thread from a run-queue. This typically happens when a thread
* is selected to run. Running threads are not on the queue and the
@@ -579,14 +612,13 @@
/*
* If thread has a batch priority and the queue from which it was
* removed is now empty, advance the batch's queue removal index if it
- * lags with respect to the batch's queue insertion index.
+ * lags with respect to the batch's queue insertion index, so that we
+ * may eventually be able to advance the latter in sched_clock().
*/
- if (queue_empty && PRI_MIN_BATCH <= td->td_priority &&
- td->td_priority <= PRI_MAX_BATCH &&
- tdq->tdq_ts_off != tdq->tdq_ts_deq_off &&
+ if (PRI_MIN_BATCH <= td->td_priority &&
+ td->td_priority <= PRI_MAX_BATCH && queue_empty &&
tdq->tdq_ts_deq_off + RQ_TS_POL_MIN == td->td_rqindex)
- tdq->tdq_ts_deq_off = (tdq->tdq_ts_deq_off + 1) %
- RQ_TS_POL_MODULO;
+ tdq_advance_ts_deq_off(tdq, true);
}
/*
@@ -2664,13 +2696,21 @@
/*
* Advance the insert offset once for each tick to ensure that all
- * threads get a chance to run.
+ * threads get a chance to run. In order not to change too much ULE's
+ * anti-starvation and "nice" behaviors after the switch to a single
+ * 256-queue runqueue, since the queue insert offset is incremented by
+ * 1 at every tick (provided the system is not too loaded) and there are
+ * now 109 distinct levels for the timesharing selection policy instead
+ * of 64 before (separate runqueue), we apply a factor 7/4 when
+ * increasing the insert offset, by incrementing it by 2 instead of
+ * 1 except for one in four ticks.
*/
if (tdq->tdq_ts_off == tdq->tdq_ts_deq_off) {
- tdq->tdq_ts_off = (tdq->tdq_ts_off + 1) % RQ_TS_POL_MODULO;
- if (runq_is_queue_empty(&tdq->tdq_runq,
- tdq->tdq_ts_deq_off + RQ_TS_POL_MIN))
- tdq->tdq_ts_deq_off = tdq->tdq_ts_off;
+ tdq->tdq_ts_ticks += cnt;
+ tdq->tdq_ts_off = (tdq->tdq_ts_off + 2 * cnt -
+ tdq-> tdq_ts_ticks / 4) % RQ_TS_POL_MODULO;
+ tdq->tdq_ts_ticks %= 4;
+ tdq_advance_ts_deq_off(tdq, false);
}
ts = td_get_sched(td);
sched_pctcpu_update(ts, 1);

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 22, 5:48 PM (1 h, 53 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27857855
Default Alt Text
D46566.id.diff (5 KB)

Event Timeline