Page MenuHomeFreeBSD

D9791.id25825.diff
No OneTemporary

D9791.id25825.diff

Index: sys/amd64/linux/linux_dummy.c
===================================================================
--- sys/amd64/linux/linux_dummy.c
+++ sys/amd64/linux/linux_dummy.c
@@ -110,9 +110,6 @@
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(inotify_init1);
-/* linux 2.6.30: */
-DUMMY(preadv);
-DUMMY(pwritev);
/* linux 2.6.31: */
DUMMY(perf_event_open);
/* linux 2.6.38: */
Index: sys/amd64/linux32/linux.h
===================================================================
--- sys/amd64/linux32/linux.h
+++ sys/amd64/linux32/linux.h
@@ -663,6 +663,7 @@
(((desc)->b >> LINUX_ENTRY_B_USEABLE) & 1)
struct iovec;
+struct uio;
struct l_iovec32 {
uint32_t iov_base;
@@ -671,6 +672,8 @@
int linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt,
struct iovec **iovp, int error);
+int linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt,
+ struct uio **uiop);
int linux_copyout_rusage(struct rusage *ru, void *uaddr);
/* robust futexes */
Index: sys/amd64/linux32/linux32_dummy.c
===================================================================
--- sys/amd64/linux32/linux32_dummy.c
+++ sys/amd64/linux32/linux32_dummy.c
@@ -110,9 +110,6 @@
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(inotify_init1);
-/* linux 2.6.30: */
-DUMMY(preadv);
-DUMMY(pwritev);
/* linux 2.6.31: */
DUMMY(perf_event_open);
/* linux 2.6.33: */
Index: sys/amd64/linux32/linux32_machdep.c
===================================================================
--- sys/amd64/linux32/linux32_machdep.c
+++ sys/amd64/linux32/linux32_machdep.c
@@ -144,7 +144,7 @@
CTASSERT(sizeof(struct l_iovec32) == 8);
-static int
+int
linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop)
{
struct l_iovec32 iov32;
Index: sys/compat/linux/linux_file.c
===================================================================
--- sys/compat/linux/linux_file.c
+++ sys/compat/linux/linux_file.c
@@ -1028,6 +1028,62 @@
}
int
+linux_preadv(struct thread *td, struct linux_preadv_args *uap)
+{
+ struct uio *auio;
+ int error;
+ off_t offset;
+
+ /*
+ * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
+ * pos_l and pos_h, respectively, contain the
+ * low order and high order 32 bits of offset.
+ */
+ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
+ (sizeof(offset) * 4)) | uap->pos_l;
+ if (offset < 0)
+ return (EINVAL);
+#ifdef COMPAT_LINUX32
+ error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
+#else
+ error = copyinuio(uap->vec, uap->vlen, &auio);
+#endif
+ if (error != 0)
+ return (error);
+ error = kern_preadv(td, uap->fd, auio, offset);
+ free(auio, M_IOV);
+ return (error);
+}
+
+int
+linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
+{
+ struct uio *auio;
+ int error;
+ off_t offset;
+
+ /*
+ * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
+ * pos_l and pos_h, respectively, contain the
+ * low order and high order 32 bits of offset.
+ */
+ offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
+ (sizeof(offset) * 4)) | uap->pos_l;
+ if (offset < 0)
+ return (EINVAL);
+#ifdef COMPAT_LINUX32
+ error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
+#else
+ error = copyinuio(uap->vec, uap->vlen, &auio);
+#endif
+ if (error != 0)
+ return (error);
+ error = kern_pwritev(td, uap->fd, auio, offset);
+ free(auio, M_IOV);
+ return (error);
+}
+
+int
linux_mount(struct thread *td, struct linux_mount_args *args)
{
char fstypename[MFSNAMELEN];
Index: sys/i386/linux/linux_dummy.c
===================================================================
--- sys/i386/linux/linux_dummy.c
+++ sys/i386/linux/linux_dummy.c
@@ -106,9 +106,6 @@
/* linux 2.6.27: */
DUMMY(signalfd4);
DUMMY(inotify_init1);
-/* linux 2.6.30: */
-DUMMY(preadv);
-DUMMY(pwritev);
/* linux 2.6.31: */
DUMMY(perf_event_open);
/* linux 2.6.33: */
Index: sys/kern/kern_synch.c
===================================================================
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -140,6 +140,7 @@
struct lock_class *class;
uintptr_t lock_state;
int catch, pri, rval, sleepq_flags;
+ bool using_rtc;
WITNESS_SAVE_DECL(lock_witness);
td = curthread;
@@ -213,13 +214,23 @@
lock_state = class->lc_unlock(lock);
sleepq_lock(ident);
}
- if (sbt != 0 && catch)
- rval = sleepq_timedwait_sig(ident, pri);
- else if (sbt != 0)
- rval = sleepq_timedwait(ident, pri);
- else if (catch)
+ if (sbt != 0) {
+ using_rtc = (td->td_rtcgen != 0);
+ if (catch)
+ rval = sleepq_timedwait_sig(ident, pri);
+ else
+ rval = sleepq_timedwait(ident, pri);
+ if (rval == 0 && using_rtc && td->td_rtcgen == 0) {
+ /*
+ * The thread was awoken by an adjustment of
+ * the real-time clock. It should read the
+ * RTC again and act on the new value.
+ */
+ rval = ERELOOKUP;
+ }
+ } else if (catch) {
rval = sleepq_wait_sig(ident, pri);
- else {
+ } else {
sleepq_wait(ident, pri);
rval = 0;
}
Index: sys/kern/kern_tc.c
===================================================================
--- sys/kern/kern_tc.c
+++ sys/kern/kern_tc.c
@@ -28,7 +28,9 @@
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/proc.h>
#include <sys/sbuf.h>
+#include <sys/sleepqueue.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
@@ -126,6 +128,8 @@
sysctl_kern_timecounter_adjprecision, "I",
"Allowed time interval deviation in percents");
+volatile int rtc_generation = 1;
+
static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */
static void tc_windup(struct bintime *new_boottimebin);
@@ -1261,6 +1265,19 @@
return (timehands->th_counter->tc_frequency);
}
+static bool
+sleeping_on_old_rtc(struct thread *td)
+{
+
+ if (td->td_rtcgen != 0 &&
+ td->td_rtcgen != atomic_load_acq_int(&rtc_generation)) {
+ td->td_rtcgen = 0;
+ return (true);
+ }
+
+ return (false);
+}
+
static struct mtx tc_setclock_mtx;
MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN);
@@ -1284,6 +1301,7 @@
/* XXX fiddle all the little crinkly bits around the fiords... */
tc_windup(&bt);
mtx_unlock_spin(&tc_setclock_mtx);
+ sleepq_chains_remove_matching(sleeping_on_old_rtc);
if (timestepwarnings) {
nanotime(&taft);
log(LOG_INFO,
@@ -1463,6 +1481,9 @@
timehands = th;
timekeep_push_vdso();
+
+ if (new_boottimebin != NULL)
+ atomic_add_rel_int(&rtc_generation, 1);
}
/* Report or change the active timecounter hardware. */
Index: sys/kern/kern_umtx.c
===================================================================
--- sys/kern/kern_umtx.c
+++ sys/kern/kern_umtx.c
@@ -59,6 +59,7 @@
#include <sys/sysproto.h>
#include <sys/syscallsubr.h>
#include <sys/taskqueue.h>
+#include <sys/time.h>
#include <sys/eventhandler.h>
#include <sys/umtx.h>
@@ -70,6 +71,7 @@
#include <vm/vm_map.h>
#include <vm/vm_object.h>
+#include <machine/atomic.h>
#include <machine/cpu.h>
#ifdef COMPAT_FREEBSD32
@@ -210,6 +212,7 @@
struct abs_timeout {
int clockid;
+ bool is_realtime;
struct timespec cur;
struct timespec end;
};
@@ -257,6 +260,8 @@
static SYSCTL_NODE(_debug_umtx, OID_AUTO, chains, CTLFLAG_RD, 0, "umtx chain stats");
#endif
+static void abs_timeout_update(struct abs_timeout *timo);
+
static void umtx_shm_init(void);
static void umtxq_sysinit(void *);
static void umtxq_hash(struct umtx_key *key);
@@ -771,13 +776,15 @@
{
timo->clockid = clockid;
+ timo->is_realtime = (clockid == CLOCK_REALTIME ||
+ clockid == CLOCK_REALTIME_FAST ||
+ clockid == CLOCK_REALTIME_PRECISE);
+ abs_timeout_update(timo);
if (!absolute) {
- kern_clock_gettime(curthread, clockid, &timo->end);
- timo->cur = timo->end;
+ timo->end = timo->cur;
timespecadd(&timo->end, timeout);
} else {
timo->end = *timeout;
- kern_clock_gettime(curthread, clockid, &timo->cur);
}
}
@@ -831,26 +838,42 @@
struct umtxq_chain *uc;
int error, timo;
+ if (abstime != NULL && abstime->is_realtime) {
+ curthread->td_rtcgen = atomic_load_acq_int(&rtc_generation);
+ abs_timeout_update(abstime);
+ }
+
uc = umtxq_getchain(&uq->uq_key);
UMTXQ_LOCKED_ASSERT(uc);
for (;;) {
- if (!(uq->uq_flags & UQF_UMTXQ))
- return (0);
+ if (!(uq->uq_flags & UQF_UMTXQ)) {
+ error = 0;
+ break;
+ }
if (abstime != NULL) {
timo = abs_timeout_gethz(abstime);
- if (timo < 0)
- return (ETIMEDOUT);
+ if (timo < 0) {
+ error = ETIMEDOUT;
+ break;
+ }
} else
timo = 0;
error = msleep(uq, &uc->uc_lock, PCATCH | PDROP, wmesg, timo);
- if (error != EWOULDBLOCK) {
+ if (error != EWOULDBLOCK && error != ERELOOKUP) {
umtxq_lock(&uq->uq_key);
break;
}
- if (abstime != NULL)
+ if (abstime != NULL) {
+ if (abstime->is_realtime)
+ curthread->td_rtcgen =
+ atomic_load_acq_int(&rtc_generation);
abs_timeout_update(abstime);
+ }
umtxq_lock(&uq->uq_key);
}
+
+ curthread->td_rtcgen = 0;
+
return (error);
}
Index: sys/kern/subr_sleepqueue.c
===================================================================
--- sys/kern/subr_sleepqueue.c
+++ sys/kern/subr_sleepqueue.c
@@ -26,7 +26,7 @@
/*
* Implementation of sleep queues used to hold queue of threads blocked on
- * a wait channel. Sleep queues different from turnstiles in that wait
+ * a wait channel. Sleep queues are different from turnstiles in that wait
* channels are not owned by anyone, so there is no priority propagation.
* Sleep queues can also provide a timeout and can also be interrupted by
* signals. That said, there are several similarities between the turnstile
@@ -36,7 +36,7 @@
* a linked list of queues. An individual queue is located by using a hash
* to pick a chain, locking the chain, and then walking the chain searching
* for the queue. This means that a wait channel object does not need to
- * embed it's queue head just as locks do not embed their turnstile queue
+ * embed its queue head just as locks do not embed their turnstile queue
* head. Threads also carry around a sleep queue that they lend to the
* wait channel when blocking. Just as in turnstiles, the queue includes
* a free list of the sleep queues of other threads blocked on the same
@@ -78,6 +78,9 @@
#include <sys/sleepqueue.h>
#include <sys/stack.h>
#include <sys/sysctl.h>
+#include <sys/time.h>
+
+#include <machine/atomic.h>
#include <vm/uma.h>
@@ -98,7 +101,7 @@
#define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)]
#define NR_SLEEPQS 2
/*
- * There two different lists of sleep queues. Both lists are connected
+ * There are two different lists of sleep queues. Both lists are connected
* via the sq_hash entries. The first list is the sleep queue chain list
* that a sleep queue is on when it is attached to a wait channel. The
* second list is the free list hung off of a sleep queue that is attached
@@ -539,6 +542,7 @@
struct sleepqueue_chain *sc;
struct sleepqueue *sq;
struct thread *td;
+ bool rtc_changed;
td = curthread;
sc = SC_LOOKUP(wchan);
@@ -557,9 +561,16 @@
/*
* If TDF_TIMEOUT is set, then our sleep has been timed out
* already but we are still on the sleep queue, so dequeue the
- * thread and return.
+ * thread and return. Do the same if the real-time clock has
+ * been adjusted since this thread calculated its timeout
+ * based on that clock.
*/
- if (td->td_flags & TDF_TIMEOUT) {
+ rtc_changed = td->td_rtcgen != 0 &&
+ td->td_rtcgen != atomic_load_acq_int(&rtc_generation);
+ if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
+ if (rtc_changed) {
+ td->td_rtcgen = 0;
+ }
MPASS(TD_ON_SLEEPQ(td));
sq = sleepq_lookup(wchan);
if (sleepq_resume_thread(sq, td, 0)) {
@@ -572,7 +583,7 @@
#endif
}
mtx_unlock_spin(&sc->sc_lock);
- return;
+ return;
}
#ifdef SLEEPQUEUE_PROFILING
if (prof_enabled)
@@ -893,8 +904,6 @@
sleepq_broadcast(void *wchan, int flags, int pri, int queue)
{
struct sleepqueue *sq;
- struct thread *td, *tdn;
- int wakeup_swapper;
CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
@@ -905,18 +914,34 @@
KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
("%s: mismatch between sleep/wakeup and cv_*", __func__));
+ return (sleepq_remove_matching(sq, queue, NULL, pri));
+}
+
+/*
+ * Resume threads on the sleep queue that match the given predicate,
+ * or all threads if the predicate is NULL.
+ */
+int
+sleepq_remove_matching(struct sleepqueue *sq, int queue,
+ bool (*matches)(struct thread *), int pri)
+{
+ struct thread *td, *tdn;
+ int wakeup_swapper;
+
/*
- * Resume all blocked threads on the sleep queue. The last thread will
- * be given ownership of sq and may re-enqueue itself before
- * sleepq_resume_thread() returns, so we must cache the "next" queue
- * item at the beginning of the final iteration.
+ * The last thread will be given ownership of sq and may
+ * re-enqueue itself before sleepq_resume_thread() returns,
+ * so we must cache the "next" queue item at the beginning
+ * of the final iteration.
*/
wakeup_swapper = 0;
TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
thread_lock(td);
- wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
+ if (matches == NULL || matches(td))
+ wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
thread_unlock(td);
}
+
return (wakeup_swapper);
}
@@ -1052,6 +1077,32 @@
return (sleepq_resume_thread(sq, td, 0));
}
+void
+sleepq_chains_remove_matching(bool (*matches)(struct thread *))
+{
+ struct sleepqueue_chain *sc;
+ struct sleepqueue *sq;
+ int i, wakeup_swapper;
+
+ wakeup_swapper = 0;
+ for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
+ if (LIST_EMPTY(&sc->sc_queues)) {
+ continue;
+ }
+ mtx_lock_spin(&sc->sc_lock);
+ LIST_FOREACH(sq, &sc->sc_queues, sq_hash) {
+ for (i = 0; i < NR_SLEEPQS; ++i) {
+ wakeup_swapper |= sleepq_remove_matching(sq, i,
+ matches, 0);
+ }
+ }
+ mtx_unlock_spin(&sc->sc_lock);
+ }
+ if (wakeup_swapper) {
+ kick_proc0();
+ }
+}
+
/*
* Prints the stacks of all threads presently sleeping on wchan/queue to
* the sbuf sb. Sets count_stacks_printed to the number of stacks actually
Index: sys/sys/proc.h
===================================================================
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -148,6 +148,7 @@
* o - ktrace lock
* q - td_contested lock
* r - p_peers lock
+ * s - by curthread, or by others when curthread is on sleepqueue
* t - thread lock
* u - process stat lock
* w - process timer lock
@@ -283,6 +284,7 @@
int td_dom_rr_idx; /* (k) RR Numa domain selection. */
void *td_su; /* (k) FFS SU private */
sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
+ int td_rtcgen; /* (s) rtc_generation of sleep */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
Index: sys/sys/sleepqueue.h
===================================================================
--- sys/sys/sleepqueue.h
+++ sys/sys/sleepqueue.h
@@ -90,11 +90,14 @@
int flags, int queue);
struct sleepqueue *sleepq_alloc(void);
int sleepq_broadcast(void *wchan, int flags, int pri, int queue);
+void sleepq_chains_remove_matching(bool (*matches)(struct thread *));
void sleepq_free(struct sleepqueue *sq);
void sleepq_lock(void *wchan);
struct sleepqueue *sleepq_lookup(void *wchan);
void sleepq_release(void *wchan);
void sleepq_remove(struct thread *td, void *wchan);
+int sleepq_remove_matching(struct sleepqueue *sq, int queue,
+ bool (*matches)(struct thread *), int pri);
int sleepq_signal(void *wchan, int flags, int pri, int queue);
void sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt,
sbintime_t pr, int flags);
Index: sys/sys/time.h
===================================================================
--- sys/sys/time.h
+++ sys/sys/time.h
@@ -383,6 +383,8 @@
extern sbintime_t sbt_timethreshold;
extern sbintime_t sbt_tickthreshold;
+extern volatile int rtc_generation;
+
/*
* Functions for looking at our clock: [get]{bin,nano,micro}[up]time()
*

File Metadata

Mime Type
text/plain
Expires
Wed, Jan 21, 3:11 PM (13 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27810420
Default Alt Text
D9791.id25825.diff (15 KB)

Event Timeline