Index: share/man/man9/Makefile =================================================================== --- share/man/man9/Makefile +++ share/man/man9/Makefile @@ -286,6 +286,7 @@ securelevel_gt.9 \ selrecord.9 \ sema.9 \ + seq.9 \ sf_buf.9 \ sglist.9 \ shm_map.9 \ @@ -1809,6 +1810,10 @@ sema.9 sema_trywait.9 \ sema.9 sema_value.9 \ sema.9 sema_wait.9 +MLINKS+=seq.9 seq_consistent.9 \ + seq.9 seq_read.9 \ + seq.9 seq_write_begin.9 \ + seq.9 seq_write_end.9 MLINKS+=sf_buf.9 sf_buf_alloc.9 \ sf_buf.9 sf_buf_free.9 \ sf_buf.9 sf_buf_kva.9 \ Index: share/man/man9/seq.9 =================================================================== --- /dev/null +++ share/man/man9/seq.9 @@ -0,0 +1,127 @@ +.\" +.\" Copyright (C) 2018 Mariusz Zaborski +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice(s), this list of conditions and the following disclaimer as +.\" the first lines of this file unmodified other than the possible +.\" addition of one or more copyright notices. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice(s), this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY +.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +.\" DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY +.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH +.\" DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd August 16, 2018 +.Dt SEQ 9 +.Os +.Sh NAME +.Nm seq_consistent , +.Nm seq_read , +.Nm seq_write_begin , +.Nm seq_write_end +.Nd "lockless read algorithm" +.Sh SYNOPSIS +.In sys/seq.h +.Ft void +.Fn seq_write_begin "seq_t *seqp" +.Ft void +.Fn seq_write_end "seq_t *seqp" +.Ft seq_t +.Fn seq_read "seq_t *seqp" +.Ft seq_t +.Fn seq_consistent "const seq_t *seqp" "seq_t oldseq" +.Sh DESCRIPTION +The +.Nm seq +interface allows readers and writers to work with a consistent snapshot +using a object sequence number instead of using heavy locks. +The writer functions increment the sequence number twice, at the beginning and +end of the transaction. +The reader at the end of a transaction checks if the sequence number has change. +If the sequence number didn't change the object wasn't modified, and fetched +variables are valid. +If the sequence number had changed the object was modified and the fetch should +be repeated. +In case when sequence number is odd the object change is in progress and the +reader will wait until the write will the sequence number will become even. +.Pp +The functions +.Fn seq_write_begin +and +.Fn seq_write_end +are used to create a transaction for writer, and notify the readers that the +object will be modified. +Some additional locking may be needed, depending on the CPU. +Modern AMD CPUs provide strong enough guarantees to not require any fencing by +the reader or writer. +.Pp +The +.Fn seq_read +function returns the current sequance number. +In case when the writing is in progress this function will spin until the +process will end. +.Pp +The +.Fn seq_consistent +function compares the current state of the sequence number. +The +.Fa oldseq +variable should contain a sequence number from the beginning of read +transaction. +.Sh EXAMPLES +The following example for a writer change the var1 and var2 variables in the +obj structure: +.Bd -literal +lock_exclusive(&obj->lock); +seq_write_begin(&obj->seq); +obj->var1 = 1; +obj->var2 = 2; +seq_write_end(&obj->seq); +unlock_exclusive(&obj->lock); +.Ed +The following example for a reader read the var1 and var2 variables from the obj +structure. +In case when the sequence number was changed it restarts the whole process. +.Bd -literal +int var1, var2; +seq_t seq; + +for (;;) { + seq = seq_read(&obj->seq); + var1 = obj->var1; + var2 = obj->var2; + if (seq_consistent(&obj->seq, seq)) + break; +} +.Ed +.Sh CAVEATS +There is no guarantee of progress. +In case when there are a lot of writers the reader can be starved. +.Pp +Theoretically if reading takes a very long time, and when there are many writers +the counter may overflow. +In that case the reader will not notice that the object was changed. +Given that this needs 4 billion transactional writes across a single contended +reader, it is unlikely to ever happen. +.Sh AUTHORS +The +.Nm seq +functions was implemented by +.An Mateusz Guzik Aq Mt mjg@FreeBSD.org . +This manual page was written by +.An Mariusz Zaborski Aq Mt oshogbo@FreeBSD.org . Index: sys/sys/seq.h =================================================================== --- sys/sys/seq.h +++ sys/sys/seq.h @@ -40,55 +40,6 @@ #ifdef _KERNEL -/* - * seq allows readers and writers to work with a consistent snapshot. Modifying - * operations must be enclosed within a transaction delineated by - * seq_write_beg/seq_write_end. The trick works by having the writer increment - * the sequence number twice, at the beginning and end of the transaction. - * The reader detects that the sequence number has not changed between its start - * and end, and that the sequence number is even, to validate consistency. - * - * Some fencing (both hard fencing and compiler barriers) may be needed, - * depending on the cpu. Modern AMD cpus provide strong enough guarantees to not - * require any fencing by the reader or writer. - * - * Example usage: - * - * writers: - * lock_exclusive(&obj->lock); - * seq_write_begin(&obj->seq); - * obj->var1 = ...; - * obj->var2 = ...; - * seq_write_end(&obj->seq); - * unlock_exclusive(&obj->lock); - * - * readers: - * int var1, var2; - * seq_t seq; - * - * for (;;) { - * seq = seq_read(&obj->seq); - * var1 = obj->var1; - * var2 = obj->var2; - * if (seq_consistent(&obj->seq, seq)) - * break; - * } - * ..... - * - * Writers may not block or sleep in any way. - * - * There are 2 minor caveats in this implementation: - * - * 1. There is no guarantee of progress. That is, a large number of writers can - * interfere with the execution of the readers and cause the code to live-lock - * in a loop trying to acquire a consistent snapshot. - * - * 2. If the reader loops long enough, the counter may overflow and eventually - * wrap back to its initial value, fooling the reader into accepting the - * snapshot. Given that this needs 4 billion transactional writes across a - * single contended reader, it is unlikely to ever happen. - */ - /* A hack to get MPASS macro. */ #include