Index: head/lib/geom/sched/gsched.8
===================================================================
--- head/lib/geom/sched/gsched.8	(revision 356184)
+++ head/lib/geom/sched/gsched.8	(nonexistent)
@@ -1,162 +0,0 @@
-.\" Copyright (c) 2009-2010 Fabio Checconi
-.\" Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
-.\" All rights reserved.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd July 26, 2012
-.Dt GSCHED 8
-.Os
-.Sh NAME
-.Nm gsched
-.Nd "control utility for disk scheduler GEOM class"
-.Sh SYNOPSIS
-.Nm
-.Cm create
-.Op Fl v
-.Op Fl a Ar algorithm
-.Ar provider ...
-.Nm
-.Cm insert
-.Op Fl v
-.Op Fl a Ar algorithm
-.Ar provider ...
-.Nm
-.Cm configure
-.Op Fl v
-.Op Fl a Ar algorithm
-.Ar node ...
-.Nm
-.Cm destroy
-.Op Fl fv
-.Ar node ...
-.Nm
-.Cm reset
-.Op Fl v
-.Ar node ...
-.Nm
-.Cm { list | status | load | unload }
-.Sh DESCRIPTION
-The
-.Nm
-utility (also callable as
-.Nm geom sched ... )
-changes the scheduling policy of the requests going to a provider.
-.Pp
-The first argument to
-.Nm
-indicates an action to be performed:
-.Bl -tag -width ".Cm configure"
-.It Cm create
-Create a new provider and geom node using the specified scheduling algorithm.
-.Ar algorithm
-is the name of the scheduling algorithm used for the provider.
-Available algorithms include:
-.Ar rr ,
-which implements anticipatory scheduling with round robin service
-among clients;
-.Ar as ,
-which implements a simple form of anticipatory scheduling with
-no per-client queue.
-.Pp
-If the operation succeeds, the new provider should appear with name
-.Pa /dev/ Ns Ao Ar dev Ac Ns Pa .sched. .
-The kernel module
-.Pa geom_sched.ko
-will be loaded if it is not loaded already.
-.It Cm insert
-Operates as "create", but the insertion is "transparent",
-i.e. the existing provider is rerouted to the newly created geom,
-which in turn forwards requests to the existing geom.
-This operation allows one to start/stop a scheduling service
-on an already existing provider.
-.Pp
-A subsequent "destroy" will remove the newly created geom and
-hook the provider back to the original geom.
-.It Cm configure
-Configure existing scheduling provider.  It supports the same options
-as the
-.Nm create
-command.
-.It Cm destroy
-Destroy the geom specified in the parameter.
-.It Cm reset
-Do nothing.
-.It Cm list | status | load | unload
-See
-.Xr geom 8 .
-.El
-.Pp
-Additional options:
-.Bl -tag -width ".Fl f"
-.It Fl f
-Force the removal of the specified provider.
-.It Fl v
-Be more verbose.
-.El
-.Sh SYSCTL VARIABLES
-The following
-.Xr sysctl 8
-variables can be used to control the behavior of the
-.Nm SCHED
-GEOM class.
-The default value is shown next to each variable.
-.Bl -tag -width indent
-.It Va kern.geom.sched.debug : No 0
-Debug level of the
-.Nm SCHED
-GEOM class.
-This can be set to a number between 0 and 2 inclusive.
-If set to 0 minimal debug information is printed, and if set to 2 the
-maximum amount of debug information is printed.
-.El
-.Sh EXIT STATUS
-Exit status is 0 on success, and 1 if the command fails.
-.Sh EXAMPLES
-The following example shows how to create a scheduling provider for disk
-.Pa /dev/ada0 ,
-and how to destroy it.
-.Bd -literal -offset indent
-# Load the geom_sched module:
-kldload geom_sched
-# Load some scheduler classes used by geom_sched:
-kldload gsched_rr
-# Configure device ada0 to use scheduler "rr":
-geom sched insert -a rr ada0
-# Now provider ada0 uses the "rr" algorithm;
-# the new geom is ada0.sched.
-# Remove the scheduler on the device:
-geom sched destroy -v ada0.sched.
-.Ed
-.Sh SEE ALSO
-.Xr geom 4 ,
-.Xr geom 8
-.Sh HISTORY
-The
-.Nm
-utility first appeared in
-.Fx 8.1 .
-.Sh AUTHORS
-.An Fabio Checconi Aq Mt fabio@FreeBSD.org
-.An Luigi Rizzo Aq Mt luigi@FreeBSD.org

Property changes on: head/lib/geom/sched/gsched.8
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/lib/geom/sched/Makefile.depend
===================================================================
--- head/lib/geom/sched/Makefile.depend	(revision 356184)
+++ head/lib/geom/sched/Makefile.depend	(nonexistent)
@@ -1,19 +0,0 @@
-# $FreeBSD$
-# Autogenerated - do NOT edit!
-
-DIRDEPS = \
-	gnu/lib/csu \
-	include \
-	include/xlocale \
-	lib/${CSU_DIR} \
-	lib/libc \
-	lib/libcompiler_rt \
-	lib/libgeom \
-	sbin/geom/core \
-
-
-.include <dirdeps.mk>
-
-.if ${DEP_RELDIR} == ${_DEP_RELDIR}
-# local dependencies - needed for -jN in clean tree
-.endif

Property changes on: head/lib/geom/sched/Makefile.depend
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/lib/geom/sched/geom_sched.c
===================================================================
--- head/lib/geom/sched/geom_sched.c	(revision 356184)
+++ head/lib/geom/sched/geom_sched.c	(nonexistent)
@@ -1,128 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009 Fabio Checconi
- * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * This file implements the userspace library used by the 'geom'
- * command to load and manipulate disk schedulers.
- */
-  
-#include <sys/cdefs.h>
-#include <sys/param.h>
-#include <sys/linker.h>
-#include <sys/module.h>
-
-#include <stdio.h>
-#include <stdint.h>
-#include <libgeom.h>
-
-#include "core/geom.h"
-#include "misc/subr.h"
-
-#define	G_SCHED_VERSION	0
-
-uint32_t lib_version = G_LIB_VERSION;
-uint32_t version = G_SCHED_VERSION;
-
-/*
- * storage for parameters used by this geom class.
- * Right now only the scheduler name is used.
- */
-#define	GSCHED_ALGO	"rr"	/* default scheduler */
-
-/*
- * Adapt to differences in geom library.
- * in V1 struct g_command misses gc_argname, eld, and G_BOOL is undefined
- */
-#if G_LIB_VERSION <= 1
-#define G_TYPE_BOOL	G_TYPE_NUMBER
-#endif
-#if G_LIB_VERSION >= 3 && G_LIB_VERSION <= 4
-#define G_ARGNAME	NULL,
-#else
-#define	G_ARGNAME
-#endif
-
-static void
-gcmd_createinsert(struct gctl_req *req, unsigned flags __unused)
-{
-	const char *reqalgo;
-	char name[64];
-
-	if (gctl_has_param(req, "algo"))
-		reqalgo = gctl_get_ascii(req, "algo");
-	else
-		reqalgo = GSCHED_ALGO;
-
-	snprintf(name, sizeof(name), "gsched_%s", reqalgo);
-	/*
-	 * Do not complain about errors here, gctl_issue()
-	 * will fail anyway.
-	 */
-	if (modfind(name) < 0)
-		kldload(name);
-	gctl_issue(req);
-}
-
-struct g_command class_commands[] = {
-	{ "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
-	    {
-		{ 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
-		G_OPT_SENTINEL
-	    },
-	    G_ARGNAME "[-v] [-a algorithm_name] dev ..."
-	},
-	{ "insert", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
-	    {
-		{ 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
-		G_OPT_SENTINEL
-	    },
-	    G_ARGNAME "[-v] [-a algorithm_name] dev ..."
-	},
-	{ "configure", G_FLAG_VERBOSE, NULL,
-	    {
-		{ 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
-		G_OPT_SENTINEL
-	    },
-	    G_ARGNAME "[-v] [-a algorithm_name] prov ..."
-	},
-	{ "destroy", G_FLAG_VERBOSE, NULL,
-	    {
-		{ 'f', "force", NULL, G_TYPE_BOOL },
-		G_OPT_SENTINEL
-	    },
-	    G_ARGNAME "[-fv] prov ..."
-	},
-	{ "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
-	    G_ARGNAME "[-v] prov ..."
-	},
-	G_CMD_SENTINEL
-};

Property changes on: head/lib/geom/sched/geom_sched.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/lib/geom/sched/Makefile
===================================================================
--- head/lib/geom/sched/Makefile	(revision 356184)
+++ head/lib/geom/sched/Makefile	(nonexistent)
@@ -1,9 +0,0 @@
-# GEOM_LIBRARY_PATH
-# $FreeBSD$
-
-PACKAGE=runtime
-.PATH: ${.CURDIR:H:H}/misc
-
-GEOM_CLASS=	sched
-
-.include <bsd.lib.mk>

Property changes on: head/lib/geom/sched/Makefile
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/lib/geom/Makefile.classes
===================================================================
--- head/lib/geom/Makefile.classes	(revision 356184)
+++ head/lib/geom/Makefile.classes	(revision 356185)
@@ -1,26 +1,25 @@
 # $FreeBSD$
 
 .if !defined(COMPAT_32BIT)
 GEOM_CLASS_DIR?=/lib/geom
 .else
 GEOM_CLASS_DIR?=/usr/lib32/geom
 .endif
 
 GEOM_CLASSES=	cache
 GEOM_CLASSES+=	concat
 .if ${MK_OPENSSL} != "no"
 GEOM_CLASSES+=	eli
 .endif
 GEOM_CLASSES+=	journal
 GEOM_CLASSES+=	label
 GEOM_CLASSES+=	mirror
 GEOM_CLASSES+=	mountver
 GEOM_CLASSES+=	multipath
 GEOM_CLASSES+=	nop
 GEOM_CLASSES+=	part
 GEOM_CLASSES+=	raid
 GEOM_CLASSES+=	raid3
-GEOM_CLASSES+=	sched
 GEOM_CLASSES+=	shsec
 GEOM_CLASSES+=	stripe
 GEOM_CLASSES+=	virstor
Index: head/sys/geom/sched/g_sched.h
===================================================================
--- head/sys/geom/sched/g_sched.h	(revision 356184)
+++ head/sys/geom/sched/g_sched.h	(nonexistent)
@@ -1,111 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef	_G_SCHED_H_
-#define	_G_SCHED_H_
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * Header for the geom_sched class (userland library and kernel part).
- * See g_sched.c for documentation.
- * The userland code only needs the three G_SCHED_* values below.
- */
-
-#define	G_SCHED_CLASS_NAME	"SCHED"
-#define	G_SCHED_VERSION		0
-#define	G_SCHED_SUFFIX		".sched."
-
-#ifdef _KERNEL
-#define	G_SCHED_DEBUG(lvl, ...) \
-    _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, (lvl), NULL, __VA_ARGS__)
-#define	G_SCHED_LOGREQ(bp, ...) \
-    _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, 2, (bp), __VA_ARGS__)
-
-LIST_HEAD(g_hash, g_sched_class);
-
-/*
- * Descriptor of a scheduler.
- * In addition to the obvious fields, sc_flushing and sc_pending
- * support dynamic switching of scheduling algorithm.
- * Normally, sc_flushing is 0, and requests that are scheduled are
- * also added to the sc_pending queue, and removed when we receive
- * the 'done' event.
- *
- * When we are transparently inserted on an existing provider,
- * sc_proxying is set. The detach procedure is slightly different.
- *
- * When switching schedulers, sc_flushing is set so requests bypass us,
- * and at the same time we update the pointer in the pending bios
- * to ignore us when they return up.
- * XXX it would be more efficient to implement sc_pending with
- * a generation number: the softc generation is increased when
- * we change scheduling algorithm, we store the current generation
- * number in the pending bios, and when they come back we ignore
- * the done() call if the generation number do not match.
- */
-struct g_sched_softc {
-	/*
-	 * Generic fields used by any scheduling algorithm:
-	 * a mutex, the class descriptor, flags, list of pending
-	 * requests (used when flushing the module) and support
-	 * for hash tables where we store per-flow queues.
-	 */
-	struct mtx	sc_mtx;
-	struct g_gsched	*sc_gsched;	/* Scheduler descriptor. */
-	int		sc_pending;	/* Pending requests. */
-	int		sc_flags;	/* Various flags. */
-
-	/*
-	 * Hash tables to store per-flow queues are generally useful
-	 * so we handle them in the common code.
-	 * sc_hash and sc_mask are parameters of the hash table,
-	 * the last two fields are used to periodically remove
-	 * expired items from the hash table.
-	 */
-	struct g_hash	*sc_hash;
-	u_long		sc_mask;
-	int		sc_flush_ticks;	/* Next tick for a flush. */
-	int		sc_flush_bucket; /* Next bucket to flush. */
-
-	/*
-	 * Pointer to the algorithm's private data, which is the value
-	 * returned by sc_gsched->gs_init() . A NULL here means failure.
-	 * XXX intptr_t might be more appropriate.
-	 */
-	void		*sc_data;
-};
-
-#define	G_SCHED_PROXYING	1
-#define	G_SCHED_FLUSHING	2
-
-#endif	/* _KERNEL */
-
-#endif	/* _G_SCHED_H_ */

Property changes on: head/sys/geom/sched/g_sched.h
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/geom/sched/README
===================================================================
--- head/sys/geom/sched/README	(revision 356184)
+++ head/sys/geom/sched/README	(nonexistent)
@@ -1,142 +0,0 @@
-
-	--- GEOM BASED DISK SCHEDULERS FOR FREEBSD ---
-
-This code contains a framework for GEOM-based disk schedulers and a
-couple of sample scheduling algorithms that use the framework and
-implement two forms of "anticipatory scheduling" (see below for more
-details).
-
-As a quick example of what this code can give you, try to run "dd",
-"tar", or some other program with highly SEQUENTIAL access patterns,
-together with "cvs", "cvsup", "svn" or other highly RANDOM access patterns
-(this is not a made-up example: it is pretty common for developers
-to have one or more apps doing random accesses, and others that do
-sequential accesses e.g., loading large binaries from disk, checking
-the integrity of tarballs, watching media streams and so on).
-
-These are the results we get on a local machine (AMD BE2400 dual
-core CPU, SATA 250GB disk):
-
-    /mnt is a partition mounted on /dev/ad0s1f
-
-    cvs: 	cvs -d /mnt/home/ncvs-local update -Pd /mnt/ports
-    dd-read:	dd bs=128k of=/dev/null if=/dev/ad0 (or ad0-sched-)
-    dd-writew	dd bs=128k if=/dev/zero of=/mnt/largefile
-
-			NO SCHEDULER		RR SCHEDULER
-                	dd	cvs		dd	cvs
-
-    dd-read only        72 MB/s	----		72 MB/s	---
-    dd-write only	55 MB/s	---		55 MB/s	---
-    dd-read+cvs		 6 MB/s	ok    		30 MB/s	ok
-    dd-write+cvs	55 MB/s slooow		14 MB/s	ok
-
-As you can see, when a cvs is running concurrently with dd, the
-performance drops dramatically, and depending on read or write mode,
-one of the two is severely penalized.  The use of the RR scheduler
-in this example makes the dd-reader go much faster when competing
-with cvs, and lets cvs progress when competing with a writer.
-
-To try it out:
-
-1. PLEASE MAKE SURE THAT THE DISK THAT YOU WILL BE USING FOR TESTS
-   DOES NOT CONTAIN PRECIOUS DATA.
-    This is experimental code, so we make no guarantees, though
-    I am routinely using it on my desktop and laptop.
-
-2. EXTRACT AND BUILD THE PROGRAMS
-    A 'make install' in the directory should work (with root privs),
-    or you can even try the binary modules.
-    If you want to build the modules yourself, look at the Makefile.
-
-3. LOAD THE MODULE, CREATE A GEOM NODE, RUN TESTS
-
-    The scheduler's module must be loaded first:
-
-      # kldload gsched_rr
-
-    substitute with gsched_as to test AS.  Then, supposing that you are
-    using /dev/ad0 for testing, a scheduler can be attached to it with:
-
-      # geom sched insert ad0
-
-    The scheduler is inserted transparently in the geom chain, so
-    mounted partitions and filesystems will keep working, but
-    now requests will go through the scheduler.
-
-    To change scheduler on-the-fly, you can reconfigure the geom:
-
-      # geom sched configure -a as ad0.sched.
-
-    assuming that gsched_as was loaded previously.
-
-5. SCHEDULER REMOVAL
-
-    In principle it is possible to remove the scheduler module
-    even on an active chain by doing
-
-	# geom sched destroy ad0.sched.
-
-    However, there is some race in the geom subsystem which makes
-    the removal unsafe if there are active requests on a chain.
-    So, in order to reduce the risk of data losses, make sure
-    you don't remove a scheduler from a chain with ongoing transactions.
-
---- NOTES ON THE SCHEDULERS ---
-
-The important contribution of this code is the framework to experiment
-with different scheduling algorithms.  'Anticipatory scheduling'
-is a very powerful technique based on the following reasoning:
-
-    The disk throughput is much better if it serves sequential requests.
-    If we have a mix of sequential and random requests, and we see a
-    non-sequential request, do not serve it immediately but instead wait
-    a little bit (2..5ms) to see if there is another one coming that
-    the disk can serve more efficiently.
-
-There are many details that should be added to make sure that the
-mechanism is effective with different workloads and systems, to
-gain a few extra percent in performance, to improve fairness,
-insulation among processes etc.  A discussion of the vast literature
-on the subject is beyond the purpose of this short note.
-
---------------------------------------------------------------------------
-
-TRANSPARENT INSERT/DELETE
-
-geom_sched is an ordinary geom module, however it is convenient
-to plug it transparently into the geom graph, so that one can
-enable or disable scheduling on a mounted filesystem, and the
-names in /etc/fstab do not depend on the presence of the scheduler.
-
-To understand how this works in practice, remember that in GEOM
-we have "providers" and "geom" objects.
-Say that we want to hook a scheduler on provider "ad0",
-accessible through pointer 'pp'. Originally, pp is attached to
-geom "ad0" (same name, different object) accessible through pointer old_gp
-
-  BEFORE	---> [ pp    --> old_gp ...]
-
-A normal "geom sched create ad0" call would create a new geom node
-on top of provider ad0/pp, and export a newly created provider
-("ad0.sched." accessible through pointer newpp).
-
-  AFTER create  ---> [ newpp --> gp --> cp ] ---> [ pp    --> old_gp ... ]
-
-On top of newpp, a whole tree will be created automatically, and we
-can e.g. mount partitions on /dev/ad0.sched.s1d, and those requests
-will go through the scheduler, whereas any partition mounted on
-the pre-existing device entries will not go through the scheduler.
-
-With the transparent insert mechanism, the original provider "ad0"/pp
-is hooked to the newly created geom, as follows:
-
-  AFTER insert  ---> [ pp    --> gp --> cp ] ---> [ newpp --> old_gp ... ]
-
-so anything that was previously using provider pp will now have
-the requests routed through the scheduler node.
-
-A removal ("geom sched destroy ad0.sched.") will restore the original
-configuration.
-
-# $FreeBSD$

Property changes on: head/sys/geom/sched/README
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/geom/sched/gs_scheduler.h
===================================================================
--- head/sys/geom/sched/gs_scheduler.h	(revision 356184)
+++ head/sys/geom/sched/gs_scheduler.h	(nonexistent)
@@ -1,239 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * Prototypes for GEOM-based disk scheduling algorithms.
- * See g_sched.c for generic documentation.
- *
- * This file is used by the kernel modules implementing the various
- * scheduling algorithms. They should provide all the methods
- * defined in struct g_gsched, and also invoke the macro
- *	DECLARE_GSCHED_MODULE
- * which registers the scheduling algorithm with the geom_sched module.
- *
- * The various scheduling algorithms do not need to know anything
- * about geom, they only need to handle the 'bio' requests they
- * receive, pass them down when needed, and use the locking interface
- * defined below.
- */
-
-#ifndef	_G_GSCHED_H_
-#define	_G_GSCHED_H_
-
-#ifdef _KERNEL
-#include <sys/param.h>
-#include <sys/kernel.h>
-#include <sys/ktr.h>
-#include <sys/module.h>
-#include <sys/queue.h>
-#include <geom/geom.h>
-#include "g_sched.h"
-
-/*
- * This is the interface exported to scheduling modules.
- *
- * gs_init() is called when our scheduling algorithm
- *    starts being used by a geom 'sched'
- *
- * gs_fini() is called when the algorithm is released.
- *
- * gs_start() is called when a new request comes in. It should
- *    enqueue the request and return 0 if success, or return non-zero
- *    in case of failure (meaning the request is passed down).
- *    The scheduler can use bio->bio_caller1 to store a non-null
- *    pointer meaning the request is under its control.
- *
- * gs_next() is called in a loop by g_sched_dispatch(), right after
- *    gs_start(), or on timeouts or 'done' events. It should return
- *    immediately, either a pointer to the bio to be served or NULL
- *    if no bio should be served now.  If force is specified, a
- *    work-conserving behavior is expected.
- *
- * gs_done() is called when a request under service completes.
- *    In turn the scheduler may decide to call the dispatch loop
- *    to serve other pending requests (or make sure there is a pending
- *    timeout to avoid stalls).
- *
- * gs_init_class() is called when a new client (as determined by
- *    the classifier) starts being used.
- *
- * gs_hash_unref() is called right before the class hashtable is
- *    destroyed; after this call, the scheduler is supposed to hold no
- *    more references to the elements in the table.
- */
-
-/* Forward declarations for prototypes. */
-struct g_geom;
-struct g_sched_class;
-
-typedef void *gs_init_t (struct g_geom *geom);
-typedef void gs_fini_t (void *data);
-typedef int gs_start_t (void *data, struct bio *bio);
-typedef void gs_done_t (void *data, struct bio *bio);
-typedef struct bio *gs_next_t (void *data, int force);
-typedef int gs_init_class_t (void *data, void *priv);
-typedef void gs_fini_class_t (void *data, void *priv);
-typedef void gs_hash_unref_t (void *data);
-
-struct g_gsched {
-	const char	*gs_name;
-	int		gs_refs;
-	int		gs_priv_size;
-
-	gs_init_t	*gs_init;
-	gs_fini_t	*gs_fini;
-	gs_start_t	*gs_start;
-	gs_done_t	*gs_done;
-	gs_next_t	*gs_next;
-	g_dumpconf_t	*gs_dumpconf;
-
-	gs_init_class_t	*gs_init_class;
-	gs_fini_class_t	*gs_fini_class;
-	gs_hash_unref_t *gs_hash_unref;
-
-	LIST_ENTRY(g_gsched) glist;
-};
-
-#define	KTR_GSCHED	KTR_SPARE4
-
-MALLOC_DECLARE(M_GEOM_SCHED);
-
-/*
- * Basic classification mechanism.  Each request is associated to
- * a g_sched_class, and each scheduler has the opportunity to set
- * its own private data for the given (class, geom) pair.  The
- * private data have a base type of g_sched_private, and are
- * extended at the end with the actual private fields of each
- * scheduler.
- */
-struct g_sched_class {
-	int	gsc_refs;
-	int	gsc_expire;
-	u_long	gsc_key;
-	LIST_ENTRY(g_sched_class) gsc_clist;
-
-	void	*gsc_priv[0];
-};
-
-/*
- * Manipulate the classifier's data.  g_sched_get_class() gets a reference
- * to the class corresponding to bp in gp, allocating and initializing
- * it if necessary.  g_sched_put_class() releases the reference.
- * The returned value points to the private data for the class.
- */
-void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
-void g_sched_put_class(struct g_geom *gp, void *priv);
-
-static inline struct g_sched_class *
-g_sched_priv2class(void *priv)
-{
-
-	return ((struct g_sched_class *)((u_long)priv -
-	    offsetof(struct g_sched_class, gsc_priv)));
-}
-
-static inline void
-g_sched_priv_ref(void *priv)
-{
-	struct g_sched_class *gsc;
-
-	gsc = g_sched_priv2class(priv);
-	gsc->gsc_refs++;
-}
-
-/*
- * Locking interface.  When each operation registered with the
- * scheduler is invoked, a per-instance lock is taken to protect
- * the data associated with it.  If the scheduler needs something
- * else to access the same data (e.g., a callout) it must use
- * these functions.
- */
-void g_sched_lock(struct g_geom *gp);
-void g_sched_unlock(struct g_geom *gp);
-
-/*
- * Restart request dispatching.  Must be called with the per-instance
- * mutex held.
- */
-void g_sched_dispatch(struct g_geom *geom);
-
-/*
- * Simple gathering of statistical data, used by schedulers to collect
- * info on process history.  Just keep an exponential average of the
- * samples, with some extra bits of precision.
- */
-struct g_savg {
-	uint64_t	gs_avg;
-	unsigned int	gs_smpl;
-};
-
-static inline void
-g_savg_add_sample(struct g_savg *ss, uint64_t sample)
-{
-
-	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
-	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
-	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
-}
-
-static inline int
-g_savg_valid(struct g_savg *ss)
-{
-
-	/* We want at least 8 samples to deem an average as valid. */
-	return (ss->gs_smpl > 7);
-}
-
-static inline uint64_t
-g_savg_read(struct g_savg *ss)
-{
-
-	return (ss->gs_avg / ss->gs_smpl);
-}
-
-/*
- * Declaration of a scheduler module.
- */
-int g_gsched_modevent(module_t mod, int cmd, void *arg);
-
-#define	DECLARE_GSCHED_MODULE(name, gsched)			\
-	static moduledata_t name##_mod = {			\
-		#name,						\
-		g_gsched_modevent,				\
-		gsched,						\
-	};							\
-	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
-	MODULE_DEPEND(name, geom_sched, 0, 0, 0);
-
-#endif	/* _KERNEL */
-
-#endif	/* _G_GSCHED_H_ */

Property changes on: head/sys/geom/sched/gs_scheduler.h
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/geom/sched/gs_rr.c
===================================================================
--- head/sys/geom/sched/gs_rr.c	(revision 356184)
+++ head/sys/geom/sched/gs_rr.c	(nonexistent)
@@ -1,701 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * A round-robin (RR) anticipatory scheduler, with per-client queues.
- *
- * The goal of this implementation is to improve throughput compared
- * to the pure elevator algorithm, and insure some fairness among
- * clients.
- * 
- * Requests coming from the same client are put in the same queue.
- * We use anticipation to help reducing seeks, and each queue
- * is never served continuously for more than a given amount of
- * time or data. Queues are then served in a round-robin fashion.
- *
- * Each queue can be in any of the following states:
- *     READY	immediately serve the first pending request;
- *     BUSY	one request is under service, wait for completion;
- *     IDLING	do not serve incoming requests immediately, unless
- * 		they are "eligible" as defined later.
- *
- * Scheduling is made looking at the status of all queues,
- * and the first one in round-robin order is privileged.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/callout.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/queue.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include "gs_scheduler.h"
-
-/* possible states of the scheduler */
-enum g_rr_state {
-	G_QUEUE_READY = 0,	/* Ready to dispatch. */
-	G_QUEUE_BUSY,		/* Waiting for a completion. */
-	G_QUEUE_IDLING		/* Waiting for a new request. */
-};
-
-/* possible queue flags */
-enum g_rr_flags {
-	/* G_FLAG_COMPLETED means that the field q_slice_end is valid. */
-	G_FLAG_COMPLETED = 1,	/* Completed a req. in the current budget. */
-};
-
-struct g_rr_softc;
-
-/*
- * Queue descriptor, containing reference count, scheduling
- * state, a queue of pending requests, configuration parameters.
- * Queues with pending request(s) and not under service are also
- * stored in a Round Robin (RR) list.
- */
-struct g_rr_queue {
-	struct g_rr_softc *q_sc;	/* link to the parent */
-
-	enum g_rr_state	q_status;
-	unsigned int	q_service;	/* service received so far */
-	int		q_slice_end;	/* actual slice end time, in ticks */
-	enum g_rr_flags	q_flags;	/* queue flags */
-	struct bio_queue_head q_bioq;
-
-	/* Scheduling parameters */
-	unsigned int	q_budget;	/* slice size in bytes */
-	unsigned int	q_slice_duration; /* slice size in ticks */
-	unsigned int	q_wait_ticks;	/* wait time for anticipation */
-
-	/* Stats to drive the various heuristics. */
-	struct g_savg	q_thinktime;	/* Thinktime average. */
-	struct g_savg	q_seekdist;	/* Seek distance average. */
-
-	int		q_bionum;	/* Number of requests. */
-
-	off_t		q_lastoff;	/* Last submitted req. offset. */
-	int		q_lastsub;	/* Last submitted req. time. */
-
-	/* Expiration deadline for an empty queue. */
-	int		q_expire;
-
-	TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
-};
-
-/* List types. */
-TAILQ_HEAD(g_rr_tailq, g_rr_queue);
-
-/* list of scheduler instances */
-LIST_HEAD(g_scheds, g_rr_softc);
-
-/* Default quantum for RR between queues. */
-#define	G_RR_DEFAULT_BUDGET	0x00800000
-
-/*
- * Per device descriptor, holding the Round Robin list of queues
- * accessing the disk, a reference to the geom, and the timer.
- */
-struct g_rr_softc {
-	struct g_geom	*sc_geom;
-
-	/*
-	 * sc_active is the queue we are anticipating for.
-	 * It is set only in gs_rr_next(), and possibly cleared
-	 * only in gs_rr_next() or on a timeout.
-	 * The active queue is never in the Round Robin list
-	 * even if it has requests queued.
-	 */
-	struct g_rr_queue *sc_active;
-	struct callout	sc_wait;	/* timer for sc_active */
-
-	struct g_rr_tailq sc_rr_tailq;	/* the round-robin list */
-	int		sc_nqueues;	/* number of queues */
-
-	/* Statistics */
-	int		sc_in_flight;	/* requests in the driver */
-
-	LIST_ENTRY(g_rr_softc)	sc_next;
-};
-
-/* Descriptor for bounded values, min and max are constant. */
-struct x_bound {		
-	const int	x_min;
-	int		x_cur;
-	const int	x_max;
-};
-
-/*
- * parameters, config and stats
- */
-struct g_rr_params {
-	int	queues;			/* total number of queues */
-	int	w_anticipate;		/* anticipate writes */
-	int	bypass;			/* bypass scheduling writes */
-
-	int	units;			/* how many instances */
-	/* sc_head is used for debugging */
-	struct g_scheds	sc_head;	/* first scheduler instance */
-
-	struct x_bound queue_depth;	/* max parallel requests */
-	struct x_bound wait_ms;		/* wait time, milliseconds */
-	struct x_bound quantum_ms;	/* quantum size, milliseconds */
-	struct x_bound quantum_kb;	/* quantum size, Kb (1024 bytes) */
-
-	/* statistics */
-	int	wait_hit;		/* success in anticipation */
-	int	wait_miss;		/* failure in anticipation */
-};
-
-/*
- * Default parameters for the scheduler.  The quantum sizes target
- * a 80MB/s disk; if the hw is faster or slower the minimum of the
- * two will have effect: the clients will still be isolated but
- * the fairness may be limited.  A complete solution would involve
- * the on-line measurement of the actual disk throughput to derive
- * these parameters.  Or we may just choose to ignore service domain
- * fairness and accept what can be achieved with time-only budgets.
- */
-static struct g_rr_params me = {
-	.sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
-	.w_anticipate =	1,
-	.queue_depth =	{ 1,	1,	50 },
-	.wait_ms =	{ 1, 	10,	30 },
-	.quantum_ms =	{ 1, 	100,	500 },
-	.quantum_kb =	{ 16, 	8192,	65536 },
-};
-
-struct g_rr_params *gs_rr_me = &me;
-
-SYSCTL_DECL(_kern_geom_sched);
-static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
-    "GEOM_SCHED ROUND ROBIN stuff");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
-    &me.units, 0, "Scheduler instances");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
-    &me.queues, 0, "Total rr queues");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
-    &me.wait_ms.x_cur, 0, "Wait time milliseconds");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
-    &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
-    &me.bypass, 0, "Bypass scheduler");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
-    &me.w_anticipate, 0, "Do anticipation on writes");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
-    &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
-    &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
-    &me.wait_hit, 0, "Hits in anticipation");
-SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
-    &me.wait_miss, 0, "Misses in anticipation");
-
-#ifdef DEBUG_QUEUES
-/* print the status of a queue */
-static void
-gs_rr_dump_q(struct g_rr_queue *qp, int index)
-{
-	int l = 0;
-	struct bio *bp;
-
-	TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
-		l++;
-	}
-	printf("--- rr queue %d %p status %d len %d ---\n",
-	    index, qp, qp->q_status, l);
-}
-
-/*
- * Dump the scheduler status when writing to this sysctl variable.
- * XXX right now we only dump the status of the last instance created.
- * not a severe issue because this is only for debugging
- */
-static int
-gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
-{
-        int error, val = 0;
-	struct g_rr_softc *sc;
-
-        error = sysctl_handle_int(oidp, &val, 0, req);
-        if (error || !req->newptr )
-                return (error);
-
-        printf("called %s\n", __FUNCTION__);
-
-	LIST_FOREACH(sc, &me.sc_head, sc_next) {
-		int i, tot = 0;
-		printf("--- sc %p active %p nqueues %d "
-		    "callout %d in_flight %d ---\n",
-		    sc, sc->sc_active, sc->sc_nqueues,
-		    callout_active(&sc->sc_wait),
-		    sc->sc_in_flight);
-		for (i = 0; i < G_RR_HASH_SIZE; i++) {
-			struct g_rr_queue *qp;
-			LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
-				gs_rr_dump_q(qp, tot);
-				tot++;
-			}
-		}
-	}
-        return (0);
-}
-
-SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
-	CTLTYPE_UINT | CTLFLAG_RW,
-    0, sizeof(int), gs_rr_sysctl_status, "I", "status");
-
-#endif	/* DEBUG_QUEUES */
-
-/*
- * Get a bounded value, optionally convert to a min of t_min ticks.
- */
-static int
-get_bounded(struct x_bound *v, int t_min)
-{
-	int x;
-
-	x = v->x_cur;
-	if (x < v->x_min)
-		x = v->x_min;
-	else if (x > v->x_max)
-		x = v->x_max;
-	if (t_min) {
-		x = x * hz / 1000;	/* convert to ticks */
-		if (x < t_min)
-			x = t_min;
-	}
-	return x;
-}
-
-/*
- * Get a reference to the queue for bp, using the generic
- * classification mechanism.
- */
-static struct g_rr_queue *
-g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
-{
-
-	return (g_sched_get_class(sc->sc_geom, bp));
-}
-
-static int
-g_rr_init_class(void *data, void *priv)
-{
-	struct g_rr_softc *sc = data;
-	struct g_rr_queue *qp = priv;
-
-	bioq_init(&qp->q_bioq);
-
-	/*
-	 * Set the initial parameters for the client:
-	 * slice size in bytes and ticks, and wait ticks.
-	 * Right now these are constant, but we could have
-	 * autoconfiguration code to adjust the values based on
-	 * the actual workload.
-	 */
-	qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
-	qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
-	qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
-
-	qp->q_sc = sc;		/* link to the parent */
-	qp->q_sc->sc_nqueues++;
-	me.queues++;
-
-	return (0);
-}
-
-/*
- * Release a reference to the queue.
- */
-static void
-g_rr_queue_put(struct g_rr_queue *qp)
-{
-
-	g_sched_put_class(qp->q_sc->sc_geom, qp);
-}
-
-static void
-g_rr_fini_class(void *data, void *priv)
-{
-	struct g_rr_queue *qp = priv;
-
-	KASSERT(bioq_first(&qp->q_bioq) == NULL,
-			("released nonempty queue"));
-	qp->q_sc->sc_nqueues--;
-	me.queues--;
-}
-
-static inline int
-g_rr_queue_expired(struct g_rr_queue *qp)
-{
-
-	if (qp->q_service >= qp->q_budget)
-		return (1);
-
-	if ((qp->q_flags & G_FLAG_COMPLETED) &&
-	    ticks - qp->q_slice_end >= 0)
-		return (1);
-
-	return (0);
-}
-
-static inline int
-g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
-{
-	int wait = get_bounded(&me.wait_ms, 2);
-
-	if (!me.w_anticipate && (bp->bio_cmd == BIO_WRITE))
-		return (0);
-
-	if (g_savg_valid(&qp->q_thinktime) &&
-	    g_savg_read(&qp->q_thinktime) > wait)
-		return (0);
-
-	if (g_savg_valid(&qp->q_seekdist) &&
-	    g_savg_read(&qp->q_seekdist) > 8192)
-		return (0);
-
-	return (1);
-}
-
-/*
- * Called on a request arrival, timeout or completion.
- * Try to serve a request among those queued.
- */
-static struct bio *
-g_rr_next(void *data, int force)
-{
-	struct g_rr_softc *sc = data;
-	struct g_rr_queue *qp;
-	struct bio *bp, *next;
-	int expired;
-
-	qp = sc->sc_active;
-	if (me.bypass == 0 && !force) {
-		if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
-			return (NULL);
-
-		/* Try with the queue under service first. */
-		if (qp != NULL && qp->q_status != G_QUEUE_READY) {
-			/*
-			 * Queue is anticipating, ignore request.
-			 * We should check that we are not past
-			 * the timeout, but in that case the timeout
-			 * will fire immediately afterwards so we
-			 * don't bother.
-			 */
-			return (NULL);
-		}
-	} else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
-		g_rr_queue_put(qp);
-		sc->sc_active = qp = NULL;
-	}
-
-	/*
-	 * No queue under service, look for the first in RR order.
-	 * If we find it, select if as sc_active, clear service
-	 * and record the end time of the slice.
-	 */
-	if (qp == NULL) {
-		qp = TAILQ_FIRST(&sc->sc_rr_tailq);
-		if (qp == NULL)
-			return (NULL); /* no queues at all, return */
-		/* otherwise select the new queue for service. */
-		TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
-		sc->sc_active = qp;
-		qp->q_service = 0;
-		qp->q_flags &= ~G_FLAG_COMPLETED;
-	}
-
-	bp = bioq_takefirst(&qp->q_bioq);	/* surely not NULL */
-	qp->q_service += bp->bio_length;	/* charge the service */
-
-	/*
-	 * The request at the head of the active queue is always
-	 * dispatched, and gs_rr_next() will be called again
-	 * immediately.
-	 * We need to prepare for what to do next:
-	 *
-	 * 1. have we reached the end of the (time or service) slice ?
-	 *    If so, clear sc_active and possibly requeue the previous
-	 *    active queue if it has more requests pending;
-	 * 2. do we have more requests in sc_active ?
-	 *    If yes, do not anticipate, as gs_rr_next() will run again;
-	 *    if no, decide whether or not to anticipate depending
-	 *    on read or writes (e.g., anticipate only on reads).
-	 */
-	expired = g_rr_queue_expired(qp);	/* are we expired ? */
-	next = bioq_first(&qp->q_bioq);	/* do we have one more ? */
- 	if (expired) {
-		sc->sc_active = NULL;
-		/* Either requeue or release reference. */
-		if (next != NULL)
-			TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
-		else
-			g_rr_queue_put(qp);
-	} else if (next != NULL) {
-		qp->q_status = G_QUEUE_READY;
-	} else {
-		if (!force && g_rr_should_anticipate(qp, bp)) {
-			/* anticipate */
-			qp->q_status = G_QUEUE_BUSY;
-		} else {
-			/* do not anticipate, release reference */
-			g_rr_queue_put(qp);
-			sc->sc_active = NULL;
-		}
-	}
-	/* If sc_active != NULL, its q_status is always correct. */
-
-	sc->sc_in_flight++;
-
-	return (bp);
-}
-
-static inline void
-g_rr_update_thinktime(struct g_rr_queue *qp)
-{
-	int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
-
-	if (qp->q_sc->sc_active != qp)
-		return;
-
-	qp->q_lastsub = ticks;
-	delta = (delta > 2 * wait) ? 2 * wait : delta;
-	if (qp->q_bionum > 7)
-		g_savg_add_sample(&qp->q_thinktime, delta);
-}
-
-static inline void
-g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
-{
-	off_t dist;
-
-	if (qp->q_lastoff > bp->bio_offset)
-		dist = qp->q_lastoff - bp->bio_offset;
-	else
-		dist = bp->bio_offset - qp->q_lastoff;
-
-	if (dist > (8192 * 8))
-		dist = 8192 * 8;
-
-	qp->q_lastoff = bp->bio_offset + bp->bio_length;
-
-	if (qp->q_bionum > 7)
-		g_savg_add_sample(&qp->q_seekdist, dist);
-}
-
-/*
- * Called when a real request for disk I/O arrives.
- * Locate the queue associated with the client.
- * If the queue is the one we are anticipating for, reset its timeout;
- * if the queue is not in the round robin list, insert it in the list.
- * On any error, do not queue the request and return -1, the caller
- * will take care of this request.
- */
-static int
-g_rr_start(void *data, struct bio *bp)
-{
-	struct g_rr_softc *sc = data;
-	struct g_rr_queue *qp;
-
-	if (me.bypass)
-		return (-1);	/* bypass the scheduler */
-
-	/* Get the queue for the request. */
-	qp = g_rr_queue_get(sc, bp);
-	if (qp == NULL)
-		return (-1); /* allocation failed, tell upstream */
-
-	if (bioq_first(&qp->q_bioq) == NULL) {
-		/*
-		 * We are inserting into an empty queue.
-		 * Reset its state if it is sc_active,
-		 * otherwise insert it in the RR list.
-		 */
-		if (qp == sc->sc_active) {
-			qp->q_status = G_QUEUE_READY;
-			callout_stop(&sc->sc_wait);
-		} else {
-			g_sched_priv_ref(qp);
-			TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
-		}
-	}
-
-	qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
-
-	g_rr_update_thinktime(qp);
-	g_rr_update_seekdist(qp, bp);
-
-	/* Inherit the reference returned by g_rr_queue_get(). */
-	bp->bio_caller1 = qp;
-	bioq_disksort(&qp->q_bioq, bp);
-
-	return (0);
-}
-
-/*
- * Callout executed when a queue times out anticipating a new request.
- */
-static void
-g_rr_wait_timeout(void *data)
-{
-	struct g_rr_softc *sc = data;
-	struct g_geom *geom = sc->sc_geom;
-
-	g_sched_lock(geom);
-	/*
-	 * We can race with other events, so check if
-	 * sc_active is still valid.
-	 */
-	if (sc->sc_active != NULL) {
-		/* Release the reference to the queue. */
-		g_rr_queue_put(sc->sc_active);
-		sc->sc_active = NULL;
-		me.wait_hit--;
-		me.wait_miss++;	/* record the miss */
-	}
-	g_sched_dispatch(geom);
-	g_sched_unlock(geom);
-}
-
-/*
- * Module glue: allocate descriptor, initialize its fields.
- */
-static void *
-g_rr_init(struct g_geom *geom)
-{
-	struct g_rr_softc *sc;
-
-	/* XXX check whether we can sleep */
-	sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
-	sc->sc_geom = geom;
-	TAILQ_INIT(&sc->sc_rr_tailq);
-	callout_init(&sc->sc_wait, 1);
-	LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
-	me.units++;
-
-	return (sc);
-}
-
-/*
- * Module glue -- drain the callout structure, destroy the
- * hash table and its element, and free the descriptor.
- */
-static void
-g_rr_fini(void *data)
-{
-	struct g_rr_softc *sc = data;
-
-	callout_drain(&sc->sc_wait);
-	KASSERT(sc->sc_active == NULL, ("still a queue under service"));
-	KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
-
-	LIST_REMOVE(sc, sc_next);
-	me.units--;
-	free(sc, M_GEOM_SCHED);
-}
-
-/*
- * Called when the request under service terminates.
- * Start the anticipation timer if needed.
- */
-static void
-g_rr_done(void *data, struct bio *bp)
-{
-	struct g_rr_softc *sc = data;
-	struct g_rr_queue *qp;
-
-	sc->sc_in_flight--;
-
-	qp = bp->bio_caller1;
-
-	/*
-	 * When the first request for this queue completes, update the
-	 * duration and end of the slice. We do not do it when the
-	 * slice starts to avoid charging to the queue the time for
-	 * the first seek.
-	 */
-	if (!(qp->q_flags & G_FLAG_COMPLETED)) {
-		qp->q_flags |= G_FLAG_COMPLETED;
-		/*
-		 * recompute the slice duration, in case we want
-		 * to make it adaptive. This is not used right now.
-		 * XXX should we do the same for q_quantum and q_wait_ticks ?
-		 */
-		qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
-		qp->q_slice_end = ticks + qp->q_slice_duration;
-	}
-
-	if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
-		/* The queue is trying anticipation, start the timer. */
-		qp->q_status = G_QUEUE_IDLING;
-		/* may make this adaptive */
-		qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
-		me.wait_hit++;
-		callout_reset(&sc->sc_wait, qp->q_wait_ticks,
-		    g_rr_wait_timeout, sc);
-	} else
-		g_sched_dispatch(sc->sc_geom);
-
-	/* Release a reference to the queue. */
-	g_rr_queue_put(qp);
-}
-
-static void
-g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
-    struct g_consumer *cp, struct g_provider *pp)
-{
-	if (indent == NULL) {   /* plaintext */
-		sbuf_printf(sb, " units %d queues %d",
-			me.units, me.queues);
-        }
-}
-
-static struct g_gsched g_rr = {
-	.gs_name = "rr",
-	.gs_priv_size = sizeof(struct g_rr_queue),
-	.gs_init = g_rr_init,
-	.gs_fini = g_rr_fini,
-	.gs_start = g_rr_start,
-	.gs_done = g_rr_done,
-	.gs_next = g_rr_next,
-	.gs_dumpconf = g_rr_dumpconf,
-	.gs_init_class = g_rr_init_class,
-	.gs_fini_class = g_rr_fini_class,
-};
-
-DECLARE_GSCHED_MODULE(rr, &g_rr);

Property changes on: head/sys/geom/sched/gs_rr.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/geom/sched/gs_delay.c
===================================================================
--- head/sys/geom/sched/gs_delay.c	(revision 356184)
+++ head/sys/geom/sched/gs_delay.c	(nonexistent)
@@ -1,264 +0,0 @@
-/*-
- * Copyright (c) 2015 Netflix, Inc.
- *
- * Derived from gs_rr.c:
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * A simple scheduler that just delays certain transactions by a certain
- * amount. We collect all the transactions that are 'done' and put them on
- * a queue. The queue is run through every so often and the transactions that
- * have taken longer than the threshold delay are completed.
- */
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/bio.h>
-#include <sys/callout.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/queue.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include "gs_scheduler.h"
-
-/* Useful constants */
-#define BTFRAC_1US 18446744073709ULL	/* 2^64 / 1000000 */
-
-/* list of scheduler instances */
-LIST_HEAD(g_scheds, g_delay_softc);
-
-/*
- * Per device descriptor, holding the Round Robin list of queues
- * accessing the disk, a reference to the geom, and the timer.
- */
-struct g_delay_softc {
-	struct g_geom	*sc_geom;
-
-	struct bio_queue_head sc_bioq;	/* queue of pending requests */
-	struct callout	sc_wait;	/* timer for completing with delays */
-
-	/* Statistics */
-	int		sc_in_flight;	/* requests in the driver */
-};
-
-/*
- * parameters, config and stats
- */
-struct g_delay_params {
-	uint64_t io;
-	int	bypass;			/* bypass scheduling */
-	int	units;			/* how many instances */
-	int	latency;		/* How big a latncy are hoping for */
-};
-
-static struct g_delay_params me = {
-	.bypass = 0,
-	.units = 0,
-	.latency = 0,
-	.io = 0,
-};
-struct g_delay_params *gs_delay_me = &me;
-
-SYSCTL_DECL(_kern_geom_sched);
-static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, delay, CTLFLAG_RW, 0,
-    "GEOM_SCHED DELAY stuff");
-SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, bypass, CTLFLAG_RD,
-    &me.bypass, 0, "Scheduler bypass");
-SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, units, CTLFLAG_RD,
-    &me.units, 0, "Scheduler instances");
-SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, latency, CTLFLAG_RW,
-    &me.latency, 0, "Minimum latency for requests, in microseconds (1/hz resolution)");
-SYSCTL_QUAD(_kern_geom_sched_delay, OID_AUTO, io, CTLFLAG_RW,
-    &me.io, 0, "I/Os delayed\n");
-
-static int
-g_delay_init_class(void *data, void *priv)
-{
-	return (0);
-}
-
-static void
-g_delay_fini_class(void *data, void *priv)
-{
-}
-
-/*
- * Called on a request arrival, timeout or completion.
- * Try to serve a request among those queued.
- */
-static struct bio *
-g_delay_next(void *data, int force)
-{
-	struct g_delay_softc *sc = data;
-	struct bio *bp;
-	struct bintime bt;
-
-	bp = bioq_first(&sc->sc_bioq);
-	if (bp == NULL)
-		return (NULL);
-
-	/*
-	 * If the time isn't yet ripe for this bp to be let loose,
-	 * then the time isn't ripe for any of its friends either
-	 * since we insert in-order. Terminate if the bio hasn't
-	 * aged appropriately. Note that there's pathology here
-	 * such that we may be up to one tick early in releasing
-	 * this I/O. We could implement this up to a tick late too
-	 * but choose not to.
-	 */
-	getbinuptime(&bt);	/* BIO's bio_t0 is uptime */
-	if (bintime_cmp(&bp->bio_t0, &bt, >))
-		return (NULL);
-	me.io++;
-	
-	/*
-	 * The bp has mellowed enough, let it through and update stats.
-	 * If there's others, we'll catch them next time we get called.
-	 */
-	sc->sc_in_flight++;
-
-	bp = bioq_takefirst(&sc->sc_bioq);
-	return (bp);
-}
-
-/*
- * Called when a real request for disk I/O arrives.
- * Locate the queue associated with the client.
- * If the queue is the one we are anticipating for, reset its timeout;
- * if the queue is not in the round robin list, insert it in the list.
- * On any error, do not queue the request and return -1, the caller
- * will take care of this request.
- */
-static int
-g_delay_start(void *data, struct bio *bp)
-{
-	struct g_delay_softc *sc = data;
-
-	if (me.bypass)
-		return (-1);	/* bypass the scheduler */
-
-	bp->bio_caller1 = sc;
-	getbinuptime(&bp->bio_t0);	/* BIO's bio_t0 is uptime */
-	bintime_addx(&bp->bio_t0, BTFRAC_1US * me.latency);
-
-	/*
-	 * Keep the I/Os ordered. Lower layers will reorder as we release them down.
-	 * We rely on this in g_delay_next() so that we delay all things equally. Even
-	 * if we move to multiple queues to push stuff down the stack, we'll want to
-	 * insert in order and let the lower layers do whatever reordering they want.
-	 */
-	bioq_insert_tail(&sc->sc_bioq, bp);
-
-	return (0);
-}
-
-static void
-g_delay_timeout(void *data)
-{
-	struct g_delay_softc *sc = data;
-	
-	g_sched_lock(sc->sc_geom);
-	g_sched_dispatch(sc->sc_geom);
-	g_sched_unlock(sc->sc_geom);
-	callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
-}
-
-/*
- * Module glue: allocate descriptor, initialize its fields.
- */
-static void *
-g_delay_init(struct g_geom *geom)
-{
-	struct g_delay_softc *sc;
-
-	sc = malloc(sizeof *sc, M_GEOM_SCHED, M_WAITOK | M_ZERO);
-	sc->sc_geom = geom;
-	bioq_init(&sc->sc_bioq);
-	callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
-	callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
-	me.units++;
-
-	return (sc);
-}
-
-/*
- * Module glue -- drain the callout structure, destroy the
- * hash table and its element, and free the descriptor.
- */
-static void
-g_delay_fini(void *data)
-{
-	struct g_delay_softc *sc = data;
-
-	/* We're force drained before getting here */
-
-	/* Kick out timers */
-	callout_drain(&sc->sc_wait);
-	me.units--;
-	free(sc, M_GEOM_SCHED);
-}
-
-/*
- * Called when the request under service terminates.
- * Start the anticipation timer if needed.
- */
-static void
-g_delay_done(void *data, struct bio *bp)
-{
-	struct g_delay_softc *sc = data;
-
-	sc->sc_in_flight--;
-
-	g_sched_dispatch(sc->sc_geom);
-}
-
-static void
-g_delay_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
-    struct g_consumer *cp, struct g_provider *pp)
-{
-}
-
-static struct g_gsched g_delay = {
-	.gs_name = "delay",
-	.gs_priv_size = 0,
-	.gs_init = g_delay_init,
-	.gs_fini = g_delay_fini,
-	.gs_start = g_delay_start,
-	.gs_done = g_delay_done,
-	.gs_next = g_delay_next,
-	.gs_dumpconf = g_delay_dumpconf,
-	.gs_init_class = g_delay_init_class,
-	.gs_fini_class = g_delay_fini_class,
-};
-
-DECLARE_GSCHED_MODULE(delay, &g_delay);

Property changes on: head/sys/geom/sched/gs_delay.c
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/geom/sched/g_sched.c
===================================================================
--- head/sys/geom/sched/g_sched.c	(revision 356184)
+++ head/sys/geom/sched/g_sched.c	(nonexistent)
@@ -1,1729 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2009-2010 Fabio Checconi
- * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/*
- * $Id$
- * $FreeBSD$
- *
- * Main control module for geom-based disk schedulers ('sched').
- *
- * USER VIEW
- * A 'sched' node is typically inserted transparently between
- * an existing provider pp and its original geom gp
- *
- *	[pp --> gp  ..]
- *
- * using the command "geom sched insert <provider>" and
- * resulting in the following topology
- *
- *	[pp --> sched_gp --> cp]   [new_pp --> gp ... ]
- *
- * Deletion "geom sched destroy <provider>.sched." restores the
- * original chain. The normal "geom sched create <provide>"
- * is also supported.
- *
- * INTERNALS
- * Internally, the 'sched' uses the following data structures
- *
- *   geom{}         g_sched_softc{}      g_gsched{}
- * +----------+    +---------------+   +-------------+
- * |  softc *-|--->| sc_gsched   *-|-->|  gs_init    |
- * |  ...     |    |               |   |  gs_fini    |
- * |          |    | [ hash table] |   |  gs_start   |
- * +----------+    |               |   |  ...        |
- *                 |               |   +-------------+
- *                 |               |
- *                 |               |     g_*_softc{}
- *                 |               |   +-------------+
- *                 | sc_data     *-|-->|             |
- *                 +---------------+   |  algorithm- |
- *                                     |  specific   |
- *                                     +-------------+
- *
- * A g_sched_softc{} is created with a "geom sched insert" call.
- * In turn this instantiates a specific scheduling algorithm,
- * which sets sc_gsched to point to the algorithm callbacks,
- * and calls gs_init() to create the g_*_softc{} .
- * The other callbacks (gs_start, gs_next, ...) are invoked
- * as needed 
- *
- * g_sched_softc{} is defined in g_sched.h and mostly used here;
- * g_gsched{}, and the gs_callbacks, are documented in gs_scheduler.h;
- * g_*_softc{} is defined/implemented by each algorithm (gs_*.c)
- *
- * DATA MOVING
- * When a bio is received on the provider, it goes to the
- * g_sched_start() which calls gs_start() to initially queue it;
- * then we call g_sched_dispatch() that loops around gs_next()
- * to select zero or more bio's to be sent downstream.
- *
- * g_sched_dispatch() can also be called as a result of a timeout,
- * e.g. when doing anticipation or pacing requests.
- *
- * When a bio comes back, it goes to g_sched_done() which in turn
- * calls gs_done(). The latter does any necessary housekeeping in
- * the scheduling algorithm, and may decide to call g_sched_dispatch()
- * to send more bio's downstream.
- *
- * If an algorithm needs per-flow queues, these are created
- * calling gs_init_class() and destroyed with gs_fini_class(),
- * and they are also inserted in the hash table implemented in
- * the g_sched_softc{}
- *
- * If an algorithm is replaced, or a transparently-inserted node is
- * removed with "geom sched destroy", we need to remove all references
- * to the g_*_softc{} and g_sched_softc from the bio's still in
- * the scheduler. g_sched_forced_dispatch() helps doing this.
- * XXX need to explain better.
- */
-
-#include <sys/cdefs.h>
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/kernel.h>
-#include <sys/module.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
-#include <sys/bio.h>
-#include <sys/limits.h>
-#include <sys/hash.h>
-#include <sys/sbuf.h>
-#include <sys/sysctl.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>		/* we access curthread */
-#include <geom/geom.h>
-#include <geom/geom_dbg.h>
-#include "gs_scheduler.h"
-#include "g_sched.h"		/* geom hooks */
-
-/*
- * Size of the per-geom hash table storing traffic classes.
- * We may decide to change it at a later time, it has no ABI
- * implications as it is only used for run-time allocations.
- */
-#define G_SCHED_HASH_SIZE	32
-
-static int g_sched_destroy(struct g_geom *gp, boolean_t force);
-static int g_sched_destroy_geom(struct gctl_req *req,
-    struct g_class *mp, struct g_geom *gp);
-static void g_sched_config(struct gctl_req *req, struct g_class *mp,
-    const char *verb);
-static struct g_geom *g_sched_taste(struct g_class *mp,
-    struct g_provider *pp, int flags __unused);
-static void g_sched_dumpconf(struct sbuf *sb, const char *indent,
-    struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp);
-static void g_sched_init(struct g_class *mp);
-static void g_sched_fini(struct g_class *mp);
-static int g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data,
-    int fflag, struct thread *td);
-
-struct g_class g_sched_class = {
-	.name = G_SCHED_CLASS_NAME,
-	.version = G_VERSION,
-	.ctlreq = g_sched_config,
-	.taste = g_sched_taste,
-	.destroy_geom = g_sched_destroy_geom,
-	.init = g_sched_init,
-	.ioctl = g_sched_ioctl,
-	.fini = g_sched_fini
-};
-
-MALLOC_DEFINE(M_GEOM_SCHED, "GEOM_SCHED", "Geom schedulers data structures");
-
-/*
- * Global variables describing the state of the geom_sched module.
- * There is only one static instance of this structure.
- */
-LIST_HEAD(gs_list, g_gsched);	/* type, link field */
-struct geom_sched_vars {
-	struct mtx	gs_mtx;
-	struct gs_list	gs_scheds;	/* list of algorithms */
-	u_int		gs_debug;
-	u_int		gs_sched_count;	/* how many algorithms ? */
-	u_int 		gs_patched;	/* g_io_request was patched */
-
-	u_int		gs_initialized;
-	u_int		gs_expire_secs;	/* expiration of hash entries */
-
-	struct bio_queue_head gs_pending;
-	u_int		gs_npending;
-
-	/* The following are for stats, usually protected by gs_mtx. */
-	u_long		gs_requests;	/* total requests */
-	u_long		gs_done;	/* total done */
-	u_int 		gs_in_flight;	/* requests in flight */
-	u_int 		gs_writes_in_flight;
-	u_int 		gs_bytes_in_flight;
-	u_int 		gs_write_bytes_in_flight;
-
-	char		gs_names[256];	/* names of schedulers */
-};
-
-static struct geom_sched_vars me = {
-	.gs_expire_secs = 10,
-};
-
-SYSCTL_DECL(_kern_geom);
-SYSCTL_NODE(_kern_geom, OID_AUTO, sched, CTLFLAG_RW, 0,
-    "GEOM_SCHED stuff");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_wb, CTLFLAG_RD,
-    &me.gs_write_bytes_in_flight, 0, "Write bytes in flight");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_b, CTLFLAG_RD,
-    &me.gs_bytes_in_flight, 0, "Bytes in flight");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight_w, CTLFLAG_RD,
-    &me.gs_writes_in_flight, 0, "Write Requests in flight");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, in_flight, CTLFLAG_RD,
-    &me.gs_in_flight, 0, "Requests in flight");
-
-SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, done, CTLFLAG_RD,
-    &me.gs_done, 0, "Total done");
-
-SYSCTL_ULONG(_kern_geom_sched, OID_AUTO, requests, CTLFLAG_RD,
-    &me.gs_requests, 0, "Total requests");
-
-SYSCTL_STRING(_kern_geom_sched, OID_AUTO, algorithms, CTLFLAG_RD,
-    &me.gs_names, 0, "Algorithm names");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, alg_count, CTLFLAG_RD,
-    &me.gs_sched_count, 0, "Number of algorithms");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, debug, CTLFLAG_RW,
-    &me.gs_debug, 0, "Debug level");
-
-SYSCTL_UINT(_kern_geom_sched, OID_AUTO, expire_secs, CTLFLAG_RW,
-    &me.gs_expire_secs, 0, "Expire time in seconds");
-
-/*
- * g_sched calls the scheduler algorithms with this lock held.
- * The locking functions are exposed so the scheduler algorithms can also
- * protect themselves e.g. when running a callout handler.
- */
-void
-g_sched_lock(struct g_geom *gp)
-{
-	struct g_sched_softc *sc = gp->softc;
-
-	mtx_lock(&sc->sc_mtx);
-}
-
-void
-g_sched_unlock(struct g_geom *gp)
-{
-	struct g_sched_softc *sc = gp->softc;
-
-	mtx_unlock(&sc->sc_mtx);
-}
-
-/*
- * Support functions to handle references to the module,
- * which are coming from devices using this scheduler.
- */
-static inline void
-g_gsched_ref(struct g_gsched *gsp)
-{
-
-	atomic_add_int(&gsp->gs_refs, 1);
-}
-
-static inline void
-g_gsched_unref(struct g_gsched *gsp)
-{
-
-	atomic_add_int(&gsp->gs_refs, -1);
-}
-
-/*
- * Update the stats when this request is done.
- */
-static void
-g_sched_update_stats(struct bio *bio)
-{
-
-	me.gs_done++;
-	me.gs_in_flight--;
-	me.gs_bytes_in_flight -= bio->bio_length;
-	if (bio->bio_cmd == BIO_WRITE) {
-		me.gs_writes_in_flight--;
-		me.gs_write_bytes_in_flight -= bio->bio_length;
-	}
-}
-
-/*
- * Dispatch any pending request.
- */
-static void
-g_sched_forced_dispatch(struct g_geom *gp)
-{
-	struct g_sched_softc *sc = gp->softc;
-	struct g_gsched *gsp = sc->sc_gsched;
-	struct bio *bp;
-
-	KASSERT(mtx_owned(&sc->sc_mtx),
-	    ("sc_mtx not owned during forced dispatch"));
-
-	while ((bp = gsp->gs_next(sc->sc_data, 1)) != NULL)
-		g_io_request(bp, LIST_FIRST(&gp->consumer));
-}
-
-/*
- * The main dispatch loop, called either here after the start
- * routine, or by scheduling algorithms when they receive a timeout
- * or a 'done' notification.  Does not share code with the forced
- * dispatch path, since the gs_done() callback can call us.
- */
-void
-g_sched_dispatch(struct g_geom *gp)
-{
-	struct g_sched_softc *sc = gp->softc;
-	struct g_gsched *gsp = sc->sc_gsched;
-	struct bio *bp;
-
-	KASSERT(mtx_owned(&sc->sc_mtx), ("sc_mtx not owned during dispatch"));
-
-	if ((sc->sc_flags & G_SCHED_FLUSHING))
-		return;
-
-	while ((bp = gsp->gs_next(sc->sc_data, 0)) != NULL)
-		g_io_request(bp, LIST_FIRST(&gp->consumer));
-}
-
-/*
- * Recent (8.0 and above) versions of FreeBSD have support to
- * register classifiers of disk requests. The classifier is
- * invoked by g_io_request(), and stores the information into
- * bp->bio_classifier1.
- *
- * Support for older versions, which is left here only for
- * documentation purposes, relies on two hacks:
- * 1. classification info is written into the bio_caller1
- *    field of the topmost node in the bio chain. This field
- *    is rarely used, but this module is incompatible with
- *    those that use bio_caller1 for other purposes,
- *    such as ZFS and gjournal;
- * 2. g_io_request() is patched in-memory when the module is
- *    loaded, so that the function calls a classifier as its
- *    first thing. g_io_request() is restored when the module
- *    is unloaded. This functionality is only supported for
- *    x86 and amd64, other architectures need source code changes.
- */
-
-/*
- * Lookup the identity of the issuer of the original request.
- * In the current implementation we use the curthread of the
- * issuer, but different mechanisms may be implemented later
- * so we do not make assumptions on the return value which for
- * us is just an opaque identifier.
- */
-
-static inline u_long
-g_sched_classify(struct bio *bp)
-{
-
-	/* we have classifier fields in the struct bio */
-	return ((u_long)bp->bio_classifier1);
-}
-
-/* Return the hash chain for the given key. */
-static inline struct g_hash *
-g_sched_hash(struct g_sched_softc *sc, u_long key)
-{
-
-	return (&sc->sc_hash[key & sc->sc_mask]);
-}
-
-/*
- * Helper function for the children classes, which takes
- * a geom and a bio and returns the private descriptor
- * associated to the request.  This involves fetching
- * the classification field and [al]locating the
- * corresponding entry in the hash table.
- */
-void *
-g_sched_get_class(struct g_geom *gp, struct bio *bp)
-{
-	struct g_sched_softc *sc;
-	struct g_sched_class *gsc;
-	struct g_gsched *gsp;
-	struct g_hash *bucket;
-	u_long key;
-
-	sc = gp->softc;
-	key = g_sched_classify(bp);
-	bucket = g_sched_hash(sc, key);
-	LIST_FOREACH(gsc, bucket, gsc_clist) {
-		if (key == gsc->gsc_key) {
-			gsc->gsc_refs++;
-			return (gsc->gsc_priv);
-		}
-	}
-
-	gsp = sc->sc_gsched;
-	gsc = malloc(sizeof(*gsc) + gsp->gs_priv_size,
-	    M_GEOM_SCHED, M_NOWAIT | M_ZERO);
-	if (!gsc)
-		return (NULL);
-
-	if (gsp->gs_init_class(sc->sc_data, gsc->gsc_priv)) {
-		free(gsc, M_GEOM_SCHED);
-		return (NULL);
-	}
-
-	gsc->gsc_refs = 2;	/* 1 for the hash table, 1 for the caller. */
-	gsc->gsc_key = key;
-	LIST_INSERT_HEAD(bucket, gsc, gsc_clist);
-
-	gsc->gsc_expire = ticks + me.gs_expire_secs * hz;
-
-	return (gsc->gsc_priv);
-}
-
-/*
- * Release a reference to the per-client descriptor,
- */
-void
-g_sched_put_class(struct g_geom *gp, void *priv)
-{
-	struct g_sched_class *gsc;
-	struct g_sched_softc *sc;
-
-	gsc = g_sched_priv2class(priv);
-	gsc->gsc_expire = ticks + me.gs_expire_secs * hz;
-
-	if (--gsc->gsc_refs > 0)
-		return;
-
-	sc = gp->softc;
-	sc->sc_gsched->gs_fini_class(sc->sc_data, priv);
-
-	LIST_REMOVE(gsc, gsc_clist);
-	free(gsc, M_GEOM_SCHED);
-}
-
-static void
-g_sched_hash_fini(struct g_geom *gp, struct g_hash *hp, u_long mask,
-    struct g_gsched *gsp, void *data)
-{
-	struct g_sched_class *cp, *cp2;
-	int i;
-
-	if (!hp)
-		return;
-
-	if (data && gsp->gs_hash_unref)
-		gsp->gs_hash_unref(data);
-
-	for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
-		LIST_FOREACH_SAFE(cp, &hp[i], gsc_clist, cp2)
-			g_sched_put_class(gp, cp->gsc_priv);
-	}
-
-	hashdestroy(hp, M_GEOM_SCHED, mask);
-}
-
-static struct g_hash *
-g_sched_hash_init(struct g_gsched *gsp, u_long *mask, int flags)
-{
-	struct g_hash *hash;
-
-	if (gsp->gs_priv_size == 0)
-		return (NULL);
-
-	hash = hashinit_flags(G_SCHED_HASH_SIZE, M_GEOM_SCHED, mask, flags);
-
-	return (hash);
-}
-
-static void
-g_sched_flush_classes(struct g_geom *gp)
-{
-	struct g_sched_softc *sc;
-	struct g_sched_class *cp, *cp2;
-	int i;
-
-	sc = gp->softc;
-
-	if (!sc->sc_hash || ticks - sc->sc_flush_ticks <= 0)
-		return;
-
-	for (i = 0; i < G_SCHED_HASH_SIZE; i++) {
-		LIST_FOREACH_SAFE(cp, &sc->sc_hash[i], gsc_clist, cp2) {
-			if (cp->gsc_refs == 1 && ticks - cp->gsc_expire > 0)
-				g_sched_put_class(gp, cp->gsc_priv);
-		}
-	}
-
-	sc->sc_flush_ticks = ticks + me.gs_expire_secs * hz;
-}
-
-/*
- * Wait for the completion of any outstanding request.  To ensure
- * that this does not take forever the caller has to make sure that
- * no new request enter the scehduler before calling us.
- *
- * Must be called with the gp mutex held and topology locked.
- */
-static int
-g_sched_wait_pending(struct g_geom *gp)
-{
-	struct g_sched_softc *sc = gp->softc;
-	int endticks = ticks + hz;
-
-	g_topology_assert();
-
-	while (sc->sc_pending && endticks - ticks >= 0)
-		msleep(gp, &sc->sc_mtx, 0, "sched_wait_pending", hz / 4);
-
-	return (sc->sc_pending ? ETIMEDOUT : 0);
-}
-
-static int
-g_sched_remove_locked(struct g_geom *gp, struct g_gsched *gsp)
-{
-	struct g_sched_softc *sc = gp->softc;
-	int error;
-
-	/* Set the flushing flag: new bios will not enter the scheduler. */
-	sc->sc_flags |= G_SCHED_FLUSHING;
-
-	g_sched_forced_dispatch(gp);
-	error = g_sched_wait_pending(gp);
-	if (error)
-		goto failed;
-	
-	/* No more requests pending or in flight from the old gsp. */
-
-	g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask, gsp, sc->sc_data);
-	sc->sc_hash = NULL;
-
-	/*
-	 * Avoid deadlock here by releasing the gp mutex and reacquiring
-	 * it once done.  It should be safe, since no reconfiguration or
-	 * destruction can take place due to the geom topology lock; no
-	 * new request can use the current sc_data since we flagged the
-	 * geom as being flushed.
-	 */
-	g_sched_unlock(gp);
-	gsp->gs_fini(sc->sc_data);
-	g_sched_lock(gp);
-
-	sc->sc_gsched = NULL;
-	sc->sc_data = NULL;
-	g_gsched_unref(gsp);
-
-failed:
-	sc->sc_flags &= ~G_SCHED_FLUSHING;
-
-	return (error);
-}
-
-static int
-g_sched_remove(struct g_geom *gp, struct g_gsched *gsp)
-{
-	int error;
-
-	g_sched_lock(gp);
-	error = g_sched_remove_locked(gp, gsp); /* gsp is surely non-null */
-	g_sched_unlock(gp);
-
-	return (error);
-}
-
-/*
- * Support function for create/taste -- locate the desired
- * algorithm and grab a reference to it.
- */
-static struct g_gsched *
-g_gsched_find(const char *name)
-{
-	struct g_gsched *gsp = NULL;
-
-	mtx_lock(&me.gs_mtx);
-	LIST_FOREACH(gsp, &me.gs_scheds, glist) {
-		if (strcmp(name, gsp->gs_name) == 0) {
-			g_gsched_ref(gsp);
-			break;
-		}
-	}
-	mtx_unlock(&me.gs_mtx);
-
-	return (gsp);
-}
-
-/*
- * Rebuild the list of scheduler names.
- * To be called with me.gs_mtx lock held.
- */
-static void
-g_gsched_build_names(struct g_gsched *gsp)
-{
-	int pos, l;
-	struct g_gsched *cur;
-
-	pos = 0;
-	LIST_FOREACH(cur, &me.gs_scheds, glist) {
-		l = strlen(cur->gs_name);
-		if (l + pos + 1 + 1 < sizeof(me.gs_names)) {
-			if (pos != 0)
-				me.gs_names[pos++] = ' ';
-			strcpy(me.gs_names + pos, cur->gs_name);
-			pos += l;
-		}
-	}
-	me.gs_names[pos] = '\0';
-}
-
-/*
- * Register or unregister individual scheduling algorithms.
- */
-static int
-g_gsched_register(struct g_gsched *gsp)
-{
-	struct g_gsched *cur;
-	int error = 0;
-
-	mtx_lock(&me.gs_mtx);
-	LIST_FOREACH(cur, &me.gs_scheds, glist) {
-		if (strcmp(gsp->gs_name, cur->gs_name) == 0)
-			break;
-	}
-	if (cur != NULL) {
-		G_SCHED_DEBUG(0, "A scheduler named %s already"
-		    "exists.", gsp->gs_name);
-		error = EEXIST;
-	} else {
-		LIST_INSERT_HEAD(&me.gs_scheds, gsp, glist);
-		gsp->gs_refs = 1;
-		me.gs_sched_count++;
-		g_gsched_build_names(gsp);
-	}
-	mtx_unlock(&me.gs_mtx);
-
-	return (error);
-}
-
-struct g_gsched_unregparm {
-	struct g_gsched *gup_gsp;
-	int		gup_error;
-};
-
-static void
-g_gsched_unregister(void *arg, int flag)
-{
-	struct g_gsched_unregparm *parm = arg;
-	struct g_gsched *gsp = parm->gup_gsp, *cur, *tmp;
-	struct g_sched_softc *sc;
-	struct g_geom *gp, *gp_tmp;
-	int error;
-
-	parm->gup_error = 0;
-
-	g_topology_assert();
-
-	if (flag == EV_CANCEL)
-		return;
-
-	mtx_lock(&me.gs_mtx);
-
-	LIST_FOREACH_SAFE(gp, &g_sched_class.geom, geom, gp_tmp) {
-		if (gp->class != &g_sched_class)
-			continue;	/* Should not happen. */
-
-		sc = gp->softc;
-		if (sc->sc_gsched == gsp) {
-			error = g_sched_remove(gp, gsp);
-			if (error)
-				goto failed;
-		}
-	}
-		
-	LIST_FOREACH_SAFE(cur, &me.gs_scheds, glist, tmp) {
-		if (cur != gsp)
-			continue;
-
-		if (gsp->gs_refs != 1) {
-			G_SCHED_DEBUG(0, "%s still in use.",
-			    gsp->gs_name);
-			parm->gup_error = EBUSY;
-		} else {
-			LIST_REMOVE(gsp, glist);
-			me.gs_sched_count--;
-			g_gsched_build_names(gsp);
-		}
-		break;
-	}
-
-	if (cur == NULL) {
-		G_SCHED_DEBUG(0, "%s not registered.", gsp->gs_name);
-		parm->gup_error = ENOENT;
-	}
-
-failed:
-	mtx_unlock(&me.gs_mtx);
-}
-
-static inline void
-g_gsched_global_init(void)
-{
-
-	if (!me.gs_initialized) {
-		G_SCHED_DEBUG(0, "Initializing global data.");
-		mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF);
-		LIST_INIT(&me.gs_scheds);
-		bioq_init(&me.gs_pending);
-		me.gs_initialized = 1;
-	}
-}
-
-/*
- * Module event called when a scheduling algorithm module is loaded or
- * unloaded.
- */
-int
-g_gsched_modevent(module_t mod, int cmd, void *arg)
-{
-	struct g_gsched *gsp = arg;
-	struct g_gsched_unregparm parm;
-	int error;
-
-	G_SCHED_DEBUG(0, "Modevent %d.", cmd);
-
-	/*
-	 * If the module is loaded at boot, the geom thread that calls
-	 * g_sched_init() might actually run after g_gsched_modevent(),
-	 * so make sure that the module is properly initialized.
-	 */
-	g_gsched_global_init();
-
-	error = EOPNOTSUPP;
-	switch (cmd) {
-	case MOD_LOAD:
-		error = g_gsched_register(gsp);
-		G_SCHED_DEBUG(0, "Loaded module %s error %d.",
-		    gsp->gs_name, error);
-		if (error == 0)
-			g_retaste(&g_sched_class);
-		break;
-
-	case MOD_UNLOAD:
-		parm.gup_gsp = gsp;
-		parm.gup_error = 0;
-
-		error = g_waitfor_event(g_gsched_unregister,
-		    &parm, M_WAITOK, NULL);
-		if (error == 0)
-			error = parm.gup_error;
-		G_SCHED_DEBUG(0, "Unloaded module %s error %d.",
-		    gsp->gs_name, error);
-		break;
-	}
-
-	return (error);
-}
-
-#ifdef KTR
-#define	TRC_BIO_EVENT(e, bp)	g_sched_trace_bio_ ## e (bp)
-
-static inline char
-g_sched_type(struct bio *bp)
-{
-
-	if (bp->bio_cmd == BIO_READ)
-		return ('R');
-	else if (bp->bio_cmd == BIO_WRITE)
-		return ('W');
-	return ('U');
-}
-
-static inline void
-g_sched_trace_bio_START(struct bio *bp)
-{
-
-	CTR5(KTR_GSCHED, "S %lu %c %lu/%lu %lu", g_sched_classify(bp),
-	    g_sched_type(bp), bp->bio_offset / ULONG_MAX,
-	    bp->bio_offset, bp->bio_length);
-}
-
-static inline void
-g_sched_trace_bio_DONE(struct bio *bp)
-{
-
-	CTR5(KTR_GSCHED, "D %lu %c %lu/%lu %lu", g_sched_classify(bp),
-	    g_sched_type(bp), bp->bio_offset / ULONG_MAX,
-	    bp->bio_offset, bp->bio_length);
-}
-#else /* !KTR */
-#define	TRC_BIO_EVENT(e, bp)
-#endif /* !KTR */
-
-/*
- * g_sched_done() and g_sched_start() dispatch the geom requests to
- * the scheduling algorithm in use.
- */
-static void
-g_sched_done(struct bio *bio)
-{
-	struct g_geom *gp = bio->bio_caller2;
-	struct g_sched_softc *sc = gp->softc;
-
-	TRC_BIO_EVENT(DONE, bio);
-
-	KASSERT(bio->bio_caller1, ("null bio_caller1 in g_sched_done"));
-
-	g_sched_lock(gp);
-
-	g_sched_update_stats(bio);
-	sc->sc_gsched->gs_done(sc->sc_data, bio);
-	if (!--sc->sc_pending)
-		wakeup(gp);
-
-	g_sched_flush_classes(gp);
-	g_sched_unlock(gp);
-
-	g_std_done(bio);
-}
-
-static void
-g_sched_start(struct bio *bp)
-{
-	struct g_geom *gp = bp->bio_to->geom;
-	struct g_sched_softc *sc = gp->softc;
-	struct bio *cbp;
-
-	TRC_BIO_EVENT(START, bp);
-	G_SCHED_LOGREQ(bp, "Request received.");
-
-	cbp = g_clone_bio(bp);
-	if (cbp == NULL) {
-		g_io_deliver(bp, ENOMEM);
-		return;
-	}
-	cbp->bio_done = g_sched_done;
-	cbp->bio_to = LIST_FIRST(&gp->provider);
-	KASSERT(cbp->bio_to != NULL, ("NULL provider"));
-
-	/* We only schedule reads and writes. */
-	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE)
-		goto bypass;
-
-	G_SCHED_LOGREQ(cbp, "Sending request.");
-
-	g_sched_lock(gp);
-	/*
-	 * Call the algorithm's gs_start to queue the request in the
-	 * scheduler. If gs_start fails then pass the request down,
-	 * otherwise call g_sched_dispatch() which tries to push
-	 * one or more requests down.
-	 */
-	if (!sc->sc_gsched || (sc->sc_flags & G_SCHED_FLUSHING) ||
-	    sc->sc_gsched->gs_start(sc->sc_data, cbp)) {
-		g_sched_unlock(gp);
-		goto bypass;
-	}
-	/*
-	 * We use bio_caller1 to mark requests that are scheduled
-	 * so make sure it is not NULL.
-	 */
-	if (cbp->bio_caller1 == NULL)
-		cbp->bio_caller1 = &me;	/* anything not NULL */
-
-	cbp->bio_caller2 = gp;
-	sc->sc_pending++;
-
-	/* Update general stats. */
-	me.gs_in_flight++;
-	me.gs_requests++;
-	me.gs_bytes_in_flight += bp->bio_length;
-	if (bp->bio_cmd == BIO_WRITE) {
-		me.gs_writes_in_flight++;
-		me.gs_write_bytes_in_flight += bp->bio_length;
-	}
-	g_sched_dispatch(gp);
-	g_sched_unlock(gp);
-	return;
-
-bypass:
-	cbp->bio_done = g_std_done;
-	cbp->bio_caller1 = NULL; /* not scheduled */
-	g_io_request(cbp, LIST_FIRST(&gp->consumer));
-}
-
-/*
- * The next few functions are the geom glue.
- */
-static void
-g_sched_orphan(struct g_consumer *cp)
-{
-
-	g_topology_assert();
-	g_sched_destroy(cp->geom, 1);
-}
-
-static int
-g_sched_access(struct g_provider *pp, int dr, int dw, int de)
-{
-	struct g_geom *gp;
-	struct g_consumer *cp;
-	int error;
-
-	gp = pp->geom;
-	cp = LIST_FIRST(&gp->consumer);
-	error = g_access(cp, dr, dw, de);
-
-	return (error);
-}
-
-static void
-g_sched_temporary_start(struct bio *bio)
-{
-
-	mtx_lock(&me.gs_mtx);
-	me.gs_npending++;
-	bioq_disksort(&me.gs_pending, bio);
-	mtx_unlock(&me.gs_mtx);
-}
-
-static void
-g_sched_flush_pending(g_start_t *start)
-{
-	struct bio *bp;
-
-	while ((bp = bioq_takefirst(&me.gs_pending)))
-		start(bp);
-}
-
-static int
-g_insert_proxy(struct g_geom *gp, struct g_provider *newpp,
-    struct g_geom *dstgp, struct g_provider *pp, struct g_consumer *cp)
-{
-	struct g_sched_softc *sc = gp->softc;
-	g_start_t *saved_start, *flush = g_sched_start;
-	int error = 0, endticks = ticks + hz;
-
-	g_cancel_event(newpp);	/* prevent taste() */
-	/* copy private fields */
-	newpp->private = pp->private;
-	newpp->index = pp->index;
-
-	/* Queue all the early requests coming for us. */
-	me.gs_npending = 0;
-	saved_start = pp->geom->start;
-	dstgp->start = g_sched_temporary_start;
-
-	while (pp->nstart - pp->nend != me.gs_npending &&
-	    endticks - ticks >= 0)
-		tsleep(pp, PRIBIO, "-", hz/10);
-
-	if (pp->nstart - pp->nend != me.gs_npending) {
-		flush = saved_start;
-		error = ETIMEDOUT;
-		goto fail;
-	}
-
-	/* link pp to this geom */
-	LIST_REMOVE(pp, provider);
-	pp->geom = gp;
-	LIST_INSERT_HEAD(&gp->provider, pp, provider);
-
-	/*
-	 * replicate the counts from the parent in the
-	 * new provider and consumer nodes
-	 */
-	cp->acr = newpp->acr = pp->acr;
-	cp->acw = newpp->acw = pp->acw;
-	cp->ace = newpp->ace = pp->ace;
-	sc->sc_flags |= G_SCHED_PROXYING;
-
-fail:
-	dstgp->start = saved_start;
-
-	g_sched_flush_pending(flush);
-
-	return (error);
-}
-
-/*
- * Create a geom node for the device passed as *pp.
- * If successful, add a reference to this gsp.
- */
-static int
-g_sched_create(struct gctl_req *req, struct g_class *mp,
-    struct g_provider *pp, struct g_gsched *gsp, int proxy)
-{
-	struct g_sched_softc *sc = NULL;
-	struct g_geom *gp, *dstgp;
-	struct g_provider *newpp = NULL;
-	struct g_consumer *cp = NULL;
-	char name[64];
-	int error;
-
-	g_topology_assert();
-
-	snprintf(name, sizeof(name), "%s%s", pp->name, G_SCHED_SUFFIX);
-	LIST_FOREACH(gp, &mp->geom, geom) {
-		if (strcmp(gp->name, name) == 0) {
-			gctl_error(req, "Geom %s already exists.",
-			    name);
-			return (EEXIST);
-		}
-	}
-
-	gp = g_new_geomf(mp, "%s", name);
-	dstgp = proxy ? pp->geom : gp; /* where do we link the provider */
-
-	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
-	sc->sc_gsched = gsp;
-	sc->sc_data = gsp->gs_init(gp);
-	if (sc->sc_data == NULL) {
-		error = ENOMEM;
-		goto fail;
-	}
-
-	sc->sc_hash = g_sched_hash_init(gsp, &sc->sc_mask, HASH_WAITOK);
-
-	/*
-	 * Do not initialize the flush mechanism, will be initialized
-	 * on the first insertion on the hash table.
-	 */
-
-	mtx_init(&sc->sc_mtx, "g_sched_mtx", NULL, MTX_DEF);
-
-	gp->softc = sc;
-	gp->start = g_sched_start;
-	gp->orphan = g_sched_orphan;
-	gp->access = g_sched_access;
-	gp->dumpconf = g_sched_dumpconf;
-
-	newpp = g_new_providerf(dstgp, "%s", gp->name);
-	newpp->mediasize = pp->mediasize;
-	newpp->sectorsize = pp->sectorsize;
-
-	cp = g_new_consumer(gp);
-	error = g_attach(cp, proxy ? newpp : pp);
-	if (error != 0) {
-		gctl_error(req, "Cannot attach to provider %s.",
-		    pp->name);
-		goto fail;
-	}
-
-	g_error_provider(newpp, 0);
-	if (proxy) {
-		error = g_insert_proxy(gp, newpp, dstgp, pp, cp);
-		if (error)
-			goto fail;
-	}
-	G_SCHED_DEBUG(0, "Device %s created.", gp->name);
-
-	g_gsched_ref(gsp);
-
-	return (0);
-
-fail:
-	if (cp != NULL) {
-		if (cp->provider != NULL)
-			g_detach(cp);
-		g_destroy_consumer(cp);
-	}
-	if (newpp != NULL)
-		g_destroy_provider(newpp);
-	if (sc->sc_hash)
-		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
-		    gsp, sc->sc_data);
-	if (sc->sc_data)
-		gsp->gs_fini(sc->sc_data);
-	g_free(gp->softc);
-	g_destroy_geom(gp);
-
-	return (error);
-}
-
-/*
- * Support for dynamic switching of scheduling algorithms.
- * First initialize the data structures for the new algorithm,
- * then call g_sched_remove_locked() to flush all references
- * to the old one, finally link the new algorithm.
- */
-static int
-g_sched_change_algo(struct gctl_req *req, struct g_class *mp,
-    struct g_provider *pp, struct g_gsched *gsp)
-{
-	struct g_sched_softc *sc;
-	struct g_geom *gp;
-	struct g_hash *newh;
-	void *data;
-	u_long mask;
-	int error = 0;
-
-	gp = pp->geom;
-	sc = gp->softc;
-
-	data = gsp->gs_init(gp);
-	if (data == NULL)
-		return (ENOMEM);
-
-	newh = g_sched_hash_init(gsp, &mask, HASH_WAITOK);
-	if (gsp->gs_priv_size && !newh) {
-		error = ENOMEM;
-		goto fail;
-	}
-
-	g_sched_lock(gp);
-	if (sc->sc_gsched) {	/* can be NULL in some cases */
-		error = g_sched_remove_locked(gp, sc->sc_gsched);
-		if (error)
-			goto fail;
-	}
-
-	g_gsched_ref(gsp);
-	sc->sc_gsched = gsp;
-	sc->sc_data = data;
-	sc->sc_hash = newh;
-	sc->sc_mask = mask;
-
-	g_sched_unlock(gp);
-
-	return (0);
-
-fail:
-	if (newh)
-		g_sched_hash_fini(gp, newh, mask, gsp, data);
-
-	if (data)
-		gsp->gs_fini(data);
-
-	g_sched_unlock(gp);
-
-	return (error);
-}
-
-/*
- * Stop the request flow directed to the proxy, redirecting the new
- * requests to the me.gs_pending queue.
- */
-static struct g_provider *
-g_detach_proxy(struct g_geom *gp)
-{
-	struct g_consumer *cp;
-	struct g_provider *pp, *newpp;
-
-	do {
-		pp = LIST_FIRST(&gp->provider);
-		if (pp == NULL)
-			break;
-		cp = LIST_FIRST(&gp->consumer);
-		if (cp == NULL)
-			break;
-		newpp = cp->provider;
-		if (newpp == NULL)
-			break;
-
-		me.gs_npending = 0;
-		pp->geom->start = g_sched_temporary_start;
-
-		return (pp);
-	} while (0);
-	printf("%s error detaching proxy %s\n", __FUNCTION__, gp->name);
-
-	return (NULL);
-}
-
-static void
-g_sched_blackhole(struct bio *bp)
-{
-
-	g_io_deliver(bp, ENXIO);
-}
-
-static inline void
-g_reparent_provider(struct g_provider *pp, struct g_geom *gp,
-    struct g_provider *newpp)
-{
-
-	LIST_REMOVE(pp, provider);
-	if (newpp) {
-		pp->private = newpp->private;
-		pp->index = newpp->index;
-	}
-	pp->geom = gp;
-	LIST_INSERT_HEAD(&gp->provider, pp, provider);
-}
-
-static inline void
-g_unproxy_provider(struct g_provider *oldpp, struct g_provider *newpp)
-{
-	struct g_geom *gp = oldpp->geom;
-
-	g_reparent_provider(oldpp, newpp->geom, newpp);
-
-	/*
-	 * Hackish: let the system destroy the old provider for us, just
-	 * in case someone attached a consumer to it, in which case a
-	 * direct call to g_destroy_provider() would not work.
-	 */
-	g_reparent_provider(newpp, gp, NULL);
-}
-
-/*
- * Complete the proxy destruction, linking the old provider to its
- * original geom, and destroying the proxy provider.  Also take care
- * of issuing the pending requests collected in me.gs_pending (if any).
- */
-static int
-g_destroy_proxy(struct g_geom *gp, struct g_provider *oldpp)
-{
-	struct g_consumer *cp;
-	struct g_provider *newpp;
-
-	do {
-		cp = LIST_FIRST(&gp->consumer);
-		if (cp == NULL)
-			break;
-		newpp = cp->provider;
-		if (newpp == NULL)
-			break;
-
-		/* Relink the provider to its original geom. */
-		g_unproxy_provider(oldpp, newpp);
-
-		/* Detach consumer from provider, and destroy provider. */
-		cp->acr = newpp->acr = 0;
-		cp->acw = newpp->acw = 0;
-		cp->ace = newpp->ace = 0;
-		g_detach(cp);
-
-		/* Send the pending bios through the right start function. */
-		g_sched_flush_pending(oldpp->geom->start);
-
-		return (0);
-	} while (0);
-	printf("%s error destroying proxy %s\n", __FUNCTION__, gp->name);
-
-	/* We cannot send the pending bios anywhere... */
-	g_sched_flush_pending(g_sched_blackhole);
-
-	return (EINVAL);
-}
-
-static int
-g_sched_destroy(struct g_geom *gp, boolean_t force)
-{
-	struct g_provider *pp, *oldpp = NULL;
-	struct g_sched_softc *sc;
-	struct g_gsched *gsp;
-	int error;
-
-	g_topology_assert();
-	sc = gp->softc;
-	if (sc == NULL)
-		return (ENXIO);
-	if (!(sc->sc_flags & G_SCHED_PROXYING)) {
-		pp = LIST_FIRST(&gp->provider);
-		if (pp && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
-			const char *msg = force ?
-				"but we force removal" : "cannot remove";
-
-			G_SCHED_DEBUG(!force,
-			    "Device %s is still open (r%dw%de%d), %s.",
-			    pp->name, pp->acr, pp->acw, pp->ace, msg);
-			if (!force)
-				return (EBUSY);
-		} else {
-			G_SCHED_DEBUG(0, "Device %s removed.", gp->name);
-		}
-	} else
-		oldpp = g_detach_proxy(gp);
-
-	gsp = sc->sc_gsched;
-	if (gsp) {
-		/*
-		 * XXX bad hack here: force a dispatch to release
-		 * any reference to the hash table still held by
-		 * the scheduler.
-		 */
-		g_sched_lock(gp);
-		/*
-		 * We are dying here, no new requests should enter
-		 * the scheduler.  This is granted by the topolgy,
-		 * either in case we were proxying (new bios are
-		 * being redirected) or not (see the access check
-		 * above).
-		 */
-		g_sched_forced_dispatch(gp);
-		error = g_sched_wait_pending(gp);
-
-		if (error) {
-			/*
-			 * Not all the requests came home: this might happen
-			 * under heavy load, or if we were waiting for any
-			 * bio which is served in the event path (see
-			 * geom_slice.c for an example of how this can
-			 * happen).  Try to restore a working configuration
-			 * if we can fail.
-			 */
-			if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
-				g_sched_flush_pending(force ?
-				    g_sched_blackhole : g_sched_start);
-			}
-
-			/*
-			 * In the forced destroy case there is not so much
-			 * we can do, we have pending bios that will call
-			 * g_sched_done() somehow, and we don't want them
-			 * to crash the system using freed memory.  We tell
-			 * the user that something went wrong, and leak some
-			 * memory here.
-			 * Note: the callers using force = 1 ignore the
-			 * return value.
-			 */
-			if (force) {
-				G_SCHED_DEBUG(0, "Pending requests while "
-				    " destroying geom, some memory leaked.");
-			}
-
-			return (error);
-		}
-
-		g_sched_unlock(gp);
-		g_sched_hash_fini(gp, sc->sc_hash, sc->sc_mask,
-		    gsp, sc->sc_data);
-		sc->sc_hash = NULL;
-		gsp->gs_fini(sc->sc_data);
-		g_gsched_unref(gsp);
-		sc->sc_gsched = NULL;
-	} else
-		error = 0;
-
-	if ((sc->sc_flags & G_SCHED_PROXYING) && oldpp) {
-		error = g_destroy_proxy(gp, oldpp);
-
-		if (error) {
-			if (force) {
-				G_SCHED_DEBUG(0, "Unrecoverable error while "
-				    "destroying a proxy geom, leaking some "
-				    " memory.");
-			}
-
-			return (error);
-		}
-	}
-
-	mtx_destroy(&sc->sc_mtx);
-
-	g_free(gp->softc);
-	gp->softc = NULL;
-	g_wither_geom(gp, ENXIO);
-
-	return (error);
-}
-
-static int
-g_sched_destroy_geom(struct gctl_req *req, struct g_class *mp,
-    struct g_geom *gp)
-{
-
-	return (g_sched_destroy(gp, 0));
-}
-
-/*
- * Functions related to the classification of requests.
- *
- * On recent FreeBSD versions (8.0 and above), we store a reference
- * to the issuer of a request in bp->bio_classifier1 as soon
- * as the bio is posted to the geom queue (and not later, because
- * requests are managed by the g_down thread afterwards).
- */
-
-/*
- * Classifier support for recent FreeBSD versions: we use
- * a very simple classifier, only use curthread to tag a request.
- * The classifier is registered at module load, and unregistered
- * at module unload.
- */
-static int
-g_sched_tag(void *arg, struct bio *bp)
-{
-
-	bp->bio_classifier1 = curthread;
-	return (1);
-}
-
-static struct g_classifier_hook g_sched_classifier = {
-	.func =	g_sched_tag,
-};
-
-static inline void
-g_classifier_ini(void)
-{
-
-	g_register_classifier(&g_sched_classifier);
-}
-
-static inline void
-g_classifier_fini(void)
-{
-
-	g_unregister_classifier(&g_sched_classifier);
-}
-
-static void
-g_sched_init(struct g_class *mp)
-{
-
-	g_gsched_global_init();
-
-	G_SCHED_DEBUG(0, "Loading: mp = %p, g_sched_class = %p.",
-	    mp, &g_sched_class);
-
-	/* Patch g_io_request to store classification info in the bio. */
-	g_classifier_ini();
-}
-
-static void
-g_sched_fini(struct g_class *mp)
-{
-
-	g_classifier_fini();
-
-	G_SCHED_DEBUG(0, "Unloading...");
-
-	KASSERT(LIST_EMPTY(&me.gs_scheds), ("still registered schedulers"));
-	mtx_destroy(&me.gs_mtx);
-}
-
-static int
-g_sched_ioctl(struct g_provider *pp, u_long cmd, void *data, int fflag,
-    struct thread *td)
-{
-	struct g_consumer *cp;
-	struct g_geom *gp;
-
-	cp = LIST_FIRST(&pp->geom->consumer);
-	if (cp == NULL)
-		return (ENOIOCTL);
-	gp = cp->provider->geom;
-	if (gp->ioctl == NULL)
-		return (ENOIOCTL);
-	return (gp->ioctl(cp->provider, cmd, data, fflag, td));
-}
-
-/*
- * Read the i-th argument for a request, skipping the /dev/
- * prefix if present.
- */
-static const char *
-g_sched_argi(struct gctl_req *req, int i)
-{
-	static const char *dev_prefix = "/dev/";
-	const char *name;
-	char param[16];
-	int l = strlen(dev_prefix);
-
-	snprintf(param, sizeof(param), "arg%d", i);
-	name = gctl_get_asciiparam(req, param);
-	if (name == NULL)
-		gctl_error(req, "No 'arg%d' argument", i);
-	else if (strncmp(name, dev_prefix, l) == 0)
-		name += l;
-	return (name);
-}
-
-/*
- * Fetch nargs and do appropriate checks.
- */
-static int
-g_sched_get_nargs(struct gctl_req *req)
-{
-	int *nargs;
-
-	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
-	if (nargs == NULL) {
-		gctl_error(req, "No 'nargs' argument");
-		return (0);
-	}
-	if (*nargs <= 0)
-		gctl_error(req, "Missing device(s).");
-	return (*nargs);
-}
-
-/*
- * Check whether we should add the class on certain volumes when
- * this geom is created. Right now this is under control of a kenv
- * variable containing the names of all devices that we care about.
- * Probably we should only support transparent insertion as the
- * preferred mode of operation.
- */
-static struct g_geom *
-g_sched_taste(struct g_class *mp, struct g_provider *pp,
-		int flags __unused)
-{
-	struct g_gsched *gsp = NULL;	/* the . algorithm we want */
-	const char *s;			/* generic string pointer */
-	const char *taste_names;	/* devices we like */
-	int l;
-    
-        g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__,
-	    mp->name, pp->name);
-        g_topology_assert();
- 
-        G_SCHED_DEBUG(2, "Tasting %s.", pp->name);
-
-	do {
-		/* do not taste on ourselves */
-		if (pp->geom->class == mp)
-                	break;
-
-		taste_names = kern_getenv("geom.sched.taste");
-		if (taste_names == NULL)
-			break;
-
-		l = strlen(pp->name);
-		for (s = taste_names; *s &&
-		    (s = strstr(s, pp->name)); s++) {
-			/* further checks for an exact match */
-			if ( (s == taste_names || s[-1] == ' ') &&
-			     (s[l] == '\0' || s[l] == ' ') )
-				break;
-		}
-		if (s == NULL)
-			break;
-		G_SCHED_DEBUG(0, "Attach device %s match [%s]\n",
-		    pp->name, s);
-
-		/* look up the provider name in the list */
-		s = kern_getenv("geom.sched.algo");
-		if (s == NULL)
-			s = "rr";
-
-		gsp = g_gsched_find(s);	/* also get a reference */
-		if (gsp == NULL) {
-			G_SCHED_DEBUG(0, "Bad '%s' algorithm.", s);
-			break;
-		}
-
-		/* XXX create with 1 as last argument ? */
-		g_sched_create(NULL, mp, pp, gsp, 0);
-		g_gsched_unref(gsp);
-	} while (0);
-	return NULL;
-}
-
-static void
-g_sched_ctl_create(struct gctl_req *req, struct g_class *mp, int proxy)
-{
-	struct g_provider *pp;
-	struct g_gsched *gsp;
-	const char *name;
-	int i, nargs;
-
-	g_topology_assert();
-
-	name = gctl_get_asciiparam(req, "algo");
-	if (name == NULL) {
-		gctl_error(req, "No '%s' argument", "algo");
-		return;
-	}
-
-	gsp = g_gsched_find(name);	/* also get a reference */
-	if (gsp == NULL) {
-		gctl_error(req, "Bad algorithm '%s'", name);
-		return;
-	}
-
-	nargs = g_sched_get_nargs(req);
-
-	/*
-	 * Run on the arguments, and break on any error.
-	 * We look for a device name, but skip the /dev/ prefix if any.
-	 */
-	for (i = 0; i < nargs; i++) {
-		name = g_sched_argi(req, i);
-		if (name == NULL)
-			break;
-		pp = g_provider_by_name(name);
-		if (pp == NULL) {
-			G_SCHED_DEBUG(1, "Provider %s is invalid.", name);
-			gctl_error(req, "Provider %s is invalid.", name);
-			break;
-		}
-		if (g_sched_create(req, mp, pp, gsp, proxy) != 0)
-			break;
-	}
-
-	g_gsched_unref(gsp);
-}
-
-static void
-g_sched_ctl_configure(struct gctl_req *req, struct g_class *mp)
-{
-	struct g_provider *pp;
-	struct g_gsched *gsp;
-	const char *name;
-	int i, nargs;
-
-	g_topology_assert();
-
-	name = gctl_get_asciiparam(req, "algo");
-	if (name == NULL) {
-		gctl_error(req, "No '%s' argument", "algo");
-		return;
-	}
-
-	gsp = g_gsched_find(name);	/* also get a reference */
-	if (gsp == NULL) {
-		gctl_error(req, "Bad algorithm '%s'", name);
-		return;
-	}
-
-	nargs = g_sched_get_nargs(req);
-
-	/*
-	 * Run on the arguments, and break on any error.
-	 * We look for a device name, but skip the /dev/ prefix if any.
-	 */
-	for (i = 0; i < nargs; i++) {
-		name = g_sched_argi(req, i);
-		if (name == NULL)
-			break;
-		pp = g_provider_by_name(name);
-		if (pp == NULL || pp->geom->class != mp) {
-			G_SCHED_DEBUG(1, "Provider %s is invalid.", name);
-			gctl_error(req, "Provider %s is invalid.", name);
-			break;
-		}
-		if (g_sched_change_algo(req, mp, pp, gsp) != 0)
-			break;
-	}
-
-	g_gsched_unref(gsp);
-}
-
-static struct g_geom *
-g_sched_find_geom(struct g_class *mp, const char *name)
-{
-	struct g_geom *gp;
-
-	LIST_FOREACH(gp, &mp->geom, geom) {
-		if (strcmp(gp->name, name) == 0)
-			return (gp);
-	}
-	return (NULL);
-}
-
-static void
-g_sched_ctl_destroy(struct gctl_req *req, struct g_class *mp)
-{
-	int nargs, *force, error, i;
-	struct g_geom *gp;
-	const char *name;
-
-	g_topology_assert();
-
-	nargs = g_sched_get_nargs(req);
-
-	force = gctl_get_paraml(req, "force", sizeof(*force));
-	if (force == NULL) {
-		gctl_error(req, "No 'force' argument");
-		return;
-	}
-
-	for (i = 0; i < nargs; i++) {
-		name = g_sched_argi(req, i);
-		if (name == NULL)
-			break;
-
-		gp = g_sched_find_geom(mp, name);
-		if (gp == NULL) {
-			G_SCHED_DEBUG(1, "Device %s is invalid.", name);
-			gctl_error(req, "Device %s is invalid.", name);
-			break;
-		}
-
-		error = g_sched_destroy(gp, *force);
-		if (error != 0) {
-			gctl_error(req, "Cannot destroy device %s (error=%d).",
-			    gp->name, error);
-			break;
-		}
-	}
-}
-
-static void
-g_sched_config(struct gctl_req *req, struct g_class *mp, const char *verb)
-{
-	uint32_t *version;
-
-	g_topology_assert();
-
-	version = gctl_get_paraml(req, "version", sizeof(*version));
-	if (version == NULL) {
-		gctl_error(req, "No '%s' argument.", "version");
-		return;
-	}
-
-	if (*version != G_SCHED_VERSION) {
-		gctl_error(req, "Userland and kernel parts are "
-		    "out of sync.");
-		return;
-	}
-
-	if (strcmp(verb, "create") == 0) {
-		g_sched_ctl_create(req, mp, 0);
-		return;
-	} else if (strcmp(verb, "insert") == 0) {
-		g_sched_ctl_create(req, mp, 1);
-		return;
-	} else if (strcmp(verb, "configure") == 0) {
-		g_sched_ctl_configure(req, mp);
-		return;
-	} else if (strcmp(verb, "destroy") == 0) {
-		g_sched_ctl_destroy(req, mp);
-		return;
-	}
-
-	gctl_error(req, "Unknown verb.");
-}
-
-static void
-g_sched_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
-    struct g_consumer *cp, struct g_provider *pp)
-{
-	struct g_sched_softc *sc = gp->softc;
-	struct g_gsched *gsp = sc->sc_gsched;
-	if (indent == NULL) {	/* plaintext */
-		sbuf_printf(sb, " algo %s", gsp ? gsp->gs_name : "--");
-	}
-	if (gsp != NULL && gsp->gs_dumpconf)
-		gsp->gs_dumpconf(sb, indent, gp, cp, pp);
-}
-
-DECLARE_GEOM_CLASS(g_sched_class, g_sched);
-MODULE_VERSION(geom_sched, 0);

Property changes on: head/sys/geom/sched/g_sched.c
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/geom/geom.h
===================================================================
--- head/sys/geom/geom.h	(revision 356184)
+++ head/sys/geom/geom.h	(revision 356185)
@@ -1,445 +1,432 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _GEOM_GEOM_H_
 #define _GEOM_GEOM_H_
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sx.h>
 #include <sys/queue.h>
 #include <sys/ioccom.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 
 struct g_class;
 struct g_geom;
 struct g_consumer;
 struct g_provider;
 struct g_stat;
 struct thread;
 struct bio;
 struct sbuf;
 struct gctl_req;
 struct g_configargs;
 struct disk_zone_args;
 
 typedef int g_config_t (struct g_configargs *ca);
 typedef void g_ctl_req_t (struct gctl_req *, struct g_class *cp, char const *verb);
 typedef int g_ctl_create_geom_t (struct gctl_req *, struct g_class *cp, struct g_provider *pp);
 typedef int g_ctl_destroy_geom_t (struct gctl_req *, struct g_class *cp, struct g_geom *gp);
 typedef int g_ctl_config_geom_t (struct gctl_req *, struct g_geom *gp, const char *verb);
 typedef void g_init_t (struct g_class *mp);
 typedef void g_fini_t (struct g_class *mp);
 typedef struct g_geom * g_taste_t (struct g_class *, struct g_provider *, int flags);
 typedef int g_ioctl_t(struct g_provider *pp, u_long cmd, void *data, int fflag, struct thread *td);
 #define G_TF_NORMAL		0
 #define G_TF_INSIST		1
 #define G_TF_TRANSPARENT	2
 typedef int g_access_t (struct g_provider *, int, int, int);
 /* XXX: not sure about the thread arg */
 typedef void g_orphan_t (struct g_consumer *);
 
 typedef void g_start_t (struct bio *);
 typedef void g_spoiled_t (struct g_consumer *);
 typedef void g_attrchanged_t (struct g_consumer *, const char *attr);
 typedef void g_provgone_t (struct g_provider *);
 typedef void g_dumpconf_t (struct sbuf *, const char *indent, struct g_geom *,
     struct g_consumer *, struct g_provider *);
 typedef void g_resize_t(struct g_consumer *cp);
 
 /*
  * The g_class structure describes a transformation class.  In other words
  * all BSD disklabel handlers share one g_class, all MBR handlers share
  * one common g_class and so on.
  * Certain operations are instantiated on the class, most notably the
  * taste and config_geom functions.
  */
 struct g_class {
 	const char		*name;
 	u_int			version;
 	u_int			spare0;
 	g_taste_t		*taste;
 	g_config_t		*config;
 	g_ctl_req_t		*ctlreq;
 	g_init_t		*init;
 	g_fini_t		*fini;
 	g_ctl_destroy_geom_t	*destroy_geom;
 	/*
 	 * Default values for geom methods
 	 */
 	g_start_t		*start;
 	g_spoiled_t		*spoiled;
 	g_attrchanged_t		*attrchanged;
 	g_dumpconf_t		*dumpconf;
 	g_access_t		*access;
 	g_orphan_t		*orphan;
 	g_ioctl_t		*ioctl;
 	g_provgone_t		*providergone;
 	g_resize_t		*resize;
 	void			*spare1;
 	void			*spare2;
 	/*
 	 * The remaining elements are private
 	 */
 	LIST_ENTRY(g_class)	class;
 	LIST_HEAD(,g_geom)	geom;
 };
 
 /*
  * The g_geom_alias is a list node for aliases for the geom name
  * for device node creation.
  */
 struct g_geom_alias {
 	LIST_ENTRY(g_geom_alias) ga_next;
 	const char		*ga_alias;
 };
 
 #define G_VERSION_00	0x19950323
 #define G_VERSION_01	0x20041207	/* add fflag to g_ioctl_t */
 #define G_VERSION	G_VERSION_01
 
 /*
  * The g_geom is an instance of a g_class.
  */
 struct g_geom {
 	char			*name;
 	struct g_class		*class;
 	LIST_ENTRY(g_geom)	geom;
 	LIST_HEAD(,g_consumer)	consumer;
 	LIST_HEAD(,g_provider)	provider;
 	TAILQ_ENTRY(g_geom)	geoms;	/* XXX: better name */
 	int			rank;
 	g_start_t		*start;
 	g_spoiled_t		*spoiled;
 	g_attrchanged_t		*attrchanged;
 	g_dumpconf_t		*dumpconf;
 	g_access_t		*access;
 	g_orphan_t		*orphan;
 	g_ioctl_t		*ioctl;
 	g_provgone_t		*providergone;
 	g_resize_t		*resize;
 	void			*spare0;
 	void			*spare1;
 	void			*softc;
 	unsigned		flags;
 #define	G_GEOM_WITHER		0x01
 #define	G_GEOM_VOLATILE_BIO	0x02
 #define	G_GEOM_IN_ACCESS	0x04
 #define	G_GEOM_ACCESS_WAIT	0x08
 	LIST_HEAD(,g_geom_alias) aliases;
 };
 
 /*
  * The g_bioq is a queue of struct bio's.
  * XXX: possibly collection point for statistics.
  * XXX: should (possibly) be collapsed with sys/bio.h::bio_queue_head.
  */
 struct g_bioq {
 	TAILQ_HEAD(, bio)	bio_queue;
 	struct mtx		bio_queue_lock;
 	int			bio_queue_length;
 };
 
 /*
  * A g_consumer is an attachment point for a g_provider.  One g_consumer
  * can only be attached to one g_provider, but multiple g_consumers
  * can be attached to one g_provider.
  */
 
 struct g_consumer {
 	struct g_geom		*geom;
 	LIST_ENTRY(g_consumer)	consumer;
 	struct g_provider	*provider;
 	LIST_ENTRY(g_consumer)	consumers;	/* XXX: better name */
 	int			acr, acw, ace;
 	int			flags;
 #define G_CF_SPOILED		0x1
 #define G_CF_ORPHAN		0x4
 #define G_CF_DIRECT_SEND	0x10
 #define G_CF_DIRECT_RECEIVE	0x20
 	struct devstat		*stat;
 	u_int			nstart, nend;
 
 	/* Two fields for the implementing class to use */
 	void			*private;
 	u_int			index;
 };
 
 /*
  * A g_provider is a "logical disk".
  */
 struct g_provider {
 	char			*name;
 	LIST_ENTRY(g_provider)	provider;
 	struct g_geom		*geom;
 	LIST_HEAD(,g_consumer)	consumers;
 	int			acr, acw, ace;
 	int			error;
 	TAILQ_ENTRY(g_provider)	orphan;
 	off_t			mediasize;
 	u_int			sectorsize;
 	off_t			stripesize;
 	off_t			stripeoffset;
 	struct devstat		*stat;
 	u_int			nstart, nend;
 	u_int			flags;
 #define G_PF_WITHER		0x2
 #define G_PF_ORPHAN		0x4
 #define	G_PF_ACCEPT_UNMAPPED	0x8
 #define G_PF_DIRECT_SEND	0x10
 #define G_PF_DIRECT_RECEIVE	0x20
 
 	/* Two fields for the implementing class to use */
 	void			*private;
 	u_int			index;
 };
 
-/*
- * Descriptor of a classifier. We can register a function and
- * an argument, which is called by g_io_request() on bio's
- * that are not previously classified.
- */
-struct g_classifier_hook {
-	TAILQ_ENTRY(g_classifier_hook) link;
-	int			(*func)(void *arg, struct bio *bp);
-	void			*arg;
-};
-
 /* BIO_GETATTR("GEOM::setstate") argument values. */
 #define G_STATE_FAILED		0
 #define G_STATE_REBUILD		1
 #define G_STATE_RESYNC		2
 #define G_STATE_ACTIVE		3
 
 /* geom_dev.c */
 struct cdev;
 void g_dev_print(void);
 void g_dev_physpath_changed(void);
 struct g_provider *g_dev_getprovider(struct cdev *dev);
 
 /* geom_dump.c */
 void (g_trace)(int level, const char *, ...) __printflike(2, 3);
 #define	G_T_TOPOLOGY		0x01
 #define	G_T_BIO			0x02
 #define	G_T_ACCESS		0x04
 extern int g_debugflags;
 #define	G_F_FOOTSHOOTING	0x10
 #define	G_F_DISKIOCTL		0x40
 #define	G_F_CTLDUMP		0x80
 #define	g_trace(level, fmt, ...) do {				\
 	if (__predict_false(g_debugflags & (level)))		\
 		(g_trace)(level, fmt, ## __VA_ARGS__);		\
 } while (0)
 
 /* geom_event.c */
 typedef void g_event_t(void *, int flag);
 #define EV_CANCEL	1
 int g_post_event(g_event_t *func, void *arg, int flag, ...);
 int g_waitfor_event(g_event_t *func, void *arg, int flag, ...);
 void g_cancel_event(void *ref);
 int g_attr_changed(struct g_provider *pp, const char *attr, int flag);
 int g_media_changed(struct g_provider *pp, int flag);
 int g_media_gone(struct g_provider *pp, int flag);
 void g_orphan_provider(struct g_provider *pp, int error);
 void g_waitidlelock(void);
 
 /* geom_subr.c */
 int g_access(struct g_consumer *cp, int nread, int nwrite, int nexcl);
 int g_attach(struct g_consumer *cp, struct g_provider *pp);
 int g_compare_names(const char *namea, const char *nameb);
 void g_destroy_consumer(struct g_consumer *cp);
 void g_destroy_geom(struct g_geom *pp);
 void g_destroy_provider(struct g_provider *pp);
 void g_detach(struct g_consumer *cp);
 void g_error_provider(struct g_provider *pp, int error);
 struct g_provider *g_provider_by_name(char const *arg);
 void g_geom_add_alias(struct g_geom *gp, const char *alias);
 int g_getattr__(const char *attr, struct g_consumer *cp, void *var, int len);
 #define g_getattr(a, c, v) g_getattr__((a), (c), (v), sizeof *(v))
 int g_handleattr(struct bio *bp, const char *attribute, const void *val,
     int len);
 int g_handleattr_int(struct bio *bp, const char *attribute, int val);
 int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
 int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val);
 int g_handleattr_str(struct bio *bp, const char *attribute, const char *str);
 struct g_consumer * g_new_consumer(struct g_geom *gp);
 struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...)
     __printflike(2, 3);
 struct g_provider * g_new_providerf(struct g_geom *gp, const char *fmt, ...)
     __printflike(2, 3);
 void g_resize_provider(struct g_provider *pp, off_t size);
 int g_retaste(struct g_class *mp);
 void g_spoil(struct g_provider *pp, struct g_consumer *cp);
 int g_std_access(struct g_provider *pp, int dr, int dw, int de);
 void g_std_done(struct bio *bp);
 void g_std_spoiled(struct g_consumer *cp);
 void g_wither_geom(struct g_geom *gp, int error);
 void g_wither_geom_close(struct g_geom *gp, int error);
 void g_wither_provider(struct g_provider *pp, int error);
 
 #if defined(DIAGNOSTIC) || defined(DDB)
 int g_valid_obj(void const *ptr);
 #endif
 #ifdef DIAGNOSTIC
 #define G_VALID_CLASS(foo) \
     KASSERT(g_valid_obj(foo) == 1, ("%p is not a g_class", foo))
 #define G_VALID_GEOM(foo) \
     KASSERT(g_valid_obj(foo) == 2, ("%p is not a g_geom", foo))
 #define G_VALID_CONSUMER(foo) \
     KASSERT(g_valid_obj(foo) == 3, ("%p is not a g_consumer", foo))
 #define G_VALID_PROVIDER(foo) \
     KASSERT(g_valid_obj(foo) == 4, ("%p is not a g_provider", foo))
 #else
 #define G_VALID_CLASS(foo) do { } while (0)
 #define G_VALID_GEOM(foo) do { } while (0)
 #define G_VALID_CONSUMER(foo) do { } while (0)
 #define G_VALID_PROVIDER(foo) do { } while (0)
 #endif
 
 int g_modevent(module_t, int, void *);
 
 /* geom_io.c */
 struct bio * g_clone_bio(struct bio *);
 struct bio * g_duplicate_bio(struct bio *);
 void g_destroy_bio(struct bio *);
 void g_io_deliver(struct bio *bp, int error);
 int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
 int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
 int g_io_flush(struct g_consumer *cp);
 int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
-int g_register_classifier(struct g_classifier_hook *hook);
-void g_unregister_classifier(struct g_classifier_hook *hook);
 void g_io_request(struct bio *bp, struct g_consumer *cp);
 struct bio *g_new_bio(void);
 struct bio *g_alloc_bio(void);
 void g_reset_bio(struct bio *);
 void * g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error);
 int g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length);
 int g_delete_data(struct g_consumer *cp, off_t offset, off_t length);
 void g_format_bio(struct sbuf *, const struct bio *bp);
 void g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix, ...) __printflike(3, 4);
 int g_use_g_read_data(void *, off_t, void **, int);
 int g_use_g_write_data(void *, off_t, void *, int);
 
 /* geom_kern.c / geom_kernsim.c */
 
 #ifdef _KERNEL
 
 extern struct sx topology_lock;
 
 struct g_kerneldump {
 	off_t		offset;
 	off_t		length;
 	struct dumperinfo di;
 };
 
 MALLOC_DECLARE(M_GEOM);
 
 static __inline void *
 g_malloc(int size, int flags)
 {
 	void *p;
 
 	p = malloc(size, M_GEOM, flags);
 	return (p);
 }
 
 static __inline void
 g_free(void *ptr)
 {
 
 #ifdef DIAGNOSTIC
 	if (sx_xlocked(&topology_lock)) {
 		KASSERT(g_valid_obj(ptr) == 0,
 		    ("g_free(%p) of live object, type %d", ptr,
 		    g_valid_obj(ptr)));
 	}
 #endif
 	free(ptr, M_GEOM);
 }
 
 #define g_topology_lock() 					\
 	do {							\
 		sx_xlock(&topology_lock);			\
 	} while (0)
 
 #define g_topology_try_lock()	sx_try_xlock(&topology_lock)
 
 #define g_topology_unlock()					\
 	do {							\
 		sx_xunlock(&topology_lock);			\
 	} while (0)
 
 #define g_topology_assert()					\
 	do {							\
 		sx_assert(&topology_lock, SX_XLOCKED);		\
 	} while (0)
 
 #define g_topology_assert_not()					\
 	do {							\
 		sx_assert(&topology_lock, SX_UNLOCKED);		\
 	} while (0)
 
 #define g_topology_sleep(chan, timo)				\
 	sx_sleep(chan, &topology_lock, 0, "gtopol", timo)
 
 #define DECLARE_GEOM_CLASS(class, name) 			\
 	static moduledata_t name##_mod = {			\
 		#name, g_modevent, &class			\
 	};							\
 	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_SECOND);
 
 int g_is_geom_thread(struct thread *td);
 
 #endif /* _KERNEL */
 
 /* geom_ctl.c */
 int gctl_set_param(struct gctl_req *req, const char *param, void const *ptr, int len);
 void gctl_set_param_err(struct gctl_req *req, const char *param, void const *ptr, int len);
 void *gctl_get_param(struct gctl_req *req, const char *param, int *len);
 char const *gctl_get_asciiparam(struct gctl_req *req, const char *param);
 void *gctl_get_paraml(struct gctl_req *req, const char *param, int len);
 void *gctl_get_paraml_opt(struct gctl_req *req, const char *param, int len);
 int gctl_error(struct gctl_req *req, const char *fmt, ...) __printflike(2, 3);
 struct g_class *gctl_get_class(struct gctl_req *req, char const *arg);
 struct g_geom *gctl_get_geom(struct gctl_req *req, struct g_class *mpr, char const *arg);
 struct g_provider *gctl_get_provider(struct gctl_req *req, char const *arg);
 
 #endif /* _GEOM_GEOM_H_ */
Index: head/sys/geom/geom_io.c
===================================================================
--- head/sys/geom/geom_io.c	(revision 356184)
+++ head/sys/geom/geom_io.c	(revision 356185)
@@ -1,1163 +1,1086 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2002 Poul-Henning Kamp
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * Copyright (c) 2013 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  * and NAI Labs, the Security Research Division of Network Associates, Inc.
  * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program.
  *
  * Portions of this software were developed by Konstantin Belousov
  * under sponsorship from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The names of the authors may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/stack.h>
 #include <sys/sysctl.h>
 #include <sys/vmem.h>
 #include <machine/stdarg.h>
 
 #include <sys/errno.h>
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 #include <sys/devicestat.h>
 
 #include <vm/uma.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static int	g_io_transient_map_bio(struct bio *bp);
 
 static struct g_bioq g_bio_run_down;
 static struct g_bioq g_bio_run_up;
 
 /*
  * Pace is a hint that we've had some trouble recently allocating
  * bios, so we should back off trying to send I/O down the stack
  * a bit to let the problem resolve. When pacing, we also turn
  * off direct dispatch to also reduce memory pressure from I/Os
  * there, at the expxense of some added latency while the memory
  * pressures exist. See g_io_schedule_down() for more details
  * and limitations.
  */
 static volatile u_int __read_mostly pace;
 
 static uma_zone_t __read_mostly biozone;
 
-/*
- * The head of the list of classifiers used in g_io_request.
- * Use g_register_classifier() and g_unregister_classifier()
- * to add/remove entries to the list.
- * Classifiers are invoked in registration order.
- */
-static TAILQ_HEAD(, g_classifier_hook) g_classifier_tailq __read_mostly =
-    TAILQ_HEAD_INITIALIZER(g_classifier_tailq);
-
 #include <machine/atomic.h>
 
 static void
 g_bioq_lock(struct g_bioq *bq)
 {
 
 	mtx_lock(&bq->bio_queue_lock);
 }
 
 static void
 g_bioq_unlock(struct g_bioq *bq)
 {
 
 	mtx_unlock(&bq->bio_queue_lock);
 }
 
 #if 0
 static void
 g_bioq_destroy(struct g_bioq *bq)
 {
 
 	mtx_destroy(&bq->bio_queue_lock);
 }
 #endif
 
 static void
 g_bioq_init(struct g_bioq *bq)
 {
 
 	TAILQ_INIT(&bq->bio_queue);
 	mtx_init(&bq->bio_queue_lock, "bio queue", NULL, MTX_DEF);
 }
 
 static struct bio *
 g_bioq_first(struct g_bioq *bq)
 {
 	struct bio *bp;
 
 	bp = TAILQ_FIRST(&bq->bio_queue);
 	if (bp != NULL) {
 		KASSERT((bp->bio_flags & BIO_ONQUEUE),
 		    ("Bio not on queue bp=%p target %p", bp, bq));
 		bp->bio_flags &= ~BIO_ONQUEUE;
 		TAILQ_REMOVE(&bq->bio_queue, bp, bio_queue);
 		bq->bio_queue_length--;
 	}
 	return (bp);
 }
 
 struct bio *
 g_new_bio(void)
 {
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_new_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return (bp);
 }
 
 struct bio *
 g_alloc_bio(void)
 {
 	struct bio *bp;
 
 	bp = uma_zalloc(biozone, M_WAITOK | M_ZERO);
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_alloc_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return (bp);
 }
 
 void
 g_destroy_bio(struct bio *bp)
 {
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR1(KTR_GEOM, "g_destroy_bio(): %p", bp);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	uma_zfree(biozone, bp);
 }
 
 struct bio *
 g_clone_bio(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_NOWAIT | M_ZERO);
 	if (bp2 != NULL) {
 		bp2->bio_parent = bp;
 		bp2->bio_cmd = bp->bio_cmd;
 		/*
 		 *  BIO_ORDERED flag may be used by disk drivers to enforce
 		 *  ordering restrictions, so this flag needs to be cloned.
 		 *  BIO_UNMAPPED and BIO_VLIST should be inherited, to properly
 		 *  indicate which way the buffer is passed.
 		 *  Other bio flags are not suitable for cloning.
 		 */
 		bp2->bio_flags = bp->bio_flags &
 		    (BIO_ORDERED | BIO_UNMAPPED | BIO_VLIST);
 		bp2->bio_length = bp->bio_length;
 		bp2->bio_offset = bp->bio_offset;
 		bp2->bio_data = bp->bio_data;
 		bp2->bio_ma = bp->bio_ma;
 		bp2->bio_ma_n = bp->bio_ma_n;
 		bp2->bio_ma_offset = bp->bio_ma_offset;
 		bp2->bio_attribute = bp->bio_attribute;
 		if (bp->bio_cmd == BIO_ZONE)
 			bcopy(&bp->bio_zone, &bp2->bio_zone,
 			    sizeof(bp->bio_zone));
-		/* Inherit classification info from the parent */
-		bp2->bio_classifier1 = bp->bio_classifier1;
-		bp2->bio_classifier2 = bp->bio_classifier2;
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		bp2->bio_track_bp = bp->bio_track_bp;
 #endif
 		bp->bio_children++;
 	}
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR2(KTR_GEOM, "g_clone_bio(%p): %p", bp, bp2);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return(bp2);
 }
 
 struct bio *
 g_duplicate_bio(struct bio *bp)
 {
 	struct bio *bp2;
 
 	bp2 = uma_zalloc(biozone, M_WAITOK | M_ZERO);
 	bp2->bio_flags = bp->bio_flags & (BIO_UNMAPPED | BIO_VLIST);
 	bp2->bio_parent = bp;
 	bp2->bio_cmd = bp->bio_cmd;
 	bp2->bio_length = bp->bio_length;
 	bp2->bio_offset = bp->bio_offset;
 	bp2->bio_data = bp->bio_data;
 	bp2->bio_ma = bp->bio_ma;
 	bp2->bio_ma_n = bp->bio_ma_n;
 	bp2->bio_ma_offset = bp->bio_ma_offset;
 	bp2->bio_attribute = bp->bio_attribute;
 	bp->bio_children++;
 #ifdef KTR
 	if ((KTR_COMPILE & KTR_GEOM) && (ktr_mask & KTR_GEOM)) {
 		struct stack st;
 
 		CTR2(KTR_GEOM, "g_duplicate_bio(%p): %p", bp, bp2);
 		stack_save(&st);
 		CTRSTACK(KTR_GEOM, &st, 3);
 	}
 #endif
 	return(bp2);
 }
 
 void
 g_reset_bio(struct bio *bp)
 {
 
 	bzero(bp, sizeof(*bp));
 }
 
 void
 g_io_init()
 {
 
 	g_bioq_init(&g_bio_run_down);
 	g_bioq_init(&g_bio_run_up);
 	biozone = uma_zcreate("g_bio", sizeof (struct bio),
 	    NULL, NULL,
 	    NULL, NULL,
 	    0, 0);
 }
 
 int
 g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_getattr(%s)", attr);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_GETATTR;
 	bp->bio_done = NULL;
 	bp->bio_attribute = attr;
 	bp->bio_length = *len;
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	error = biowait(bp, "ggetattr");
 	*len = bp->bio_completed;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 int
 g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 	
 	g_trace(G_T_BIO, "bio_zone(%d)", zone_args->zone_cmd);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_ZONE;
 	bp->bio_done = NULL;
 	/*
 	 * XXX KDM need to handle report zone data.
 	 */
 	bcopy(zone_args, &bp->bio_zone, sizeof(*zone_args));
 	if (zone_args->zone_cmd == DISK_ZONE_REPORT_ZONES)
 		bp->bio_length =
 		    zone_args->zone_params.report.entries_allocated *
 		    sizeof(struct disk_zone_rep_entry);
 	else
 		bp->bio_length = 0;
 
 	g_io_request(bp, cp);
 	error = biowait(bp, "gzone");
 	bcopy(&bp->bio_zone, zone_args, sizeof(*zone_args));
 	g_destroy_bio(bp);
 	return (error);
 }
 
 /*
  * Send a BIO_SPEEDUP down the stack. This is used to tell the lower layers that
  * the upper layers have detected a resource shortage. The lower layers are
  * advised to stop delaying I/O that they might be holding for performance
  * reasons and to schedule it (non-trims) or complete it successfully (trims) as
  * quickly as it can. bio_length is the amount of the shortage.  This call
  * should be non-blocking. bio_resid is used to communicate back if the lower
  * layers couldn't find bio_length worth of I/O to schedule or discard. A length
  * of 0 means to do as much as you can (schedule the h/w queues full, discard
  * all trims). flags are a hint from the upper layers to the lower layers what
  * operation should be done.
  */
 int
 g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT((flags & (BIO_SPEEDUP_TRIM | BIO_SPEEDUP_WRITE)) != 0,
 	    ("Invalid flags passed to g_io_speedup: %#x", flags));
 	g_trace(G_T_BIO, "bio_speedup(%s, %zu, %#x)", cp->provider->name,
 	    shortage, flags);
 	bp = g_new_bio();
 	if (bp == NULL)
 		return (ENOMEM);
 	bp->bio_cmd = BIO_SPEEDUP;
 	bp->bio_length = shortage;
 	bp->bio_done = NULL;
 	bp->bio_flags |= flags;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gflush");
 	*resid = bp->bio_resid;
 	g_destroy_bio(bp);
 	return (error);
 }
 
 int
 g_io_flush(struct g_consumer *cp)
 {
 	struct bio *bp;
 	int error;
 
 	g_trace(G_T_BIO, "bio_flush(%s)", cp->provider->name);
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_FLUSH;
 	bp->bio_flags |= BIO_ORDERED;
 	bp->bio_done = NULL;
 	bp->bio_attribute = NULL;
 	bp->bio_offset = cp->provider->mediasize;
 	bp->bio_length = 0;
 	bp->bio_data = NULL;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gflush");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 static int
 g_io_check(struct bio *bp)
 {
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	off_t excess;
 	int error;
 
 	biotrack(bp, __func__);
 
 	cp = bp->bio_from;
 	pp = bp->bio_to;
 
 	/* Fail if access counters dont allow the operation */
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_GETATTR:
 		if (cp->acr == 0)
 			return (EPERM);
 		break;
 	case BIO_WRITE:
 	case BIO_DELETE:
 	case BIO_FLUSH:
 		if (cp->acw == 0)
 			return (EPERM);
 		break;
 	case BIO_ZONE:
 		if ((bp->bio_zone.zone_cmd == DISK_ZONE_REPORT_ZONES) ||
 		    (bp->bio_zone.zone_cmd == DISK_ZONE_GET_PARAMS)) {
 			if (cp->acr == 0)
 				return (EPERM);
 		} else if (cp->acw == 0)
 			return (EPERM);
 		break;
 	default:
 		return (EPERM);
 	}
 	/* if provider is marked for error, don't disturb. */
 	if (pp->error)
 		return (pp->error);
 	if (cp->flags & G_CF_ORPHAN)
 		return (ENXIO);
 
 	switch(bp->bio_cmd) {
 	case BIO_READ:
 	case BIO_WRITE:
 	case BIO_DELETE:
 		/* Zero sectorsize or mediasize is probably a lack of media. */
 		if (pp->sectorsize == 0 || pp->mediasize == 0)
 			return (ENXIO);
 		/* Reject I/O not on sector boundary */
 		if (bp->bio_offset % pp->sectorsize)
 			return (EINVAL);
 		/* Reject I/O not integral sector long */
 		if (bp->bio_length % pp->sectorsize)
 			return (EINVAL);
 		/* Reject requests before or past the end of media. */
 		if (bp->bio_offset < 0)
 			return (EIO);
 		if (bp->bio_offset > pp->mediasize)
 			return (EIO);
 
 		/* Truncate requests to the end of providers media. */
 		excess = bp->bio_offset + bp->bio_length;
 		if (excess > bp->bio_to->mediasize) {
 			KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 ||
 			    round_page(bp->bio_ma_offset +
 			    bp->bio_length) / PAGE_SIZE == bp->bio_ma_n,
 			    ("excess bio %p too short", bp));
 			excess -= bp->bio_to->mediasize;
 			bp->bio_length -= excess;
 			if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
 				bp->bio_ma_n = round_page(bp->bio_ma_offset +
 				    bp->bio_length) / PAGE_SIZE;
 			}
 			if (excess > 0)
 				CTR3(KTR_GEOM, "g_down truncated bio "
 				    "%p provider %s by %d", bp,
 				    bp->bio_to->name, excess);
 		}
 
 		/* Deliver zero length transfers right here. */
 		if (bp->bio_length == 0) {
 			CTR2(KTR_GEOM, "g_down terminated 0-length "
 			    "bp %p provider %s", bp, bp->bio_to->name);
 			return (0);
 		}
 
 		if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
 		    (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 &&
 		    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
 			if ((error = g_io_transient_map_bio(bp)) >= 0)
 				return (error);
 		}
 		break;
 	default:
 		break;
 	}
 	return (EJUSTRETURN);
 }
 
-/*
- * bio classification support.
- *
- * g_register_classifier() and g_unregister_classifier()
- * are used to add/remove a classifier from the list.
- * The list is protected using the g_bio_run_down lock,
- * because the classifiers are called in this path.
- *
- * g_io_request() passes bio's that are not already classified
- * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers().
- * Classifiers can store their result in the two fields
- * bio_classifier1 and bio_classifier2.
- * A classifier that updates one of the fields should
- * return a non-zero value.
- * If no classifier updates the field, g_run_classifiers() sets
- * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls.
- */
-
-int
-g_register_classifier(struct g_classifier_hook *hook)
-{
-
-	g_bioq_lock(&g_bio_run_down);
-	TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link);
-	g_bioq_unlock(&g_bio_run_down);
-
-	return (0);
-}
-
 void
-g_unregister_classifier(struct g_classifier_hook *hook)
-{
-	struct g_classifier_hook *entry;
-
-	g_bioq_lock(&g_bio_run_down);
-	TAILQ_FOREACH(entry, &g_classifier_tailq, link) {
-		if (entry == hook) {
-			TAILQ_REMOVE(&g_classifier_tailq, hook, link);
-			break;
-		}
-	}
-	g_bioq_unlock(&g_bio_run_down);
-}
-
-static void
-g_run_classifiers(struct bio *bp)
-{
-	struct g_classifier_hook *hook;
-	int classified = 0;
-
-	biotrack(bp, __func__);
-
-	TAILQ_FOREACH(hook, &g_classifier_tailq, link)
-		classified |= hook->func(hook->arg, bp);
-
-	if (!classified)
-		bp->bio_classifier1 = BIO_NOTCLASSIFIED;
-}
-
-void
 g_io_request(struct bio *bp, struct g_consumer *cp)
 {
 	struct g_provider *pp;
 	struct mtx *mtxp;
 	int direct, error, first;
 	uint8_t cmd;
 
 	biotrack(bp, __func__);
 
 	KASSERT(cp != NULL, ("NULL cp in g_io_request"));
 	KASSERT(bp != NULL, ("NULL bp in g_io_request"));
 	pp = cp->provider;
 	KASSERT(pp != NULL, ("consumer not attached in g_io_request"));
 #ifdef DIAGNOSTIC
 	KASSERT(bp->bio_driver1 == NULL,
 	    ("bio_driver1 used by the consumer (geom %s)", cp->geom->name));
 	KASSERT(bp->bio_driver2 == NULL,
 	    ("bio_driver2 used by the consumer (geom %s)", cp->geom->name));
 	KASSERT(bp->bio_pflags == 0,
 	    ("bio_pflags used by the consumer (geom %s)", cp->geom->name));
 	/*
 	 * Remember consumer's private fields, so we can detect if they were
 	 * modified by the provider.
 	 */
 	bp->_bio_caller1 = bp->bio_caller1;
 	bp->_bio_caller2 = bp->bio_caller2;
 	bp->_bio_cflags = bp->bio_cflags;
 #endif
 
 	cmd = bp->bio_cmd;
 	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_GETATTR) {
 		KASSERT(bp->bio_data != NULL,
 		    ("NULL bp->data in g_io_request(cmd=%hu)", bp->bio_cmd));
 	}
 	if (cmd == BIO_DELETE || cmd == BIO_FLUSH) {
 		KASSERT(bp->bio_data == NULL,
 		    ("non-NULL bp->data in g_io_request(cmd=%hu)",
 		    bp->bio_cmd));
 	}
 	if (cmd == BIO_READ || cmd == BIO_WRITE || cmd == BIO_DELETE) {
 		KASSERT(bp->bio_offset % cp->provider->sectorsize == 0,
 		    ("wrong offset %jd for sectorsize %u",
 		    bp->bio_offset, cp->provider->sectorsize));
 		KASSERT(bp->bio_length % cp->provider->sectorsize == 0,
 		    ("wrong length %jd for sectorsize %u",
 		    bp->bio_length, cp->provider->sectorsize));
 	}
 
 	g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd %d",
 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
 
 	bp->bio_from = cp;
 	bp->bio_to = pp;
 	bp->bio_error = 0;
 	bp->bio_completed = 0;
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
 	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&bp->bio_t0);
 	else
 		getbinuptime(&bp->bio_t0);
 
 #ifdef GET_STACK_USAGE
 	direct = (cp->flags & G_CF_DIRECT_SEND) != 0 &&
 	    (pp->flags & G_PF_DIRECT_RECEIVE) != 0 &&
 	    !g_is_geom_thread(curthread) &&
 	    ((pp->flags & G_PF_ACCEPT_UNMAPPED) != 0 ||
 	    (bp->bio_flags & BIO_UNMAPPED) == 0 || THREAD_CAN_SLEEP()) &&
 	    pace == 0;
 	if (direct) {
 		/* Block direct execution if less then half of stack left. */
 		size_t	st, su;
 		GET_STACK_USAGE(st, su);
 		if (su * 2 > st)
 			direct = 0;
 	}
 #else
 	direct = 0;
 #endif
-
-	if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
-		g_bioq_lock(&g_bio_run_down);
-		g_run_classifiers(bp);
-		g_bioq_unlock(&g_bio_run_down);
-	}
 
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
 	 */
 	mtxp = mtx_pool_find(mtxpool_sleep, pp);
 	mtx_lock(mtxp);
 	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_start_transaction(pp->stat, &bp->bio_t0);
 	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_start_transaction(cp->stat, &bp->bio_t0);
 	pp->nstart++;
 	cp->nstart++;
 	mtx_unlock(mtxp);
 
 	if (direct) {
 		error = g_io_check(bp);
 		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p "
 			    "provider %s returned %d", bp, bp->bio_to->name,
 			    error);
 			g_io_deliver(bp, error);
 			return;
 		}
 		bp->bio_to->geom->start(bp);
 	} else {
 		g_bioq_lock(&g_bio_run_down);
 		first = TAILQ_EMPTY(&g_bio_run_down.bio_queue);
 		TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue);
 		bp->bio_flags |= BIO_ONQUEUE;
 		g_bio_run_down.bio_queue_length++;
 		g_bioq_unlock(&g_bio_run_down);
 		/* Pass it on down. */
 		if (first)
 			wakeup(&g_wait_down);
 	}
 }
 
 void
 g_io_deliver(struct bio *bp, int error)
 {
 	struct bintime now;
 	struct g_consumer *cp;
 	struct g_provider *pp;
 	struct mtx *mtxp;
 	int direct, first;
 
 	biotrack(bp, __func__);
 
 	KASSERT(bp != NULL, ("NULL bp in g_io_deliver"));
 	pp = bp->bio_to;
 	KASSERT(pp != NULL, ("NULL bio_to in g_io_deliver"));
 	cp = bp->bio_from;
 	if (cp == NULL) {
 		bp->bio_error = error;
 		bp->bio_done(bp);
 		return;
 	}
 	KASSERT(cp != NULL, ("NULL bio_from in g_io_deliver"));
 	KASSERT(cp->geom != NULL, ("NULL bio_from->geom in g_io_deliver"));
 #ifdef DIAGNOSTIC
 	/*
 	 * Some classes - GJournal in particular - can modify bio's
 	 * private fields while the bio is in transit; G_GEOM_VOLATILE_BIO
 	 * flag means it's an expected behaviour for that particular geom.
 	 */
 	if ((cp->geom->flags & G_GEOM_VOLATILE_BIO) == 0) {
 		KASSERT(bp->bio_caller1 == bp->_bio_caller1,
 		    ("bio_caller1 used by the provider %s", pp->name));
 		KASSERT(bp->bio_caller2 == bp->_bio_caller2,
 		    ("bio_caller2 used by the provider %s", pp->name));
 		KASSERT(bp->bio_cflags == bp->_bio_cflags,
 		    ("bio_cflags used by the provider %s", pp->name));
 	}
 #endif
 	KASSERT(bp->bio_completed >= 0, ("bio_completed can't be less than 0"));
 	KASSERT(bp->bio_completed <= bp->bio_length,
 	    ("bio_completed can't be greater than bio_length"));
 
 	g_trace(G_T_BIO,
 "g_io_deliver(%p) from %p(%s) to %p(%s) cmd %d error %d off %jd len %jd",
 	    bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd, error,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
 
 	KASSERT(!(bp->bio_flags & BIO_ONQUEUE),
 	    ("Bio already on queue bp=%p", bp));
 
 	/*
 	 * XXX: next two doesn't belong here
 	 */
 	bp->bio_bcount = bp->bio_length;
 	bp->bio_resid = bp->bio_bcount - bp->bio_completed;
 
 #ifdef GET_STACK_USAGE
 	direct = (pp->flags & G_PF_DIRECT_SEND) &&
 		 (cp->flags & G_CF_DIRECT_RECEIVE) &&
 		 !g_is_geom_thread(curthread);
 	if (direct) {
 		/* Block direct execution if less then half of stack left. */
 		size_t	st, su;
 		GET_STACK_USAGE(st, su);
 		if (su * 2 > st)
 			direct = 0;
 	}
 #else
 	direct = 0;
 #endif
 
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
 	 * than one thread at a time, so grab the lock first.
 	 */
 	if ((g_collectstats & G_STATS_CONSUMERS) != 0 ||
 	    ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL))
 		binuptime(&now);
 	mtxp = mtx_pool_find(mtxpool_sleep, cp);
 	mtx_lock(mtxp);
 	if (g_collectstats & G_STATS_PROVIDERS)
 		devstat_end_transaction_bio_bt(pp->stat, bp, &now);
 	if (g_collectstats & G_STATS_CONSUMERS)
 		devstat_end_transaction_bio_bt(cp->stat, bp, &now);
 	cp->nend++;
 	pp->nend++;
 	mtx_unlock(mtxp);
 
 	if (error != ENOMEM) {
 		bp->bio_error = error;
 		if (direct) {
 			biodone(bp);
 		} else {
 			g_bioq_lock(&g_bio_run_up);
 			first = TAILQ_EMPTY(&g_bio_run_up.bio_queue);
 			TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue);
 			bp->bio_flags |= BIO_ONQUEUE;
 			g_bio_run_up.bio_queue_length++;
 			g_bioq_unlock(&g_bio_run_up);
 			if (first)
 				wakeup(&g_wait_up);
 		}
 		return;
 	}
 
 	if (bootverbose)
 		printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name);
 	bp->bio_children = 0;
 	bp->bio_inbed = 0;
 	bp->bio_driver1 = NULL;
 	bp->bio_driver2 = NULL;
 	bp->bio_pflags = 0;
 	g_io_request(bp, cp);
 	pace = 1;
 	return;
 }
 
 SYSCTL_DECL(_kern_geom);
 
 static long transient_maps;
 SYSCTL_LONG(_kern_geom, OID_AUTO, transient_maps, CTLFLAG_RD,
     &transient_maps, 0,
     "Total count of the transient mapping requests");
 u_int transient_map_retries = 10;
 SYSCTL_UINT(_kern_geom, OID_AUTO, transient_map_retries, CTLFLAG_RW,
     &transient_map_retries, 0,
     "Max count of retries used before giving up on creating transient map");
 int transient_map_hard_failures;
 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_hard_failures, CTLFLAG_RD,
     &transient_map_hard_failures, 0,
     "Failures to establish the transient mapping due to retry attempts "
     "exhausted");
 int transient_map_soft_failures;
 SYSCTL_INT(_kern_geom, OID_AUTO, transient_map_soft_failures, CTLFLAG_RD,
     &transient_map_soft_failures, 0,
     "Count of retried failures to establish the transient mapping");
 int inflight_transient_maps;
 SYSCTL_INT(_kern_geom, OID_AUTO, inflight_transient_maps, CTLFLAG_RD,
     &inflight_transient_maps, 0,
     "Current count of the active transient maps");
 
 static int
 g_io_transient_map_bio(struct bio *bp)
 {
 	vm_offset_t addr;
 	long size;
 	u_int retried;
 
 	KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
 
 	size = round_page(bp->bio_ma_offset + bp->bio_length);
 	KASSERT(size / PAGE_SIZE == bp->bio_ma_n, ("Bio too short %p", bp));
 	addr = 0;
 	retried = 0;
 	atomic_add_long(&transient_maps, 1);
 retry:
 	if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
 		if (transient_map_retries != 0 &&
 		    retried >= transient_map_retries) {
 			CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s",
 			    bp, bp->bio_to->name);
 			atomic_add_int(&transient_map_hard_failures, 1);
 			return (EDEADLK/* XXXKIB */);
 		} else {
 			/*
 			 * Naive attempt to quisce the I/O to get more
 			 * in-flight requests completed and defragment
 			 * the transient_arena.
 			 */
 			CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
 			    bp, bp->bio_to->name, retried);
 			pause("g_d_tra", hz / 10);
 			retried++;
 			atomic_add_int(&transient_map_soft_failures, 1);
 			goto retry;
 		}
 	}
 	atomic_add_int(&inflight_transient_maps, 1);
 	pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
 	bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;
 	bp->bio_flags |= BIO_TRANSIENT_MAPPING;
 	bp->bio_flags &= ~BIO_UNMAPPED;
 	return (EJUSTRETURN);
 }
 
 void
 g_io_schedule_down(struct thread *tp __unused)
 {
 	struct bio *bp;
 	int error;
 
 	for(;;) {
 		g_bioq_lock(&g_bio_run_down);
 		bp = g_bioq_first(&g_bio_run_down);
 		if (bp == NULL) {
 			CTR0(KTR_GEOM, "g_down going to sleep");
 			msleep(&g_wait_down, &g_bio_run_down.bio_queue_lock,
 			    PRIBIO | PDROP, "-", 0);
 			continue;
 		}
 		CTR0(KTR_GEOM, "g_down has work to do");
 		g_bioq_unlock(&g_bio_run_down);
 		biotrack(bp, __func__);
 		if (pace != 0) {
 			/*
 			 * There has been at least one memory allocation
 			 * failure since the last I/O completed. Pause 1ms to
 			 * give the system a chance to free up memory. We only
 			 * do this once because a large number of allocations
 			 * can fail in the direct dispatch case and there's no
 			 * relationship between the number of these failures and
 			 * the length of the outage. If there's still an outage,
 			 * we'll pause again and again until it's
 			 * resolved. Older versions paused longer and once per
 			 * allocation failure. This was OK for a single threaded
 			 * g_down, but with direct dispatch would lead to max of
 			 * 10 IOPs for minutes at a time when transient memory
 			 * issues prevented allocation for a batch of requests
 			 * from the upper layers.
 			 *
 			 * XXX This pacing is really lame. It needs to be solved
 			 * by other methods. This is OK only because the worst
 			 * case scenario is so rare. In the worst case scenario
 			 * all memory is tied up waiting for I/O to complete
 			 * which can never happen since we can't allocate bios
 			 * for that I/O.
 			 */
 			CTR0(KTR_GEOM, "g_down pacing self");
 			pause("g_down", min(hz/1000, 1));
 			pace = 0;
 		}
 		CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp,
 		    bp->bio_to->name);
 		error = g_io_check(bp);
 		if (error >= 0) {
 			CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider "
 			    "%s returned %d", bp, bp->bio_to->name, error);
 			g_io_deliver(bp, error);
 			continue;
 		}
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld "
 		    "len %ld", bp, bp->bio_to->name, bp->bio_offset,
 		    bp->bio_length);
 		bp->bio_to->geom->start(bp);
 		THREAD_SLEEPING_OK();
 	}
 }
 
 void
 g_io_schedule_up(struct thread *tp __unused)
 {
 	struct bio *bp;
 
 	for(;;) {
 		g_bioq_lock(&g_bio_run_up);
 		bp = g_bioq_first(&g_bio_run_up);
 		if (bp == NULL) {
 			CTR0(KTR_GEOM, "g_up going to sleep");
 			msleep(&g_wait_up, &g_bio_run_up.bio_queue_lock,
 			    PRIBIO | PDROP, "-", 0);
 			continue;
 		}
 		g_bioq_unlock(&g_bio_run_up);
 		THREAD_NO_SLEEPING();
 		CTR4(KTR_GEOM, "g_up biodone bp %p provider %s off "
 		    "%jd len %ld", bp, bp->bio_to->name,
 		    bp->bio_offset, bp->bio_length);
 		biodone(bp);
 		THREAD_SLEEPING_OK();
 	}
 }
 
 void *
 g_read_data(struct g_consumer *cp, off_t offset, off_t length, int *error)
 {
 	struct bio *bp;
 	void *ptr;
 	int errorc;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
 	    length <= MAXPHYS, ("g_read_data(): invalid length %jd",
 	    (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_READ;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	ptr = g_malloc(length, M_WAITOK);
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	errorc = biowait(bp, "gread");
 	if (error != NULL)
 		*error = errorc;
 	g_destroy_bio(bp);
 	if (errorc) {
 		g_free(ptr);
 		ptr = NULL;
 	}
 	return (ptr);
 }
 
 /*
  * A read function for use by ffs_sbget when used by GEOM-layer routines.
  */
 int
 g_use_g_read_data(void *devfd, off_t loc, void **bufp, int size)
 {
 	struct g_consumer *cp;
 
 	KASSERT(*bufp == NULL,
 	    ("g_use_g_read_data: non-NULL *bufp %p\n", *bufp));
 
 	cp = (struct g_consumer *)devfd;
 	/*
 	 * Take care not to issue an invalid I/O request. The offset of
 	 * the superblock candidate must be multiples of the provider's
 	 * sector size, otherwise an FFS can't exist on the provider
 	 * anyway.
 	 */
 	if (loc % cp->provider->sectorsize != 0)
 		return (ENOENT);
 	*bufp = g_read_data(cp, loc, size, NULL);
 	if (*bufp == NULL)
 		return (ENOENT);
 	return (0);
 }
 
 int
 g_write_data(struct g_consumer *cp, off_t offset, void *ptr, off_t length)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize &&
 	    length <= MAXPHYS, ("g_write_data(): invalid length %jd",
 	    (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_WRITE;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_data = ptr;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gwrite");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 /*
  * A write function for use by ffs_sbput when used by GEOM-layer routines.
  */
 int
 g_use_g_write_data(void *devfd, off_t loc, void *buf, int size)
 {
 
 	return (g_write_data((struct g_consumer *)devfd, loc, buf, size));
 }
 
 int
 g_delete_data(struct g_consumer *cp, off_t offset, off_t length)
 {
 	struct bio *bp;
 	int error;
 
 	KASSERT(length > 0 && length >= cp->provider->sectorsize,
 	    ("g_delete_data(): invalid length %jd", (intmax_t)length));
 
 	bp = g_alloc_bio();
 	bp->bio_cmd = BIO_DELETE;
 	bp->bio_done = NULL;
 	bp->bio_offset = offset;
 	bp->bio_length = length;
 	bp->bio_data = NULL;
 	g_io_request(bp, cp);
 	error = biowait(bp, "gdelete");
 	g_destroy_bio(bp);
 	return (error);
 }
 
 void
 g_print_bio(const char *prefix, const struct bio *bp, const char *fmtsuffix,
     ...)
 {
 #ifndef PRINTF_BUFR_SIZE
 #define PRINTF_BUFR_SIZE 64
 #endif
 	char bufr[PRINTF_BUFR_SIZE];
 	struct sbuf sb, *sbp __unused;
 	va_list ap;
 
 	sbp = sbuf_new(&sb, bufr, sizeof(bufr), SBUF_FIXEDLEN);
 	KASSERT(sbp != NULL, ("sbuf_new misused?"));
 
 	sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
 
 	sbuf_cat(&sb, prefix);
 	g_format_bio(&sb, bp);
 
 	va_start(ap, fmtsuffix);
 	sbuf_vprintf(&sb, fmtsuffix, ap);
 	va_end(ap);
 
 	sbuf_nl_terminate(&sb);
 
 	sbuf_finish(&sb);
 	sbuf_delete(&sb);
 }
 
 void
 g_format_bio(struct sbuf *sb, const struct bio *bp)
 {
 	const char *pname, *cmd = NULL;
 
 	if (bp->bio_to != NULL)
 		pname = bp->bio_to->name;
 	else
 		pname = "[unknown]";
 
 	switch (bp->bio_cmd) {
 	case BIO_GETATTR:
 		cmd = "GETATTR";
 		sbuf_printf(sb, "%s[%s(attr=%s)]", pname, cmd,
 		    bp->bio_attribute);
 		return;
 	case BIO_FLUSH:
 		cmd = "FLUSH";
 		sbuf_printf(sb, "%s[%s]", pname, cmd);
 		return;
 	case BIO_ZONE: {
 		char *subcmd = NULL;
 		cmd = "ZONE";
 		switch (bp->bio_zone.zone_cmd) {
 		case DISK_ZONE_OPEN:
 			subcmd = "OPEN";
 			break;
 		case DISK_ZONE_CLOSE:
 			subcmd = "CLOSE";
 			break;
 		case DISK_ZONE_FINISH:
 			subcmd = "FINISH";
 			break;
 		case DISK_ZONE_RWP:
 			subcmd = "RWP";
 			break;
 		case DISK_ZONE_REPORT_ZONES:
 			subcmd = "REPORT ZONES";
 			break;
 		case DISK_ZONE_GET_PARAMS:
 			subcmd = "GET PARAMS";
 			break;
 		default:
 			subcmd = "UNKNOWN";
 			break;
 		}
 		sbuf_printf(sb, "%s[%s,%s]", pname, cmd, subcmd);
 		return;
 	}
 	case BIO_READ:
 		cmd = "READ";
 		break;
 	case BIO_WRITE:
 		cmd = "WRITE";
 		break;
 	case BIO_DELETE:
 		cmd = "DELETE";
 		break;
 	default:
 		cmd = "UNKNOWN";
 		sbuf_printf(sb, "%s[%s()]", pname, cmd);
 		return;
 	}
 	sbuf_printf(sb, "%s[%s(offset=%jd, length=%jd)]", pname, cmd,
 	    (intmax_t)bp->bio_offset, (intmax_t)bp->bio_length);
 }
Index: head/sys/modules/geom/geom_sched/gsched_delay/Makefile
===================================================================
--- head/sys/modules/geom/geom_sched/gsched_delay/Makefile	(revision 356184)
+++ head/sys/modules/geom/geom_sched/gsched_delay/Makefile	(nonexistent)
@@ -1,7 +0,0 @@
-# $FreeBSD$
-
-KMOD=   gsched_delay
-SRCS=   gs_delay.c
-
-# ../Makefile.inc automatically included
-.include <bsd.kmod.mk>

Property changes on: head/sys/modules/geom/geom_sched/gsched_delay/Makefile
___________________________________________________________________
Deleted: svn:eol-style
## -1 +0,0 ##
-native
\ No newline at end of property
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Deleted: svn:mime-type
## -1 +0,0 ##
-text/plain
\ No newline at end of property
Index: head/sys/modules/geom/geom_sched/Makefile.inc
===================================================================
--- head/sys/modules/geom/geom_sched/Makefile.inc	(revision 356184)
+++ head/sys/modules/geom/geom_sched/Makefile.inc	(nonexistent)
@@ -1,9 +0,0 @@
-# $FreeBSD$
-# included by geom_sched children
-
-.PATH: ${SRCTOP}/sys/geom/sched
-
-# 6.x needs this path
-#CFLAGS += -I${SRCTOP}/sys/geom/sched
-
-# .include <bsd.kmod.mk>

Property changes on: head/sys/modules/geom/geom_sched/Makefile.inc
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/modules/geom/geom_sched/gs_sched/Makefile
===================================================================
--- head/sys/modules/geom/geom_sched/gs_sched/Makefile	(revision 356184)
+++ head/sys/modules/geom/geom_sched/gs_sched/Makefile	(nonexistent)
@@ -1,6 +0,0 @@
-# $FreeBSD$
-KMOD=   geom_sched
-SRCS=   g_sched.c
-
-# ../Makefile.inc automatically included
-.include <bsd.kmod.mk>

Property changes on: head/sys/modules/geom/geom_sched/gs_sched/Makefile
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/modules/geom/geom_sched/Makefile
===================================================================
--- head/sys/modules/geom/geom_sched/Makefile	(revision 356184)
+++ head/sys/modules/geom/geom_sched/Makefile	(nonexistent)
@@ -1,5 +0,0 @@
-# $FreeBSD$
-
-SUBDIR=	gs_sched gsched_rr gsched_delay
-
-.include <bsd.subdir.mk>

Property changes on: head/sys/modules/geom/geom_sched/Makefile
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/modules/geom/geom_sched/gsched_rr/Makefile
===================================================================
--- head/sys/modules/geom/geom_sched/gsched_rr/Makefile	(revision 356184)
+++ head/sys/modules/geom/geom_sched/gsched_rr/Makefile	(nonexistent)
@@ -1,7 +0,0 @@
-# $FreeBSD$
-
-KMOD=   gsched_rr
-SRCS=   gs_rr.c
-
-# ../Makefile.inc automatically included
-.include <bsd.kmod.mk>

Property changes on: head/sys/modules/geom/geom_sched/gsched_rr/Makefile
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/modules/geom/Makefile
===================================================================
--- head/sys/modules/geom/Makefile	(revision 356184)
+++ head/sys/modules/geom/Makefile	(revision 356185)
@@ -1,34 +1,33 @@
 # $FreeBSD$
 
 SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 SUBDIR=	geom_bde \
 	geom_cache \
 	geom_concat \
 	geom_eli \
 	geom_flashmap \
 	geom_gate \
 	geom_journal \
 	geom_label \
 	geom_linux_lvm \
 	geom_mirror \
 	geom_mountver \
 	geom_multipath \
 	geom_nop \
 	geom_part \
 	geom_raid \
 	geom_raid3 \
-	geom_sched \
 	geom_shsec \
 	geom_stripe \
 	geom_uzip \
 	geom_vinum \
 	geom_virstor \
 	geom_zero
 
 .if ${MK_CCD} != "no" || defined(ALL_MODULES)
 SUBDIR+=	geom_ccd
 .endif
 
 .include <bsd.subdir.mk>
Index: head/sys/sys/bio.h
===================================================================
--- head/sys/sys/bio.h	(revision 356184)
+++ head/sys/sys/bio.h	(revision 356185)
@@ -1,191 +1,188 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_BIO_H_
 #define	_SYS_BIO_H_
 
 #include <sys/queue.h>
 #include <sys/disk_zone.h>
 
 /* bio_cmd */
 #define BIO_READ	0x01	/* Read I/O data */
 #define BIO_WRITE	0x02	/* Write I/O data */
 #define BIO_DELETE	0x03	/* TRIM or free blocks, i.e. mark as unused */
 #define BIO_GETATTR	0x04	/* Get GEOM attributes of object */
 #define BIO_FLUSH	0x05	/* Commit outstanding I/O now */
 #define BIO_CMD0	0x06	/* Available for local hacks */
 #define BIO_CMD1	0x07	/* Available for local hacks */
 #define BIO_CMD2	0x08	/* Available for local hacks */
 #define BIO_ZONE	0x09	/* Zone command */
 #define BIO_SPEEDUP	0x0a	/* Upper layers face shortage */
 
 /* bio_flags */
 #define BIO_ERROR	0x01	/* An error occurred processing this bio. */
 #define BIO_DONE	0x02	/* This bio is finished. */
 #define BIO_ONQUEUE	0x04	/* This bio is in a queue & not yet taken. */
 /*
  * This bio must be executed after all previous bios in the queue have been
  * executed, and before any successive bios can be executed.
  */
 #define BIO_ORDERED	0x08
 #define	BIO_UNMAPPED	0x10
 #define	BIO_TRANSIENT_MAPPING	0x20
 #define	BIO_VLIST	0x40
 
 #define	PRINT_BIO_FLAGS "\20\7vlist\6transient_mapping\5unmapped" \
 	"\4ordered\3onqueue\2done\1error"
 
 #define BIO_SPEEDUP_WRITE	0x4000	/* Resource shortage at upper layers */
 #define BIO_SPEEDUP_TRIM	0x8000	/* Resource shortage at upper layers */
 
 #ifdef _KERNEL
 struct disk;
 struct bio;
 struct vm_map;
 
-/* Empty classifier tag, to prevent further classification. */
-#define	BIO_NOTCLASSIFIED		(void *)(~0UL)
-
 typedef void bio_task_t(void *);
 
 /*
  * The bio structure describes an I/O operation in the kernel.
  */
 struct bio {
 	uint16_t bio_cmd;		/* I/O operation. */
 	uint16_t bio_flags;		/* General flags. */
 	uint16_t bio_cflags;		/* Private use by the consumer. */
 	uint16_t bio_pflags;		/* Private use by the provider. */
 	struct cdev *bio_dev;		/* Device to do I/O on. */
 	struct disk *bio_disk;		/* Valid below geom_disk.c only */
 	off_t	bio_offset;		/* Offset into file. */
 	long	bio_bcount;		/* Valid bytes in buffer. */
 	caddr_t	bio_data;		/* Memory, superblocks, indirect etc. */
 	struct vm_page **bio_ma;	/* Or unmapped. */
 	int	bio_ma_offset;		/* Offset in the first page of bio_ma. */
 	int	bio_ma_n;		/* Number of pages in bio_ma. */
 	int	bio_error;		/* Errno for BIO_ERROR. */
 	long	bio_resid;		/* Remaining I/O in bytes. */
 	void	(*bio_done)(struct bio *);
 	void	*bio_driver1;		/* Private use by the provider. */
 	void	*bio_driver2;		/* Private use by the provider. */
 	void	*bio_caller1;		/* Private use by the consumer. */
 	void	*bio_caller2;		/* Private use by the consumer. */
 	TAILQ_ENTRY(bio) bio_queue;	/* Disksort queue. */
 	const char *bio_attribute;	/* Attribute for BIO_[GS]ETATTR */
 	struct  disk_zone_args bio_zone;/* Used for BIO_ZONE */
 	struct g_consumer *bio_from;	/* GEOM linkage */
 	struct g_provider *bio_to;	/* GEOM linkage */
 	off_t	bio_length;		/* Like bio_bcount */
 	off_t	bio_completed;		/* Inverse of bio_resid */
 	u_int	bio_children;		/* Number of spawned bios */
 	u_int	bio_inbed;		/* Children safely home by now */
 	struct bio *bio_parent;		/* Pointer to parent */
 	struct bintime bio_t0;		/* Time request started */
 
 	bio_task_t *bio_task;		/* Task_queue handler */
 	void	*bio_task_arg;		/* Argument to above */
 
-	void	*bio_classifier1;	/* Classifier tag. */
-	void	*bio_classifier2;	/* Classifier tag. */
+	void	*bio_spare1;
+	void	*bio_spare2;
 
 #ifdef DIAGNOSTIC
 	void	*_bio_caller1;
 	void	*_bio_caller2;
 	uint8_t	_bio_cflags;
 #endif
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 	struct buf *bio_track_bp;	/* Parent buf for tracking */
 #endif
 
 	/* XXX: these go away when bio chaining is introduced */
 	daddr_t bio_pblkno;               /* physical block number */
 };
 
 struct uio;
 struct devstat;
 
 struct bio_queue_head {
 	TAILQ_HEAD(bio_queue, bio) queue;
 	off_t last_offset;
 	struct	bio *insert_point;
 	int total;
 	int batched;
 };
 
 extern struct vm_map *bio_transient_map;
 extern int bio_transient_maxcnt;
 
 void biodone(struct bio *bp);
 void biofinish(struct bio *bp, struct devstat *stat, int error);
 int biowait(struct bio *bp, const char *wchan);
 
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 void biotrack_buf(struct bio *bp, const char *location);
 
 static __inline void
 biotrack(struct bio *bp, const char *location)
 {
 
 	if (bp->bio_track_bp != NULL)
 		biotrack_buf(bp, location);
 }
 #else
 static __inline void
 biotrack(struct bio *bp __unused, const char *location __unused)
 {
 }
 #endif
 
 void bioq_disksort(struct bio_queue_head *ap, struct bio *bp);
 struct bio *bioq_first(struct bio_queue_head *head);
 struct bio *bioq_takefirst(struct bio_queue_head *head);
 void bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error);
 void bioq_init(struct bio_queue_head *head);
 void bioq_insert_head(struct bio_queue_head *head, struct bio *bp);
 void bioq_insert_tail(struct bio_queue_head *head, struct bio *bp);
 void bioq_remove(struct bio_queue_head *head, struct bio *bp);
 
 int	physio(struct cdev *dev, struct uio *uio, int ioflag);
 #define physread physio
 #define physwrite physio
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_BIO_H_ */
Index: head/sys/sys/ktr_class.h
===================================================================
--- head/sys/sys/ktr_class.h	(revision 356184)
+++ head/sys/sys/ktr_class.h	(revision 356185)
@@ -1,89 +1,89 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1996 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: ktr.h,v 1.10.2.7 2000/03/16 21:44:42 cp Exp $
  * $FreeBSD$
  */
 
 #ifndef _SYS_KTR_CLASS_H_
 #define _SYS_KTR_CLASS_H_
 
 /*
  * KTR trace classes
  *
  * Two of the trace classes (KTR_DEV and KTR_SUBSYS) are special in that
  * they are really placeholders so that indvidual drivers and subsystems
  * can map their internal tracing to the general class when they wish to
  * have tracing enabled and map it to 0 when they don't.
  */
 #define	KTR_GEN		0x00000001		/* General (TR) */
 #define	KTR_NET		0x00000002		/* Network */
 #define	KTR_DEV		0x00000004		/* Device driver */
 #define	KTR_LOCK	0x00000008		/* MP locking */
 #define	KTR_SMP		0x00000010		/* MP general */
 #define	KTR_SUBSYS	0x00000020		/* Subsystem. */
 #define	KTR_PMAP	0x00000040		/* Pmap tracing */
 #define	KTR_MALLOC	0x00000080		/* Malloc tracing */
 #define	KTR_TRAP	0x00000100		/* Trap processing */
 #define	KTR_INTR	0x00000200		/* Interrupt tracing */
 #define	KTR_SIG		0x00000400		/* Signal processing */
 #define	KTR_SPARE2	0x00000800		/* cxgb, amd64, xen, clk, &c */
 #define	KTR_PROC	0x00001000		/* Process scheduling */
 #define	KTR_SYSC	0x00002000		/* System call */
 #define	KTR_INIT	0x00004000		/* System initialization */
 #define	KTR_SPARE3	0x00008000		/* cxgb, drm2, ioat, ntb */
-#define	KTR_SPARE4	0x00010000		/* geom_sched */
+#define	KTR_SPARE4	0x00010000
 #define	KTR_EVH		0x00020000		/* Eventhandler */
 #define	KTR_VFS		0x00040000		/* VFS events */
 #define	KTR_VOP		0x00080000		/* Auto-generated vop events */
 #define	KTR_VM		0x00100000		/* The virtual memory system */
 #define	KTR_INET	0x00200000		/* IPv4 stack */
 #define	KTR_RUNQ	0x00400000		/* Run queue */
 #define	KTR_SPARE5	0x00800000
 #define	KTR_UMA		0x01000000		/* UMA slab allocator */
 #define	KTR_CALLOUT	0x02000000		/* Callouts and timeouts */
 #define	KTR_GEOM	0x04000000		/* GEOM I/O events */
 #define	KTR_BUSDMA	0x08000000		/* busdma(9) events */
 #define	KTR_INET6	0x10000000		/* IPv6 stack */
 #define	KTR_SCHED	0x20000000		/* Machine parsed sched info. */
 #define	KTR_BUF		0x40000000		/* Buffer cache */
 #define	KTR_PTRACE	0x80000000		/* Process debugging. */
 #define	KTR_ALL		0xffffffff
 
 /* KTR trace classes to compile in */
 #ifdef KTR
 #ifndef KTR_COMPILE
 #define	KTR_COMPILE	(KTR_ALL)
 #endif
 #else	/* !KTR */
 #undef KTR_COMPILE
 #define KTR_COMPILE 0
 #endif	/* KTR */
 
 #endif /* !_SYS_KTR_CLASS_H_ */
Index: head/sys/sys/param.h
===================================================================
--- head/sys/sys/param.h	(revision 356184)
+++ head/sys/sys/param.h	(revision 356185)
@@ -1,368 +1,368 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.h	8.3 (Berkeley) 4/4/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_PARAM_H_
 #define _SYS_PARAM_H_
 
 #include <sys/_null.h>
 
 #define	BSD	199506		/* System version (year & month). */
 #define BSD4_3	1
 #define BSD4_4	1
 
 /*
  * __FreeBSD_version numbers are documented in the Porter's Handbook.
  * If you bump the version for any reason, you should update the documentation
  * there.
  * Currently this lives here in the doc/ repository:
  *
  *	head/en_US.ISO8859-1/books/porters-handbook/versions/chapter.xml
  *
  * scheme is:  <major><two digit minor>Rxx
  *		'R' is in the range 0 to 4 if this is a release branch or
  *		X.0-CURRENT before releng/X.0 is created, otherwise 'R' is
  *		in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1300071	/* Master, propagated to newvers */
+#define __FreeBSD_version 1300072	/* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
  * which by definition is always true on FreeBSD. This macro is also defined
  * on other systems that use the kernel of FreeBSD, such as GNU/kFreeBSD.
  *
  * It is tempting to use this macro in userland code when we want to enable
  * kernel-specific routines, and in fact it's fine to do this in code that
  * is part of FreeBSD itself.  However, be aware that as presence of this
  * macro is still not widespread (e.g. older FreeBSD versions, 3rd party
  * compilers, etc), it is STRONGLY DISCOURAGED to check for this macro in
  * external applications without also checking for __FreeBSD__ as an
  * alternative.
  */
 #undef __FreeBSD_kernel__
 #define __FreeBSD_kernel__
 
 #if defined(_KERNEL) || defined(IN_RTLD)
 #define	P_OSREL_SIGWAIT			700000
 #define	P_OSREL_SIGSEGV			700004
 #define	P_OSREL_MAP_ANON		800104
 #define	P_OSREL_MAP_FSTRICT		1100036
 #define	P_OSREL_SHUTDOWN_ENOTCONN	1100077
 #define	P_OSREL_MAP_GUARD		1200035
 #define	P_OSREL_WRFSBASE		1200041
 #define	P_OSREL_CK_CYLGRP		1200046
 #define	P_OSREL_VMTOTAL64		1200054
 #define	P_OSREL_CK_SUPERBLOCK		1300000
 #define	P_OSREL_CK_INODE		1300005
 #define	P_OSREL_POWERPC_NEW_AUX_ARGS	1300070
 
 #define	P_OSREL_MAJOR(x)		((x) / 100000)
 #endif
 
 #ifndef LOCORE
 #include <sys/types.h>
 #endif
 
 /*
  * Machine-independent constants (some used in following include files).
  * Redefined constants are from POSIX 1003.1 limits file.
  *
  * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
  */
 #include <sys/syslimits.h>
 
 #define	MAXCOMLEN	19		/* max command name remembered */
 #define	MAXINTERP	PATH_MAX	/* max interpreter file name length */
 #define	MAXLOGNAME	33		/* max login name length (incl. NUL) */
 #define	MAXUPRC		CHILD_MAX	/* max simultaneous processes */
 #define	NCARGS		ARG_MAX		/* max bytes for an exec function */
 #define	NGROUPS		(NGROUPS_MAX+1)	/* max number groups */
 #define	NOFILE		OPEN_MAX	/* max open files per process */
 #define	NOGROUP		65535		/* marker for empty group set member */
 #define MAXHOSTNAMELEN	256		/* max hostname size */
 #define SPECNAMELEN	255		/* max length of devicename */
 
 /* More types and definitions used throughout the kernel. */
 #ifdef _KERNEL
 #include <sys/cdefs.h>
 #include <sys/errno.h>
 #ifndef LOCORE
 #include <sys/time.h>
 #include <sys/priority.h>
 #endif
 
 #ifndef FALSE
 #define	FALSE	0
 #endif
 #ifndef TRUE
 #define	TRUE	1
 #endif
 #endif
 
 #ifndef _KERNEL
 /* Signals. */
 #include <sys/signal.h>
 #endif
 
 /* Machine type dependent parameters. */
 #include <machine/param.h>
 #ifndef _KERNEL
 #include <sys/limits.h>
 #endif
 
 #ifndef DEV_BSHIFT
 #define	DEV_BSHIFT	9		/* log2(DEV_BSIZE) */
 #endif
 #define	DEV_BSIZE	(1<<DEV_BSHIFT)
 
 #ifndef BLKDEV_IOSIZE
 #define BLKDEV_IOSIZE  PAGE_SIZE	/* default block device I/O size */
 #endif
 #ifndef DFLTPHYS
 #define DFLTPHYS	(64 * 1024)	/* default max raw I/O transfer size */
 #endif
 #ifndef MAXPHYS
 #define MAXPHYS		(128 * 1024)	/* max raw I/O transfer size */
 #endif
 #ifndef MAXDUMPPGS
 #define MAXDUMPPGS	(DFLTPHYS/PAGE_SIZE)
 #endif
 
 /*
  * Constants related to network buffer management.
  * MCLBYTES must be no larger than PAGE_SIZE.
  */
 #ifndef	MSIZE
 #define	MSIZE		256		/* size of an mbuf */
 #endif
 
 #ifndef	MCLSHIFT
 #define MCLSHIFT	11		/* convert bytes to mbuf clusters */
 #endif	/* MCLSHIFT */
 
 #define MCLBYTES	(1 << MCLSHIFT)	/* size of an mbuf cluster */
 
 #if PAGE_SIZE < 2048
 #define	MJUMPAGESIZE	MCLBYTES
 #elif PAGE_SIZE <= 8192
 #define	MJUMPAGESIZE	PAGE_SIZE
 #else
 #define	MJUMPAGESIZE	(8 * 1024)
 #endif
 
 #define	MJUM9BYTES	(9 * 1024)	/* jumbo cluster 9k */
 #define	MJUM16BYTES	(16 * 1024)	/* jumbo cluster 16k */
 
 /*
  * Some macros for units conversion
  */
 
 /* clicks to bytes */
 #ifndef ctob
 #define ctob(x)	((x)<<PAGE_SHIFT)
 #endif
 
 /* bytes to clicks */
 #ifndef btoc
 #define btoc(x)	(((vm_offset_t)(x)+PAGE_MASK)>>PAGE_SHIFT)
 #endif
 
 /*
  * btodb() is messy and perhaps slow because `bytes' may be an off_t.  We
  * want to shift an unsigned type to avoid sign extension and we don't
  * want to widen `bytes' unnecessarily.  Assume that the result fits in
  * a daddr_t.
  */
 #ifndef btodb
 #define btodb(bytes)	 		/* calculates (bytes / DEV_BSIZE) */ \
 	(sizeof (bytes) > sizeof(long) \
 	 ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \
 	 : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT))
 #endif
 
 #ifndef dbtob
 #define dbtob(db)			/* calculates (db * DEV_BSIZE) */ \
 	((off_t)(db) << DEV_BSHIFT)
 #endif
 
 #define	PRIMASK	0x0ff
 #define	PCATCH	0x100		/* OR'd with pri for tsleep to check signals */
 #define	PDROP	0x200	/* OR'd with pri to stop re-entry of interlock mutex */
 
 #define	NZERO	0		/* default "nice" */
 
 #define	NBBY	8		/* number of bits in a byte */
 #define	NBPW	sizeof(int)	/* number of bytes per word (integer) */
 
 #define	CMASK	022		/* default file mask: S_IWGRP|S_IWOTH */
 
 #define	NODEV	(dev_t)(-1)	/* non-existent device */
 
 /*
  * File system parameters and macros.
  *
  * MAXBSIZE -	Filesystems are made out of blocks of at most MAXBSIZE bytes
  *		per block.  MAXBSIZE may be made larger without effecting
  *		any existing filesystems as long as it does not exceed MAXPHYS,
  *		and may be made smaller at the risk of not being able to use
  *		filesystems which require a block size exceeding MAXBSIZE.
  *
  * MAXBCACHEBUF - Maximum size of a buffer in the buffer cache.  This must
  *		be >= MAXBSIZE and can be set differently for different
  *		architectures by defining it in <machine/param.h>.
  *		Making this larger allows NFS to do larger reads/writes.
  *
  * BKVASIZE -	Nominal buffer space per buffer, in bytes.  BKVASIZE is the
  *		minimum KVM memory reservation the kernel is willing to make.
  *		Filesystems can of course request smaller chunks.  Actual
  *		backing memory uses a chunk size of a page (PAGE_SIZE).
  *		The default value here can be overridden on a per-architecture
  *		basis by defining it in <machine/param.h>.
  *
  *		If you make BKVASIZE too small you risk seriously fragmenting
  *		the buffer KVM map which may slow things down a bit.  If you
  *		make it too big the kernel will not be able to optimally use
  *		the KVM memory reserved for the buffer cache and will wind
  *		up with too-few buffers.
  *
  *		The default is 16384, roughly 2x the block size used by a
  *		normal UFS filesystem.
  */
 #define MAXBSIZE	65536	/* must be power of 2 */
 #ifndef	MAXBCACHEBUF
 #define	MAXBCACHEBUF	MAXBSIZE /* must be a power of 2 >= MAXBSIZE */
 #endif
 #ifndef	BKVASIZE
 #define BKVASIZE	16384	/* must be power of 2 */
 #endif
 #define BKVAMASK	(BKVASIZE-1)
 
 /*
  * MAXPATHLEN defines the longest permissible path length after expanding
  * symbolic links. It is used to allocate a temporary buffer from the buffer
  * pool in which to do the name expansion, hence should be a power of two,
  * and must be less than or equal to MAXBSIZE.  MAXSYMLINKS defines the
  * maximum number of symbolic links that may be expanded in a path name.
  * It should be set high enough to allow all legitimate uses, but halt
  * infinite loops reasonably quickly.
  */
 #define	MAXPATHLEN	PATH_MAX
 #define MAXSYMLINKS	32
 
 /* Bit map related macros. */
 #define	setbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY))
 #define	clrbit(a,i)	(((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY)))
 #define	isset(a,i)							\
 	(((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY)))
 #define	isclr(a,i)							\
 	((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
 
 /* Macros for counting and rounding. */
 #ifndef howmany
 #define	howmany(x, y)	(((x)+((y)-1))/(y))
 #endif
 #define	nitems(x)	(sizeof((x)) / sizeof((x)[0]))
 #define	rounddown(x, y)	(((x)/(y))*(y))
 #define	rounddown2(x, y) ((x)&(~((y)-1)))          /* if y is power of two */
 #define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))  /* to any y */
 #define	roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
 #define powerof2(x)	((((x)-1)&(x))==0)
 
 /* Macros for min/max. */
 #define	MIN(a,b) (((a)<(b))?(a):(b))
 #define	MAX(a,b) (((a)>(b))?(a):(b))
 
 #ifdef _KERNEL
 /*
  * Basic byte order function prototypes for non-inline functions.
  */
 #ifndef LOCORE
 #ifndef _BYTEORDER_PROTOTYPED
 #define	_BYTEORDER_PROTOTYPED
 __BEGIN_DECLS
 __uint32_t	 htonl(__uint32_t);
 __uint16_t	 htons(__uint16_t);
 __uint32_t	 ntohl(__uint32_t);
 __uint16_t	 ntohs(__uint16_t);
 __END_DECLS
 #endif
 #endif
 
 #ifndef _BYTEORDER_FUNC_DEFINED
 #define	_BYTEORDER_FUNC_DEFINED
 #define	htonl(x)	__htonl(x)
 #define	htons(x)	__htons(x)
 #define	ntohl(x)	__ntohl(x)
 #define	ntohs(x)	__ntohs(x)
 #endif /* !_BYTEORDER_FUNC_DEFINED */
 #endif /* _KERNEL */
 
 /*
  * Scale factor for scaled integers used to count %cpu time and load avgs.
  *
  * The number of CPU `tick's that map to a unique `%age' can be expressed
  * by the formula (1 / (2 ^ (FSHIFT - 11))).  The maximum load average that
  * can be calculated (assuming 32 bits) can be closely approximated using
  * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
  *
  * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
  * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
  */
 #define	FSHIFT	11		/* bits to right of fixed binary point */
 #define FSCALE	(1<<FSHIFT)
 
 #define dbtoc(db)			/* calculates devblks to pages */ \
 	((db + (ctodb(1) - 1)) >> (PAGE_SHIFT - DEV_BSHIFT))
 
 #define ctodb(db)			/* calculates pages to devblks */ \
 	((db) << (PAGE_SHIFT - DEV_BSHIFT))
 
 /*
  * Old spelling of __containerof().
  */
 #define	member2struct(s, m, x)						\
 	((struct s *)(void *)((char *)(x) - offsetof(struct s, m)))
 
 /*
  * Access a variable length array that has been declared as a fixed
  * length array.
  */
 #define __PAST_END(array, offset) (((__typeof__(*(array)) *)(array))[offset])
 
 #endif	/* _SYS_PARAM_H_ */