diff --git a/lib/libc/sys/aio_fsync.2 b/lib/libc/sys/aio_fsync.2
index 9d5d143416b3..0ce47edfb33f 100644
--- a/lib/libc/sys/aio_fsync.2
+++ b/lib/libc/sys/aio_fsync.2
@@ -1,178 +1,188 @@
 .\" Copyright (c) 2013 Sergey Kandaurov
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 19, 2016
+.Dd January 6, 2021
 .Dt AIO_FSYNC 2
 .Os
 .Sh NAME
 .Nm aio_fsync
 .Nd asynchronous file synchronization (REALTIME)
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In aio.h
 .Ft int
 .Fn aio_fsync "int op" "struct aiocb *iocb"
 .Sh DESCRIPTION
 The
 .Fn aio_fsync
 system call allows the calling process to move all modified data
 associated with the descriptor
 .Fa iocb->aio_fildes
 to a permanent storage device.
 The call returns immediately after the synchronization request has been
 enqueued to the descriptor; the synchronization may or may not have
 completed at the time the call returns.
 .Pp
 The
 .Fa op
-argument can only be set to
+argument can be set to
 .Dv O_SYNC
 to cause all currently queued I/O operations to be completed
 as if by a call to
-.Xr fsync 2 .
+.Xr fsync 2 ,
+or
+.Dv O_DSYNC
+for the behavior of
+.Xr fdatasync 2 .
 .Pp
 If _POSIX_PRIORITIZED_IO is defined, and the descriptor supports it,
 then the enqueued operation is submitted at a priority equal to that
 of the calling process minus
 .Fa iocb->aio_reqprio .
 .Pp
 The
 .Fa iocb
 pointer may be subsequently used as an argument to
 .Fn aio_return
 and
 .Fn aio_error
 in order to determine return or error status for the enqueued operation
 while it is in progress.
 .Pp
 If the request could not be enqueued (generally due to invalid arguments),
 the call returns without having enqueued the request.
 .Pp
 The
 .Fa iocb->aio_sigevent
 structure can be used to request notification of the operation's
 completion as described in
 .Xr aio 4 .
 .Sh RESTRICTIONS
 The Asynchronous I/O Control Block structure pointed to by
 .Fa iocb
 must remain valid until the
 operation has completed.
 .Pp
 The asynchronous I/O control buffer
 .Fa iocb
 should be zeroed before the
 .Fn aio_fsync
 call to avoid passing bogus context information to the kernel.
 .Pp
 Modification of the Asynchronous I/O Control Block structure is not allowed
 while the request is queued.
 .Sh RETURN VALUES
 .Rv -std aio_fsync
 .Sh ERRORS
 The
 .Fn aio_fsync
 system call will fail if:
 .Bl -tag -width Er
 .It Bq Er EAGAIN
 The request was not queued because of system resource limitations.
 .It Bq Er EINVAL
 The asynchronous notification method in
 .Fa iocb->aio_sigevent.sigev_notify
 is invalid or not supported.
 .It Bq Er EOPNOTSUPP
 Asynchronous file synchronization operations on the file descriptor
 .Fa iocb->aio_fildes
 are unsafe and unsafe asynchronous I/O operations are disabled.
 .It Bq Er EINVAL
 A value of the
 .Fa op
 argument is not set to
-.Dv O_SYNC .
+.Dv O_SYNC
+or
+.Dv O_DSYNC .
 .El
 .Pp
 The following conditions may be synchronously detected when the
 .Fn aio_fsync
 system call is made, or asynchronously, at any time thereafter.
 If they are detected at call time,
 .Fn aio_fsync
 returns -1 and sets
 .Va errno
 appropriately; otherwise the
 .Fn aio_return
 system call must be called, and will return -1, and
 .Fn aio_error
 must be called to determine the actual value that would have been
 returned in
 .Va errno .
 .Bl -tag -width Er
 .It Bq Er EBADF
 The
 .Fa iocb->aio_fildes
 argument
 is not a valid descriptor.
 .It Bq Er EINVAL
 This implementation does not support synchronized I/O for this file.
 .El
 .Pp
 If the request is successfully enqueued, but subsequently cancelled
 or an error occurs, the value returned by the
 .Fn aio_return
 system call is per the
 .Xr read 2
 and
 .Xr write 2
 system calls, and the value returned by the
 .Fn aio_error
 system call is one of the error returns from the
 .Xr read 2
 or
 .Xr write 2
 system calls.
 .Sh SEE ALSO
 .Xr aio_cancel 2 ,
 .Xr aio_error 2 ,
 .Xr aio_read 2 ,
 .Xr aio_return 2 ,
 .Xr aio_suspend 2 ,
 .Xr aio_waitcomplete 2 ,
 .Xr aio_write 2 ,
 .Xr fsync 2 ,
 .Xr sigevent 3 ,
 .Xr siginfo 3 ,
 .Xr aio 4
 .Sh STANDARDS
 The
 .Fn aio_fsync
 system call is expected to conform to the
 .St -p1003.1
 standard.
 .Sh HISTORY
 The
 .Fn aio_fsync
 system call first appeared in
 .Fx 7.0 .
+The
+.Dv O_DSYNC
+option appeared in
+.Fx 13.0 .
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index d83c9d725e68..8666d6ea4217 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1,3141 +1,3160 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. John S. Dyson's name may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
  * bad that happens because of using this software isn't the responsibility
  * of the author.  This software is distributed AS-IS.
  */
 
 /*
  * This file contains support for the POSIX 1003.1B AIO/LIO facility.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/eventhandler.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/module.h>
 #include <sys/kthread.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/unistd.h>
 #include <sys/posix4.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/sema.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/sx.h>
 #include <sys/taskqueue.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 #include <sys/event.h>
 #include <sys/mount.h>
 #include <geom/geom.h>
 
 #include <machine/atomic.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/uma.h>
 #include <sys/aio.h>
 
 /*
  * Counter for allocating reference ids to new jobs.  Wrapped to 1 on
  * overflow. (XXX will be removed soon.)
  */
 static u_long jobrefid;
 
 /*
  * Counter for aio_fsync.
  */
 static uint64_t jobseqno;
 
 #ifndef MAX_AIO_PER_PROC
 #define MAX_AIO_PER_PROC	32
 #endif
 
 #ifndef MAX_AIO_QUEUE_PER_PROC
 #define MAX_AIO_QUEUE_PER_PROC	256
 #endif
 
 #ifndef MAX_AIO_QUEUE
 #define MAX_AIO_QUEUE		1024 /* Bigger than MAX_AIO_QUEUE_PER_PROC */
 #endif
 
 #ifndef MAX_BUF_AIO
 #define MAX_BUF_AIO		16
 #endif
 
 FEATURE(aio, "Asynchronous I/O");
 SYSCTL_DECL(_p1003_1b);
 
 static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list");
 static MALLOC_DEFINE(M_AIOS, "aios", "aio_suspend aio control block list");
 
 static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Async IO management");
 
 static int enable_aio_unsafe = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, enable_unsafe, CTLFLAG_RW, &enable_aio_unsafe, 0,
     "Permit asynchronous IO on all file types, not just known-safe types");
 
 static unsigned int unsafe_warningcnt = 1;
 SYSCTL_UINT(_vfs_aio, OID_AUTO, unsafe_warningcnt, CTLFLAG_RW,
     &unsafe_warningcnt, 0,
     "Warnings that will be triggered upon failed IO requests on unsafe files");
 
 static int max_aio_procs = MAX_AIO_PROCS;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, CTLFLAG_RW, &max_aio_procs, 0,
     "Maximum number of kernel processes to use for handling async IO ");
 
 static int num_aio_procs = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, CTLFLAG_RD, &num_aio_procs, 0,
     "Number of presently active kernel processes for async IO");
 
 /*
  * The code will adjust the actual number of AIO processes towards this
  * number when it gets a chance.
  */
 static int target_aio_procs = TARGET_AIO_PROCS;
 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs,
     0,
     "Preferred number of ready kernel processes for async IO");
 
 static int max_queue_count = MAX_AIO_QUEUE;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0,
     "Maximum number of aio requests to queue, globally");
 
 static int num_queue_count = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0,
     "Number of queued aio requests");
 
 static int num_buf_aio = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0,
     "Number of aio requests presently handled by the buf subsystem");
 
 static int num_unmapped_aio = 0;
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_unmapped_aio, CTLFLAG_RD, &num_unmapped_aio,
     0,
     "Number of aio requests presently handled by unmapped I/O buffers");
 
 /* Number of async I/O processes in the process of being started */
 /* XXX This should be local to aio_aqueue() */
 static int num_aio_resv_start = 0;
 
 static int aiod_lifetime;
 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0,
     "Maximum lifetime for idle aiod");
 
 static int max_aio_per_proc = MAX_AIO_PER_PROC;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc,
     0,
     "Maximum active aio requests per process");
 
 static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW,
     &max_aio_queue_per_proc, 0,
     "Maximum queued aio requests per process");
 
 static int max_buf_aio = MAX_BUF_AIO;
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0,
     "Maximum buf aio requests per process");
 
 /* 
  * Though redundant with vfs.aio.max_aio_queue_per_proc, POSIX requires
  * sysconf(3) to support AIO_LISTIO_MAX, and we implement that with
  * vfs.aio.aio_listio_max.
  */
 SYSCTL_INT(_p1003_1b, CTL_P1003_1B_AIO_LISTIO_MAX, aio_listio_max,
     CTLFLAG_RD | CTLFLAG_CAPRD, &max_aio_queue_per_proc,
     0, "Maximum aio requests for a single lio_listio call");
 
 #ifdef COMPAT_FREEBSD6
 typedef struct oaiocb {
 	int	aio_fildes;		/* File descriptor */
 	off_t	aio_offset;		/* File offset for I/O */
 	volatile void *aio_buf;         /* I/O buffer in process space */
 	size_t	aio_nbytes;		/* Number of bytes for I/O */
 	struct	osigevent aio_sigevent;	/* Signal to deliver */
 	int	aio_lio_opcode;		/* LIO opcode */
 	int	aio_reqprio;		/* Request priority -- ignored */
 	struct	__aiocb_private	_aiocb_private;
 } oaiocb_t;
 #endif
 
 /*
  * Below is a key of locks used to protect each member of struct kaiocb
  * aioliojob and kaioinfo and any backends.
  *
  * * - need not protected
  * a - locked by kaioinfo lock
  * b - locked by backend lock, the backend lock can be null in some cases,
  *     for example, BIO belongs to this type, in this case, proc lock is
  *     reused.
  * c - locked by aio_job_mtx, the lock for the generic file I/O backend.
  */
 
 /*
  * If the routine that services an AIO request blocks while running in an
  * AIO kernel process it can starve other I/O requests.  BIO requests
  * queued via aio_qbio() complete asynchronously and do not use AIO kernel
  * processes at all.  Socket I/O requests use a separate pool of
  * kprocs and also force non-blocking I/O.  Other file I/O requests
  * use the generic fo_read/fo_write operations which can block.  The
  * fsync and mlock operations can also block while executing.  Ideally
  * none of these requests would block while executing.
  *
  * Note that the service routines cannot toggle O_NONBLOCK in the file
  * structure directly while handling a request due to races with
  * userland threads.
  */
 
 /* jobflags */
 #define	KAIOCB_QUEUEING		0x01
 #define	KAIOCB_CANCELLED	0x02
 #define	KAIOCB_CANCELLING	0x04
 #define	KAIOCB_CHECKSYNC	0x08
 #define	KAIOCB_CLEARED		0x10
 #define	KAIOCB_FINISHED		0x20
 
 /*
  * AIO process info
  */
 #define AIOP_FREE	0x1			/* proc on free queue */
 
 struct aioproc {
 	int	aioprocflags;			/* (c) AIO proc flags */
 	TAILQ_ENTRY(aioproc) list;		/* (c) list of processes */
 	struct	proc *aioproc;			/* (*) the AIO proc */
 };
 
 /*
  * data-structure for lio signal management
  */
 struct aioliojob {
 	int	lioj_flags;			/* (a) listio flags */
 	int	lioj_count;			/* (a) count of jobs */
 	int	lioj_finished_count;		/* (a) count of finished jobs */
 	struct	sigevent lioj_signal;		/* (a) signal on all I/O done */
 	TAILQ_ENTRY(aioliojob) lioj_list;	/* (a) lio list */
 	struct	knlist klist;			/* (a) list of knotes */
 	ksiginfo_t lioj_ksi;			/* (a) Realtime signal info */
 };
 
 #define	LIOJ_SIGNAL		0x1	/* signal on all done (lio) */
 #define	LIOJ_SIGNAL_POSTED	0x2	/* signal has been posted */
 #define LIOJ_KEVENT_POSTED	0x4	/* kevent triggered */
 
 /*
  * per process aio data structure
  */
 struct kaioinfo {
 	struct	mtx kaio_mtx;		/* the lock to protect this struct */
 	int	kaio_flags;		/* (a) per process kaio flags */
 	int	kaio_active_count;	/* (c) number of currently used AIOs */
 	int	kaio_count;		/* (a) size of AIO queue */
 	int	kaio_buffer_count;	/* (a) number of bio buffers */
 	TAILQ_HEAD(,kaiocb) kaio_all;	/* (a) all AIOs in a process */
 	TAILQ_HEAD(,kaiocb) kaio_done;	/* (a) done queue for process */
 	TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */
 	TAILQ_HEAD(,kaiocb) kaio_jobqueue;	/* (a) job queue for process */
 	TAILQ_HEAD(,kaiocb) kaio_syncqueue;	/* (a) queue for aio_fsync */
 	TAILQ_HEAD(,kaiocb) kaio_syncready;  /* (a) second q for aio_fsync */
 	struct	task kaio_task;		/* (*) task to kick aio processes */
 	struct	task kaio_sync_task;	/* (*) task to schedule fsync jobs */
 };
 
 #define AIO_LOCK(ki)		mtx_lock(&(ki)->kaio_mtx)
 #define AIO_UNLOCK(ki)		mtx_unlock(&(ki)->kaio_mtx)
 #define AIO_LOCK_ASSERT(ki, f)	mtx_assert(&(ki)->kaio_mtx, (f))
 #define AIO_MTX(ki)		(&(ki)->kaio_mtx)
 
 #define KAIO_RUNDOWN	0x1	/* process is being run down */
 #define KAIO_WAKEUP	0x2	/* wakeup process when AIO completes */
 
 /*
  * Operations used to interact with userland aio control blocks.
  * Different ABIs provide their own operations.
  */
 struct aiocb_ops {
 	int	(*aio_copyin)(struct aiocb *ujob, struct kaiocb *kjob, int ty);
 	long	(*fetch_status)(struct aiocb *ujob);
 	long	(*fetch_error)(struct aiocb *ujob);
 	int	(*store_status)(struct aiocb *ujob, long status);
 	int	(*store_error)(struct aiocb *ujob, long error);
 	int	(*store_kernelinfo)(struct aiocb *ujob, long jobref);
 	int	(*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob);
 };
 
 static TAILQ_HEAD(,aioproc) aio_freeproc;		/* (c) Idle daemons */
 static struct sema aio_newproc_sem;
 static struct mtx aio_job_mtx;
 static TAILQ_HEAD(,kaiocb) aio_jobs;			/* (c) Async job list */
 static struct unrhdr *aiod_unr;
 
 static void	aio_biocleanup(struct bio *bp);
 void		aio_init_aioinfo(struct proc *p);
 static int	aio_onceonly(void);
 static int	aio_free_entry(struct kaiocb *job);
 static void	aio_process_rw(struct kaiocb *job);
 static void	aio_process_sync(struct kaiocb *job);
 static void	aio_process_mlock(struct kaiocb *job);
 static void	aio_schedule_fsync(void *context, int pending);
 static int	aio_newproc(int *);
 int		aio_aqueue(struct thread *td, struct aiocb *ujob,
 		    struct aioliojob *lio, int type, struct aiocb_ops *ops);
 static int	aio_queue_file(struct file *fp, struct kaiocb *job);
 static void	aio_biowakeup(struct bio *bp);
 static void	aio_proc_rundown(void *arg, struct proc *p);
 static void	aio_proc_rundown_exec(void *arg, struct proc *p,
 		    struct image_params *imgp);
 static int	aio_qbio(struct proc *p, struct kaiocb *job);
 static void	aio_daemon(void *param);
 static void	aio_bio_done_notify(struct proc *userp, struct kaiocb *job);
 static bool	aio_clear_cancel_function_locked(struct kaiocb *job);
 static int	aio_kick(struct proc *userp);
 static void	aio_kick_nowait(struct proc *userp);
 static void	aio_kick_helper(void *context, int pending);
 static int	filt_aioattach(struct knote *kn);
 static void	filt_aiodetach(struct knote *kn);
 static int	filt_aio(struct knote *kn, long hint);
 static int	filt_lioattach(struct knote *kn);
 static void	filt_liodetach(struct knote *kn);
 static int	filt_lio(struct knote *kn, long hint);
 
 /*
  * Zones for:
  * 	kaio	Per process async io info
  *	aiop	async io process data
  *	aiocb	async io jobs
  *	aiolio	list io jobs
  */
 static uma_zone_t kaio_zone, aiop_zone, aiocb_zone, aiolio_zone;
 
 /* kqueue filters for aio */
 static struct filterops aio_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_aioattach,
 	.f_detach = filt_aiodetach,
 	.f_event = filt_aio,
 };
 static struct filterops lio_filtops = {
 	.f_isfd = 0,
 	.f_attach = filt_lioattach,
 	.f_detach = filt_liodetach,
 	.f_event = filt_lio
 };
 
 static eventhandler_tag exit_tag, exec_tag;
 
 TASKQUEUE_DEFINE_THREAD(aiod_kick);
 
 /*
  * Main operations function for use as a kernel module.
  */
 static int
 aio_modload(struct module *module, int cmd, void *arg)
 {
 	int error = 0;
 
 	switch (cmd) {
 	case MOD_LOAD:
 		aio_onceonly();
 		break;
 	case MOD_SHUTDOWN:
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t aio_mod = {
 	"aio",
 	&aio_modload,
 	NULL
 };
 
 DECLARE_MODULE(aio, aio_mod, SI_SUB_VFS, SI_ORDER_ANY);
 MODULE_VERSION(aio, 1);
 
 /*
  * Startup initialization
  */
 static int
 aio_onceonly(void)
 {
 
 	exit_tag = EVENTHANDLER_REGISTER(process_exit, aio_proc_rundown, NULL,
 	    EVENTHANDLER_PRI_ANY);
 	exec_tag = EVENTHANDLER_REGISTER(process_exec, aio_proc_rundown_exec,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	kqueue_add_filteropts(EVFILT_AIO, &aio_filtops);
 	kqueue_add_filteropts(EVFILT_LIO, &lio_filtops);
 	TAILQ_INIT(&aio_freeproc);
 	sema_init(&aio_newproc_sem, 0, "aio_new_proc");
 	mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF);
 	TAILQ_INIT(&aio_jobs);
 	aiod_unr = new_unrhdr(1, INT_MAX, NULL);
 	kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	aiop_zone = uma_zcreate("AIOP", sizeof(struct aioproc), NULL,
 	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	aiocb_zone = uma_zcreate("AIOCB", sizeof(struct kaiocb), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL,
 	    NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	aiod_lifetime = AIOD_LIFETIME_DEFAULT;
 	jobrefid = 1;
 	p31b_setcfg(CTL_P1003_1B_ASYNCHRONOUS_IO, _POSIX_ASYNCHRONOUS_IO);
 	p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE);
 	p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0);
 
 	return (0);
 }
 
 /*
  * Init the per-process aioinfo structure.  The aioinfo limits are set
  * per-process for user limit (resource) management.
  */
 void
 aio_init_aioinfo(struct proc *p)
 {
 	struct kaioinfo *ki;
 
 	ki = uma_zalloc(kaio_zone, M_WAITOK);
 	mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF | MTX_NEW);
 	ki->kaio_flags = 0;
 	ki->kaio_active_count = 0;
 	ki->kaio_count = 0;
 	ki->kaio_buffer_count = 0;
 	TAILQ_INIT(&ki->kaio_all);
 	TAILQ_INIT(&ki->kaio_done);
 	TAILQ_INIT(&ki->kaio_jobqueue);
 	TAILQ_INIT(&ki->kaio_liojoblist);
 	TAILQ_INIT(&ki->kaio_syncqueue);
 	TAILQ_INIT(&ki->kaio_syncready);
 	TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p);
 	TASK_INIT(&ki->kaio_sync_task, 0, aio_schedule_fsync, ki);
 	PROC_LOCK(p);
 	if (p->p_aioinfo == NULL) {
 		p->p_aioinfo = ki;
 		PROC_UNLOCK(p);
 	} else {
 		PROC_UNLOCK(p);
 		mtx_destroy(&ki->kaio_mtx);
 		uma_zfree(kaio_zone, ki);
 	}
 
 	while (num_aio_procs < MIN(target_aio_procs, max_aio_procs))
 		aio_newproc(NULL);
 }
 
 static int
 aio_sendsig(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi, bool ext)
 {
 	struct thread *td;
 	int error;
 
 	error = sigev_findtd(p, sigev, &td);
 	if (error)
 		return (error);
 	if (!KSI_ONQ(ksi)) {
 		ksiginfo_set_sigev(ksi, sigev);
 		ksi->ksi_code = SI_ASYNCIO;
 		ksi->ksi_flags |= ext ? (KSI_EXT | KSI_INS) : 0;
 		tdsendsignal(p, td, ksi->ksi_signo, ksi);
 	}
 	PROC_UNLOCK(p);
 	return (error);
 }
 
 /*
  * Free a job entry.  Wait for completion if it is currently active, but don't
  * delay forever.  If we delay, we return a flag that says that we have to
  * restart the queue scan.
  */
 static int
 aio_free_entry(struct kaiocb *job)
 {
 	struct kaioinfo *ki;
 	struct aioliojob *lj;
 	struct proc *p;
 
 	p = job->userproc;
 	MPASS(curproc == p);
 	ki = p->p_aioinfo;
 	MPASS(ki != NULL);
 
 	AIO_LOCK_ASSERT(ki, MA_OWNED);
 	MPASS(job->jobflags & KAIOCB_FINISHED);
 
 	atomic_subtract_int(&num_queue_count, 1);
 
 	ki->kaio_count--;
 	MPASS(ki->kaio_count >= 0);
 
 	TAILQ_REMOVE(&ki->kaio_done, job, plist);
 	TAILQ_REMOVE(&ki->kaio_all, job, allist);
 
 	lj = job->lio;
 	if (lj) {
 		lj->lioj_count--;
 		lj->lioj_finished_count--;
 
 		if (lj->lioj_count == 0) {
 			TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 			/* lio is going away, we need to destroy any knotes */
 			knlist_delete(&lj->klist, curthread, 1);
 			PROC_LOCK(p);
 			sigqueue_take(&lj->lioj_ksi);
 			PROC_UNLOCK(p);
 			uma_zfree(aiolio_zone, lj);
 		}
 	}
 
 	/* job is going away, we need to destroy any knotes */
 	knlist_delete(&job->klist, curthread, 1);
 	PROC_LOCK(p);
 	sigqueue_take(&job->ksi);
 	PROC_UNLOCK(p);
 
 	AIO_UNLOCK(ki);
 
 	/*
 	 * The thread argument here is used to find the owning process
 	 * and is also passed to fo_close() which may pass it to various
 	 * places such as devsw close() routines.  Because of that, we
 	 * need a thread pointer from the process owning the job that is
 	 * persistent and won't disappear out from under us or move to
 	 * another process.
 	 *
 	 * Currently, all the callers of this function call it to remove
 	 * a kaiocb from the current process' job list either via a
 	 * syscall or due to the current process calling exit() or
 	 * execve().  Thus, we know that p == curproc.  We also know that
 	 * curthread can't exit since we are curthread.
 	 *
 	 * Therefore, we use curthread as the thread to pass to
 	 * knlist_delete().  This does mean that it is possible for the
 	 * thread pointer at close time to differ from the thread pointer
 	 * at open time, but this is already true of file descriptors in
 	 * a multithreaded process.
 	 */
 	if (job->fd_file)
 		fdrop(job->fd_file, curthread);
 	crfree(job->cred);
 	if (job->uiop != &job->uio)
 		free(job->uiop, M_IOV);
 	uma_zfree(aiocb_zone, job);
 	AIO_LOCK(ki);
 
 	return (0);
 }
 
 static void
 aio_proc_rundown_exec(void *arg, struct proc *p,
     struct image_params *imgp __unused)
 {
    	aio_proc_rundown(arg, p);
 }
 
 static int
 aio_cancel_job(struct proc *p, struct kaioinfo *ki, struct kaiocb *job)
 {
 	aio_cancel_fn_t *func;
 	int cancelled;
 
 	AIO_LOCK_ASSERT(ki, MA_OWNED);
 	if (job->jobflags & (KAIOCB_CANCELLED | KAIOCB_FINISHED))
 		return (0);
 	MPASS((job->jobflags & KAIOCB_CANCELLING) == 0);
 	job->jobflags |= KAIOCB_CANCELLED;
 
 	func = job->cancel_fn;
 
 	/*
 	 * If there is no cancel routine, just leave the job marked as
 	 * cancelled.  The job should be in active use by a caller who
 	 * should complete it normally or when it fails to install a
 	 * cancel routine.
 	 */
 	if (func == NULL)
 		return (0);
 
 	/*
 	 * Set the CANCELLING flag so that aio_complete() will defer
 	 * completions of this job.  This prevents the job from being
 	 * freed out from under the cancel callback.  After the
 	 * callback any deferred completion (whether from the callback
 	 * or any other source) will be completed.
 	 */
 	job->jobflags |= KAIOCB_CANCELLING;
 	AIO_UNLOCK(ki);
 	func(job);
 	AIO_LOCK(ki);
 	job->jobflags &= ~KAIOCB_CANCELLING;
 	if (job->jobflags & KAIOCB_FINISHED) {
 		cancelled = job->uaiocb._aiocb_private.error == ECANCELED;
 		TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist);
 		aio_bio_done_notify(p, job);
 	} else {
 		/*
 		 * The cancel callback might have scheduled an
 		 * operation to cancel this request, but it is
 		 * only counted as cancelled if the request is
 		 * cancelled when the callback returns.
 		 */
 		cancelled = 0;
 	}
 	return (cancelled);
 }
 
 /*
  * Rundown the jobs for a given process.
  */
 static void
 aio_proc_rundown(void *arg, struct proc *p)
 {
 	struct kaioinfo *ki;
 	struct aioliojob *lj;
 	struct kaiocb *job, *jobn;
 
 	KASSERT(curthread->td_proc == p,
 	    ("%s: called on non-curproc", __func__));
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return;
 
 	AIO_LOCK(ki);
 	ki->kaio_flags |= KAIO_RUNDOWN;
 
 restart:
 
 	/*
 	 * Try to cancel all pending requests. This code simulates
 	 * aio_cancel on all pending I/O requests.
 	 */
 	TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) {
 		aio_cancel_job(p, ki, job);
 	}
 
 	/* Wait for all running I/O to be finished */
 	if (TAILQ_FIRST(&ki->kaio_jobqueue) || ki->kaio_active_count != 0) {
 		ki->kaio_flags |= KAIO_WAKEUP;
 		msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz);
 		goto restart;
 	}
 
 	/* Free all completed I/O requests. */
 	while ((job = TAILQ_FIRST(&ki->kaio_done)) != NULL)
 		aio_free_entry(job);
 
 	while ((lj = TAILQ_FIRST(&ki->kaio_liojoblist)) != NULL) {
 		if (lj->lioj_count == 0) {
 			TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 			knlist_delete(&lj->klist, curthread, 1);
 			PROC_LOCK(p);
 			sigqueue_take(&lj->lioj_ksi);
 			PROC_UNLOCK(p);
 			uma_zfree(aiolio_zone, lj);
 		} else {
 			panic("LIO job not cleaned up: C:%d, FC:%d\n",
 			    lj->lioj_count, lj->lioj_finished_count);
 		}
 	}
 	AIO_UNLOCK(ki);
 	taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_task);
 	taskqueue_drain(taskqueue_aiod_kick, &ki->kaio_sync_task);
 	mtx_destroy(&ki->kaio_mtx);
 	uma_zfree(kaio_zone, ki);
 	p->p_aioinfo = NULL;
 }
 
 /*
  * Select a job to run (called by an AIO daemon).
  */
 static struct kaiocb *
 aio_selectjob(struct aioproc *aiop)
 {
 	struct kaiocb *job;
 	struct kaioinfo *ki;
 	struct proc *userp;
 
 	mtx_assert(&aio_job_mtx, MA_OWNED);
 restart:
 	TAILQ_FOREACH(job, &aio_jobs, list) {
 		userp = job->userproc;
 		ki = userp->p_aioinfo;
 
 		if (ki->kaio_active_count < max_aio_per_proc) {
 			TAILQ_REMOVE(&aio_jobs, job, list);
 			if (!aio_clear_cancel_function(job))
 				goto restart;
 
 			/* Account for currently active jobs. */
 			ki->kaio_active_count++;
 			break;
 		}
 	}
 	return (job);
 }
 
 /*
  * Move all data to a permanent storage device.  This code
- * simulates the fsync syscall.
+ * simulates the fsync and fdatasync syscalls.
  */
 static int
-aio_fsync_vnode(struct thread *td, struct vnode *vp)
+aio_fsync_vnode(struct thread *td, struct vnode *vp, int op)
 {
 	struct mount *mp;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto drop;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	}
-	error = VOP_FSYNC(vp, MNT_WAIT, td);
+	if (op == LIO_DSYNC)
+		error = VOP_FDATASYNC(vp, td);
+	else
+		error = VOP_FSYNC(vp, MNT_WAIT, td);
 
 	VOP_UNLOCK(vp);
 	vn_finished_write(mp);
 drop:
 	return (error);
 }
 
 /*
  * The AIO processing activity for LIO_READ/LIO_WRITE.  This is the code that
  * does the I/O request for the non-bio version of the operations.  The normal
  * vn operations are used, and this code should work in all instances for every
  * type of file, including pipes, sockets, fifos, and regular files.
  *
  * XXX I don't think it works well for socket, pipe, and fifo.
  */
 static void
 aio_process_rw(struct kaiocb *job)
 {
 	struct ucred *td_savedcred;
 	struct thread *td;
 	struct aiocb *cb;
 	struct file *fp;
 	ssize_t cnt;
 	long msgsnd_st, msgsnd_end;
 	long msgrcv_st, msgrcv_end;
 	long oublock_st, oublock_end;
 	long inblock_st, inblock_end;
 	int error, opcode;
 
 	KASSERT(job->uaiocb.aio_lio_opcode == LIO_READ ||
 	    job->uaiocb.aio_lio_opcode == LIO_READV ||
 	    job->uaiocb.aio_lio_opcode == LIO_WRITE ||
 	    job->uaiocb.aio_lio_opcode == LIO_WRITEV,
 	    ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
 
 	aio_switch_vmspace(job);
 	td = curthread;
 	td_savedcred = td->td_ucred;
 	td->td_ucred = job->cred;
 	job->uiop->uio_td = td;
 	cb = &job->uaiocb;
 	fp = job->fd_file;
 
 	opcode = job->uaiocb.aio_lio_opcode;
 	cnt = job->uiop->uio_resid;
 
 	msgrcv_st = td->td_ru.ru_msgrcv;
 	msgsnd_st = td->td_ru.ru_msgsnd;
 	inblock_st = td->td_ru.ru_inblock;
 	oublock_st = td->td_ru.ru_oublock;
 
 	/*
 	 * aio_aqueue() acquires a reference to the file that is
 	 * released in aio_free_entry().
 	 */
 	if (opcode == LIO_READ || opcode == LIO_READV) {
 		if (job->uiop->uio_resid == 0)
 			error = 0;
 		else
 			error = fo_read(fp, job->uiop, fp->f_cred, FOF_OFFSET,
 			    td);
 	} else {
 		if (fp->f_type == DTYPE_VNODE)
 			bwillwrite();
 		error = fo_write(fp, job->uiop, fp->f_cred, FOF_OFFSET, td);
 	}
 	msgrcv_end = td->td_ru.ru_msgrcv;
 	msgsnd_end = td->td_ru.ru_msgsnd;
 	inblock_end = td->td_ru.ru_inblock;
 	oublock_end = td->td_ru.ru_oublock;
 
 	job->msgrcv = msgrcv_end - msgrcv_st;
 	job->msgsnd = msgsnd_end - msgsnd_st;
 	job->inblock = inblock_end - inblock_st;
 	job->outblock = oublock_end - oublock_st;
 
 	if (error != 0 && job->uiop->uio_resid != cnt) {
 		if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
 			error = 0;
 		if (error == EPIPE &&
 		    (opcode == LIO_WRITE || opcode == LIO_WRITEV)) {
 			PROC_LOCK(job->userproc);
 			kern_psignal(job->userproc, SIGPIPE);
 			PROC_UNLOCK(job->userproc);
 		}
 	}
 
 	cnt -= job->uiop->uio_resid;
 	td->td_ucred = td_savedcred;
 	if (error)
 		aio_complete(job, -1, error);
 	else
 		aio_complete(job, cnt, 0);
 }
 
 static void
 aio_process_sync(struct kaiocb *job)
 {
 	struct thread *td = curthread;
 	struct ucred *td_savedcred = td->td_ucred;
 	struct file *fp = job->fd_file;
 	int error = 0;
 
-	KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC,
+	KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC ||
+	    job->uaiocb.aio_lio_opcode == LIO_DSYNC,
 	    ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
 
 	td->td_ucred = job->cred;
-	if (fp->f_vnode != NULL)
-		error = aio_fsync_vnode(td, fp->f_vnode);
+	if (fp->f_vnode != NULL) {
+		error = aio_fsync_vnode(td, fp->f_vnode,
+		    job->uaiocb.aio_lio_opcode);
+	}
 	td->td_ucred = td_savedcred;
 	if (error)
 		aio_complete(job, -1, error);
 	else
 		aio_complete(job, 0, 0);
 }
 
 static void
 aio_process_mlock(struct kaiocb *job)
 {
 	struct aiocb *cb = &job->uaiocb;
 	int error;
 
 	KASSERT(job->uaiocb.aio_lio_opcode == LIO_MLOCK,
 	    ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode));
 
 	aio_switch_vmspace(job);
 	error = kern_mlock(job->userproc, job->cred,
 	    __DEVOLATILE(uintptr_t, cb->aio_buf), cb->aio_nbytes);
 	aio_complete(job, error != 0 ? -1 : 0, error);
 }
 
 static void
 aio_bio_done_notify(struct proc *userp, struct kaiocb *job)
 {
 	struct aioliojob *lj;
 	struct kaioinfo *ki;
 	struct kaiocb *sjob, *sjobn;
 	int lj_done;
 	bool schedule_fsync;
 
 	ki = userp->p_aioinfo;
 	AIO_LOCK_ASSERT(ki, MA_OWNED);
 	lj = job->lio;
 	lj_done = 0;
 	if (lj) {
 		lj->lioj_finished_count++;
 		if (lj->lioj_count == lj->lioj_finished_count)
 			lj_done = 1;
 	}
 	TAILQ_INSERT_TAIL(&ki->kaio_done, job, plist);
 	MPASS(job->jobflags & KAIOCB_FINISHED);
 
 	if (ki->kaio_flags & KAIO_RUNDOWN)
 		goto notification_done;
 
 	if (job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ||
 	    job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID)
 		aio_sendsig(userp, &job->uaiocb.aio_sigevent, &job->ksi, true);
 
 	KNOTE_LOCKED(&job->klist, 1);
 
 	if (lj_done) {
 		if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) {
 			lj->lioj_flags |= LIOJ_KEVENT_POSTED;
 			KNOTE_LOCKED(&lj->klist, 1);
 		}
 		if ((lj->lioj_flags & (LIOJ_SIGNAL | LIOJ_SIGNAL_POSTED))
 		    == LIOJ_SIGNAL &&
 		    (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL ||
 		    lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) {
 			aio_sendsig(userp, &lj->lioj_signal, &lj->lioj_ksi,
 			    true);
 			lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 		}
 	}
 
 notification_done:
 	if (job->jobflags & KAIOCB_CHECKSYNC) {
 		schedule_fsync = false;
 		TAILQ_FOREACH_SAFE(sjob, &ki->kaio_syncqueue, list, sjobn) {
 			if (job->fd_file != sjob->fd_file ||
 			    job->seqno >= sjob->seqno)
 				continue;
 			if (--sjob->pending > 0)
 				continue;
 			TAILQ_REMOVE(&ki->kaio_syncqueue, sjob, list);
 			if (!aio_clear_cancel_function_locked(sjob))
 				continue;
 			TAILQ_INSERT_TAIL(&ki->kaio_syncready, sjob, list);
 			schedule_fsync = true;
 		}
 		if (schedule_fsync)
 			taskqueue_enqueue(taskqueue_aiod_kick,
 			    &ki->kaio_sync_task);
 	}
 	if (ki->kaio_flags & KAIO_WAKEUP) {
 		ki->kaio_flags &= ~KAIO_WAKEUP;
 		wakeup(&userp->p_aioinfo);
 	}
 }
 
 static void
 aio_schedule_fsync(void *context, int pending)
 {
 	struct kaioinfo *ki;
 	struct kaiocb *job;
 
 	ki = context;
 	AIO_LOCK(ki);
 	while (!TAILQ_EMPTY(&ki->kaio_syncready)) {
 		job = TAILQ_FIRST(&ki->kaio_syncready);
 		TAILQ_REMOVE(&ki->kaio_syncready, job, list);
 		AIO_UNLOCK(ki);
 		aio_schedule(job, aio_process_sync);
 		AIO_LOCK(ki);
 	}
 	AIO_UNLOCK(ki);
 }
 
 bool
 aio_cancel_cleared(struct kaiocb *job)
 {
 
 	/*
 	 * The caller should hold the same queue lock held when
 	 * aio_clear_cancel_function() was called and set this flag
 	 * ensuring this check sees an up-to-date value.  However,
 	 * there is no way to assert that.
 	 */
 	return ((job->jobflags & KAIOCB_CLEARED) != 0);
 }
 
 static bool
 aio_clear_cancel_function_locked(struct kaiocb *job)
 {
 
 	AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED);
 	MPASS(job->cancel_fn != NULL);
 	if (job->jobflags & KAIOCB_CANCELLING) {
 		job->jobflags |= KAIOCB_CLEARED;
 		return (false);
 	}
 	job->cancel_fn = NULL;
 	return (true);
 }
 
 bool
 aio_clear_cancel_function(struct kaiocb *job)
 {
 	struct kaioinfo *ki;
 	bool ret;
 
 	ki = job->userproc->p_aioinfo;
 	AIO_LOCK(ki);
 	ret = aio_clear_cancel_function_locked(job);
 	AIO_UNLOCK(ki);
 	return (ret);
 }
 
 static bool
 aio_set_cancel_function_locked(struct kaiocb *job, aio_cancel_fn_t *func)
 {
 
 	AIO_LOCK_ASSERT(job->userproc->p_aioinfo, MA_OWNED);
 	if (job->jobflags & KAIOCB_CANCELLED)
 		return (false);
 	job->cancel_fn = func;
 	return (true);
 }
 
 bool
 aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func)
 {
 	struct kaioinfo *ki;
 	bool ret;
 
 	ki = job->userproc->p_aioinfo;
 	AIO_LOCK(ki);
 	ret = aio_set_cancel_function_locked(job, func);
 	AIO_UNLOCK(ki);
 	return (ret);
 }
 
 void
 aio_complete(struct kaiocb *job, long status, int error)
 {
 	struct kaioinfo *ki;
 	struct proc *userp;
 
 	job->uaiocb._aiocb_private.error = error;
 	job->uaiocb._aiocb_private.status = status;
 
 	userp = job->userproc;
 	ki = userp->p_aioinfo;
 
 	AIO_LOCK(ki);
 	KASSERT(!(job->jobflags & KAIOCB_FINISHED),
 	    ("duplicate aio_complete"));
 	job->jobflags |= KAIOCB_FINISHED;
 	if ((job->jobflags & (KAIOCB_QUEUEING | KAIOCB_CANCELLING)) == 0) {
 		TAILQ_REMOVE(&ki->kaio_jobqueue, job, plist);
 		aio_bio_done_notify(userp, job);
 	}
 	AIO_UNLOCK(ki);
 }
 
 void
 aio_cancel(struct kaiocb *job)
 {
 
 	aio_complete(job, -1, ECANCELED);
 }
 
 void
 aio_switch_vmspace(struct kaiocb *job)
 {
 
 	vmspace_switch_aio(job->userproc->p_vmspace);
 }
 
 /*
  * The AIO daemon, most of the actual work is done in aio_process_*,
  * but the setup (and address space mgmt) is done in this routine.
  */
 static void
 aio_daemon(void *_id)
 {
 	struct kaiocb *job;
 	struct aioproc *aiop;
 	struct kaioinfo *ki;
 	struct proc *p;
 	struct vmspace *myvm;
 	struct thread *td = curthread;
 	int id = (intptr_t)_id;
 
 	/*
 	 * Grab an extra reference on the daemon's vmspace so that it
 	 * doesn't get freed by jobs that switch to a different
 	 * vmspace.
 	 */
 	p = td->td_proc;
 	myvm = vmspace_acquire_ref(p);
 
 	KASSERT(p->p_textvp == NULL, ("kthread has a textvp"));
 
 	/*
 	 * Allocate and ready the aio control info.  There is one aiop structure
 	 * per daemon.
 	 */
 	aiop = uma_zalloc(aiop_zone, M_WAITOK);
 	aiop->aioproc = p;
 	aiop->aioprocflags = 0;
 
 	/*
 	 * Wakeup parent process.  (Parent sleeps to keep from blasting away
 	 * and creating too many daemons.)
 	 */
 	sema_post(&aio_newproc_sem);
 
 	mtx_lock(&aio_job_mtx);
 	for (;;) {
 		/*
 		 * Take daemon off of free queue
 		 */
 		if (aiop->aioprocflags & AIOP_FREE) {
 			TAILQ_REMOVE(&aio_freeproc, aiop, list);
 			aiop->aioprocflags &= ~AIOP_FREE;
 		}
 
 		/*
 		 * Check for jobs.
 		 */
 		while ((job = aio_selectjob(aiop)) != NULL) {
 			mtx_unlock(&aio_job_mtx);
 
 			ki = job->userproc->p_aioinfo;
 			job->handle_fn(job);
 
 			mtx_lock(&aio_job_mtx);
 			/* Decrement the active job count. */
 			ki->kaio_active_count--;
 		}
 
 		/*
 		 * Disconnect from user address space.
 		 */
 		if (p->p_vmspace != myvm) {
 			mtx_unlock(&aio_job_mtx);
 			vmspace_switch_aio(myvm);
 			mtx_lock(&aio_job_mtx);
 			/*
 			 * We have to restart to avoid race, we only sleep if
 			 * no job can be selected.
 			 */
 			continue;
 		}
 
 		mtx_assert(&aio_job_mtx, MA_OWNED);
 
 		TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
 		aiop->aioprocflags |= AIOP_FREE;
 
 		/*
 		 * If daemon is inactive for a long time, allow it to exit,
 		 * thereby freeing resources.
 		 */
 		if (msleep(p, &aio_job_mtx, PRIBIO, "aiordy",
 		    aiod_lifetime) == EWOULDBLOCK && TAILQ_EMPTY(&aio_jobs) &&
 		    (aiop->aioprocflags & AIOP_FREE) &&
 		    num_aio_procs > target_aio_procs)
 			break;
 	}
 	TAILQ_REMOVE(&aio_freeproc, aiop, list);
 	num_aio_procs--;
 	mtx_unlock(&aio_job_mtx);
 	uma_zfree(aiop_zone, aiop);
 	free_unr(aiod_unr, id);
 	vmspace_free(myvm);
 
 	KASSERT(p->p_vmspace == myvm,
 	    ("AIOD: bad vmspace for exiting daemon"));
 	KASSERT(refcount_load(&myvm->vm_refcnt) > 1,
 	    ("AIOD: bad vm refcnt for exiting daemon: %d",
 	    refcount_load(&myvm->vm_refcnt)));
 	kproc_exit(0);
 }
 
 /*
  * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The
  * AIO daemon modifies its environment itself.
  */
 static int
 aio_newproc(int *start)
 {
 	int error;
 	struct proc *p;
 	int id;
 
 	id = alloc_unr(aiod_unr);
 	error = kproc_create(aio_daemon, (void *)(intptr_t)id, &p,
 		RFNOWAIT, 0, "aiod%d", id);
 	if (error == 0) {
 		/*
 		 * Wait until daemon is started.
 		 */
 		sema_wait(&aio_newproc_sem);
 		mtx_lock(&aio_job_mtx);
 		num_aio_procs++;
 		if (start != NULL)
 			(*start)--;
 		mtx_unlock(&aio_job_mtx);
 	} else {
 		free_unr(aiod_unr, id);
 	}
 	return (error);
 }
 
 /*
  * Try the high-performance, low-overhead bio method for eligible
  * VCHR devices.  This method doesn't use an aio helper thread, and
  * thus has very low overhead.
  *
  * Assumes that the caller, aio_aqueue(), has incremented the file
  * structure's reference count, preventing its deallocation for the
  * duration of this call.
  */
 static int
 aio_qbio(struct proc *p, struct kaiocb *job)
 {
 	struct aiocb *cb;
 	struct file *fp;
 	struct buf *pbuf;
 	struct vnode *vp;
 	struct cdevsw *csw;
 	struct cdev *dev;
 	struct kaioinfo *ki;
 	struct bio **bios = NULL;
 	off_t offset;
 	int bio_cmd, error, i, iovcnt, opcode, poff, ref;
 	vm_prot_t prot;
 	bool use_unmapped;
 
 	cb = &job->uaiocb;
 	fp = job->fd_file;
 	opcode = cb->aio_lio_opcode;
 
 	if (!(opcode == LIO_WRITE || opcode == LIO_WRITEV ||
 	    opcode == LIO_READ || opcode == LIO_READV))
 		return (-1);
 	if (fp == NULL || fp->f_type != DTYPE_VNODE)
 		return (-1);
 
 	vp = fp->f_vnode;
 	if (vp->v_type != VCHR)
 		return (-1);
 	if (vp->v_bufobj.bo_bsize == 0)
 		return (-1);
 
 	bio_cmd = opcode == LIO_WRITE || opcode == LIO_WRITEV ? BIO_WRITE :
 	    BIO_READ;
 	iovcnt = job->uiop->uio_iovcnt;
 	if (iovcnt > max_buf_aio)
 		return (-1);
 	for (i = 0; i < iovcnt; i++) {
 		if (job->uiop->uio_iov[i].iov_len % vp->v_bufobj.bo_bsize != 0)
 			return (-1);
 		if (job->uiop->uio_iov[i].iov_len > maxphys) {
 			error = -1;
 			return (-1);
 		}
 	}
 	offset = cb->aio_offset;
 
 	ref = 0;
 	csw = devvn_refthread(vp, &dev, &ref);
 	if (csw == NULL)
 		return (ENXIO);
 
 	if ((csw->d_flags & D_DISK) == 0) {
 		error = -1;
 		goto unref;
 	}
 	if (job->uiop->uio_resid > dev->si_iosize_max) {
 		error = -1;
 		goto unref;
 	}
 
 	ki = p->p_aioinfo;
 	job->error = 0;
 
 	use_unmapped = (dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed;
 	if (!use_unmapped) {
 		AIO_LOCK(ki);
 		if (ki->kaio_buffer_count + iovcnt > max_buf_aio) {
 			AIO_UNLOCK(ki);
 			error = EAGAIN;
 			goto unref;
 		}
 		ki->kaio_buffer_count += iovcnt;
 		AIO_UNLOCK(ki);
 	}
 
 	bios = malloc(sizeof(struct bio *) * iovcnt, M_TEMP, M_WAITOK);
 	atomic_store_int(&job->nbio, iovcnt);
 	for (i = 0; i < iovcnt; i++) {
 		struct vm_page** pages;
 		struct bio *bp;
 		void *buf;
 		size_t nbytes;
 		int npages;
 
 		buf = job->uiop->uio_iov[i].iov_base;
 		nbytes = job->uiop->uio_iov[i].iov_len;
 
 		bios[i] = g_alloc_bio();
 		bp = bios[i];
 
 		poff = (vm_offset_t)buf & PAGE_MASK;
 		if (use_unmapped) {
 			pbuf = NULL;
 			pages = malloc(sizeof(vm_page_t) * (atop(round_page(
 			    nbytes)) + 1), M_TEMP, M_WAITOK | M_ZERO);
 		} else {
 			pbuf = uma_zalloc(pbuf_zone, M_WAITOK);
 			BUF_KERNPROC(pbuf);
 			pages = pbuf->b_pages;
 		}
 
 		bp->bio_length = nbytes;
 		bp->bio_bcount = nbytes;
 		bp->bio_done = aio_biowakeup;
 		bp->bio_offset = offset;
 		bp->bio_cmd = bio_cmd;
 		bp->bio_dev = dev;
 		bp->bio_caller1 = job;
 		bp->bio_caller2 = pbuf;
 
 		prot = VM_PROT_READ;
 		if (opcode == LIO_READ || opcode == LIO_READV)
 			prot |= VM_PROT_WRITE;	/* Less backwards than it looks */
 		npages = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map,
 		    (vm_offset_t)buf, bp->bio_length, prot, pages,
 		    atop(maxphys) + 1);
 		if (npages < 0) {
 			if (pbuf != NULL)
 				uma_zfree(pbuf_zone, pbuf);
 			else
 				free(pages, M_TEMP);
 			error = EFAULT;
 			g_destroy_bio(bp);
 			i--;
 			goto destroy_bios;
 		}
 		if (pbuf != NULL) {
 			pmap_qenter((vm_offset_t)pbuf->b_data, pages, npages);
 			bp->bio_data = pbuf->b_data + poff;
 			pbuf->b_npages = npages;
 			atomic_add_int(&num_buf_aio, 1);
 		} else {
 			bp->bio_ma = pages;
 			bp->bio_ma_n = npages;
 			bp->bio_ma_offset = poff;
 			bp->bio_data = unmapped_buf;
 			bp->bio_flags |= BIO_UNMAPPED;
 			atomic_add_int(&num_unmapped_aio, 1);
 		}
 
 		offset += nbytes;
 	}
 
 	/* Perform transfer. */
 	for (i = 0; i < iovcnt; i++)
 		csw->d_strategy(bios[i]);
 	free(bios, M_TEMP);
 
 	dev_relthread(dev, ref);
 	return (0);
 
 destroy_bios:
 	for (; i >= 0; i--)
 		aio_biocleanup(bios[i]);
 	free(bios, M_TEMP);
 unref:
 	dev_relthread(dev, ref);
 	return (error);
 }
 
 #ifdef COMPAT_FREEBSD6
 static int
 convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig)
 {
 
 	/*
 	 * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are
 	 * supported by AIO with the old sigevent structure.
 	 */
 	nsig->sigev_notify = osig->sigev_notify;
 	switch (nsig->sigev_notify) {
 	case SIGEV_NONE:
 		break;
 	case SIGEV_SIGNAL:
 		nsig->sigev_signo = osig->__sigev_u.__sigev_signo;
 		break;
 	case SIGEV_KEVENT:
 		nsig->sigev_notify_kqueue =
 		    osig->__sigev_u.__sigev_notify_kqueue;
 		nsig->sigev_value.sival_ptr = osig->sigev_value.sival_ptr;
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 aiocb_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob,
     int type __unused)
 {
 	struct oaiocb *ojob;
 	struct aiocb *kcb = &kjob->uaiocb;
 	int error;
 
 	bzero(kcb, sizeof(struct aiocb));
 	error = copyin(ujob, kcb, sizeof(struct oaiocb));
 	if (error)
 		return (error);
 	/* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */
 	ojob = (struct oaiocb *)kcb;
 	return (convert_old_sigevent(&ojob->aio_sigevent, &kcb->aio_sigevent));
 }
 #endif
 
 static int
 aiocb_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
 {
 	struct aiocb *kcb = &kjob->uaiocb;
 	int error;
 
 	error = copyin(ujob, kcb, sizeof(struct aiocb));
 	if (error)
 		return (error);
 	if (type == LIO_READV || type == LIO_WRITEV) {
 		/* malloc a uio and copy in the iovec */
 		error = copyinuio(__DEVOLATILE(struct iovec*, kcb->aio_iov),
 		    kcb->aio_iovcnt, &kjob->uiop);
 	}
 
 	return (error);
 }
 
 static long
 aiocb_fetch_status(struct aiocb *ujob)
 {
 
 	return (fuword(&ujob->_aiocb_private.status));
 }
 
 static long
 aiocb_fetch_error(struct aiocb *ujob)
 {
 
 	return (fuword(&ujob->_aiocb_private.error));
 }
 
 static int
 aiocb_store_status(struct aiocb *ujob, long status)
 {
 
 	return (suword(&ujob->_aiocb_private.status, status));
 }
 
 static int
 aiocb_store_error(struct aiocb *ujob, long error)
 {
 
 	return (suword(&ujob->_aiocb_private.error, error));
 }
 
 static int
 aiocb_store_kernelinfo(struct aiocb *ujob, long jobref)
 {
 
 	return (suword(&ujob->_aiocb_private.kernelinfo, jobref));
 }
 
 static int
 aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob)
 {
 
 	return (suword(ujobp, (long)ujob));
 }
 
 static struct aiocb_ops aiocb_ops = {
 	.aio_copyin = aiocb_copyin,
 	.fetch_status = aiocb_fetch_status,
 	.fetch_error = aiocb_fetch_error,
 	.store_status = aiocb_store_status,
 	.store_error = aiocb_store_error,
 	.store_kernelinfo = aiocb_store_kernelinfo,
 	.store_aiocb = aiocb_store_aiocb,
 };
 
 #ifdef COMPAT_FREEBSD6
 static struct aiocb_ops aiocb_ops_osigevent = {
 	.aio_copyin = aiocb_copyin_old_sigevent,
 	.fetch_status = aiocb_fetch_status,
 	.fetch_error = aiocb_fetch_error,
 	.store_status = aiocb_store_status,
 	.store_error = aiocb_store_error,
 	.store_kernelinfo = aiocb_store_kernelinfo,
 	.store_aiocb = aiocb_store_aiocb,
 };
 #endif
 
 /*
  * Queue a new AIO request.  Choosing either the threaded or direct bio VCHR
  * technique is done in this code.
  */
 int
 aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
     int type, struct aiocb_ops *ops)
 {
 	struct proc *p = td->td_proc;
 	struct file *fp = NULL;
 	struct kaiocb *job;
 	struct kaioinfo *ki;
 	struct kevent kev;
 	int opcode;
 	int error;
 	int fd, kqfd;
 	int jid;
 	u_short evflags;
 
 	if (p->p_aioinfo == NULL)
 		aio_init_aioinfo(p);
 
 	ki = p->p_aioinfo;
 
 	ops->store_status(ujob, -1);
 	ops->store_error(ujob, 0);
 	ops->store_kernelinfo(ujob, -1);
 
 	if (num_queue_count >= max_queue_count ||
 	    ki->kaio_count >= max_aio_queue_per_proc) {
 		error = EAGAIN;
 		goto err1;
 	}
 
 	job = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO);
 	knlist_init_mtx(&job->klist, AIO_MTX(ki));
 
 	error = ops->aio_copyin(ujob, job, type);
 	if (error)
 		goto err2;
 
 	if (job->uaiocb.aio_nbytes > IOSIZE_MAX) {
 		error = EINVAL;
 		goto err2;
 	}
 
 	if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT &&
 	    job->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL &&
 	    job->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID &&
 	    job->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) {
 		error = EINVAL;
 		goto err2;
 	}
 
 	if ((job->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL ||
 	     job->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) &&
 		!_SIG_VALID(job->uaiocb.aio_sigevent.sigev_signo)) {
 		error = EINVAL;
 		goto err2;
 	}
 
 	ksiginfo_init(&job->ksi);
 
 	/* Save userspace address of the job info. */
 	job->ujob = ujob;
 
 	/* Get the opcode. */
 	if (type != LIO_NOP)
 		job->uaiocb.aio_lio_opcode = type;
 	opcode = job->uaiocb.aio_lio_opcode;
 
 	/*
 	 * Validate the opcode and fetch the file object for the specified
 	 * file descriptor.
 	 *
 	 * XXXRW: Moved the opcode validation up here so that we don't
 	 * retrieve a file descriptor without knowing what the capabiltity
 	 * should be.
 	 */
 	fd = job->uaiocb.aio_fildes;
 	switch (opcode) {
 	case LIO_WRITE:
 	case LIO_WRITEV:
 		error = fget_write(td, fd, &cap_pwrite_rights, &fp);
 		break;
 	case LIO_READ:
 	case LIO_READV:
 		error = fget_read(td, fd, &cap_pread_rights, &fp);
 		break;
 	case LIO_SYNC:
+	case LIO_DSYNC:
 		error = fget(td, fd, &cap_fsync_rights, &fp);
 		break;
 	case LIO_MLOCK:
 		break;
 	case LIO_NOP:
 		error = fget(td, fd, &cap_no_rights, &fp);
 		break;
 	default:
 		error = EINVAL;
 	}
 	if (error)
 		goto err3;
 
-	if (opcode == LIO_SYNC && fp->f_vnode == NULL) {
+	if ((opcode == LIO_SYNC || opcode == LIO_DSYNC) && fp->f_vnode == NULL) {
 		error = EINVAL;
 		goto err3;
 	}
 
 	if ((opcode == LIO_READ || opcode == LIO_READV ||
 	    opcode == LIO_WRITE || opcode == LIO_WRITEV) &&
 	    job->uaiocb.aio_offset < 0 &&
 	    (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) {
 		error = EINVAL;
 		goto err3;
 	}
 
 	job->fd_file = fp;
 
 	mtx_lock(&aio_job_mtx);
 	jid = jobrefid++;
 	job->seqno = jobseqno++;
 	mtx_unlock(&aio_job_mtx);
 	error = ops->store_kernelinfo(ujob, jid);
 	if (error) {
 		error = EINVAL;
 		goto err3;
 	}
 	job->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid;
 
 	if (opcode == LIO_NOP) {
 		fdrop(fp, td);
 		MPASS(job->uiop == &job->uio || job->uiop == NULL);
 		uma_zfree(aiocb_zone, job);
 		return (0);
 	}
 
 	if (job->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT)
 		goto no_kqueue;
 	evflags = job->uaiocb.aio_sigevent.sigev_notify_kevent_flags;
 	if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) {
 		error = EINVAL;
 		goto err3;
 	}
 	kqfd = job->uaiocb.aio_sigevent.sigev_notify_kqueue;
 	memset(&kev, 0, sizeof(kev));
 	kev.ident = (uintptr_t)job->ujob;
 	kev.filter = EVFILT_AIO;
 	kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | evflags;
 	kev.data = (intptr_t)job;
 	kev.udata = job->uaiocb.aio_sigevent.sigev_value.sival_ptr;
 	error = kqfd_register(kqfd, &kev, td, M_WAITOK);
 	if (error)
 		goto err3;
 
 no_kqueue:
 
 	ops->store_error(ujob, EINPROGRESS);
 	job->uaiocb._aiocb_private.error = EINPROGRESS;
 	job->userproc = p;
 	job->cred = crhold(td->td_ucred);
 	job->jobflags = KAIOCB_QUEUEING;
 	job->lio = lj;
 
 	switch (opcode) {
 	case LIO_READV:
 	case LIO_WRITEV:
 		/* Use the uio copied in by aio_copyin */
 		MPASS(job->uiop != &job->uio && job->uiop != NULL);
 		break;
 	case LIO_READ:
 	case LIO_WRITE:
 		/* Setup the inline uio */
 		job->iov[0].iov_base = (void *)(uintptr_t)job->uaiocb.aio_buf;
 		job->iov[0].iov_len = job->uaiocb.aio_nbytes;
 		job->uio.uio_iov = job->iov;
 		job->uio.uio_iovcnt = 1;
 		job->uio.uio_resid = job->uaiocb.aio_nbytes;
 		job->uio.uio_segflg = UIO_USERSPACE;
 		/* FALLTHROUGH */
 	default:
 		job->uiop = &job->uio;
 		break;
 	}
 	switch (opcode) {
 	case LIO_READ:
 	case LIO_READV:
 		job->uiop->uio_rw = UIO_READ;
 		break;
 	case LIO_WRITE:
 	case LIO_WRITEV:
 		job->uiop->uio_rw = UIO_WRITE;
 		break;
 	}
 	job->uiop->uio_offset = job->uaiocb.aio_offset;
 	job->uiop->uio_td = td;
 
 	if (opcode == LIO_MLOCK) {
 		aio_schedule(job, aio_process_mlock);
 		error = 0;
 	} else if (fp->f_ops->fo_aio_queue == NULL)
 		error = aio_queue_file(fp, job);
 	else
 		error = fo_aio_queue(fp, job);
 	if (error)
 		goto err3;
 
 	AIO_LOCK(ki);
 	job->jobflags &= ~KAIOCB_QUEUEING;
 	TAILQ_INSERT_TAIL(&ki->kaio_all, job, allist);
 	ki->kaio_count++;
 	if (lj)
 		lj->lioj_count++;
 	atomic_add_int(&num_queue_count, 1);
 	if (job->jobflags & KAIOCB_FINISHED) {
 		/*
 		 * The queue callback completed the request synchronously.
 		 * The bulk of the completion is deferred in that case
 		 * until this point.
 		 */
 		aio_bio_done_notify(p, job);
 	} else
 		TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, job, plist);
 	AIO_UNLOCK(ki);
 	return (0);
 
 err3:
 	if (fp)
 		fdrop(fp, td);
 	knlist_delete(&job->klist, curthread, 0);
 err2:
 	if (job->uiop != &job->uio)
 		free(job->uiop, M_IOV);
 	uma_zfree(aiocb_zone, job);
 err1:
 	ops->store_error(ujob, error);
 	return (error);
 }
 
 static void
 aio_cancel_daemon_job(struct kaiocb *job)
 {
 
 	mtx_lock(&aio_job_mtx);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&aio_jobs, job, list);
 	mtx_unlock(&aio_job_mtx);
 	aio_cancel(job);
 }
 
 void
 aio_schedule(struct kaiocb *job, aio_handle_fn_t *func)
 {
 
 	mtx_lock(&aio_job_mtx);
 	if (!aio_set_cancel_function(job, aio_cancel_daemon_job)) {
 		mtx_unlock(&aio_job_mtx);
 		aio_cancel(job);
 		return;
 	}
 	job->handle_fn = func;
 	TAILQ_INSERT_TAIL(&aio_jobs, job, list);
 	aio_kick_nowait(job->userproc);
 	mtx_unlock(&aio_job_mtx);
 }
 
 static void
 aio_cancel_sync(struct kaiocb *job)
 {
 	struct kaioinfo *ki;
 
 	ki = job->userproc->p_aioinfo;
 	AIO_LOCK(ki);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&ki->kaio_syncqueue, job, list);
 	AIO_UNLOCK(ki);
 	aio_cancel(job);
 }
 
 int
 aio_queue_file(struct file *fp, struct kaiocb *job)
 {
 	struct kaioinfo *ki;
 	struct kaiocb *job2;
 	struct vnode *vp;
 	struct mount *mp;
 	int error;
 	bool safe;
 
 	ki = job->userproc->p_aioinfo;
 	error = aio_qbio(job->userproc, job);
 	if (error >= 0)
 		return (error);
 	safe = false;
 	if (fp->f_type == DTYPE_VNODE) {
 		vp = fp->f_vnode;
 		if (vp->v_type == VREG || vp->v_type == VDIR) {
 			mp = fp->f_vnode->v_mount;
 			if (mp == NULL || (mp->mnt_flag & MNT_LOCAL) != 0)
 				safe = true;
 		}
 	}
 	if (!(safe || enable_aio_unsafe)) {
 		counted_warning(&unsafe_warningcnt,
 		    "is attempting to use unsafe AIO requests");
 		return (EOPNOTSUPP);
 	}
 
 	switch (job->uaiocb.aio_lio_opcode) {
 	case LIO_READ:
 	case LIO_READV:
 	case LIO_WRITE:
 	case LIO_WRITEV:
 		aio_schedule(job, aio_process_rw);
 		error = 0;
 		break;
 	case LIO_SYNC:
+	case LIO_DSYNC:
 		AIO_LOCK(ki);
 		TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) {
 			if (job2->fd_file == job->fd_file &&
 			    job2->uaiocb.aio_lio_opcode != LIO_SYNC &&
+			    job2->uaiocb.aio_lio_opcode != LIO_DSYNC &&
 			    job2->seqno < job->seqno) {
 				job2->jobflags |= KAIOCB_CHECKSYNC;
 				job->pending++;
 			}
 		}
 		if (job->pending != 0) {
 			if (!aio_set_cancel_function_locked(job,
 				aio_cancel_sync)) {
 				AIO_UNLOCK(ki);
 				aio_cancel(job);
 				return (0);
 			}
 			TAILQ_INSERT_TAIL(&ki->kaio_syncqueue, job, list);
 			AIO_UNLOCK(ki);
 			return (0);
 		}
 		AIO_UNLOCK(ki);
 		aio_schedule(job, aio_process_sync);
 		error = 0;
 		break;
 	default:
 		error = EINVAL;
 	}
 	return (error);
 }
 
 static void
 aio_kick_nowait(struct proc *userp)
 {
 	struct kaioinfo *ki = userp->p_aioinfo;
 	struct aioproc *aiop;
 
 	mtx_assert(&aio_job_mtx, MA_OWNED);
 	if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
 		aiop->aioprocflags &= ~AIOP_FREE;
 		wakeup(aiop->aioproc);
 	} else if (num_aio_resv_start + num_aio_procs < max_aio_procs &&
 	    ki->kaio_active_count + num_aio_resv_start < max_aio_per_proc) {
 		taskqueue_enqueue(taskqueue_aiod_kick, &ki->kaio_task);
 	}
 }
 
 static int
 aio_kick(struct proc *userp)
 {
 	struct kaioinfo *ki = userp->p_aioinfo;
 	struct aioproc *aiop;
 	int error, ret = 0;
 
 	mtx_assert(&aio_job_mtx, MA_OWNED);
 retryproc:
 	if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
 		aiop->aioprocflags &= ~AIOP_FREE;
 		wakeup(aiop->aioproc);
 	} else if (num_aio_resv_start + num_aio_procs < max_aio_procs &&
 	    ki->kaio_active_count + num_aio_resv_start < max_aio_per_proc) {
 		num_aio_resv_start++;
 		mtx_unlock(&aio_job_mtx);
 		error = aio_newproc(&num_aio_resv_start);
 		mtx_lock(&aio_job_mtx);
 		if (error) {
 			num_aio_resv_start--;
 			goto retryproc;
 		}
 	} else {
 		ret = -1;
 	}
 	return (ret);
 }
 
 static void
 aio_kick_helper(void *context, int pending)
 {
 	struct proc *userp = context;
 
 	mtx_lock(&aio_job_mtx);
 	while (--pending >= 0) {
 		if (aio_kick(userp))
 			break;
 	}
 	mtx_unlock(&aio_job_mtx);
 }
 
 /*
  * Support the aio_return system call, as a side-effect, kernel resources are
  * released.
  */
 static int
 kern_aio_return(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops)
 {
 	struct proc *p = td->td_proc;
 	struct kaiocb *job;
 	struct kaioinfo *ki;
 	long status, error;
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return (EINVAL);
 	AIO_LOCK(ki);
 	TAILQ_FOREACH(job, &ki->kaio_done, plist) {
 		if (job->ujob == ujob)
 			break;
 	}
 	if (job != NULL) {
 		MPASS(job->jobflags & KAIOCB_FINISHED);
 		status = job->uaiocb._aiocb_private.status;
 		error = job->uaiocb._aiocb_private.error;
 		td->td_retval[0] = status;
 		td->td_ru.ru_oublock += job->outblock;
 		td->td_ru.ru_inblock += job->inblock;
 		td->td_ru.ru_msgsnd += job->msgsnd;
 		td->td_ru.ru_msgrcv += job->msgrcv;
 		aio_free_entry(job);
 		AIO_UNLOCK(ki);
 		ops->store_error(ujob, error);
 		ops->store_status(ujob, status);
 	} else {
 		error = EINVAL;
 		AIO_UNLOCK(ki);
 	}
 	return (error);
 }
 
 int
 sys_aio_return(struct thread *td, struct aio_return_args *uap)
 {
 
 	return (kern_aio_return(td, uap->aiocbp, &aiocb_ops));
 }
 
 /*
  * Allow a process to wakeup when any of the I/O requests are completed.
  */
 static int
 kern_aio_suspend(struct thread *td, int njoblist, struct aiocb **ujoblist,
     struct timespec *ts)
 {
 	struct proc *p = td->td_proc;
 	struct timeval atv;
 	struct kaioinfo *ki;
 	struct kaiocb *firstjob, *job;
 	int error, i, timo;
 
 	timo = 0;
 	if (ts) {
 		if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000)
 			return (EINVAL);
 
 		TIMESPEC_TO_TIMEVAL(&atv, ts);
 		if (itimerfix(&atv))
 			return (EINVAL);
 		timo = tvtohz(&atv);
 	}
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return (EAGAIN);
 
 	if (njoblist == 0)
 		return (0);
 
 	AIO_LOCK(ki);
 	for (;;) {
 		firstjob = NULL;
 		error = 0;
 		TAILQ_FOREACH(job, &ki->kaio_all, allist) {
 			for (i = 0; i < njoblist; i++) {
 				if (job->ujob == ujoblist[i]) {
 					if (firstjob == NULL)
 						firstjob = job;
 					if (job->jobflags & KAIOCB_FINISHED)
 						goto RETURN;
 				}
 			}
 		}
 		/* All tasks were finished. */
 		if (firstjob == NULL)
 			break;
 
 		ki->kaio_flags |= KAIO_WAKEUP;
 		error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH,
 		    "aiospn", timo);
 		if (error == ERESTART)
 			error = EINTR;
 		if (error)
 			break;
 	}
 RETURN:
 	AIO_UNLOCK(ki);
 	return (error);
 }
 
 int
 sys_aio_suspend(struct thread *td, struct aio_suspend_args *uap)
 {
 	struct timespec ts, *tsp;
 	struct aiocb **ujoblist;
 	int error;
 
 	if (uap->nent < 0 || uap->nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (uap->timeout) {
 		/* Get timespec struct. */
 		if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0)
 			return (error);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 
 	ujoblist = malloc(uap->nent * sizeof(ujoblist[0]), M_AIOS, M_WAITOK);
 	error = copyin(uap->aiocbp, ujoblist, uap->nent * sizeof(ujoblist[0]));
 	if (error == 0)
 		error = kern_aio_suspend(td, uap->nent, ujoblist, tsp);
 	free(ujoblist, M_AIOS);
 	return (error);
 }
 
 /*
  * aio_cancel cancels any non-bio aio operations not currently in progress.
  */
 int
 sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap)
 {
 	struct proc *p = td->td_proc;
 	struct kaioinfo *ki;
 	struct kaiocb *job, *jobn;
 	struct file *fp;
 	int error;
 	int cancelled = 0;
 	int notcancelled = 0;
 	struct vnode *vp;
 
 	/* Lookup file object. */
 	error = fget(td, uap->fd, &cap_no_rights, &fp);
 	if (error)
 		return (error);
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		goto done;
 
 	if (fp->f_type == DTYPE_VNODE) {
 		vp = fp->f_vnode;
 		if (vn_isdisk(vp)) {
 			fdrop(fp, td);
 			td->td_retval[0] = AIO_NOTCANCELED;
 			return (0);
 		}
 	}
 
 	AIO_LOCK(ki);
 	TAILQ_FOREACH_SAFE(job, &ki->kaio_jobqueue, plist, jobn) {
 		if ((uap->fd == job->uaiocb.aio_fildes) &&
 		    ((uap->aiocbp == NULL) ||
 		     (uap->aiocbp == job->ujob))) {
 			if (aio_cancel_job(p, ki, job)) {
 				cancelled++;
 			} else {
 				notcancelled++;
 			}
 			if (uap->aiocbp != NULL)
 				break;
 		}
 	}
 	AIO_UNLOCK(ki);
 
 done:
 	fdrop(fp, td);
 
 	if (uap->aiocbp != NULL) {
 		if (cancelled) {
 			td->td_retval[0] = AIO_CANCELED;
 			return (0);
 		}
 	}
 
 	if (notcancelled) {
 		td->td_retval[0] = AIO_NOTCANCELED;
 		return (0);
 	}
 
 	if (cancelled) {
 		td->td_retval[0] = AIO_CANCELED;
 		return (0);
 	}
 
 	td->td_retval[0] = AIO_ALLDONE;
 
 	return (0);
 }
 
 /*
  * aio_error is implemented in the kernel level for compatibility purposes
  * only.  For a user mode async implementation, it would be best to do it in
  * a userland subroutine.
  */
 static int
 kern_aio_error(struct thread *td, struct aiocb *ujob, struct aiocb_ops *ops)
 {
 	struct proc *p = td->td_proc;
 	struct kaiocb *job;
 	struct kaioinfo *ki;
 	int status;
 
 	ki = p->p_aioinfo;
 	if (ki == NULL) {
 		td->td_retval[0] = EINVAL;
 		return (0);
 	}
 
 	AIO_LOCK(ki);
 	TAILQ_FOREACH(job, &ki->kaio_all, allist) {
 		if (job->ujob == ujob) {
 			if (job->jobflags & KAIOCB_FINISHED)
 				td->td_retval[0] =
 					job->uaiocb._aiocb_private.error;
 			else
 				td->td_retval[0] = EINPROGRESS;
 			AIO_UNLOCK(ki);
 			return (0);
 		}
 	}
 	AIO_UNLOCK(ki);
 
 	/*
 	 * Hack for failure of aio_aqueue.
 	 */
 	status = ops->fetch_status(ujob);
 	if (status == -1) {
 		td->td_retval[0] = ops->fetch_error(ujob);
 		return (0);
 	}
 
 	td->td_retval[0] = EINVAL;
 	return (0);
 }
 
 int
 sys_aio_error(struct thread *td, struct aio_error_args *uap)
 {
 
 	return (kern_aio_error(td, uap->aiocbp, &aiocb_ops));
 }
 
 /* syscall - asynchronous read from a file (REALTIME) */
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_aio_read(struct thread *td, struct freebsd6_aio_read_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ,
 	    &aiocb_ops_osigevent));
 }
 #endif
 
 int
 sys_aio_read(struct thread *td, struct aio_read_args *uap)
 {
 
 	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops));
 }
 
 int
 sys_aio_readv(struct thread *td, struct aio_readv_args *uap)
 {
 
 	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READV, &aiocb_ops));
 }
 
 /* syscall - asynchronous write to a file (REALTIME) */
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_aio_write(struct thread *td, struct freebsd6_aio_write_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE,
 	    &aiocb_ops_osigevent));
 }
 #endif
 
 int
 sys_aio_write(struct thread *td, struct aio_write_args *uap)
 {
 
 	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops));
 }
 
 int
 sys_aio_writev(struct thread *td, struct aio_writev_args *uap)
 {
 
 	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITEV, &aiocb_ops));
 }
 
 int
 sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap)
 {
 
 	return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops));
 }
 
 static int
 kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list,
     struct aiocb **acb_list, int nent, struct sigevent *sig,
     struct aiocb_ops *ops)
 {
 	struct proc *p = td->td_proc;
 	struct aiocb *job;
 	struct kaioinfo *ki;
 	struct aioliojob *lj;
 	struct kevent kev;
 	int error;
 	int nagain, nerror;
 	int i;
 
 	if ((mode != LIO_NOWAIT) && (mode != LIO_WAIT))
 		return (EINVAL);
 
 	if (nent < 0 || nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (p->p_aioinfo == NULL)
 		aio_init_aioinfo(p);
 
 	ki = p->p_aioinfo;
 
 	lj = uma_zalloc(aiolio_zone, M_WAITOK);
 	lj->lioj_flags = 0;
 	lj->lioj_count = 0;
 	lj->lioj_finished_count = 0;
 	knlist_init_mtx(&lj->klist, AIO_MTX(ki));
 	ksiginfo_init(&lj->lioj_ksi);
 
 	/*
 	 * Setup signal.
 	 */
 	if (sig && (mode == LIO_NOWAIT)) {
 		bcopy(sig, &lj->lioj_signal, sizeof(lj->lioj_signal));
 		if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) {
 			/* Assume only new style KEVENT */
 			memset(&kev, 0, sizeof(kev));
 			kev.filter = EVFILT_LIO;
 			kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1;
 			kev.ident = (uintptr_t)uacb_list; /* something unique */
 			kev.data = (intptr_t)lj;
 			/* pass user defined sigval data */
 			kev.udata = lj->lioj_signal.sigev_value.sival_ptr;
 			error = kqfd_register(
 			    lj->lioj_signal.sigev_notify_kqueue, &kev, td,
 			    M_WAITOK);
 			if (error) {
 				uma_zfree(aiolio_zone, lj);
 				return (error);
 			}
 		} else if (lj->lioj_signal.sigev_notify == SIGEV_NONE) {
 			;
 		} else if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL ||
 			   lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID) {
 				if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) {
 					uma_zfree(aiolio_zone, lj);
 					return EINVAL;
 				}
 				lj->lioj_flags |= LIOJ_SIGNAL;
 		} else {
 			uma_zfree(aiolio_zone, lj);
 			return EINVAL;
 		}
 	}
 
 	AIO_LOCK(ki);
 	TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list);
 	/*
 	 * Add extra aiocb count to avoid the lio to be freed
 	 * by other threads doing aio_waitcomplete or aio_return,
 	 * and prevent event from being sent until we have queued
 	 * all tasks.
 	 */
 	lj->lioj_count = 1;
 	AIO_UNLOCK(ki);
 
 	/*
 	 * Get pointers to the list of I/O requests.
 	 */
 	nagain = 0;
 	nerror = 0;
 	for (i = 0; i < nent; i++) {
 		job = acb_list[i];
 		if (job != NULL) {
 			error = aio_aqueue(td, job, lj, LIO_NOP, ops);
 			if (error == EAGAIN)
 				nagain++;
 			else if (error != 0)
 				nerror++;
 		}
 	}
 
 	error = 0;
 	AIO_LOCK(ki);
 	if (mode == LIO_WAIT) {
 		while (lj->lioj_count - 1 != lj->lioj_finished_count) {
 			ki->kaio_flags |= KAIO_WAKEUP;
 			error = msleep(&p->p_aioinfo, AIO_MTX(ki),
 			    PRIBIO | PCATCH, "aiospn", 0);
 			if (error == ERESTART)
 				error = EINTR;
 			if (error)
 				break;
 		}
 	} else {
 		if (lj->lioj_count - 1 == lj->lioj_finished_count) {
 			if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) {
 				lj->lioj_flags |= LIOJ_KEVENT_POSTED;
 				KNOTE_LOCKED(&lj->klist, 1);
 			}
 			if ((lj->lioj_flags & (LIOJ_SIGNAL |
 			    LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL &&
 			    (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL ||
 			    lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) {
 				aio_sendsig(p, &lj->lioj_signal, &lj->lioj_ksi,
 				    lj->lioj_count != 1);
 				lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 			}
 		}
 	}
 	lj->lioj_count--;
 	if (lj->lioj_count == 0) {
 		TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 		knlist_delete(&lj->klist, curthread, 1);
 		PROC_LOCK(p);
 		sigqueue_take(&lj->lioj_ksi);
 		PROC_UNLOCK(p);
 		AIO_UNLOCK(ki);
 		uma_zfree(aiolio_zone, lj);
 	} else
 		AIO_UNLOCK(ki);
 
 	if (nerror)
 		return (EIO);
 	else if (nagain)
 		return (EAGAIN);
 	else
 		return (error);
 }
 
 /* syscall - list directed I/O (REALTIME) */
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_lio_listio(struct thread *td, struct freebsd6_lio_listio_args *uap)
 {
 	struct aiocb **acb_list;
 	struct sigevent *sigp, sig;
 	struct osigevent osig;
 	int error, nent;
 
 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 		return (EINVAL);
 
 	nent = uap->nent;
 	if (nent < 0 || nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 		error = copyin(uap->sig, &osig, sizeof(osig));
 		if (error)
 			return (error);
 		error = convert_old_sigevent(&osig, &sig);
 		if (error)
 			return (error);
 		sigp = &sig;
 	} else
 		sigp = NULL;
 
 	acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 	error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0]));
 	if (error == 0)
 		error = kern_lio_listio(td, uap->mode,
 		    (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp,
 		    &aiocb_ops_osigevent);
 	free(acb_list, M_LIO);
 	return (error);
 }
 #endif
 
 /* syscall - list directed I/O (REALTIME) */
 int
 sys_lio_listio(struct thread *td, struct lio_listio_args *uap)
 {
 	struct aiocb **acb_list;
 	struct sigevent *sigp, sig;
 	int error, nent;
 
 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 		return (EINVAL);
 
 	nent = uap->nent;
 	if (nent < 0 || nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 		error = copyin(uap->sig, &sig, sizeof(sig));
 		if (error)
 			return (error);
 		sigp = &sig;
 	} else
 		sigp = NULL;
 
 	acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 	error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0]));
 	if (error == 0)
 		error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list,
 		    nent, sigp, &aiocb_ops);
 	free(acb_list, M_LIO);
 	return (error);
 }
 
 static void
 aio_biocleanup(struct bio *bp)
 {
 	struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
 	struct kaioinfo *ki;
 	struct buf *pbuf = (struct buf *)bp->bio_caller2;
 
 	/* Release mapping into kernel space. */
 	if (pbuf != NULL) {
 		MPASS(pbuf->b_npages <= atop(maxphys) + 1);
 		pmap_qremove((vm_offset_t)pbuf->b_data, pbuf->b_npages);
 		vm_page_unhold_pages(pbuf->b_pages, pbuf->b_npages);
 		uma_zfree(pbuf_zone, pbuf);
 		atomic_subtract_int(&num_buf_aio, 1);
 		ki = job->userproc->p_aioinfo;
 		AIO_LOCK(ki);
 		ki->kaio_buffer_count--;
 		AIO_UNLOCK(ki);
 	} else {
 		MPASS(bp->bio_ma_n <= atop(maxphys) + 1);
 		vm_page_unhold_pages(bp->bio_ma, bp->bio_ma_n);
 		free(bp->bio_ma, M_TEMP);
 		atomic_subtract_int(&num_unmapped_aio, 1);
 	}
 	g_destroy_bio(bp);
 }
 
 static void
 aio_biowakeup(struct bio *bp)
 {
 	struct kaiocb *job = (struct kaiocb *)bp->bio_caller1;
 	size_t nbytes;
 	long bcount = bp->bio_bcount;
 	long resid = bp->bio_resid;
 	int error, opcode, nblks;
 	int bio_error = bp->bio_error;
 	uint16_t flags = bp->bio_flags;
 
 	opcode = job->uaiocb.aio_lio_opcode;
 
 	aio_biocleanup(bp);
 
 	nbytes =bcount - resid;
 	atomic_add_acq_long(&job->nbytes, nbytes);
 	nblks = btodb(nbytes);
 	error = 0;
 	/*
 	 * If multiple bios experienced an error, the job will reflect the
 	 * error of whichever failed bio completed last.
 	 */
 	if (flags & BIO_ERROR)
 		atomic_set_int(&job->error, bio_error);
 	if (opcode == LIO_WRITE || opcode == LIO_WRITEV)
 		atomic_add_int(&job->outblock, nblks);
 	else
 		atomic_add_int(&job->inblock, nblks);
 	atomic_subtract_int(&job->nbio, 1);
 
 
 	if (atomic_load_int(&job->nbio) == 0) {
 		if (atomic_load_int(&job->error))
 			aio_complete(job, -1, job->error);
 		else
 			aio_complete(job, atomic_load_long(&job->nbytes), 0);
 	}
 }
 
 /* syscall - wait for the next completion of an aio request */
 static int
 kern_aio_waitcomplete(struct thread *td, struct aiocb **ujobp,
     struct timespec *ts, struct aiocb_ops *ops)
 {
 	struct proc *p = td->td_proc;
 	struct timeval atv;
 	struct kaioinfo *ki;
 	struct kaiocb *job;
 	struct aiocb *ujob;
 	long error, status;
 	int timo;
 
 	ops->store_aiocb(ujobp, NULL);
 
 	if (ts == NULL) {
 		timo = 0;
 	} else if (ts->tv_sec == 0 && ts->tv_nsec == 0) {
 		timo = -1;
 	} else {
 		if ((ts->tv_nsec < 0) || (ts->tv_nsec >= 1000000000))
 			return (EINVAL);
 
 		TIMESPEC_TO_TIMEVAL(&atv, ts);
 		if (itimerfix(&atv))
 			return (EINVAL);
 		timo = tvtohz(&atv);
 	}
 
 	if (p->p_aioinfo == NULL)
 		aio_init_aioinfo(p);
 	ki = p->p_aioinfo;
 
 	error = 0;
 	job = NULL;
 	AIO_LOCK(ki);
 	while ((job = TAILQ_FIRST(&ki->kaio_done)) == NULL) {
 		if (timo == -1) {
 			error = EWOULDBLOCK;
 			break;
 		}
 		ki->kaio_flags |= KAIO_WAKEUP;
 		error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH,
 		    "aiowc", timo);
 		if (timo && error == ERESTART)
 			error = EINTR;
 		if (error)
 			break;
 	}
 
 	if (job != NULL) {
 		MPASS(job->jobflags & KAIOCB_FINISHED);
 		ujob = job->ujob;
 		status = job->uaiocb._aiocb_private.status;
 		error = job->uaiocb._aiocb_private.error;
 		td->td_retval[0] = status;
 		td->td_ru.ru_oublock += job->outblock;
 		td->td_ru.ru_inblock += job->inblock;
 		td->td_ru.ru_msgsnd += job->msgsnd;
 		td->td_ru.ru_msgrcv += job->msgrcv;
 		aio_free_entry(job);
 		AIO_UNLOCK(ki);
 		ops->store_aiocb(ujobp, ujob);
 		ops->store_error(ujob, error);
 		ops->store_status(ujob, status);
 	} else
 		AIO_UNLOCK(ki);
 
 	return (error);
 }
 
 int
 sys_aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap)
 {
 	struct timespec ts, *tsp;
 	int error;
 
 	if (uap->timeout) {
 		/* Get timespec struct. */
 		error = copyin(uap->timeout, &ts, sizeof(ts));
 		if (error)
 			return (error);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 
 	return (kern_aio_waitcomplete(td, uap->aiocbp, tsp, &aiocb_ops));
 }
 
 static int
 kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob,
     struct aiocb_ops *ops)
 {
+	int listop;
 
-	if (op != O_SYNC) /* XXX lack of O_DSYNC */
+	switch (op) {
+	case O_SYNC:
+		listop = LIO_SYNC;
+		break;
+	case O_DSYNC:
+		listop = LIO_DSYNC;
+		break;
+	default:
 		return (EINVAL);
-	return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops));
+	}
+
+	return (aio_aqueue(td, ujob, NULL, listop, ops));
 }
 
 int
 sys_aio_fsync(struct thread *td, struct aio_fsync_args *uap)
 {
 
 	return (kern_aio_fsync(td, uap->op, uap->aiocbp, &aiocb_ops));
 }
 
 /* kqueue attach function */
 static int
 filt_aioattach(struct knote *kn)
 {
 	struct kaiocb *job;
 
 	job = (struct kaiocb *)(uintptr_t)kn->kn_sdata;
 
 	/*
 	 * The job pointer must be validated before using it, so
 	 * registration is restricted to the kernel; the user cannot
 	 * set EV_FLAG1.
 	 */
 	if ((kn->kn_flags & EV_FLAG1) == 0)
 		return (EPERM);
 	kn->kn_ptr.p_aio = job;
 	kn->kn_flags &= ~EV_FLAG1;
 
 	knlist_add(&job->klist, kn, 0);
 
 	return (0);
 }
 
 /* kqueue detach function */
 static void
 filt_aiodetach(struct knote *kn)
 {
 	struct knlist *knl;
 
 	knl = &kn->kn_ptr.p_aio->klist;
 	knl->kl_lock(knl->kl_lockarg);
 	if (!knlist_empty(knl))
 		knlist_remove(knl, kn, 1);
 	knl->kl_unlock(knl->kl_lockarg);
 }
 
 /* kqueue filter function */
 /*ARGSUSED*/
 static int
 filt_aio(struct knote *kn, long hint)
 {
 	struct kaiocb *job = kn->kn_ptr.p_aio;
 
 	kn->kn_data = job->uaiocb._aiocb_private.error;
 	if (!(job->jobflags & KAIOCB_FINISHED))
 		return (0);
 	kn->kn_flags |= EV_EOF;
 	return (1);
 }
 
 /* kqueue attach function */
 static int
 filt_lioattach(struct knote *kn)
 {
 	struct aioliojob *lj;
 
 	lj = (struct aioliojob *)(uintptr_t)kn->kn_sdata;
 
 	/*
 	 * The aioliojob pointer must be validated before using it, so
 	 * registration is restricted to the kernel; the user cannot
 	 * set EV_FLAG1.
 	 */
 	if ((kn->kn_flags & EV_FLAG1) == 0)
 		return (EPERM);
 	kn->kn_ptr.p_lio = lj;
 	kn->kn_flags &= ~EV_FLAG1;
 
 	knlist_add(&lj->klist, kn, 0);
 
 	return (0);
 }
 
 /* kqueue detach function */
 static void
 filt_liodetach(struct knote *kn)
 {
 	struct knlist *knl;
 
 	knl = &kn->kn_ptr.p_lio->klist;
 	knl->kl_lock(knl->kl_lockarg);
 	if (!knlist_empty(knl))
 		knlist_remove(knl, kn, 1);
 	knl->kl_unlock(knl->kl_lockarg);
 }
 
 /* kqueue filter function */
 /*ARGSUSED*/
 static int
 filt_lio(struct knote *kn, long hint)
 {
 	struct aioliojob * lj = kn->kn_ptr.p_lio;
 
 	return (lj->lioj_flags & LIOJ_KEVENT_POSTED);
 }
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 #include <compat/freebsd32/freebsd32_signal.h>
 #include <compat/freebsd32/freebsd32_syscall.h>
 #include <compat/freebsd32/freebsd32_util.h>
 
 struct __aiocb_private32 {
 	int32_t	status;
 	int32_t	error;
 	uint32_t kernelinfo;
 };
 
 #ifdef COMPAT_FREEBSD6
 typedef struct oaiocb32 {
 	int	aio_fildes;		/* File descriptor */
 	uint64_t aio_offset __packed;	/* File offset for I/O */
 	uint32_t aio_buf;		/* I/O buffer in process space */
 	uint32_t aio_nbytes;		/* Number of bytes for I/O */
 	struct	osigevent32 aio_sigevent; /* Signal to deliver */
 	int	aio_lio_opcode;		/* LIO opcode */
 	int	aio_reqprio;		/* Request priority -- ignored */
 	struct	__aiocb_private32 _aiocb_private;
 } oaiocb32_t;
 #endif
 
 typedef struct aiocb32 {
 	int32_t	aio_fildes;		/* File descriptor */
 	uint64_t aio_offset __packed;	/* File offset for I/O */
 	uint32_t aio_buf;	/* I/O buffer in process space */
 	uint32_t aio_nbytes;	/* Number of bytes for I/O */
 	int	__spare__[2];
 	uint32_t __spare2__;
 	int	aio_lio_opcode;		/* LIO opcode */
 	int	aio_reqprio;		/* Request priority -- ignored */
 	struct	__aiocb_private32 _aiocb_private;
 	struct	sigevent32 aio_sigevent;	/* Signal to deliver */
 } aiocb32_t;
 
 #ifdef COMPAT_FREEBSD6
 static int
 convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig)
 {
 
 	/*
 	 * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are
 	 * supported by AIO with the old sigevent structure.
 	 */
 	CP(*osig, *nsig, sigev_notify);
 	switch (nsig->sigev_notify) {
 	case SIGEV_NONE:
 		break;
 	case SIGEV_SIGNAL:
 		nsig->sigev_signo = osig->__sigev_u.__sigev_signo;
 		break;
 	case SIGEV_KEVENT:
 		nsig->sigev_notify_kqueue =
 		    osig->__sigev_u.__sigev_notify_kqueue;
 		PTRIN_CP(*osig, *nsig, sigev_value.sival_ptr);
 		break;
 	default:
 		return (EINVAL);
 	}
 	return (0);
 }
 
 static int
 aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct kaiocb *kjob,
     int type __unused)
 {
 	struct oaiocb32 job32;
 	struct aiocb *kcb = &kjob->uaiocb;
 	int error;
 
 	bzero(kcb, sizeof(struct aiocb));
 	error = copyin(ujob, &job32, sizeof(job32));
 	if (error)
 		return (error);
 
 	/* No need to copyin aio_iov, because it did not exist in FreeBSD 6 */
 
 	CP(job32, *kcb, aio_fildes);
 	CP(job32, *kcb, aio_offset);
 	PTRIN_CP(job32, *kcb, aio_buf);
 	CP(job32, *kcb, aio_nbytes);
 	CP(job32, *kcb, aio_lio_opcode);
 	CP(job32, *kcb, aio_reqprio);
 	CP(job32, *kcb, _aiocb_private.status);
 	CP(job32, *kcb, _aiocb_private.error);
 	PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo);
 	return (convert_old_sigevent32(&job32.aio_sigevent,
 	    &kcb->aio_sigevent));
 }
 #endif
 
 static int
 aiocb32_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
 {
 	struct aiocb32 job32;
 	struct aiocb *kcb = &kjob->uaiocb;
 	struct iovec32 *iov32;
 	int error;
 
 	error = copyin(ujob, &job32, sizeof(job32));
 	if (error)
 		return (error);
 	CP(job32, *kcb, aio_fildes);
 	CP(job32, *kcb, aio_offset);
 	CP(job32, *kcb, aio_lio_opcode);
 	if (type == LIO_READV || type == LIO_WRITEV) {
 		iov32 = PTRIN(job32.aio_iov);
 		CP(job32, *kcb, aio_iovcnt);
 		/* malloc a uio and copy in the iovec */
 		error = freebsd32_copyinuio(iov32,
 		    kcb->aio_iovcnt, &kjob->uiop);
 		if (error)
 			return (error);
 	} else {
 		PTRIN_CP(job32, *kcb, aio_buf);
 		CP(job32, *kcb, aio_nbytes);
 	}
 	CP(job32, *kcb, aio_reqprio);
 	CP(job32, *kcb, _aiocb_private.status);
 	CP(job32, *kcb, _aiocb_private.error);
 	PTRIN_CP(job32, *kcb, _aiocb_private.kernelinfo);
 	error = convert_sigevent32(&job32.aio_sigevent, &kcb->aio_sigevent);
 
 	return (error);
 }
 
 static long
 aiocb32_fetch_status(struct aiocb *ujob)
 {
 	struct aiocb32 *ujob32;
 
 	ujob32 = (struct aiocb32 *)ujob;
 	return (fuword32(&ujob32->_aiocb_private.status));
 }
 
 static long
 aiocb32_fetch_error(struct aiocb *ujob)
 {
 	struct aiocb32 *ujob32;
 
 	ujob32 = (struct aiocb32 *)ujob;
 	return (fuword32(&ujob32->_aiocb_private.error));
 }
 
 static int
 aiocb32_store_status(struct aiocb *ujob, long status)
 {
 	struct aiocb32 *ujob32;
 
 	ujob32 = (struct aiocb32 *)ujob;
 	return (suword32(&ujob32->_aiocb_private.status, status));
 }
 
 static int
 aiocb32_store_error(struct aiocb *ujob, long error)
 {
 	struct aiocb32 *ujob32;
 
 	ujob32 = (struct aiocb32 *)ujob;
 	return (suword32(&ujob32->_aiocb_private.error, error));
 }
 
 static int
 aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref)
 {
 	struct aiocb32 *ujob32;
 
 	ujob32 = (struct aiocb32 *)ujob;
 	return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref));
 }
 
 static int
 aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob)
 {
 
 	return (suword32(ujobp, (long)ujob));
 }
 
 static struct aiocb_ops aiocb32_ops = {
 	.aio_copyin = aiocb32_copyin,
 	.fetch_status = aiocb32_fetch_status,
 	.fetch_error = aiocb32_fetch_error,
 	.store_status = aiocb32_store_status,
 	.store_error = aiocb32_store_error,
 	.store_kernelinfo = aiocb32_store_kernelinfo,
 	.store_aiocb = aiocb32_store_aiocb,
 };
 
 #ifdef COMPAT_FREEBSD6
 static struct aiocb_ops aiocb32_ops_osigevent = {
 	.aio_copyin = aiocb32_copyin_old_sigevent,
 	.fetch_status = aiocb32_fetch_status,
 	.fetch_error = aiocb32_fetch_error,
 	.store_status = aiocb32_store_status,
 	.store_error = aiocb32_store_error,
 	.store_kernelinfo = aiocb32_store_kernelinfo,
 	.store_aiocb = aiocb32_store_aiocb,
 };
 #endif
 
 int
 freebsd32_aio_return(struct thread *td, struct freebsd32_aio_return_args *uap)
 {
 
 	return (kern_aio_return(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops));
 }
 
 int
 freebsd32_aio_suspend(struct thread *td, struct freebsd32_aio_suspend_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	struct aiocb **ujoblist;
 	uint32_t *ujoblist32;
 	int error, i;
 
 	if (uap->nent < 0 || uap->nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (uap->timeout) {
 		/* Get timespec struct. */
 		if ((error = copyin(uap->timeout, &ts32, sizeof(ts32))) != 0)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 
 	ujoblist = malloc(uap->nent * sizeof(ujoblist[0]), M_AIOS, M_WAITOK);
 	ujoblist32 = (uint32_t *)ujoblist;
 	error = copyin(uap->aiocbp, ujoblist32, uap->nent *
 	    sizeof(ujoblist32[0]));
 	if (error == 0) {
 		for (i = uap->nent - 1; i >= 0; i--)
 			ujoblist[i] = PTRIN(ujoblist32[i]);
 
 		error = kern_aio_suspend(td, uap->nent, ujoblist, tsp);
 	}
 	free(ujoblist, M_AIOS);
 	return (error);
 }
 
 int
 freebsd32_aio_error(struct thread *td, struct freebsd32_aio_error_args *uap)
 {
 
 	return (kern_aio_error(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_aio_read(struct thread *td,
     struct freebsd6_freebsd32_aio_read_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ,
 	    &aiocb32_ops_osigevent));
 }
 #endif
 
 int
 freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ,
 	    &aiocb32_ops));
 }
 
 int
 freebsd32_aio_readv(struct thread *td, struct freebsd32_aio_readv_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READV,
 	    &aiocb32_ops));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_aio_write(struct thread *td,
     struct freebsd6_freebsd32_aio_write_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE,
 	    &aiocb32_ops_osigevent));
 }
 #endif
 
 int
 freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE,
 	    &aiocb32_ops));
 }
 
 int
 freebsd32_aio_writev(struct thread *td, struct freebsd32_aio_writev_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITEV,
 	    &aiocb32_ops));
 }
 
 int
 freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap)
 {
 
 	return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK,
 	    &aiocb32_ops));
 }
 
 int
 freebsd32_aio_waitcomplete(struct thread *td,
     struct freebsd32_aio_waitcomplete_args *uap)
 {
 	struct timespec32 ts32;
 	struct timespec ts, *tsp;
 	int error;
 
 	if (uap->timeout) {
 		/* Get timespec struct. */
 		error = copyin(uap->timeout, &ts32, sizeof(ts32));
 		if (error)
 			return (error);
 		CP(ts32, ts, tv_sec);
 		CP(ts32, ts, tv_nsec);
 		tsp = &ts;
 	} else
 		tsp = NULL;
 
 	return (kern_aio_waitcomplete(td, (struct aiocb **)uap->aiocbp, tsp,
 	    &aiocb32_ops));
 }
 
 int
 freebsd32_aio_fsync(struct thread *td, struct freebsd32_aio_fsync_args *uap)
 {
 
 	return (kern_aio_fsync(td, uap->op, (struct aiocb *)uap->aiocbp,
 	    &aiocb32_ops));
 }
 
 #ifdef COMPAT_FREEBSD6
 int
 freebsd6_freebsd32_lio_listio(struct thread *td,
     struct freebsd6_freebsd32_lio_listio_args *uap)
 {
 	struct aiocb **acb_list;
 	struct sigevent *sigp, sig;
 	struct osigevent32 osig;
 	uint32_t *acb_list32;
 	int error, i, nent;
 
 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 		return (EINVAL);
 
 	nent = uap->nent;
 	if (nent < 0 || nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 		error = copyin(uap->sig, &osig, sizeof(osig));
 		if (error)
 			return (error);
 		error = convert_old_sigevent32(&osig, &sig);
 		if (error)
 			return (error);
 		sigp = &sig;
 	} else
 		sigp = NULL;
 
 	acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK);
 	error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t));
 	if (error) {
 		free(acb_list32, M_LIO);
 		return (error);
 	}
 	acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 	for (i = 0; i < nent; i++)
 		acb_list[i] = PTRIN(acb_list32[i]);
 	free(acb_list32, M_LIO);
 
 	error = kern_lio_listio(td, uap->mode,
 	    (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp,
 	    &aiocb32_ops_osigevent);
 	free(acb_list, M_LIO);
 	return (error);
 }
 #endif
 
 int
 freebsd32_lio_listio(struct thread *td, struct freebsd32_lio_listio_args *uap)
 {
 	struct aiocb **acb_list;
 	struct sigevent *sigp, sig;
 	struct sigevent32 sig32;
 	uint32_t *acb_list32;
 	int error, i, nent;
 
 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT))
 		return (EINVAL);
 
 	nent = uap->nent;
 	if (nent < 0 || nent > max_aio_queue_per_proc)
 		return (EINVAL);
 
 	if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 		error = copyin(uap->sig, &sig32, sizeof(sig32));
 		if (error)
 			return (error);
 		error = convert_sigevent32(&sig32, &sig);
 		if (error)
 			return (error);
 		sigp = &sig;
 	} else
 		sigp = NULL;
 
 	acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK);
 	error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t));
 	if (error) {
 		free(acb_list32, M_LIO);
 		return (error);
 	}
 	acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK);
 	for (i = 0; i < nent; i++)
 		acb_list[i] = PTRIN(acb_list32[i]);
 	free(acb_list32, M_LIO);
 
 	error = kern_lio_listio(td, uap->mode,
 	    (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp,
 	    &aiocb32_ops);
 	free(acb_list, M_LIO);
 	return (error);
 }
 
 #endif
diff --git a/sys/sys/aio.h b/sys/sys/aio.h
index c0e2b4eaaaf6..dbfbadcd1254 100644
--- a/sys/sys/aio.h
+++ b/sys/sys/aio.h
@@ -1,279 +1,280 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. John S. Dyson's name may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
  * bad that happens because of using this software isn't the responsibility
  * of the author.  This software is distributed AS-IS.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_AIO_H_
 #define	_SYS_AIO_H_
 
 #include <sys/types.h>
 #include <sys/signal.h>
 #ifdef _KERNEL
 #include <sys/queue.h>
 #include <sys/event.h>
 #include <sys/signalvar.h>
 #include <sys/uio.h>
 #endif
 
 /*
  * Returned by aio_cancel:
  */
 #define	AIO_CANCELED		0x1
 #define	AIO_NOTCANCELED		0x2
 #define	AIO_ALLDONE		0x3
 
 /*
  * LIO opcodes
  */
 #define	LIO_NOP			0x0
 #define LIO_WRITE		0x1
 #define	LIO_READ		0x2
 #ifdef _KERNEL
 #define	LIO_SYNC		0x3
 #define	LIO_MLOCK		0x4
 #define	LIO_WRITEV		0x5
 #define	LIO_READV		0x6
+#define	LIO_DSYNC		0x7
 #endif
 
 /*
  * LIO modes
  */
 #define	LIO_NOWAIT		0x0
 #define	LIO_WAIT		0x1
 
 /*
  * Maximum number of operations in a single lio_listio call
  */
 #define	AIO_LISTIO_MAX		16
 
 #ifdef _KERNEL
 
 /* Default values of tunables for the AIO worker pool. */
 
 #ifndef MAX_AIO_PROCS
 #define MAX_AIO_PROCS		32
 #endif
 
 #ifndef TARGET_AIO_PROCS
 #define TARGET_AIO_PROCS	4
 #endif
 
 #ifndef AIOD_LIFETIME_DEFAULT
 #define AIOD_LIFETIME_DEFAULT	(30 * hz)
 #endif
 
 #endif
 
 /*
  * Private members for aiocb -- don't access
  * directly.
  */
 struct __aiocb_private {
 	long	status;
 	long	error;
 	void	*kernelinfo;
 };
 
 /*
  * I/O control block
  */
 typedef struct aiocb {
 	int	aio_fildes;		/* File descriptor */
 	off_t	aio_offset;		/* File offset for I/O */
 	volatile void *aio_buf;		/* I/O buffer in process space */
 	size_t	aio_nbytes;		/* Number of bytes for I/O */
 	int	__spare__[2];
 	void	*__spare2__;
 	int	aio_lio_opcode;		/* LIO opcode */
 	int	aio_reqprio;		/* Request priority -- ignored */
 	struct	__aiocb_private	_aiocb_private;
 	struct	sigevent aio_sigevent;	/* Signal to deliver */
 } aiocb_t;
 
 #define	aio_iov	aio_buf			/* I/O scatter/gather list */
 #define	aio_iovcnt	aio_nbytes	/* Length of aio_iov */
 
 #ifdef _KERNEL
 
 typedef void aio_cancel_fn_t(struct kaiocb *);
 typedef void aio_handle_fn_t(struct kaiocb *);
 
 /*
  * Kernel version of an I/O control block.
  *
  * Locking key:
  * * - need not protected
  * a - locked by kaioinfo lock
  * b - locked by backend lock
  * c - locked by aio_job_mtx
  */
 struct kaiocb {
 	TAILQ_ENTRY(kaiocb) list;	/* (b) backend-specific list of jobs */
 	TAILQ_ENTRY(kaiocb) plist;	/* (a) lists of pending / done jobs */
 	TAILQ_ENTRY(kaiocb) allist;	/* (a) list of all jobs in proc */
 	int	jobflags;		/* (a) job flags */
 	int	inblock;		/* (*) input blocks */
 	int	outblock;		/* (*) output blocks */
 	int	msgsnd;			/* (*) messages sent */
 	int	msgrcv;			/* (*) messages received */
 	struct	proc *userproc;		/* (*) user process */
 	struct	ucred *cred;		/* (*) active credential when created */
 	struct	file *fd_file;		/* (*) pointer to file structure */
 	struct	aioliojob *lio;		/* (*) optional lio job */
 	struct	aiocb *ujob;		/* (*) pointer in userspace of aiocb */
 	struct	knlist klist;		/* (a) list of knotes */
 	struct	aiocb uaiocb;		/* (*) copy of user I/O control block */
 	struct	uio uio;		/* (*) storage for non-vectored uio */
 	struct	iovec iov[1];		/* (*) storage for non-vectored uio */
 	struct	uio *uiop;		/* (*) Possibly malloced uio */
 	ksiginfo_t ksi;			/* (a) realtime signal info */
 	uint64_t seqno;			/* (*) job number */
 	aio_cancel_fn_t *cancel_fn;	/* (a) backend cancel function */
 	aio_handle_fn_t *handle_fn;	/* (c) backend handle function */
 	union {				/* Backend-specific data fields */
 		struct {		/* BIO backend */
 			int	nbio;	/* Number of remaining bios */
 			int	error;	/* Worst error of all bios */
 			long	nbytes;	/* Bytes completed so far */
 		};
 		struct {		/* fsync() requests */
 			int	pending; /* (a) number of pending I/O */
 		};
 		struct {		/* socket backend */
 			void	*backend1;
 			long	backend3;
 			int	backend4;
 		};
 	};
 };
 
 struct socket;
 struct sockbuf;
 
 /*
  * AIO backends should permit cancellation of queued requests waiting to
  * be serviced by installing a cancel routine while the request is
  * queued.  The cancellation routine should dequeue the request if
  * necessary and cancel it.  Care must be used to handle races between
  * queueing and dequeueing requests and cancellation.
  *
  * When queueing a request somewhere such that it can be cancelled, the
  * caller should:
  *
  *  1) Acquire lock that protects the associated queue.
  *  2) Call aio_set_cancel_function() to install the cancel routine.
  *  3) If that fails, the request has a pending cancel and should be
  *     cancelled via aio_cancel().
  *  4) Queue the request.
  *
  * When dequeueing a request to service it or hand it off to somewhere else,
  * the caller should:
  *
  *  1) Acquire the lock that protects the associated queue.
  *  2) Dequeue the request.
  *  3) Call aio_clear_cancel_function() to clear the cancel routine.
  *  4) If that fails, the cancel routine is about to be called.  The
  *     caller should ignore the request.
  *
  * The cancel routine should:
  *
  *  1) Acquire the lock that protects the associated queue.
  *  2) Call aio_cancel_cleared() to determine if the request is already
  *     dequeued due to a race with dequeueing thread.
  *  3) If that fails, dequeue the request.
  *  4) Cancel the request via aio_cancel().
  */
 
 bool	aio_cancel_cleared(struct kaiocb *job);
 void	aio_cancel(struct kaiocb *job);
 bool	aio_clear_cancel_function(struct kaiocb *job);
 void	aio_complete(struct kaiocb *job, long status, int error);
 void	aio_schedule(struct kaiocb *job, aio_handle_fn_t *func);
 bool	aio_set_cancel_function(struct kaiocb *job, aio_cancel_fn_t *func);
 void	aio_switch_vmspace(struct kaiocb *job);
 
 #else /* !_KERNEL */
 
 struct timespec;
 
 __BEGIN_DECLS
 /*
  * Asynchronously read from a file
  */
 int	aio_read(struct aiocb *);
 #if __BSD_VISIBLE
 int	aio_readv(struct aiocb *);
 #endif
 
 /*
  * Asynchronously write to file
  */
 int	aio_write(struct aiocb *);
 #if __BSD_VISIBLE
 int	aio_writev(struct aiocb *);
 #endif
 
 /*
  * List I/O Asynchronously/synchronously read/write to/from file
  *	"lio_mode" specifies whether or not the I/O is synchronous.
  *	"acb_list" is an array of "nacb_listent" I/O control blocks.
  *	when all I/Os are complete, the optional signal "sig" is sent.
  */
 int	lio_listio(int, struct aiocb *__restrict const *__restrict, int,
     struct sigevent *);
 
 /*
  * Get completion status
  *	returns EINPROGRESS until I/O is complete.
  *	this routine does not block.
  */
 int	aio_error(const struct aiocb *);
 
 /*
  * Finish up I/O, releasing I/O resources and returns the value
  *	that would have been associated with a synchronous I/O request.
  *	This routine must be called once and only once for each
  *	I/O control block who has had I/O associated with it.
  */
 ssize_t	aio_return(struct aiocb *);
 
 /*
  * Cancel I/O
  */
 int	aio_cancel(int, struct aiocb *);
 
 /*
  * Suspend until all specified I/O or timeout is complete.
  */
 int	aio_suspend(const struct aiocb * const[], int, const struct timespec *);
 
 /*
  * Asynchronous mlock
  */
 int	aio_mlock(struct aiocb *);
 
 #if __BSD_VISIBLE
 ssize_t	aio_waitcomplete(struct aiocb **, struct timespec *);
 #endif
 
 int	aio_fsync(int op, struct aiocb *aiocbp);
 __END_DECLS
 
 #endif /* !_KERNEL */
 
 #endif /* !_SYS_AIO_H_ */
diff --git a/tests/sys/aio/aio_test.c b/tests/sys/aio/aio_test.c
index 891892e5e757..f563ec5fa5d9 100644
--- a/tests/sys/aio/aio_test.c
+++ b/tests/sys/aio/aio_test.c
@@ -1,1804 +1,1817 @@
 /*-
  * Copyright (c) 2004 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Regression test to do some very basic AIO exercising on several types of
  * file descriptors.  Currently, the tests consist of initializing a fixed
  * size buffer with pseudo-random data, writing it to one fd using AIO, then
  * reading it from a second descriptor using AIO.  For some targets, the same
  * fd is used for write and read (i.e., file, md device), but for others the
  * operation is performed on a peer (pty, socket, fifo, etc).  For each file
  * descriptor type, several completion methods are tested.  This test program
  * does not attempt to exercise error cases or more subtle asynchronous
  * behavior, just make sure that the basic operations work on some basic object
  * types.
  */
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/mdioctl.h>
 
 #include <aio.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <libutil.h>
 #include <limits.h>
 #include <semaphore.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <termios.h>
 #include <unistd.h>
 
 #include <atf-c.h>
 
 #include "freebsd_test_suite/macros.h"
 #include "local.h"
 
 /*
  * GLOBAL_MAX sets the largest usable buffer size to be read and written, as
  * it sizes ac_buffer in the aio_context structure.  It is also the default
  * size for file I/O.  For other types, we use smaller blocks or we risk
  * blocking (and we run in a single process/thread so that would be bad).
  */
 #define	GLOBAL_MAX	16384
 
 #define	BUFFER_MAX	GLOBAL_MAX
 
 /*
  * A completion function will block until the aio has completed, then return
  * the result of the aio.  errno will be set appropriately.
  */
 typedef ssize_t (*completion)(struct aiocb*);
 
 struct aio_context {
 	int		 ac_read_fd, ac_write_fd;
 	long		 ac_seed;
 	char		 ac_buffer[GLOBAL_MAX];
 	int		 ac_buflen;
 	int		 ac_seconds;
 };
 
 static sem_t		completions;
 
 
 /*
  * Fill a buffer given a seed that can be fed into srandom() to initialize
  * the PRNG in a repeatable manner.
  */
 static void
 aio_fill_buffer(char *buffer, int len, long seed)
 {
 	char ch;
 	int i;
 
 	srandom(seed);
 	for (i = 0; i < len; i++) {
 		ch = random() & 0xff;
 		buffer[i] = ch;
 	}
 }
 
 /*
  * Test that a buffer matches a given seed.  See aio_fill_buffer().  Return
  * (1) on a match, (0) on a mismatch.
  */
 static int
 aio_test_buffer(char *buffer, int len, long seed)
 {
 	char ch;
 	int i;
 
 	srandom(seed);
 	for (i = 0; i < len; i++) {
 		ch = random() & 0xff;
 		if (buffer[i] != ch)
 			return (0);
 	}
 	return (1);
 }
 
 /*
  * Initialize a testing context given the file descriptors provided by the
  * test setup.
  */
 static void
 aio_context_init(struct aio_context *ac, int read_fd,
     int write_fd, int buflen)
 {
 
 	ATF_REQUIRE_MSG(buflen <= BUFFER_MAX,
 	    "aio_context_init: buffer too large (%d > %d)",
 	    buflen, BUFFER_MAX);
 	bzero(ac, sizeof(*ac));
 	ac->ac_read_fd = read_fd;
 	ac->ac_write_fd = write_fd;
 	ac->ac_buflen = buflen;
 	srandomdev();
 	ac->ac_seed = random();
 	aio_fill_buffer(ac->ac_buffer, buflen, ac->ac_seed);
 	ATF_REQUIRE_MSG(aio_test_buffer(ac->ac_buffer, buflen,
 	    ac->ac_seed) != 0, "aio_test_buffer: internal error");
 }
 
 static ssize_t
 poll(struct aiocb *aio)
 {
 	int error;
 
 	while ((error = aio_error(aio)) == EINPROGRESS)
 		usleep(25000);
 	if (error)
 		return (error);
 	else
 		return (aio_return(aio));
 }
 
 static void
 sigusr1_handler(int sig __unused)
 {
 	ATF_REQUIRE_EQ(0, sem_post(&completions));
 }
 
 static void
 thr_handler(union sigval sv __unused)
 {
 	ATF_REQUIRE_EQ(0, sem_post(&completions));
 }
 
 static ssize_t
 poll_signaled(struct aiocb *aio)
 {
 	int error;
 
 	ATF_REQUIRE_EQ(0, sem_wait(&completions));
 	error = aio_error(aio);
 	switch (error) {
 		case EINPROGRESS:
 			errno = EINTR;
 			return (-1);
 		case 0:
 			return (aio_return(aio));
 		default:
 			return (error);
 	}
 }
 
 /*
  * Setup a signal handler for signal delivery tests
  * This isn't thread safe, but it's ok since ATF runs each testcase in a
  * separate process
  */
 static struct sigevent*
 setup_signal(void)
 {
 	static struct sigevent sev;
 
 	ATF_REQUIRE_EQ(0, sem_init(&completions, false, 0));
 	sev.sigev_notify = SIGEV_SIGNAL;
 	sev.sigev_signo = SIGUSR1;
 	ATF_REQUIRE(SIG_ERR != signal(SIGUSR1, sigusr1_handler));
 	return (&sev);
 }
 
 /*
  * Setup a thread for thread delivery tests
  * This isn't thread safe, but it's ok since ATF runs each testcase in a
  * separate process
  */
 static struct sigevent*
 setup_thread(void)
 {
 	static struct sigevent sev;
 
 	ATF_REQUIRE_EQ(0, sem_init(&completions, false, 0));
 	sev.sigev_notify = SIGEV_THREAD;
 	sev.sigev_notify_function = thr_handler;
 	sev.sigev_notify_attributes = NULL;
 	return (&sev);
 }
 
 static ssize_t
 suspend(struct aiocb *aio)
 {
 	const struct aiocb *const iocbs[] = {aio};
 	int error;
 
 	error = aio_suspend(iocbs, 1, NULL);
 	if (error == 0)
 		return (aio_return(aio));
 	else
 		return (error);
 }
 
 static ssize_t
 waitcomplete(struct aiocb *aio)
 {
 	struct aiocb *aiop;
 	ssize_t ret;
 
 	ret = aio_waitcomplete(&aiop, NULL);
 	ATF_REQUIRE_EQ(aio, aiop);
 	return (ret);
 }
 
 /*
  * Perform a simple write test of our initialized data buffer to the provided
  * file descriptor.
  */
 static void
 aio_write_test(struct aio_context *ac, completion comp, struct sigevent *sev)
 {
 	struct aiocb aio;
 	ssize_t len;
 
 	bzero(&aio, sizeof(aio));
 	aio.aio_buf = ac->ac_buffer;
 	aio.aio_nbytes = ac->ac_buflen;
 	aio.aio_fildes = ac->ac_write_fd;
 	aio.aio_offset = 0;
 	if (sev)
 		aio.aio_sigevent = *sev;
 
 	if (aio_write(&aio) < 0)
 		atf_tc_fail("aio_write failed: %s", strerror(errno));
 
 	len = comp(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	if (len != ac->ac_buflen)
 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
 }
 
 /*
  * Perform a vectored I/O test of our initialized data buffer to the provided
  * file descriptor.
  *
  * To vectorize the linear buffer, chop it up into two pieces of dissimilar
  * size, and swap their offsets.
  */
 static void
 aio_writev_test(struct aio_context *ac, completion comp, struct sigevent *sev)
 {
 	struct aiocb aio;
 	struct iovec iov[2];
 	size_t len0, len1;
 	ssize_t len;
 
 	bzero(&aio, sizeof(aio));
 
 	aio.aio_fildes = ac->ac_write_fd;
 	aio.aio_offset = 0;
 	len0 = ac->ac_buflen * 3 / 4;
 	len1 = ac->ac_buflen / 4;
 	iov[0].iov_base = ac->ac_buffer + len1;
 	iov[0].iov_len = len0;
 	iov[1].iov_base = ac->ac_buffer;
 	iov[1].iov_len = len1;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = 2;
 	if (sev)
 		aio.aio_sigevent = *sev;
 
 	if (aio_writev(&aio) < 0)
 		atf_tc_fail("aio_writev failed: %s", strerror(errno));
 
 	len = comp(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	if (len != ac->ac_buflen)
 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
 }
 
 /*
  * Perform a simple read test of our initialized data buffer from the
  * provided file descriptor.
  */
 static void
 aio_read_test(struct aio_context *ac, completion comp, struct sigevent *sev)
 {
 	struct aiocb aio;
 	ssize_t len;
 
 	bzero(ac->ac_buffer, ac->ac_buflen);
 	bzero(&aio, sizeof(aio));
 	aio.aio_buf = ac->ac_buffer;
 	aio.aio_nbytes = ac->ac_buflen;
 	aio.aio_fildes = ac->ac_read_fd;
 	aio.aio_offset = 0;
 	if (sev)
 		aio.aio_sigevent = *sev;
 
 	if (aio_read(&aio) < 0)
 		atf_tc_fail("aio_read failed: %s", strerror(errno));
 
 	len = comp(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	ATF_REQUIRE_EQ_MSG(len, ac->ac_buflen,
 	    "aio short read (%jd)", (intmax_t)len);
 
 	if (aio_test_buffer(ac->ac_buffer, ac->ac_buflen, ac->ac_seed) == 0)
 		atf_tc_fail("buffer mismatched");
 }
 
 static void
 aio_readv_test(struct aio_context *ac, completion comp, struct sigevent *sev)
 {
 	struct aiocb aio;
 	struct iovec iov[2];
 	size_t len0, len1;
 	ssize_t len;
 
 	bzero(ac->ac_buffer, ac->ac_buflen);
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = ac->ac_read_fd;
 	aio.aio_offset = 0;
 	len0 = ac->ac_buflen * 3 / 4;
 	len1 = ac->ac_buflen / 4;
 	iov[0].iov_base = ac->ac_buffer + len1;
 	iov[0].iov_len = len0;
 	iov[1].iov_base = ac->ac_buffer;
 	iov[1].iov_len = len1;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = 2;
 	if (sev)
 		aio.aio_sigevent = *sev;
 
 	if (aio_readv(&aio) < 0)
 		atf_tc_fail("aio_read failed: %s", strerror(errno));
 
 	len = comp(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	ATF_REQUIRE_EQ_MSG(len, ac->ac_buflen,
 	    "aio short read (%jd)", (intmax_t)len);
 
 	if (aio_test_buffer(ac->ac_buffer, ac->ac_buflen, ac->ac_seed) == 0)
 		atf_tc_fail("buffer mismatched");
 }
 
 /*
  * Series of type-specific tests for AIO.  For now, we just make sure we can
  * issue a write and then a read to each type.  We assume that once a write
  * is issued, a read can follow.
  */
 
 /*
  * Test with a classic file.  Assumes we can create a moderate size temporary
  * file.
  */
 #define	FILE_LEN	GLOBAL_MAX
 #define	FILE_PATHNAME	"testfile"
 
 static void
 aio_file_test(completion comp, struct sigevent *sev, bool vectored)
 {
 	struct aio_context ac;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 
 	aio_context_init(&ac, fd, fd, FILE_LEN);
 	if (vectored) {
 		aio_writev_test(&ac, comp, sev);
 		aio_readv_test(&ac, comp, sev);
 	} else {
 		aio_write_test(&ac, comp, sev);
 		aio_read_test(&ac, comp, sev);
 	}
 	close(fd);
 }
 
 ATF_TC_WITHOUT_HEAD(file_poll);
 ATF_TC_BODY(file_poll, tc)
 {
 	aio_file_test(poll, NULL, false);
 }
 
 ATF_TC_WITHOUT_HEAD(file_signal);
 ATF_TC_BODY(file_signal, tc)
 {
 	aio_file_test(poll_signaled, setup_signal(), false);
 }
 
 ATF_TC_WITHOUT_HEAD(file_suspend);
 ATF_TC_BODY(file_suspend, tc)
 {
 	aio_file_test(suspend, NULL, false);
 }
 
 ATF_TC_WITHOUT_HEAD(file_thread);
 ATF_TC_BODY(file_thread, tc)
 {
 	aio_file_test(poll_signaled, setup_thread(), false);
 }
 
 ATF_TC_WITHOUT_HEAD(file_waitcomplete);
 ATF_TC_BODY(file_waitcomplete, tc)
 {
 	aio_file_test(waitcomplete, NULL, false);
 }
 
 #define	FIFO_LEN	256
 #define	FIFO_PATHNAME	"testfifo"
 
 static void
 aio_fifo_test(completion comp, struct sigevent *sev)
 {
 	int error, read_fd = -1, write_fd = -1;
 	struct aio_context ac;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	ATF_REQUIRE_MSG(mkfifo(FIFO_PATHNAME, 0600) != -1,
 	    "mkfifo failed: %s", strerror(errno));
 
 	read_fd = open(FIFO_PATHNAME, O_RDONLY | O_NONBLOCK);
 	if (read_fd == -1) {
 		error = errno;
 		errno = error;
 		atf_tc_fail("read_fd open failed: %s",
 		    strerror(errno));
 	}
 
 	write_fd = open(FIFO_PATHNAME, O_WRONLY);
 	if (write_fd == -1) {
 		error = errno;
 		errno = error;
 		atf_tc_fail("write_fd open failed: %s",
 		    strerror(errno));
 	}
 
 	aio_context_init(&ac, read_fd, write_fd, FIFO_LEN);
 	aio_write_test(&ac, comp, sev);
 	aio_read_test(&ac, comp, sev);
 
 	close(read_fd);
 	close(write_fd);
 }
 
 ATF_TC_WITHOUT_HEAD(fifo_poll);
 ATF_TC_BODY(fifo_poll, tc)
 {
 	aio_fifo_test(poll, NULL);
 }
 
 ATF_TC_WITHOUT_HEAD(fifo_signal);
 ATF_TC_BODY(fifo_signal, tc)
 {
 	aio_fifo_test(poll_signaled, setup_signal());
 }
 
 ATF_TC_WITHOUT_HEAD(fifo_suspend);
 ATF_TC_BODY(fifo_suspend, tc)
 {
 	aio_fifo_test(suspend, NULL);
 }
 
 ATF_TC_WITHOUT_HEAD(fifo_thread);
 ATF_TC_BODY(fifo_thread, tc)
 {
 	aio_fifo_test(poll_signaled, setup_thread());
 }
 
 ATF_TC_WITHOUT_HEAD(fifo_waitcomplete);
 ATF_TC_BODY(fifo_waitcomplete, tc)
 {
 	aio_fifo_test(waitcomplete, NULL);
 }
 
 #define	UNIX_SOCKETPAIR_LEN	256
 static void
 aio_unix_socketpair_test(completion comp, struct sigevent *sev, bool vectored)
 {
 	struct aio_context ac;
 	struct rusage ru_before, ru_after;
 	int sockets[2];
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 
 	ATF_REQUIRE_MSG(socketpair(PF_UNIX, SOCK_STREAM, 0, sockets) != -1,
 	    "socketpair failed: %s", strerror(errno));
 
 	aio_context_init(&ac, sockets[0], sockets[1], UNIX_SOCKETPAIR_LEN);
 	ATF_REQUIRE_MSG(getrusage(RUSAGE_SELF, &ru_before) != -1,
 	    "getrusage failed: %s", strerror(errno));
 	if (vectored) {
 		aio_writev_test(&ac, comp, sev);
 		aio_readv_test(&ac, comp, sev);
 	} else {
 		aio_write_test(&ac, comp, sev);
 		aio_read_test(&ac, comp, sev);
 	}
 	ATF_REQUIRE_MSG(getrusage(RUSAGE_SELF, &ru_after) != -1,
 	    "getrusage failed: %s", strerror(errno));
 	ATF_REQUIRE(ru_after.ru_msgsnd == ru_before.ru_msgsnd + 1);
 	ATF_REQUIRE(ru_after.ru_msgrcv == ru_before.ru_msgrcv + 1);
 
 	close(sockets[0]);
 	close(sockets[1]);
 }
 
 ATF_TC_WITHOUT_HEAD(socket_poll);
 ATF_TC_BODY(socket_poll, tc)
 {
 	aio_unix_socketpair_test(poll, NULL, false);
 }
 
 ATF_TC_WITHOUT_HEAD(socket_signal);
 ATF_TC_BODY(socket_signal, tc)
 {
 	aio_unix_socketpair_test(poll_signaled, setup_signal(), false);
 }
 
 ATF_TC_WITHOUT_HEAD(socket_suspend);
 ATF_TC_BODY(socket_suspend, tc)
 {
 	aio_unix_socketpair_test(suspend, NULL, false);
 }
 
 ATF_TC_WITHOUT_HEAD(socket_thread);
 ATF_TC_BODY(socket_thread, tc)
 {
 	aio_unix_socketpair_test(poll_signaled, setup_thread(), false);
 }
 
 ATF_TC_WITHOUT_HEAD(socket_waitcomplete);
 ATF_TC_BODY(socket_waitcomplete, tc)
 {
 	aio_unix_socketpair_test(waitcomplete, NULL, false);
 }
 
 struct aio_pty_arg {
 	int	apa_read_fd;
 	int	apa_write_fd;
 };
 
 #define	PTY_LEN		256
 static void
 aio_pty_test(completion comp, struct sigevent *sev)
 {
 	struct aio_context ac;
 	int read_fd, write_fd;
 	struct termios ts;
 	int error;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	ATF_REQUIRE_MSG(openpty(&read_fd, &write_fd, NULL, NULL, NULL) == 0,
 	    "openpty failed: %s", strerror(errno));
 
 
 	if (tcgetattr(write_fd, &ts) < 0) {
 		error = errno;
 		errno = error;
 		atf_tc_fail("tcgetattr failed: %s", strerror(errno));
 	}
 	cfmakeraw(&ts);
 	if (tcsetattr(write_fd, TCSANOW, &ts) < 0) {
 		error = errno;
 		errno = error;
 		atf_tc_fail("tcsetattr failed: %s", strerror(errno));
 	}
 	aio_context_init(&ac, read_fd, write_fd, PTY_LEN);
 
 	aio_write_test(&ac, comp, sev);
 	aio_read_test(&ac, comp, sev);
 
 	close(read_fd);
 	close(write_fd);
 }
 
 ATF_TC_WITHOUT_HEAD(pty_poll);
 ATF_TC_BODY(pty_poll, tc)
 {
 	aio_pty_test(poll, NULL);
 }
 
 ATF_TC_WITHOUT_HEAD(pty_signal);
 ATF_TC_BODY(pty_signal, tc)
 {
 	aio_pty_test(poll_signaled, setup_signal());
 }
 
 ATF_TC_WITHOUT_HEAD(pty_suspend);
 ATF_TC_BODY(pty_suspend, tc)
 {
 	aio_pty_test(suspend, NULL);
 }
 
 ATF_TC_WITHOUT_HEAD(pty_thread);
 ATF_TC_BODY(pty_thread, tc)
 {
 	aio_pty_test(poll_signaled, setup_thread());
 }
 
 ATF_TC_WITHOUT_HEAD(pty_waitcomplete);
 ATF_TC_BODY(pty_waitcomplete, tc)
 {
 	aio_pty_test(waitcomplete, NULL);
 }
 
 #define	PIPE_LEN	256
 static void
 aio_pipe_test(completion comp, struct sigevent *sev)
 {
 	struct aio_context ac;
 	int pipes[2];
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	ATF_REQUIRE_MSG(pipe(pipes) != -1,
 	    "pipe failed: %s", strerror(errno));
 
 	aio_context_init(&ac, pipes[0], pipes[1], PIPE_LEN);
 	aio_write_test(&ac, comp, sev);
 	aio_read_test(&ac, comp, sev);
 
 	close(pipes[0]);
 	close(pipes[1]);
 }
 
 ATF_TC_WITHOUT_HEAD(pipe_poll);
 ATF_TC_BODY(pipe_poll, tc)
 {
 	aio_pipe_test(poll, NULL);
 }
 
 ATF_TC_WITHOUT_HEAD(pipe_signal);
 ATF_TC_BODY(pipe_signal, tc)
 {
 	aio_pipe_test(poll_signaled, setup_signal());
 }
 
 ATF_TC_WITHOUT_HEAD(pipe_suspend);
 ATF_TC_BODY(pipe_suspend, tc)
 {
 	aio_pipe_test(suspend, NULL);
 }
 
 ATF_TC_WITHOUT_HEAD(pipe_thread);
 ATF_TC_BODY(pipe_thread, tc)
 {
 	aio_pipe_test(poll_signaled, setup_thread());
 }
 
 ATF_TC_WITHOUT_HEAD(pipe_waitcomplete);
 ATF_TC_BODY(pipe_waitcomplete, tc)
 {
 	aio_pipe_test(waitcomplete, NULL);
 }
 
 #define	MD_LEN		GLOBAL_MAX
 #define	MDUNIT_LINK	"mdunit_link"
 
 static int
 aio_md_setup(void)
 {
 	int error, fd, mdctl_fd, unit;
 	char pathname[PATH_MAX];
 	struct md_ioctl mdio;
 	char buf[80];
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 
 	mdctl_fd = open("/dev/" MDCTL_NAME, O_RDWR, 0);
 	ATF_REQUIRE_MSG(mdctl_fd != -1,
 	    "opening /dev/%s failed: %s", MDCTL_NAME, strerror(errno));
 
 	bzero(&mdio, sizeof(mdio));
 	mdio.md_version = MDIOVERSION;
 	mdio.md_type = MD_MALLOC;
 	mdio.md_options = MD_AUTOUNIT | MD_COMPRESS;
 	mdio.md_mediasize = GLOBAL_MAX;
 	mdio.md_sectorsize = 512;
 
 	if (ioctl(mdctl_fd, MDIOCATTACH, &mdio) < 0) {
 		error = errno;
 		errno = error;
 		atf_tc_fail("ioctl MDIOCATTACH failed: %s", strerror(errno));
 	}
 	close(mdctl_fd);
 
 	/* Store the md unit number in a symlink for future cleanup */
 	unit = mdio.md_unit;
 	snprintf(buf, sizeof(buf), "%d", unit);
 	ATF_REQUIRE_EQ(0, symlink(buf, MDUNIT_LINK));
 	snprintf(pathname, PATH_MAX, "/dev/md%d", unit);
 	fd = open(pathname, O_RDWR);
 	ATF_REQUIRE_MSG(fd != -1,
 	    "opening %s failed: %s", pathname, strerror(errno));
 
 	return (fd);
 }
 
 static void
 aio_md_cleanup(void)
 {
 	struct md_ioctl mdio;
 	int mdctl_fd, error, n, unit;
 	char buf[80];
 
 	mdctl_fd = open("/dev/" MDCTL_NAME, O_RDWR, 0);
 	ATF_REQUIRE(mdctl_fd >= 0);
 	n = readlink(MDUNIT_LINK, buf, sizeof(buf));
 	if (n > 0) {
 		if (sscanf(buf, "%d", &unit) == 1 && unit >= 0) {
 			bzero(&mdio, sizeof(mdio));
 			mdio.md_version = MDIOVERSION;
 			mdio.md_unit = unit;
 			if (ioctl(mdctl_fd, MDIOCDETACH, &mdio) == -1) {
 				error = errno;
 				close(mdctl_fd);
 				errno = error;
 				atf_tc_fail("ioctl MDIOCDETACH failed: %s",
 				    strerror(errno));
 			}
 		}
 	}
 
 	close(mdctl_fd);
 }
 
 static void
 aio_md_test(completion comp, struct sigevent *sev, bool vectored)
 {
 	struct aio_context ac;
 	int fd;
 
 	fd = aio_md_setup();
 	aio_context_init(&ac, fd, fd, MD_LEN);
 	if (vectored) {
 		aio_writev_test(&ac, comp, sev);
 		aio_readv_test(&ac, comp, sev);
 	} else {
 		aio_write_test(&ac, comp, sev);
 		aio_read_test(&ac, comp, sev);
 	}
 	
 	close(fd);
 }
 
 ATF_TC_WITH_CLEANUP(md_poll);
 ATF_TC_HEAD(md_poll, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(md_poll, tc)
 {
 	aio_md_test(poll, NULL, false);
 }
 ATF_TC_CLEANUP(md_poll, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITH_CLEANUP(md_signal);
 ATF_TC_HEAD(md_signal, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(md_signal, tc)
 {
 	aio_md_test(poll_signaled, setup_signal(), false);
 }
 ATF_TC_CLEANUP(md_signal, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITH_CLEANUP(md_suspend);
 ATF_TC_HEAD(md_suspend, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(md_suspend, tc)
 {
 	aio_md_test(suspend, NULL, false);
 }
 ATF_TC_CLEANUP(md_suspend, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITH_CLEANUP(md_thread);
 ATF_TC_HEAD(md_thread, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(md_thread, tc)
 {
 	aio_md_test(poll_signaled, setup_thread(), false);
 }
 ATF_TC_CLEANUP(md_thread, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITH_CLEANUP(md_waitcomplete);
 ATF_TC_HEAD(md_waitcomplete, tc)
 {
 
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(md_waitcomplete, tc)
 {
 	aio_md_test(waitcomplete, NULL, false);
 }
 ATF_TC_CLEANUP(md_waitcomplete, tc)
 {
 	aio_md_cleanup();
 }
 
 #define	ZVOL_VDEV_PATHNAME	"test_vdev"
 #define POOL_SIZE		(1 << 28)	/* 256 MB */
 #define ZVOL_SIZE		"64m"
 #define POOL_NAME		"aio_testpool"
 #define ZVOL_NAME		"aio_testvol"
 
 static int
 aio_zvol_setup(void)
 {
 	FILE *pidfile;
 	int fd;
 	pid_t pid;
 	char pool_name[80];
 	char cmd[160];
 	char zvol_name[160];
 	char devname[160];
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_KERNEL_MODULE("zfs");
 
 	fd = open(ZVOL_VDEV_PATHNAME, O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 	ATF_REQUIRE_EQ_MSG(0,
 	    ftruncate(fd, POOL_SIZE), "ftruncate failed: %s", strerror(errno));
 	close(fd);
 
 	pid = getpid();
 	pidfile = fopen("pidfile", "w");
 	ATF_REQUIRE_MSG(NULL != pidfile, "fopen: %s", strerror(errno));
 	fprintf(pidfile, "%d", pid);
 	fclose(pidfile);
 
 	snprintf(pool_name, sizeof(pool_name), POOL_NAME ".%d", pid);
 	snprintf(zvol_name, sizeof(zvol_name), "%s/" ZVOL_NAME, pool_name);
 	snprintf(cmd, sizeof(cmd), "zpool create %s $PWD/" ZVOL_VDEV_PATHNAME,
 	    pool_name);
 	ATF_REQUIRE_EQ_MSG(0, system(cmd),
 	    "zpool create failed: %s", strerror(errno));
 	snprintf(cmd, sizeof(cmd),
 	    "zfs create -o volblocksize=8192 -o volmode=dev -V "
 		ZVOL_SIZE " %s", zvol_name);
 	ATF_REQUIRE_EQ_MSG(0, system(cmd),
 	    "zfs create failed: %s", strerror(errno));
 	/*
 	 * XXX Due to bug 251828, we need an extra "zfs set" here
 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=251828
 	 */
 	snprintf(cmd, sizeof(cmd), "zfs set volmode=dev %s", zvol_name);
 	ATF_REQUIRE_EQ_MSG(0, system(cmd),
 	    "zfs set failed: %s", strerror(errno));
 
 	snprintf(devname, sizeof(devname), "/dev/zvol/%s", zvol_name);
 	do {
 		fd = open(devname, O_RDWR);
 	} while (fd == -1 && errno == EINTR) ;
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 	return (fd);
 }
 
 static void
 aio_zvol_cleanup(void)
 {
 	FILE *pidfile;
 	pid_t testpid;
 	char cmd[160];
 
 	pidfile = fopen("pidfile", "r");
 	ATF_REQUIRE_MSG(NULL != pidfile, "fopen: %s", strerror(errno));
 	ATF_REQUIRE_EQ(1, fscanf(pidfile, "%d", &testpid));
 	fclose(pidfile);
 
 	snprintf(cmd, sizeof(cmd), "zpool destroy " POOL_NAME ".%d", testpid);
 	system(cmd);
 }
 
 
 ATF_TC_WITHOUT_HEAD(aio_large_read_test);
 ATF_TC_BODY(aio_large_read_test, tc)
 {
 	struct aiocb cb, *cbp;
 	ssize_t nread;
 	size_t len;
 	int fd;
 #ifdef __LP64__
 	int clamped;
 #endif
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 #ifdef __LP64__
 	len = sizeof(clamped);
 	if (sysctlbyname("debug.iosize_max_clamp", &clamped, &len, NULL, 0) ==
 	    -1)
 		atf_libc_error(errno, "Failed to read debug.iosize_max_clamp");
 #endif
 
 	/* Determine the maximum supported read(2) size. */
 	len = SSIZE_MAX;
 #ifdef __LP64__
 	if (clamped)
 		len = INT_MAX;
 #endif
 
 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 
 	unlink(FILE_PATHNAME);
 
 	memset(&cb, 0, sizeof(cb));
 	cb.aio_nbytes = len;
 	cb.aio_fildes = fd;
 	cb.aio_buf = NULL;
 	if (aio_read(&cb) == -1)
 		atf_tc_fail("aio_read() of maximum read size failed: %s",
 		    strerror(errno));
 
 	nread = aio_waitcomplete(&cbp, NULL);
 	if (nread == -1)
 		atf_tc_fail("aio_waitcomplete() failed: %s", strerror(errno));
 	if (nread != 0)
 		atf_tc_fail("aio_read() from empty file returned data: %zd",
 		    nread);
 
 	memset(&cb, 0, sizeof(cb));
 	cb.aio_nbytes = len + 1;
 	cb.aio_fildes = fd;
 	cb.aio_buf = NULL;
 	if (aio_read(&cb) == -1) {
 		if (errno == EINVAL)
 			goto finished;
 		atf_tc_fail("aio_read() of too large read size failed: %s",
 		    strerror(errno));
 	}
 
 	nread = aio_waitcomplete(&cbp, NULL);
 	if (nread == -1) {
 		if (errno == EINVAL)
 			goto finished;
 		atf_tc_fail("aio_waitcomplete() failed: %s", strerror(errno));
 	}
 	atf_tc_fail("aio_read() of too large read size returned: %zd", nread);
 
 finished:
 	close(fd);
 }
 
 /*
  * This tests for a bug where arriving socket data can wakeup multiple
  * AIO read requests resulting in an uncancellable request.
  */
 ATF_TC_WITHOUT_HEAD(aio_socket_two_reads);
 ATF_TC_BODY(aio_socket_two_reads, tc)
 {
 	struct ioreq {
 		struct aiocb iocb;
 		char buffer[1024];
 	} ioreq[2];
 	struct aiocb *iocb;
 	unsigned i;
 	int s[2];
 	char c;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 #if __FreeBSD_version < 1100101
 	aft_tc_skip("kernel version %d is too old (%d required)",
 	    __FreeBSD_version, 1100101);
 #endif
 
 	ATF_REQUIRE(socketpair(PF_UNIX, SOCK_STREAM, 0, s) != -1);
 
 	/* Queue two read requests. */
 	memset(&ioreq, 0, sizeof(ioreq));
 	for (i = 0; i < nitems(ioreq); i++) {
 		ioreq[i].iocb.aio_nbytes = sizeof(ioreq[i].buffer);
 		ioreq[i].iocb.aio_fildes = s[0];
 		ioreq[i].iocb.aio_buf = ioreq[i].buffer;
 		ATF_REQUIRE(aio_read(&ioreq[i].iocb) == 0);
 	}
 
 	/* Send a single byte.  This should complete one request. */
 	c = 0xc3;
 	ATF_REQUIRE(write(s[1], &c, sizeof(c)) == 1);
 
 	ATF_REQUIRE(aio_waitcomplete(&iocb, NULL) == 1);
 
 	/* Determine which request completed and verify the data was read. */
 	if (iocb == &ioreq[0].iocb)
 		i = 0;
 	else
 		i = 1;
 	ATF_REQUIRE(ioreq[i].buffer[0] == c);
 
 	i ^= 1;
 
 	/*
 	 * Try to cancel the other request.  On broken systems this
 	 * will fail and the process will hang on exit.
 	 */
 	ATF_REQUIRE(aio_error(&ioreq[i].iocb) == EINPROGRESS);
 	ATF_REQUIRE(aio_cancel(s[0], &ioreq[i].iocb) == AIO_CANCELED);
 
 	close(s[1]);
 	close(s[0]);
 }
 
 static void
 aio_socket_blocking_short_write_test(bool vectored)
 {
 	struct aiocb iocb, *iocbp;
 	struct iovec iov[2];
 	char *buffer[2];
 	ssize_t done, r;
 	int buffer_size, sb_size;
 	socklen_t len;
 	int s[2];
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 
 	ATF_REQUIRE(socketpair(PF_UNIX, SOCK_STREAM, 0, s) != -1);
 
 	len = sizeof(sb_size);
 	ATF_REQUIRE(getsockopt(s[0], SOL_SOCKET, SO_RCVBUF, &sb_size, &len) !=
 	    -1);
 	ATF_REQUIRE(len == sizeof(sb_size));
 	buffer_size = sb_size;
 
 	ATF_REQUIRE(getsockopt(s[1], SOL_SOCKET, SO_SNDBUF, &sb_size, &len) !=
 	    -1);
 	ATF_REQUIRE(len == sizeof(sb_size));
 	if (sb_size > buffer_size)
 		buffer_size = sb_size;
 
 	/*
 	 * Use twice the size of the MAX(receive buffer, send buffer)
 	 * to ensure that the write is split up into multiple writes
 	 * internally.
 	 */
 	buffer_size *= 2;
 
 	buffer[0] = malloc(buffer_size);
 	ATF_REQUIRE(buffer[0] != NULL);
 	buffer[1] = malloc(buffer_size);
 	ATF_REQUIRE(buffer[1] != NULL);
 
 	srandomdev();
 	aio_fill_buffer(buffer[1], buffer_size, random());
 
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.aio_fildes = s[1];
 	if (vectored) {
 		iov[0].iov_base = buffer[1];
 		iov[0].iov_len = buffer_size / 2 + 1;
 		iov[1].iov_base = buffer[1] + buffer_size / 2 + 1;
 		iov[1].iov_len = buffer_size / 2 - 1;
 		iocb.aio_iov = iov;
 		iocb.aio_iovcnt = 2;
 		r = aio_writev(&iocb);
 		ATF_CHECK_EQ_MSG(0, r, "aio_writev returned %zd", r);
 	} else {
 		iocb.aio_buf = buffer[1];
 		iocb.aio_nbytes = buffer_size;
 		r = aio_write(&iocb);
 		ATF_CHECK_EQ_MSG(0, r, "aio_writev returned %zd", r);
 	}
 
 	done = recv(s[0], buffer[0], buffer_size, MSG_WAITALL);
 	ATF_REQUIRE(done == buffer_size);
 
 	done = aio_waitcomplete(&iocbp, NULL);
 	ATF_REQUIRE(iocbp == &iocb);
 	ATF_REQUIRE(done == buffer_size);
 
 	ATF_REQUIRE(memcmp(buffer[0], buffer[1], buffer_size) == 0);
 
 	close(s[1]);
 	close(s[0]);
 }
 
 /*
  * This test ensures that aio_write() on a blocking socket of a "large"
  * buffer does not return a short completion.
  */
 ATF_TC_WITHOUT_HEAD(aio_socket_blocking_short_write);
 ATF_TC_BODY(aio_socket_blocking_short_write, tc)
 {
 	aio_socket_blocking_short_write_test(false);
 }
 
 /*
  * Like aio_socket_blocking_short_write, but also tests that partially
  * completed vectored sends can be retried correctly.
  */
 ATF_TC_WITHOUT_HEAD(aio_socket_blocking_short_write_vectored);
 ATF_TC_BODY(aio_socket_blocking_short_write_vectored, tc)
 {
 	aio_socket_blocking_short_write_test(true);
 }
 
 /*
  * This test verifies that cancelling a partially completed socket write
  * returns a short write rather than ECANCELED.
  */
 ATF_TC_WITHOUT_HEAD(aio_socket_short_write_cancel);
 ATF_TC_BODY(aio_socket_short_write_cancel, tc)
 {
 	struct aiocb iocb, *iocbp;
 	char *buffer[2];
 	ssize_t done;
 	int buffer_size, sb_size;
 	socklen_t len;
 	int s[2];
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 
 	ATF_REQUIRE(socketpair(PF_UNIX, SOCK_STREAM, 0, s) != -1);
 
 	len = sizeof(sb_size);
 	ATF_REQUIRE(getsockopt(s[0], SOL_SOCKET, SO_RCVBUF, &sb_size, &len) !=
 	    -1);
 	ATF_REQUIRE(len == sizeof(sb_size));
 	buffer_size = sb_size;
 
 	ATF_REQUIRE(getsockopt(s[1], SOL_SOCKET, SO_SNDBUF, &sb_size, &len) !=
 	    -1);
 	ATF_REQUIRE(len == sizeof(sb_size));
 	if (sb_size > buffer_size)
 		buffer_size = sb_size;
 
 	/*
 	 * Use three times the size of the MAX(receive buffer, send
 	 * buffer) for the write to ensure that the write is split up
 	 * into multiple writes internally.  The recv() ensures that
 	 * the write has partially completed, but a remaining size of
 	 * two buffers should ensure that the write has not completed
 	 * fully when it is cancelled.
 	 */
 	buffer[0] = malloc(buffer_size);
 	ATF_REQUIRE(buffer[0] != NULL);
 	buffer[1] = malloc(buffer_size * 3);
 	ATF_REQUIRE(buffer[1] != NULL);
 
 	srandomdev();
 	aio_fill_buffer(buffer[1], buffer_size * 3, random());
 
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.aio_fildes = s[1];
 	iocb.aio_buf = buffer[1];
 	iocb.aio_nbytes = buffer_size * 3;
 	ATF_REQUIRE(aio_write(&iocb) == 0);
 
 	done = recv(s[0], buffer[0], buffer_size, MSG_WAITALL);
 	ATF_REQUIRE(done == buffer_size);
 
 	ATF_REQUIRE(aio_error(&iocb) == EINPROGRESS);
 	ATF_REQUIRE(aio_cancel(s[1], &iocb) == AIO_NOTCANCELED);
 
 	done = aio_waitcomplete(&iocbp, NULL);
 	ATF_REQUIRE(iocbp == &iocb);
 	ATF_REQUIRE(done >= buffer_size && done <= buffer_size * 2);
 
 	ATF_REQUIRE(memcmp(buffer[0], buffer[1], buffer_size) == 0);
 
 	close(s[1]);
 	close(s[0]);
 }
 
 /* 
  * test aio_fsync's behavior with bad inputs 
  */
 ATF_TC_WITHOUT_HEAD(aio_fsync_errors);
 ATF_TC_BODY(aio_fsync_errors, tc)
 {
 	int fd;
 	struct aiocb iocb;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 	unlink(FILE_PATHNAME);
 
-	/* aio_fsync should return EINVAL unless op is O_SYNC */
+	/* aio_fsync should return EINVAL unless op is O_SYNC or O_DSYNC */
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.aio_fildes = fd;
 	ATF_CHECK_EQ(-1, aio_fsync(666, &iocb));
 	ATF_CHECK_EQ(EINVAL, errno);
 
 	/* aio_fsync should return EBADF if fd is not a valid descriptor */
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.aio_fildes = 666;
 	ATF_CHECK_EQ(-1, aio_fsync(O_SYNC, &iocb));
 	ATF_CHECK_EQ(EBADF, errno);
 
 	/* aio_fsync should return EINVAL if sigev_notify is invalid */
 	memset(&iocb, 0, sizeof(iocb));
 	iocb.aio_fildes = fd;
 	iocb.aio_sigevent.sigev_notify = 666;
 	ATF_CHECK_EQ(-1, aio_fsync(666, &iocb));
 	ATF_CHECK_EQ(EINVAL, errno);
 }
 
 /*
  * This test just performs a basic test of aio_fsync().
  */
-ATF_TC_WITHOUT_HEAD(aio_fsync_test);
-ATF_TC_BODY(aio_fsync_test, tc)
+static void
+aio_fsync_test(int op)
 {
 	struct aiocb synccb, *iocbp;
 	struct {
 		struct aiocb iocb;
 		bool done;
 		char *buffer;
 	} buffers[16];
 	struct stat sb;
 	ssize_t rval;
 	unsigned i;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open(FILE_PATHNAME, O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 	unlink(FILE_PATHNAME);
 
 	ATF_REQUIRE(fstat(fd, &sb) == 0);
 	ATF_REQUIRE(sb.st_blksize != 0);
 	ATF_REQUIRE(ftruncate(fd, sb.st_blksize * nitems(buffers)) == 0);
 
 	/*
 	 * Queue several asynchronous write requests.  Hopefully this
 	 * forces the aio_fsync() request to be deferred.  There is no
 	 * reliable way to guarantee that however.
 	 */
 	srandomdev();
 	for (i = 0; i < nitems(buffers); i++) {
 		buffers[i].done = false;
 		memset(&buffers[i].iocb, 0, sizeof(buffers[i].iocb));
 		buffers[i].buffer = malloc(sb.st_blksize);
 		aio_fill_buffer(buffers[i].buffer, sb.st_blksize, random());
 		buffers[i].iocb.aio_fildes = fd;
 		buffers[i].iocb.aio_buf = buffers[i].buffer;
 		buffers[i].iocb.aio_nbytes = sb.st_blksize;
 		buffers[i].iocb.aio_offset = sb.st_blksize * i;
 		ATF_REQUIRE(aio_write(&buffers[i].iocb) == 0);
 	}
 
 	/* Queue the aio_fsync request. */
 	memset(&synccb, 0, sizeof(synccb));
 	synccb.aio_fildes = fd;
-	ATF_REQUIRE(aio_fsync(O_SYNC, &synccb) == 0);
+	ATF_REQUIRE(aio_fsync(op, &synccb) == 0);
 
 	/* Wait for requests to complete. */
 	for (;;) {
 	next:
 		rval = aio_waitcomplete(&iocbp, NULL);
 		ATF_REQUIRE(iocbp != NULL);
 		if (iocbp == &synccb) {
 			ATF_REQUIRE(rval == 0);
 			break;
 		}
 
 		for (i = 0; i < nitems(buffers); i++) {
 			if (iocbp == &buffers[i].iocb) {
 				ATF_REQUIRE(buffers[i].done == false);
 				ATF_REQUIRE(rval == sb.st_blksize);
 				buffers[i].done = true;
 				goto next;
 			}
 		}
 
 		ATF_REQUIRE_MSG(false, "unmatched AIO request");
 	}
 
 	for (i = 0; i < nitems(buffers); i++)
 		ATF_REQUIRE_MSG(buffers[i].done,
 		    "AIO request %u did not complete", i);
 
 	close(fd);
 }
 
+ATF_TC_WITHOUT_HEAD(aio_fsync_sync_test);
+ATF_TC_BODY(aio_fsync_sync_test, tc)
+{
+	aio_fsync_test(O_SYNC);
+}
+
+ATF_TC_WITHOUT_HEAD(aio_fsync_dsync_test);
+ATF_TC_BODY(aio_fsync_dsync_test, tc)
+{
+	aio_fsync_test(O_DSYNC);
+}
+
 /*
  * We shouldn't be able to DoS the system by setting iov_len to an insane
  * value
  */
 ATF_TC_WITHOUT_HEAD(aio_writev_dos_iov_len);
 ATF_TC_BODY(aio_writev_dos_iov_len, tc)
 {
 	struct aiocb aio;
 	const struct aiocb *const iocbs[] = {&aio};
 	const char *wbuf = "Hello, world!";
 	struct iovec iov[1];
 	ssize_t len, r;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 
 	len = strlen(wbuf);
 	iov[0].iov_base = __DECONST(void*, wbuf);
 	iov[0].iov_len = 1 << 30;
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = 1;
 
 	r = aio_writev(&aio);
 	ATF_CHECK_EQ_MSG(0, r, "aio_writev returned %zd", r);
 	ATF_REQUIRE_EQ(0, aio_suspend(iocbs, 1, NULL));
 	r = aio_return(&aio);
 	ATF_CHECK_EQ_MSG(-1, r, "aio_return returned %zd", r);
 	ATF_CHECK_MSG(errno == EFAULT || errno == EINVAL,
 	    "aio_writev: %s", strerror(errno));
 
 	close(fd);
 }
 
 /*
  * We shouldn't be able to DoS the system by setting aio_iovcnt to an insane
  * value
  */
 ATF_TC_WITHOUT_HEAD(aio_writev_dos_iovcnt);
 ATF_TC_BODY(aio_writev_dos_iovcnt, tc)
 {
 	struct aiocb aio;
 	const char *wbuf = "Hello, world!";
 	struct iovec iov[1];
 	ssize_t len;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 
 	len = strlen(wbuf);
 	iov[0].iov_base = __DECONST(void*, wbuf);
 	iov[0].iov_len = len;
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = 1 << 30;
 
 	ATF_REQUIRE_EQ(-1, aio_writev(&aio));
 	ATF_CHECK_EQ(EINVAL, errno);
 
 	close(fd);
 }
 
 ATF_TC_WITH_CLEANUP(aio_writev_efault);
 ATF_TC_HEAD(aio_writev_efault, tc)
 {
 	atf_tc_set_md_var(tc, "descr",
 	    "Vectored AIO should gracefully handle invalid addresses");
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(aio_writev_efault, tc)
 {
 	struct aiocb aio;
 	ssize_t buflen;
 	char *buffer;
 	struct iovec iov[2];
 	long seed;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = aio_md_setup();
 
 	seed = random();
 	buflen = 4096;
 	buffer = malloc(buflen);
 	aio_fill_buffer(buffer, buflen, seed);
 	iov[0].iov_base = buffer;
 	iov[0].iov_len = buflen;
 	iov[1].iov_base = (void*)-1;	/* Invalid! */
 	iov[1].iov_len = buflen;
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = nitems(iov);
 
 	ATF_REQUIRE_EQ(-1, aio_writev(&aio));
 	ATF_CHECK_EQ(EFAULT, errno);
 
 	close(fd);
 }
 ATF_TC_CLEANUP(aio_writev_efault, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITHOUT_HEAD(aio_writev_empty_file_poll);
 ATF_TC_BODY(aio_writev_empty_file_poll, tc)
 {
 	struct aiocb aio;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	aio.aio_iovcnt = 0;
 
 	ATF_REQUIRE_EQ(0, aio_writev(&aio));
 	ATF_REQUIRE_EQ(0, suspend(&aio));
 
 	close(fd);
 }
 
 ATF_TC_WITHOUT_HEAD(aio_writev_empty_file_signal);
 ATF_TC_BODY(aio_writev_empty_file_signal, tc)
 {
 	struct aiocb aio;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	fd = open("testfile", O_RDWR | O_CREAT, 0600);
 	ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno));
 
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	aio.aio_iovcnt = 0;
 	aio.aio_sigevent = *setup_signal();
 
 	ATF_REQUIRE_EQ(0, aio_writev(&aio));
 	ATF_REQUIRE_EQ(0, poll_signaled(&aio));
 
 	close(fd);
 }
 
 // aio_writev and aio_readv should still work even if the iovcnt is greater
 // than the number of buffered AIO operations permitted per process.
 ATF_TC_WITH_CLEANUP(vectored_big_iovcnt);
 ATF_TC_HEAD(vectored_big_iovcnt, tc)
 {
 	atf_tc_set_md_var(tc, "descr",
 	    "Vectored AIO should still work even if the iovcnt is greater than "
 	    "the number of buffered AIO operations permitted by the process");
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(vectored_big_iovcnt, tc)
 {
 	struct aiocb aio;
 	struct iovec *iov;
 	ssize_t len, buflen;
 	char *buffer;
 	const char *oid = "vfs.aio.max_buf_aio";
 	long seed;
 	int max_buf_aio;
 	int fd, i;
 	ssize_t sysctl_len = sizeof(max_buf_aio);
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	if (sysctlbyname(oid, &max_buf_aio, &sysctl_len, NULL, 0) == -1)
 		atf_libc_error(errno, "Failed to read %s", oid);
 
 	seed = random();
 	buflen = 512 * (max_buf_aio + 1);
 	buffer = malloc(buflen);
 	aio_fill_buffer(buffer, buflen, seed);
 	iov = calloc(max_buf_aio + 1, sizeof(struct iovec));
 
 	fd = aio_md_setup();
 
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	for (i = 0; i < max_buf_aio + 1; i++) {
 		iov[i].iov_base = &buffer[i * 512];
 		iov[i].iov_len = 512;
 	}
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = max_buf_aio + 1;
 
 	if (aio_writev(&aio) < 0)
 		atf_tc_fail("aio_writev failed: %s", strerror(errno));
 
 	len = poll(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	if (len != buflen)
 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
 
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = fd;
 	aio.aio_offset = 0;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = max_buf_aio + 1;
 
 	if (aio_readv(&aio) < 0)
 		atf_tc_fail("aio_readv failed: %s", strerror(errno));
 
 	len = poll(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	if (len != buflen)
 		atf_tc_fail("aio short read (%jd)", (intmax_t)len);
 
 	if (aio_test_buffer(buffer, buflen, seed) == 0)
 		atf_tc_fail("buffer mismatched");
 
 	close(fd);
 }
 ATF_TC_CLEANUP(vectored_big_iovcnt, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITHOUT_HEAD(vectored_file_poll);
 ATF_TC_BODY(vectored_file_poll, tc)
 {
 	aio_file_test(poll, NULL, true);
 }
 
 ATF_TC_WITH_CLEANUP(vectored_md_poll);
 ATF_TC_HEAD(vectored_md_poll, tc)
 {
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(vectored_md_poll, tc)
 {
 	aio_md_test(poll, NULL, true);
 }
 ATF_TC_CLEANUP(vectored_md_poll, tc)
 {
 	aio_md_cleanup();
 }
 
 ATF_TC_WITHOUT_HEAD(vectored_socket_poll);
 ATF_TC_BODY(vectored_socket_poll, tc)
 {
 	aio_unix_socketpair_test(poll, NULL, true);
 }
 
 // aio_writev and aio_readv should still work even if the iov contains elements
 // that aren't a multiple of the device's sector size, and even if the total
 // amount if I/O _is_ a multiple of the device's sector size.
 ATF_TC_WITH_CLEANUP(vectored_unaligned);
 ATF_TC_HEAD(vectored_unaligned, tc)
 {
 	atf_tc_set_md_var(tc, "descr",
 	    "Vectored AIO should still work even if the iov contains elements "
 	    "that aren't a multiple of the sector size.");
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(vectored_unaligned, tc)
 {
 	struct aio_context ac;
 	struct aiocb aio;
 	struct iovec iov[3];
 	ssize_t len, total_len;
 	int fd;
 
 	ATF_REQUIRE_KERNEL_MODULE("aio");
 	ATF_REQUIRE_UNSAFE_AIO();
 
 	/* 
 	 * Use a zvol with volmode=dev, so it will allow .d_write with
 	 * unaligned uio.  geom devices use physio, which doesn't allow that.
 	 */
 	fd = aio_zvol_setup();
 	aio_context_init(&ac, fd, fd, FILE_LEN);
 
 	/* Break the buffer into 3 parts:
 	 * * A 4kB part, aligned to 4kB
 	 * * Two other parts that add up to 4kB:
 	 *   - 256B
 	 *   - 4kB - 256B
 	 */
 	iov[0].iov_base = ac.ac_buffer;
 	iov[0].iov_len = 4096;
 	iov[1].iov_base = (void*)((uintptr_t)iov[0].iov_base + iov[0].iov_len);
 	iov[1].iov_len = 256;
 	iov[2].iov_base = (void*)((uintptr_t)iov[1].iov_base + iov[1].iov_len);
 	iov[2].iov_len = 4096 - iov[1].iov_len;
 	total_len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = ac.ac_write_fd;
 	aio.aio_offset = 0;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = 3;
 
 	if (aio_writev(&aio) < 0)
 		atf_tc_fail("aio_writev failed: %s", strerror(errno));
 
 	len = poll(&aio);
 	if (len < 0)
 		atf_tc_fail("aio failed: %s", strerror(errno));
 
 	if (len != total_len)
 		atf_tc_fail("aio short write (%jd)", (intmax_t)len);
 
 	bzero(&aio, sizeof(aio));
 	aio.aio_fildes = ac.ac_read_fd;
 	aio.aio_offset = 0;
 	aio.aio_iov = iov;
 	aio.aio_iovcnt = 3;
 
 	if (aio_readv(&aio) < 0)
 		atf_tc_fail("aio_readv failed: %s", strerror(errno));
 	len = poll(&aio);
 
 	ATF_REQUIRE_MSG(aio_test_buffer(ac.ac_buffer, total_len,
 	    ac.ac_seed) != 0, "aio_test_buffer: internal error");
 
 	close(fd);
 }
 ATF_TC_CLEANUP(vectored_unaligned, tc)
 {
 	aio_zvol_cleanup();
 }
 
 static void
 aio_zvol_test(completion comp, struct sigevent *sev, bool vectored)
 {
 	struct aio_context ac;
 	int fd;
 
 	fd = aio_zvol_setup();
 	aio_context_init(&ac, fd, fd, MD_LEN);
 	if (vectored) {
 		aio_writev_test(&ac, comp, sev);
 		aio_readv_test(&ac, comp, sev);
 	} else {
 		aio_write_test(&ac, comp, sev);
 		aio_read_test(&ac, comp, sev);
 	}
 
 	close(fd);
 }
 
 /*
  * Note that unlike md, the zvol is not a geom device, does not allow unmapped
  * buffers, and does not use physio.
  */
 ATF_TC_WITH_CLEANUP(vectored_zvol_poll);
 ATF_TC_HEAD(vectored_zvol_poll, tc)
 {
 	atf_tc_set_md_var(tc, "require.user", "root");
 }
 ATF_TC_BODY(vectored_zvol_poll, tc)
 {
 	aio_zvol_test(poll, NULL, true);
 }
 ATF_TC_CLEANUP(vectored_zvol_poll, tc)
 {
 	aio_zvol_cleanup();
 }
 
 ATF_TP_ADD_TCS(tp)
 {
 
 	ATF_TP_ADD_TC(tp, file_poll);
 	ATF_TP_ADD_TC(tp, file_signal);
 	ATF_TP_ADD_TC(tp, file_suspend);
 	ATF_TP_ADD_TC(tp, file_thread);
 	ATF_TP_ADD_TC(tp, file_waitcomplete);
 	ATF_TP_ADD_TC(tp, fifo_poll);
 	ATF_TP_ADD_TC(tp, fifo_signal);
 	ATF_TP_ADD_TC(tp, fifo_suspend);
 	ATF_TP_ADD_TC(tp, fifo_thread);
 	ATF_TP_ADD_TC(tp, fifo_waitcomplete);
 	ATF_TP_ADD_TC(tp, socket_poll);
 	ATF_TP_ADD_TC(tp, socket_signal);
 	ATF_TP_ADD_TC(tp, socket_suspend);
 	ATF_TP_ADD_TC(tp, socket_thread);
 	ATF_TP_ADD_TC(tp, socket_waitcomplete);
 	ATF_TP_ADD_TC(tp, pty_poll);
 	ATF_TP_ADD_TC(tp, pty_signal);
 	ATF_TP_ADD_TC(tp, pty_suspend);
 	ATF_TP_ADD_TC(tp, pty_thread);
 	ATF_TP_ADD_TC(tp, pty_waitcomplete);
 	ATF_TP_ADD_TC(tp, pipe_poll);
 	ATF_TP_ADD_TC(tp, pipe_signal);
 	ATF_TP_ADD_TC(tp, pipe_suspend);
 	ATF_TP_ADD_TC(tp, pipe_thread);
 	ATF_TP_ADD_TC(tp, pipe_waitcomplete);
 	ATF_TP_ADD_TC(tp, md_poll);
 	ATF_TP_ADD_TC(tp, md_signal);
 	ATF_TP_ADD_TC(tp, md_suspend);
 	ATF_TP_ADD_TC(tp, md_thread);
 	ATF_TP_ADD_TC(tp, md_waitcomplete);
 	ATF_TP_ADD_TC(tp, aio_fsync_errors);
-	ATF_TP_ADD_TC(tp, aio_fsync_test);
+	ATF_TP_ADD_TC(tp, aio_fsync_sync_test);
+	ATF_TP_ADD_TC(tp, aio_fsync_dsync_test);
 	ATF_TP_ADD_TC(tp, aio_large_read_test);
 	ATF_TP_ADD_TC(tp, aio_socket_two_reads);
 	ATF_TP_ADD_TC(tp, aio_socket_blocking_short_write);
 	ATF_TP_ADD_TC(tp, aio_socket_blocking_short_write_vectored);
 	ATF_TP_ADD_TC(tp, aio_socket_short_write_cancel);
 	ATF_TP_ADD_TC(tp, aio_writev_dos_iov_len);
 	ATF_TP_ADD_TC(tp, aio_writev_dos_iovcnt);
 	ATF_TP_ADD_TC(tp, aio_writev_efault);
 	ATF_TP_ADD_TC(tp, aio_writev_empty_file_poll);
 	ATF_TP_ADD_TC(tp, aio_writev_empty_file_signal);
 	ATF_TP_ADD_TC(tp, vectored_big_iovcnt);
 	ATF_TP_ADD_TC(tp, vectored_file_poll);
 	ATF_TP_ADD_TC(tp, vectored_md_poll);
 	ATF_TP_ADD_TC(tp, vectored_zvol_poll);
 	ATF_TP_ADD_TC(tp, vectored_unaligned);
 	ATF_TP_ADD_TC(tp, vectored_socket_poll);
 
 	return (atf_no_error());
 }