Index: lib/libc/sys/aio_fsync.2 =================================================================== --- lib/libc/sys/aio_fsync.2 +++ lib/libc/sys/aio_fsync.2 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd August 19, 2016 +.Dd June 2, 2020 .Dt AIO_FSYNC 2 .Os .Sh NAME @@ -49,11 +49,15 @@ .Pp The .Fa op -argument can only be set to +argument can be set to .Dv O_SYNC to cause all currently queued I/O operations to be completed as if by a call to -.Xr fsync 2 . +.Xr fsync 2 , +or +.Dv O_DSYNC +for +.Xr fdatasync 2 . .Pp If _POSIX_PRIORITIZED_IO is defined, and the descriptor supports it, then the enqueued operation is submitted at a priority equal to that @@ -176,3 +180,7 @@ .Fn aio_fsync system call first appeared in .Fx 7.0 . +The +.Dv O_DSYNC +option appeared in +.Fx 13.0 . Index: sys/kern/vfs_aio.c =================================================================== --- sys/kern/vfs_aio.c +++ sys/kern/vfs_aio.c @@ -715,10 +715,10 @@ /* * Move all data to a permanent storage device. This code - * simulates the fsync syscall. + * simulates the fsync and fdatasync syscalls. */ static int -aio_fsync_vnode(struct thread *td, struct vnode *vp) +aio_fsync_vnode(struct thread *td, struct vnode *vp, bool fullsync) { struct mount *mp; int error; @@ -731,7 +731,7 @@ vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } - error = VOP_FSYNC(vp, MNT_WAIT, td); + error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); VOP_UNLOCK(vp); vn_finished_write(mp); @@ -841,13 +841,16 @@ struct ucred *td_savedcred = td->td_ucred; struct file *fp = job->fd_file; int error = 0; + bool fullsync; - KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC, + KASSERT(job->uaiocb.aio_lio_opcode == LIO_SYNC || + job->uaiocb.aio_lio_opcode == LIO_DSYNC, ("%s: opcode %d", __func__, job->uaiocb.aio_lio_opcode)); + fullsync = job->uaiocb.aio_lio_opcode == LIO_SYNC; td->td_ucred = job->cred; if (fp->f_vnode != NULL) - error = aio_fsync_vnode(td, fp->f_vnode); + error = aio_fsync_vnode(td, fp->f_vnode, fullsync); td->td_ucred = td_savedcred; if (error) aio_complete(job, -1, error); @@ -1533,6 +1536,7 @@ error = fget_read(td, fd, &cap_pread_rights, &fp); break; case LIO_SYNC: + case LIO_DSYNC: error = fget(td, fd, &cap_fsync_rights, &fp); break; case LIO_MLOCK: @@ -1550,7 +1554,7 @@ return (error); } - if (opcode == LIO_SYNC && fp->f_vnode == NULL) { + if ((opcode == LIO_SYNC || opcode == LIO_DSYNC) && fp->f_vnode == NULL) { error = EINVAL; goto aqueue_fail; } @@ -1722,10 +1726,12 @@ error = 0; break; case LIO_SYNC: + case LIO_DSYNC: AIO_LOCK(ki); TAILQ_FOREACH(job2, &ki->kaio_jobqueue, plist) { if (job2->fd_file == job->fd_file && job2->uaiocb.aio_lio_opcode != LIO_SYNC && + job2->uaiocb.aio_lio_opcode != LIO_DSYNC && job2->seqno < job->seqno) { job2->jobflags |= KAIOCB_CHECKSYNC; job->pending++; @@ -2467,10 +2473,12 @@ kern_aio_fsync(struct thread *td, int op, struct aiocb *ujob, struct aiocb_ops *ops) { + int listop; - if (op != O_SYNC) /* XXX lack of O_DSYNC */ + if (op != O_SYNC && op != O_DSYNC) return (EINVAL); - return (aio_aqueue(td, ujob, NULL, LIO_SYNC, ops)); + listop = op == O_SYNC ? LIO_SYNC : LIO_DSYNC; + return (aio_aqueue(td, ujob, NULL, listop, ops)); } int Index: sys/sys/aio.h =================================================================== --- sys/sys/aio.h +++ sys/sys/aio.h @@ -45,6 +45,7 @@ #ifdef _KERNEL #define LIO_SYNC 0x3 #define LIO_MLOCK 0x4 +#define LIO_DSYNC 0x5 #endif /* Index: tests/sys/aio/aio_test.c =================================================================== --- tests/sys/aio/aio_test.c +++ tests/sys/aio/aio_test.c @@ -1055,7 +1055,7 @@ ATF_REQUIRE_MSG(fd != -1, "open failed: %s", strerror(errno)); unlink(FILE_PATHNAME); - /* aio_fsync should return EINVAL unless op is O_SYNC */ + /* aio_fsync should return EINVAL unless op is O_SYNC or O_DSYNC */ memset(&iocb, 0, sizeof(iocb)); iocb.aio_fildes = fd; ATF_CHECK_EQ(-1, aio_fsync(666, &iocb)); @@ -1075,21 +1075,88 @@ ATF_CHECK_EQ(EINVAL, errno); } +struct aio_buffer { + struct aiocb ab_iocb; + bool ab_done; + char *ab_buffer; +}; + +static void +queue_random_writes(struct aio_buffer *buffers, size_t nbuffers, + size_t blksize, int fd) +{ + size_t i; + + for (i = 0; i < nbuffers; i++) { + buffers[i].ab_done = false; + memset(&buffers[i].ab_iocb, 0, sizeof(buffers[i].ab_iocb)); + buffers[i].ab_buffer = malloc(blksize); + aio_fill_buffer(buffers[i].ab_buffer, blksize, random()); + buffers[i].ab_iocb.aio_fildes = fd; + buffers[i].ab_iocb.aio_buf = buffers[i].ab_buffer; + buffers[i].ab_iocb.aio_nbytes = blksize; + buffers[i].ab_iocb.aio_offset = blksize * i; + ATF_REQUIRE(aio_write(&buffers[i].ab_iocb) == 0); + } +} + +static void +check_fsync_completion(struct aio_buffer *buffers, size_t nbuffers, + size_t blksize, struct aiocb *synccb) +{ + struct aiocb *iocbp; + ssize_t rval; + size_t i; + + for (;;) { + next: + rval = aio_waitcomplete(&iocbp, NULL); + ATF_REQUIRE(iocbp != NULL); + + /* If it was the sync, then we are done. */ + if (iocbp == synccb) { + ATF_REQUIRE(rval == 0); + break; + } + + /* A write: find it and mark it completed. */ + for (i = 0; i < nbuffers; i++) { + if (iocbp == &buffers[i].ab_iocb) { + ATF_REQUIRE(buffers[i].ab_done == false); + ATF_REQUIRE(rval == (ssize_t)blksize); + buffers[i].ab_done = true; + goto next; + } + } + + ATF_REQUIRE_MSG(false, "unmatched AIO request"); + } + + /* Writes should have completed before the sync did. */ + for (i = 0; i < nbuffers; i++) { + ATF_REQUIRE_MSG(buffers[i].ab_done, + "AIO request %zu did not complete", i); + free(buffers[i].ab_buffer); + } +} + +static void +queue_fsync(int op, int fd, struct aiocb *synccb) +{ + memset(synccb, 0, sizeof(*synccb)); + synccb->aio_fildes = fd; + ATF_REQUIRE(aio_fsync(op, synccb) == 0); +} + /* * This test just performs a basic test of aio_fsync(). */ ATF_TC_WITHOUT_HEAD(aio_fsync_test); ATF_TC_BODY(aio_fsync_test, tc) { - struct aiocb synccb, *iocbp; - struct { - struct aiocb iocb; - bool done; - char *buffer; - } buffers[16]; + struct aiocb synccb; + struct aio_buffer buffers[16]; struct stat sb; - ssize_t rval; - unsigned i; int fd; ATF_REQUIRE_KERNEL_MODULE("aio"); @@ -1103,54 +1170,21 @@ ATF_REQUIRE(sb.st_blksize != 0); ATF_REQUIRE(ftruncate(fd, sb.st_blksize * nitems(buffers)) == 0); + srandomdev(); + /* * Queue several asynchronous write requests. Hopefully this * forces the aio_fsync() request to be deferred. There is no * reliable way to guarantee that however. */ - srandomdev(); - for (i = 0; i < nitems(buffers); i++) { - buffers[i].done = false; - memset(&buffers[i].iocb, 0, sizeof(buffers[i].iocb)); - buffers[i].buffer = malloc(sb.st_blksize); - aio_fill_buffer(buffers[i].buffer, sb.st_blksize, random()); - buffers[i].iocb.aio_fildes = fd; - buffers[i].iocb.aio_buf = buffers[i].buffer; - buffers[i].iocb.aio_nbytes = sb.st_blksize; - buffers[i].iocb.aio_offset = sb.st_blksize * i; - ATF_REQUIRE(aio_write(&buffers[i].iocb) == 0); - } - - /* Queue the aio_fsync request. */ - memset(&synccb, 0, sizeof(synccb)); - synccb.aio_fildes = fd; - ATF_REQUIRE(aio_fsync(O_SYNC, &synccb) == 0); - - /* Wait for requests to complete. */ - for (;;) { - next: - rval = aio_waitcomplete(&iocbp, NULL); - ATF_REQUIRE(iocbp != NULL); - if (iocbp == &synccb) { - ATF_REQUIRE(rval == 0); - break; - } - - for (i = 0; i < nitems(buffers); i++) { - if (iocbp == &buffers[i].iocb) { - ATF_REQUIRE(buffers[i].done == false); - ATF_REQUIRE(rval == sb.st_blksize); - buffers[i].done = true; - goto next; - } - } - - ATF_REQUIRE_MSG(false, "unmatched AIO request"); - } - - for (i = 0; i < nitems(buffers); i++) - ATF_REQUIRE_MSG(buffers[i].done, - "AIO request %u did not complete", i); + queue_random_writes(buffers, nitems(buffers), sb.st_blksize, fd); + queue_fsync(O_SYNC, fd, &synccb); + check_fsync_completion(buffers, nitems(buffers), sb.st_blksize, &synccb); + + /* Same again, but this time with O_DSYNC. */ + queue_random_writes(buffers, nitems(buffers), sb.st_blksize, fd); + queue_fsync(O_DSYNC, fd, &synccb); + check_fsync_completion(buffers, nitems(buffers), sb.st_blksize, &synccb); close(fd); }