Index: sys/kern/kern_sendfile.c
===================================================================
--- sys/kern/kern_sendfile.c
+++ sys/kern/kern_sendfile.c
@@ -34,18 +34,18 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/blockcount.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/ktls.h>
-#include <sys/mutex.h>
 #include <sys/malloc.h>
+#include <sys/mbuf.h>
 #include <sys/mman.h>
 #include <sys/mount.h>
-#include <sys/mbuf.h>
+#include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
-#include <sys/rwlock.h>
 #include <sys/sf_buf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
@@ -103,9 +103,8 @@
  * Structure used to track requests with SF_SYNC flag.
  */
 struct sendfile_sync {
-	struct mtx	mtx;
-	struct cv	cv;
-	unsigned	count;
+	u_int		refcount;	/* structure references */
+	blockcount_t	count;		/* outstanding mbufs */
 };
 
 counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
@@ -134,9 +133,27 @@
     sfstat_sysctl, "I",
     "sendfile statistics");
 
+static void
+sendfile_sync_init(struct mbuf *m, struct sendfile_sync *sfs)
+{
+	m->m_ext.ext_flags |= EXT_FLAG_SYNC;
+	if (m->m_ext.ext_type == EXT_PGS)
+		m->m_ext.ext_arg1 = sfs;
+	else
+		m->m_ext.ext_arg2 = sfs;
+}
+
+static void
+sendfile_sync_release(struct sendfile_sync *sfs)
+{
+	if (refcount_release(&sfs->refcount))
+		free(sfs, M_SENDFILE);
+}
+
 static void
 sendfile_free_mext(struct mbuf *m)
 {
+	struct sendfile_sync *sfs;
 	struct sf_buf *sf;
 	vm_page_t pg;
 	int flags;
@@ -152,13 +169,9 @@
 	vm_page_release(pg, flags);
 
 	if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
-		struct sendfile_sync *sfs = m->m_ext.ext_arg2;
-
-		mtx_lock(&sfs->mtx);
-		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
-		if (--sfs->count == 0)
-			cv_signal(&sfs->cv);
-		mtx_unlock(&sfs->mtx);
+		sfs = m->m_ext.ext_arg2;
+		blockcount_release(&sfs->count, 1);
+		sendfile_sync_release(sfs);
 	}
 }
 
@@ -166,6 +179,7 @@
 sendfile_free_mext_pg(struct mbuf *m)
 {
 	struct mbuf_ext_pgs *ext_pgs;
+	struct sendfile_sync *sfs;
 	vm_page_t pg;
 	int flags, i;
 	bool cache_last;
@@ -185,13 +199,9 @@
 	}
 
 	if (m->m_ext.ext_flags & EXT_FLAG_SYNC) {
-		struct sendfile_sync *sfs = m->m_ext.ext_arg1;
-
-		mtx_lock(&sfs->mtx);
-		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
-		if (--sfs->count == 0)
-			cv_signal(&sfs->cv);
-		mtx_unlock(&sfs->mtx);
+		sfs = m->m_ext.ext_arg1;
+		blockcount_release(&sfs->count, 1);
+		sendfile_sync_release(sfs);
 	}
 }
 
@@ -367,9 +377,10 @@
 		ktls_enqueue(sfio->m, so, sfio->npages);
 		goto out_with_ref;
 #endif
-	} else
+	} else {
 		(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m,
 		    sfio->npages);
+	}
 
 	SOCK_LOCK(so);
 	sorele(so);
@@ -661,6 +672,7 @@
 	struct sendfile_sync *sfs;
 	struct vattr va;
 	off_t off, sbytes, rem, obj_size;
+	u_int sfscount;
 	int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr;
 #ifdef KERN_TLS
 	int tls_enq_cnt;
@@ -695,10 +707,11 @@
 	SFSTAT_INC(sf_syscalls);
 	SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags));
 
-	if (flags & SF_SYNC) {
-		sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK | M_ZERO);
-		mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
-		cv_init(&sfs->cv, "sendfile");
+	if ((flags & SF_SYNC) != 0) {
+		sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK);
+		refcount_init(&sfs->refcount, 1);
+		blockcount_init(&sfs->count);
+		sfscount = 0;
 	}
 
 	rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset;
@@ -977,18 +990,8 @@
 							    EXT_FLAG_CACHE_LAST;
 					}
 					if (sfs != NULL) {
-						m0->m_ext.ext_flags |=
-						    EXT_FLAG_SYNC;
-						if (m0->m_ext.ext_type ==
-						    EXT_PGS)
-							m0->m_ext.ext_arg1 =
-								sfs;
-						else
-							m0->m_ext.ext_arg2 =
-								sfs;
-						mtx_lock(&sfs->mtx);
-						sfs->count++;
-						mtx_unlock(&sfs->mtx);
+						sendfile_sync_init(m0, sfs);
+						sfscount++;
 					}
 					ext_pgs = &m0->m_ext_pgs;
 					ext_pgs_idx = 0;
@@ -1060,15 +1063,8 @@
 			    !(rem > space || rhpages > 0)))
 				m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE;
 			if (sfs != NULL) {
-				m0->m_ext.ext_flags |= EXT_FLAG_SYNC;
-				if (m0->m_ext.ext_type == EXT_PGS)
-					m0->m_ext.ext_arg1 = sfs;
-				else
-					m0->m_ext.ext_arg2 = sfs;
-				m0->m_ext.ext_arg2 = sfs;
-				mtx_lock(&sfs->mtx);
-				sfs->count++;
-				mtx_unlock(&sfs->mtx);
+				sendfile_sync_init(m0, sfs);
+				sfscount++;
 			}
 			m0->m_ext.ext_count = 1;
 			m0->m_flags |= (M_EXT | M_RDONLY);
@@ -1122,6 +1118,11 @@
 		if (tls != NULL)
 			ktls_frame(m, tls, &tls_enq_cnt, TLS_RLTYPE_APP);
 #endif
+		if (sfs != NULL) {
+			blockcount_acquire(&sfs->count, sfscount);
+			refcount_acquiren(&sfs->refcount, sfscount);
+			sfscount = 0;
+		}
 		if (nios == 0) {
 			/*
 			 * If sendfile_swapin() didn't initiate any I/Os,
@@ -1199,13 +1200,13 @@
 		m_freem(mh);
 
 	if (sfs != NULL) {
-		mtx_lock(&sfs->mtx);
-		if (sfs->count != 0)
-			cv_wait(&sfs->cv, &sfs->mtx);
-		KASSERT(sfs->count == 0, ("sendfile sync still busy"));
-		cv_destroy(&sfs->cv);
-		mtx_destroy(&sfs->mtx);
-		free(sfs, M_SENDFILE);
+		if (error == 0) {
+			error = blockcount_sleep(&sfs->count, NULL, "sfsync",
+			    PUSER | PCATCH);
+			if (error == EAGAIN)
+				error = 0;
+		}
+		sendfile_sync_release(sfs);
 	}
 #ifdef KERN_TLS
 	if (tls != NULL)
Index: sys/kern/kern_synch.c
===================================================================
--- sys/kern/kern_synch.c
+++ sys/kern/kern_synch.c
@@ -400,12 +400,12 @@
 }
 
 /*
- * Wait for a wakeup.  This does not guarantee that the count is still zero on
- * return and may be subject to transient wakeups.  Callers wanting a precise
- * answer should use blockcount_wait() with an interlock.
+ * Wait for a wakeup or a signal.  This does not guarantee that the count is
+ * still zero on return.  Callers wanting a precise answer should use
+ * blockcount_wait() with an interlock.
  *
- * Return 0 if there is no work to wait for, and 1 if we slept waiting for work
- * to complete.  In the latter case the counter value must be re-read.
+ * If there is no work to wait for, return 0.  If the sleep was interrupted by a
+ * signal, return EINTR or ERESTART, and return EAGAIN otherwise.
  */
 int
 _blockcount_sleep(blockcount_t *bc, struct lock_object *lock, const char *wmesg,
@@ -415,10 +415,15 @@
 	uintptr_t lock_state;
 	u_int old;
 	int ret;
+	bool catch, drop;
 
 	KASSERT(lock != &Giant.lock_object,
 	    ("%s: cannot use Giant as the interlock", __func__));
 
+	catch = (prio & PCATCH) != 0;
+	drop = (prio & PDROP) != 0;
+	prio &= PRIMASK;
+
 	/*
 	 * Synchronize with the fence in blockcount_release().  If we end up
 	 * waiting, the sleepqueue lock acquisition will provide the required
@@ -428,7 +433,7 @@
 	 * ourselves to sleep to avoid jumping ahead.
 	 */
 	if (atomic_load_acq_int(&bc->__count) == 0) {
-		if (lock != NULL && (prio & PDROP) != 0)
+		if (lock != NULL && drop)
 			LOCK_CLASS(lock)->lc_unlock(lock);
 		return (0);
 	}
@@ -439,23 +444,27 @@
 	if (lock != NULL)
 		lock_state = LOCK_CLASS(lock)->lc_unlock(lock);
 	old = blockcount_read(bc);
+	ret = 0;
 	do {
 		if (_BLOCKCOUNT_COUNT(old) == 0) {
 			sleepq_release(wchan);
-			ret = 0;
 			goto out;
 		}
 		if (_BLOCKCOUNT_WAITERS(old))
 			break;
 	} while (!atomic_fcmpset_int(&bc->__count, &old,
 	    old | _BLOCKCOUNT_WAITERS_FLAG));
-	sleepq_add(wchan, NULL, wmesg, 0, 0);
-	sleepq_wait(wchan, prio);
-	ret = 1;
+	sleepq_add(wchan, NULL, wmesg, catch ? SLEEPQ_INTERRUPTIBLE : 0, 0);
+	if (catch)
+		ret = sleepq_wait_sig(wchan, prio);
+	else
+		sleepq_wait(wchan, prio);
+	if (ret == 0)
+		ret = EAGAIN;
 
 out:
 	PICKUP_GIANT();
-	if (lock != NULL && (prio & PDROP) == 0)
+	if (lock != NULL && !drop)
 		LOCK_CLASS(lock)->lc_lock(lock, lock_state);
 
 	return (ret);
Index: sys/sys/blockcount.h
===================================================================
--- sys/sys/blockcount.h
+++ sys/sys/blockcount.h
@@ -80,9 +80,9 @@
 _blockcount_wait(blockcount_t *bc, struct lock_object *lo, const char *wmesg,
     int prio)
 {
-	KASSERT((prio & PDROP) == 0, ("%s: invalid prio %x", __func__, prio));
+	KASSERT((prio & ~PRIMASK) == 0, ("%s: invalid prio %x", __func__, prio));
 
-	while (_blockcount_sleep(bc, lo, wmesg, prio) != 0)
+	while (_blockcount_sleep(bc, lo, wmesg, prio) == EAGAIN)
 		;
 }