Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -34,18 +34,18 @@ #include #include +#include #include #include #include #include -#include #include +#include #include #include -#include +#include #include #include -#include #include #include #include @@ -103,9 +103,8 @@ * Structure used to track requests with SF_SYNC flag. */ struct sendfile_sync { - struct mtx mtx; - struct cv cv; - unsigned count; + u_int refcount; /* structure references */ + blockcount_t count; /* outstanding mbufs */ }; counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; @@ -134,9 +133,27 @@ sfstat_sysctl, "I", "sendfile statistics"); +static void +sendfile_sync_init(struct mbuf *m, struct sendfile_sync *sfs) +{ + m->m_ext.ext_flags |= EXT_FLAG_SYNC; + if (m->m_ext.ext_type == EXT_PGS) + m->m_ext.ext_arg1 = sfs; + else + m->m_ext.ext_arg2 = sfs; +} + +static void +sendfile_sync_release(struct sendfile_sync *sfs) +{ + if (refcount_release(&sfs->refcount)) + free(sfs, M_SENDFILE); +} + static void sendfile_free_mext(struct mbuf *m) { + struct sendfile_sync *sfs; struct sf_buf *sf; vm_page_t pg; int flags; @@ -152,13 +169,9 @@ vm_page_release(pg, flags); if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { - struct sendfile_sync *sfs = m->m_ext.ext_arg2; - - mtx_lock(&sfs->mtx); - KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); - if (--sfs->count == 0) - cv_signal(&sfs->cv); - mtx_unlock(&sfs->mtx); + sfs = m->m_ext.ext_arg2; + blockcount_release(&sfs->count, 1); + sendfile_sync_release(sfs); } } @@ -166,6 +179,7 @@ sendfile_free_mext_pg(struct mbuf *m) { struct mbuf_ext_pgs *ext_pgs; + struct sendfile_sync *sfs; vm_page_t pg; int flags, i; bool cache_last; @@ -185,13 +199,9 @@ } if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { - struct sendfile_sync *sfs = m->m_ext.ext_arg1; - - mtx_lock(&sfs->mtx); - KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0")); - if (--sfs->count == 0) - cv_signal(&sfs->cv); - mtx_unlock(&sfs->mtx); + sfs = m->m_ext.ext_arg1; + blockcount_release(&sfs->count, 1); + sendfile_sync_release(sfs); } } @@ -367,9 +377,10 @@ ktls_enqueue(sfio->m, so, sfio->npages); goto out_with_ref; #endif - } else + } else { (void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, sfio->npages); + } SOCK_LOCK(so); sorele(so); @@ -661,6 +672,7 @@ struct sendfile_sync *sfs; struct vattr va; off_t off, sbytes, rem, obj_size; + u_int sfscount; int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr; #ifdef KERN_TLS int tls_enq_cnt; @@ -695,10 +707,11 @@ SFSTAT_INC(sf_syscalls); SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags)); - if (flags & SF_SYNC) { - sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK | M_ZERO); - mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); - cv_init(&sfs->cv, "sendfile"); + if ((flags & SF_SYNC) != 0) { + sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK); + refcount_init(&sfs->refcount, 1); + blockcount_init(&sfs->count); + sfscount = 0; } rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset; @@ -977,18 +990,8 @@ EXT_FLAG_CACHE_LAST; } if (sfs != NULL) { - m0->m_ext.ext_flags |= - EXT_FLAG_SYNC; - if (m0->m_ext.ext_type == - EXT_PGS) - m0->m_ext.ext_arg1 = - sfs; - else - m0->m_ext.ext_arg2 = - sfs; - mtx_lock(&sfs->mtx); - sfs->count++; - mtx_unlock(&sfs->mtx); + sendfile_sync_init(m0, sfs); + sfscount++; } ext_pgs = &m0->m_ext_pgs; ext_pgs_idx = 0; @@ -1060,15 +1063,8 @@ !(rem > space || rhpages > 0))) m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; if (sfs != NULL) { - m0->m_ext.ext_flags |= EXT_FLAG_SYNC; - if (m0->m_ext.ext_type == EXT_PGS) - m0->m_ext.ext_arg1 = sfs; - else - m0->m_ext.ext_arg2 = sfs; - m0->m_ext.ext_arg2 = sfs; - mtx_lock(&sfs->mtx); - sfs->count++; - mtx_unlock(&sfs->mtx); + sendfile_sync_init(m0, sfs); + sfscount++; } m0->m_ext.ext_count = 1; m0->m_flags |= (M_EXT | M_RDONLY); @@ -1122,6 +1118,11 @@ if (tls != NULL) ktls_frame(m, tls, &tls_enq_cnt, TLS_RLTYPE_APP); #endif + if (sfs != NULL) { + blockcount_acquire(&sfs->count, sfscount); + refcount_acquiren(&sfs->refcount, sfscount); + sfscount = 0; + } if (nios == 0) { /* * If sendfile_swapin() didn't initiate any I/Os, @@ -1199,13 +1200,13 @@ m_freem(mh); if (sfs != NULL) { - mtx_lock(&sfs->mtx); - if (sfs->count != 0) - cv_wait(&sfs->cv, &sfs->mtx); - KASSERT(sfs->count == 0, ("sendfile sync still busy")); - cv_destroy(&sfs->cv); - mtx_destroy(&sfs->mtx); - free(sfs, M_SENDFILE); + if (error == 0) { + error = blockcount_sleep(&sfs->count, NULL, "sfsync", + PUSER | PCATCH); + if (error == EAGAIN) + error = 0; + } + sendfile_sync_release(sfs); } #ifdef KERN_TLS if (tls != NULL) Index: sys/kern/kern_synch.c =================================================================== --- sys/kern/kern_synch.c +++ sys/kern/kern_synch.c @@ -400,12 +400,12 @@ } /* - * Wait for a wakeup. This does not guarantee that the count is still zero on - * return and may be subject to transient wakeups. Callers wanting a precise - * answer should use blockcount_wait() with an interlock. + * Wait for a wakeup or a signal. This does not guarantee that the count is + * still zero on return. Callers wanting a precise answer should use + * blockcount_wait() with an interlock. * - * Return 0 if there is no work to wait for, and 1 if we slept waiting for work - * to complete. In the latter case the counter value must be re-read. + * If there is no work to wait for, return 0. If the sleep was interrupted by a + * signal, return EINTR or ERESTART, and return EAGAIN otherwise. */ int _blockcount_sleep(blockcount_t *bc, struct lock_object *lock, const char *wmesg, @@ -415,10 +415,15 @@ uintptr_t lock_state; u_int old; int ret; + bool catch, drop; KASSERT(lock != &Giant.lock_object, ("%s: cannot use Giant as the interlock", __func__)); + catch = (prio & PCATCH) != 0; + drop = (prio & PDROP) != 0; + prio &= PRIMASK; + /* * Synchronize with the fence in blockcount_release(). If we end up * waiting, the sleepqueue lock acquisition will provide the required @@ -428,7 +433,7 @@ * ourselves to sleep to avoid jumping ahead. */ if (atomic_load_acq_int(&bc->__count) == 0) { - if (lock != NULL && (prio & PDROP) != 0) + if (lock != NULL && drop) LOCK_CLASS(lock)->lc_unlock(lock); return (0); } @@ -439,23 +444,27 @@ if (lock != NULL) lock_state = LOCK_CLASS(lock)->lc_unlock(lock); old = blockcount_read(bc); + ret = 0; do { if (_BLOCKCOUNT_COUNT(old) == 0) { sleepq_release(wchan); - ret = 0; goto out; } if (_BLOCKCOUNT_WAITERS(old)) break; } while (!atomic_fcmpset_int(&bc->__count, &old, old | _BLOCKCOUNT_WAITERS_FLAG)); - sleepq_add(wchan, NULL, wmesg, 0, 0); - sleepq_wait(wchan, prio); - ret = 1; + sleepq_add(wchan, NULL, wmesg, catch ? SLEEPQ_INTERRUPTIBLE : 0, 0); + if (catch) + ret = sleepq_wait_sig(wchan, prio); + else + sleepq_wait(wchan, prio); + if (ret == 0) + ret = EAGAIN; out: PICKUP_GIANT(); - if (lock != NULL && (prio & PDROP) == 0) + if (lock != NULL && !drop) LOCK_CLASS(lock)->lc_lock(lock, lock_state); return (ret); Index: sys/sys/blockcount.h =================================================================== --- sys/sys/blockcount.h +++ sys/sys/blockcount.h @@ -80,9 +80,9 @@ _blockcount_wait(blockcount_t *bc, struct lock_object *lo, const char *wmesg, int prio) { - KASSERT((prio & PDROP) == 0, ("%s: invalid prio %x", __func__, prio)); + KASSERT((prio & ~PRIMASK) == 0, ("%s: invalid prio %x", __func__, prio)); - while (_blockcount_sleep(bc, lo, wmesg, prio) != 0) + while (_blockcount_sleep(bc, lo, wmesg, prio) == EAGAIN) ; }