Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/kern_sendfile.c
Show All 24 Lines | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | ||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | ||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_kern_tls.h" | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/capsicum.h> | #include <sys/capsicum.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <netinet/in.h> | #include <netinet/in.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/ktls.h> | |||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/sysproto.h> | #include <sys/sysproto.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/mount.h> | #include <sys/mount.h> | ||||
#include <sys/mbuf.h> | #include <sys/mbuf.h> | ||||
#include <sys/protosw.h> | #include <sys/protosw.h> | ||||
Show All 33 Lines | |||||
* of the data. | * of the data. | ||||
*/ | */ | ||||
struct sf_io { | struct sf_io { | ||||
volatile u_int nios; | volatile u_int nios; | ||||
u_int error; | u_int error; | ||||
int npages; | int npages; | ||||
struct socket *so; | struct socket *so; | ||||
struct mbuf *m; | struct mbuf *m; | ||||
struct ktls_session *tls; | |||||
vm_page_t pa[]; | vm_page_t pa[]; | ||||
}; | }; | ||||
/* | /* | ||||
* Structure used to track requests with SF_SYNC flag. | * Structure used to track requests with SF_SYNC flag. | ||||
*/ | */ | ||||
struct sendfile_sync { | struct sendfile_sync { | ||||
struct mtx mtx; | struct mtx mtx; | ||||
▲ Show 20 Lines • Show All 161 Lines • ▼ Show 20 Lines | if (pg[i] != bogus_page) | ||||
vm_page_xunbusy(pg[i]); | vm_page_xunbusy(pg[i]); | ||||
if (error) | if (error) | ||||
sfio->error = error; | sfio->error = error; | ||||
if (!refcount_release(&sfio->nios)) | if (!refcount_release(&sfio->nios)) | ||||
return; | return; | ||||
#ifdef INVARIANTS | |||||
if ((sfio->m->m_flags & M_EXT) != 0 && | |||||
sfio->m->m_ext.ext_type == EXT_PGS) | |||||
KASSERT(sfio->tls == sfio->m->m_ext.ext_pgs->tls, | |||||
("TLS session mismatch")); | |||||
else | |||||
KASSERT(sfio->tls == NULL, | |||||
("non-ext_pgs mbuf with TLS session")); | |||||
#endif | |||||
CURVNET_SET(so->so_vnet); | CURVNET_SET(so->so_vnet); | ||||
if (sfio->error) { | if (sfio->error) { | ||||
/* | /* | ||||
* I/O operation failed. The state of data in the socket | * I/O operation failed. The state of data in the socket | ||||
* is now inconsistent, and all what we can do is to tear | * is now inconsistent, and all what we can do is to tear | ||||
* it down. Protocol abort method would tear down protocol | * it down. Protocol abort method would tear down protocol | ||||
* state, free all ready mbufs and detach not ready ones. | * state, free all ready mbufs and detach not ready ones. | ||||
* We will free the mbufs corresponding to this I/O manually. | * We will free the mbufs corresponding to this I/O manually. | ||||
* | * | ||||
* The socket would be marked with EIO and made available | * The socket would be marked with EIO and made available | ||||
* for read, so that application receives EIO on next | * for read, so that application receives EIO on next | ||||
* syscall and eventually closes the socket. | * syscall and eventually closes the socket. | ||||
*/ | */ | ||||
so->so_proto->pr_usrreqs->pru_abort(so); | so->so_proto->pr_usrreqs->pru_abort(so); | ||||
so->so_error = EIO; | so->so_error = EIO; | ||||
mb_free_notready(sfio->m, sfio->npages); | mb_free_notready(sfio->m, sfio->npages); | ||||
#ifdef KERN_TLS | |||||
} else if (sfio->tls != NULL && sfio->tls->sw_encrypt != NULL) { | |||||
/* | |||||
* I/O operation is complete, but we still need to | |||||
* encrypt. We cannot do this in the interrupt thread | |||||
* of the disk controller, so forward the mbufs to a | |||||
* different thread. | |||||
* | |||||
* Donate the socket reference from sfio to rather | |||||
* than explicitly invoking soref(). | |||||
*/ | |||||
ktls_enqueue(sfio->m, so, sfio->npages); | |||||
goto out_with_ref; | |||||
#endif | |||||
} else | } else | ||||
(void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, | (void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, | ||||
sfio->npages); | sfio->npages); | ||||
SOCK_LOCK(so); | SOCK_LOCK(so); | ||||
sorele(so); | sorele(so); | ||||
#ifdef KERN_TLS | |||||
out_with_ref: | |||||
#endif | |||||
CURVNET_RESTORE(); | CURVNET_RESTORE(); | ||||
free(sfio, M_TEMP); | free(sfio, M_TEMP); | ||||
} | } | ||||
/* | /* | ||||
* Iterate through pages vector and request paging for non-valid pages. | * Iterate through pages vector and request paging for non-valid pages. | ||||
*/ | */ | ||||
static int | static int | ||||
▲ Show 20 Lines • Show All 225 Lines • ▼ Show 20 Lines | |||||
vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, | vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, | ||||
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, | struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, | ||||
struct thread *td) | struct thread *td) | ||||
{ | { | ||||
struct file *sock_fp; | struct file *sock_fp; | ||||
struct vnode *vp; | struct vnode *vp; | ||||
struct vm_object *obj; | struct vm_object *obj; | ||||
struct socket *so; | struct socket *so; | ||||
#ifdef KERN_TLS | |||||
struct ktls_session *tls; | |||||
#endif | |||||
struct mbuf_ext_pgs *ext_pgs; | struct mbuf_ext_pgs *ext_pgs; | ||||
struct mbuf *m, *mh, *mhtail; | struct mbuf *m, *mh, *mhtail; | ||||
struct sf_buf *sf; | struct sf_buf *sf; | ||||
struct shmfd *shmfd; | struct shmfd *shmfd; | ||||
struct sendfile_sync *sfs; | struct sendfile_sync *sfs; | ||||
struct vattr va; | struct vattr va; | ||||
off_t off, sbytes, rem, obj_size; | off_t off, sbytes, rem, obj_size; | ||||
int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr; | int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr; | ||||
#ifdef KERN_TLS | |||||
int tls_enq_cnt; | |||||
#endif | |||||
bool use_ext_pgs; | bool use_ext_pgs; | ||||
obj = NULL; | obj = NULL; | ||||
so = NULL; | so = NULL; | ||||
m = mh = NULL; | m = mh = NULL; | ||||
sfs = NULL; | sfs = NULL; | ||||
#ifdef KERN_TLS | |||||
tls = NULL; | |||||
#endif | |||||
hdrlen = sbytes = 0; | hdrlen = sbytes = 0; | ||||
softerr = 0; | softerr = 0; | ||||
use_ext_pgs = false; | use_ext_pgs = false; | ||||
error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); | error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); | ||||
if (error != 0) | if (error != 0) | ||||
return (error); | return (error); | ||||
Show All 20 Lines | #endif | ||||
/* | /* | ||||
* Protect against multiple writers to the socket. | * Protect against multiple writers to the socket. | ||||
* | * | ||||
* XXXRW: Historically this has assumed non-interruptibility, so now | * XXXRW: Historically this has assumed non-interruptibility, so now | ||||
* we implement that, but possibly shouldn't. | * we implement that, but possibly shouldn't. | ||||
*/ | */ | ||||
(void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); | (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); | ||||
#ifdef KERN_TLS | |||||
tls = ktls_hold(so->so_snd.sb_tls_info); | |||||
#endif | |||||
/* | /* | ||||
* Loop through the pages of the file, starting with the requested | * Loop through the pages of the file, starting with the requested | ||||
* offset. Get a file page (do I/O if necessary), map the file page | * offset. Get a file page (do I/O if necessary), map the file page | ||||
* into an sf_buf, attach an mbuf header to the sf_buf, and queue | * into an sf_buf, attach an mbuf header to the sf_buf, and queue | ||||
* it on the socket. | * it on the socket. | ||||
* This is done in two loops. The inner loop turns as many pages | * This is done in two loops. The inner loop turns as many pages | ||||
* as it can, up to available socket buffer space, without blocking | * as it can, up to available socket buffer space, without blocking | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | retry_space: | ||||
* At the beginning of the first loop check if any headers | * At the beginning of the first loop check if any headers | ||||
* are specified and copy them into mbufs. Reduce space in | * are specified and copy them into mbufs. Reduce space in | ||||
* the socket buffer by the size of the header mbuf chain. | * the socket buffer by the size of the header mbuf chain. | ||||
* Clear hdr_uio here and hdrlen at the end of the first loop. | * Clear hdr_uio here and hdrlen at the end of the first loop. | ||||
*/ | */ | ||||
if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { | if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { | ||||
hdr_uio->uio_td = td; | hdr_uio->uio_td = td; | ||||
hdr_uio->uio_rw = UIO_WRITE; | hdr_uio->uio_rw = UIO_WRITE; | ||||
mh = m_uiotombuf(hdr_uio, M_WAITOK, space, 0, 0); | #ifdef KERN_TLS | ||||
if (tls != NULL) | |||||
mh = m_uiotombuf(hdr_uio, M_WAITOK, space, | |||||
tls->params.max_frame_len, M_NOMAP); | |||||
else | |||||
#endif | |||||
mh = m_uiotombuf(hdr_uio, M_WAITOK, | |||||
space, 0, 0); | |||||
hdrlen = m_length(mh, &mhtail); | hdrlen = m_length(mh, &mhtail); | ||||
space -= hdrlen; | space -= hdrlen; | ||||
/* | /* | ||||
* If header consumed all the socket buffer space, | * If header consumed all the socket buffer space, | ||||
* don't waste CPU cycles and jump to the end. | * don't waste CPU cycles and jump to the end. | ||||
*/ | */ | ||||
if (space == 0) { | if (space == 0) { | ||||
sfio = NULL; | sfio = NULL; | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) - | ||||
npages, rhpages); | npages, rhpages); | ||||
sfio = malloc(sizeof(struct sf_io) + | sfio = malloc(sizeof(struct sf_io) + | ||||
npages * sizeof(vm_page_t), M_TEMP, M_WAITOK); | npages * sizeof(vm_page_t), M_TEMP, M_WAITOK); | ||||
refcount_init(&sfio->nios, 1); | refcount_init(&sfio->nios, 1); | ||||
sfio->so = so; | sfio->so = so; | ||||
sfio->error = 0; | sfio->error = 0; | ||||
#ifdef KERN_TLS | |||||
/* | |||||
* This doesn't use ktls_hold() because sfio->m will | |||||
* also have a reference on 'tls' that will be valid | |||||
* for all of sfio's lifetime. | |||||
*/ | |||||
sfio->tls = tls; | |||||
#endif | |||||
error = sendfile_swapin(obj, sfio, &nios, off, space, npages, | error = sendfile_swapin(obj, sfio, &nios, off, space, npages, | ||||
rhpages, flags); | rhpages, flags); | ||||
if (error != 0) { | if (error != 0) { | ||||
if (vp != NULL) | if (vp != NULL) | ||||
VOP_UNLOCK(vp, 0); | VOP_UNLOCK(vp, 0); | ||||
free(sfio, M_TEMP); | free(sfio, M_TEMP); | ||||
goto done; | goto done; | ||||
} | } | ||||
/* | /* | ||||
* Loop and construct maximum sized mbuf chain to be bulk | * Loop and construct maximum sized mbuf chain to be bulk | ||||
* dumped into socket buffer. | * dumped into socket buffer. | ||||
*/ | */ | ||||
pa = sfio->pa; | pa = sfio->pa; | ||||
/* | /* | ||||
* Use unmapped mbufs if enabled for TCP. Unmapped | * Use unmapped mbufs if enabled for TCP. Unmapped | ||||
* bufs are restricted to TCP as that is what has been | * bufs are restricted to TCP as that is what has been | ||||
* tested. In particular, unmapped mbufs have not | * tested. In particular, unmapped mbufs have not | ||||
* been tested with UNIX-domain sockets. | * been tested with UNIX-domain sockets. | ||||
* | |||||
* TLS frames always require unmapped mbufs. | |||||
*/ | */ | ||||
if (mb_use_ext_pgs && | if ((mb_use_ext_pgs && | ||||
so->so_proto->pr_protocol == IPPROTO_TCP) { | so->so_proto->pr_protocol == IPPROTO_TCP) | ||||
#ifdef KERN_TLS | |||||
|| tls != NULL | |||||
#endif | |||||
) { | |||||
use_ext_pgs = true; | use_ext_pgs = true; | ||||
#ifdef KERN_TLS | |||||
if (tls != NULL) | |||||
max_pgs = num_pages(tls->params.max_frame_len); | |||||
else | |||||
#endif | |||||
max_pgs = MBUF_PEXT_MAX_PGS; | max_pgs = MBUF_PEXT_MAX_PGS; | ||||
/* Start at last index, to wrap on first use. */ | /* Start at last index, to wrap on first use. */ | ||||
ext_pgs_idx = max_pgs - 1; | ext_pgs_idx = max_pgs - 1; | ||||
} | } | ||||
for (int i = 0; i < npages; i++) { | for (int i = 0; i < npages; i++) { | ||||
struct mbuf *m0; | struct mbuf *m0; | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | prepend_header: | ||||
} | } | ||||
/* Add the buffer chain to the socket buffer. */ | /* Add the buffer chain to the socket buffer. */ | ||||
KASSERT(m_length(m, NULL) == space + hdrlen, | KASSERT(m_length(m, NULL) == space + hdrlen, | ||||
("%s: mlen %u space %d hdrlen %d", | ("%s: mlen %u space %d hdrlen %d", | ||||
__func__, m_length(m, NULL), space, hdrlen)); | __func__, m_length(m, NULL), space, hdrlen)); | ||||
CURVNET_SET(so->so_vnet); | CURVNET_SET(so->so_vnet); | ||||
#ifdef KERN_TLS | |||||
if (tls != NULL) { | |||||
error = ktls_frame(m, tls, &tls_enq_cnt, | |||||
TLS_RLTYPE_APP); | |||||
if (error != 0) | |||||
goto done; | |||||
} | |||||
#endif | |||||
if (nios == 0) { | if (nios == 0) { | ||||
/* | /* | ||||
* If sendfile_swapin() didn't initiate any I/Os, | * If sendfile_swapin() didn't initiate any I/Os, | ||||
* which happens if all data is cached in VM, then | * which happens if all data is cached in VM, then | ||||
* we can send data right now without the | * we can send data right now without the | ||||
* PRUS_NOTREADY flag. | * PRUS_NOTREADY flag. | ||||
*/ | */ | ||||
free(sfio, M_TEMP); | free(sfio, M_TEMP); | ||||
#ifdef KERN_TLS | |||||
if (tls != NULL && tls->sw_encrypt != NULL) { | |||||
error = (*so->so_proto->pr_usrreqs->pru_send) | error = (*so->so_proto->pr_usrreqs->pru_send) | ||||
(so, PRUS_NOTREADY, m, NULL, NULL, td); | |||||
soref(so); | |||||
ktls_enqueue(m, so, tls_enq_cnt); | |||||
} else | |||||
#endif | |||||
error = (*so->so_proto->pr_usrreqs->pru_send) | |||||
(so, 0, m, NULL, NULL, td); | (so, 0, m, NULL, NULL, td); | ||||
} else { | } else { | ||||
sfio->npages = npages; | sfio->npages = npages; | ||||
soref(so); | soref(so); | ||||
error = (*so->so_proto->pr_usrreqs->pru_send) | error = (*so->so_proto->pr_usrreqs->pru_send) | ||||
(so, PRUS_NOTREADY, m, NULL, NULL, td); | (so, PRUS_NOTREADY, m, NULL, NULL, td); | ||||
sendfile_iodone(sfio, NULL, 0, 0); | sendfile_iodone(sfio, NULL, 0, 0); | ||||
} | } | ||||
CURVNET_RESTORE(); | CURVNET_RESTORE(); | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | if (sfs != NULL) { | ||||
mtx_lock(&sfs->mtx); | mtx_lock(&sfs->mtx); | ||||
if (sfs->count != 0) | if (sfs->count != 0) | ||||
cv_wait(&sfs->cv, &sfs->mtx); | cv_wait(&sfs->cv, &sfs->mtx); | ||||
KASSERT(sfs->count == 0, ("sendfile sync still busy")); | KASSERT(sfs->count == 0, ("sendfile sync still busy")); | ||||
cv_destroy(&sfs->cv); | cv_destroy(&sfs->cv); | ||||
mtx_destroy(&sfs->mtx); | mtx_destroy(&sfs->mtx); | ||||
free(sfs, M_TEMP); | free(sfs, M_TEMP); | ||||
} | } | ||||
#ifdef KERN_TLS | |||||
if (tls != NULL) | |||||
ktls_free(tls); | |||||
#endif | |||||
if (error == ERESTART) | if (error == ERESTART) | ||||
error = EINTR; | error = EINTR; | ||||
return (error); | return (error); | ||||
} | } | ||||
static int | static int | ||||
▲ Show 20 Lines • Show All 106 Lines • Show Last 20 Lines |