Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142298666
D48918.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D48918.diff
View Options
diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c
--- a/sys/kern/kern_sendfile.c
+++ b/sys/kern/kern_sendfile.c
@@ -667,6 +667,77 @@
return (0);
}
+/*
+ * Check socket state and wait (or EAGAIN) for needed amount of space.
+ */
+int
+sendfile_wait_generic(struct socket *so, off_t need, int *space)
+{
+ int error;
+
+ MPASS(need > 0);
+ MPASS(space != NULL);
+
+ /*
+ * XXXGL: the hack with sb_lowat originates from d99b0dd2c5297. To
+ * achieve high performance sending with sendfile(2) a non-blocking
+ * socket needs a fairly high low watermark. Otherwise, the socket
+ * will be reported as writable too early, and sendfile(2) will send
+ * just a few bytes each time. It is important to understand that
+ * we are changing sb_lowat not for the current invocation of the
+ * syscall, but for the *next* syscall. So there is no way to
+ * workaround the problem, e.g. provide a special version of sbspace().
+ * Since this hack has been in the kernel for a long time, we
+ * anticipate that there is a lot of software that will suffer if we
+ * remove it. See also b21104487324.
+ */
+ error = 0;
+ SOCK_SENDBUF_LOCK(so);
+ if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
+ if (so->so_snd.sb_lowat < PAGE_SIZE && so->so_snd.sb_hiwat >= PAGE_SIZE)
+ so->so_snd.sb_lowat = PAGE_SIZE;
+retry_space:
+ if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
+ error = EPIPE;
+ goto done;
+ } else if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ goto done;
+ }
+ if ((so->so_state & SS_ISCONNECTED) == 0) {
+ error = ENOTCONN;
+ goto done;
+ }
+
+ *space = sbspace(&so->so_snd);
+ if (*space < need && (*space <= 0 || *space < so->so_snd.sb_lowat)) {
+ if (so->so_state & SS_NBIO) {
+ error = EAGAIN;
+ goto done;
+ }
+ /*
+ * sbwait() drops the lock while sleeping. When we loop back
+ * to retry_space the state may have changed and we retest
+ * for it.
+ */
+ error = sbwait(so, SO_SND);
+ /*
+ * An error from sbwait() usually indicates that we've been
+ * interrupted by a signal. If we've sent anything then return
+ * bytes sent, otherwise return the error.
+ */
+ if (error != 0)
+ goto done;
+ goto retry_space;
+ }
+done:
+ SOCK_SENDBUF_UNLOCK(so);
+
+ return (error);
+}
+
int
vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
@@ -677,6 +748,7 @@
struct vm_object *obj;
vm_page_t pga;
struct socket *so;
+ const struct protosw *pr;
#ifdef KERN_TLS
struct ktls_session *tls;
#endif
@@ -710,6 +782,7 @@
error = sendfile_getsock(td, sockfd, &sock_fp, &so);
if (error != 0)
goto out;
+ pr = so->so_proto;
#ifdef MAC
error = mac_socket_check_send(td->td_ucred, so);
@@ -760,74 +833,8 @@
int nios, space, npages, rhpages;
mtail = NULL;
- /*
- * Check the socket state for ongoing connection,
- * no errors and space in socket buffer.
- * If space is low allow for the remainder of the
- * file to be processed if it fits the socket buffer.
- * Otherwise block in waiting for sufficient space
- * to proceed, or if the socket is nonblocking, return
- * to userland with EAGAIN while reporting how far
- * we've come.
- * We wait until the socket buffer has significant free
- * space to do bulk sends. This makes good use of file
- * system read ahead and allows packet segmentation
- * offloading hardware to take over lots of work. If
- * we were not careful here we would send off only one
- * sfbuf at a time.
- */
- SOCKBUF_LOCK(&so->so_snd);
- if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2)
- so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2;
- if (so->so_snd.sb_lowat < PAGE_SIZE &&
- so->so_snd.sb_hiwat >= PAGE_SIZE)
- so->so_snd.sb_lowat = PAGE_SIZE;
-retry_space:
- if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
- error = EPIPE;
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- } else if (so->so_error) {
- error = so->so_error;
- so->so_error = 0;
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- }
- if ((so->so_state & SS_ISCONNECTED) == 0) {
- SOCKBUF_UNLOCK(&so->so_snd);
- error = ENOTCONN;
+ if ((error = pr->pr_sendfile_wait(so, rem, &space)) != 0)
goto done;
- }
-
- space = sbspace(&so->so_snd);
- if (space < rem &&
- (space <= 0 ||
- space < so->so_snd.sb_lowat)) {
- if (so->so_state & SS_NBIO) {
- SOCKBUF_UNLOCK(&so->so_snd);
- error = EAGAIN;
- goto done;
- }
- /*
- * sbwait drops the lock while sleeping.
- * When we loop back to retry_space the
- * state may have changed and we retest
- * for it.
- */
- error = sbwait(so, SO_SND);
- /*
- * An error from sbwait usually indicates that we've
- * been interrupted by a signal. If we've sent anything
- * then return bytes sent, otherwise return the error.
- */
- if (error != 0) {
- SOCKBUF_UNLOCK(&so->so_snd);
- goto done;
- }
- goto retry_space;
- }
- SOCKBUF_UNLOCK(&so->so_snd);
-
/*
* At the beginning of the first loop check if any headers
* are specified and copy them into mbufs. Reduce space in
@@ -966,8 +973,7 @@
*
* TLS frames always require unmapped mbufs.
*/
- if ((mb_use_ext_pgs &&
- so->so_proto->pr_protocol == IPPROTO_TCP)
+ if ((mb_use_ext_pgs && pr->pr_protocol == IPPROTO_TCP)
#ifdef KERN_TLS
|| tls != NULL
#endif
@@ -1172,8 +1178,8 @@
sendfile_iodone(sfio, NULL, 0, 0);
#ifdef KERN_TLS
if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) {
- error = so->so_proto->pr_send(so,
- PRUS_NOTREADY, m, NULL, NULL, td);
+ error = pr->pr_send(so, PRUS_NOTREADY, m, NULL,
+ NULL, td);
if (error != 0) {
m_freem(m);
} else {
@@ -1182,14 +1188,13 @@
}
} else
#endif
- error = so->so_proto->pr_send(so, 0, m, NULL,
- NULL, td);
+ error = pr->pr_send(so, 0, m, NULL, NULL, td);
} else {
sfio->so = so;
sfio->m = m0;
soref(so);
- error = so->so_proto->pr_send(so, PRUS_NOTREADY, m,
- NULL, NULL, td);
+ error = pr->pr_send(so, PRUS_NOTREADY, m, NULL, NULL,
+ td);
sendfile_iodone(sfio, NULL, 0, error);
}
#ifdef TCP_REQUEST_TRK
diff --git a/sys/kern/uipc_domain.c b/sys/kern/uipc_domain.c
--- a/sys/kern/uipc_domain.c
+++ b/sys/kern/uipc_domain.c
@@ -144,6 +144,12 @@
return (EOPNOTSUPP);
}
+static int
+pr_sendfile_wait_notsupp(struct socket *so, off_t need, int *space)
+{
+ return (EOPNOTSUPP);
+}
+
static int
pr_ready_notsupp(struct socket *so, struct mbuf *m, int count)
{
@@ -208,6 +214,7 @@
NOTSUPP(pr_rcvd);
NOTSUPP(pr_rcvoob);
NOTSUPP(pr_send);
+ NOTSUPP(pr_sendfile_wait);
NOTSUPP(pr_shutdown);
NOTSUPP(pr_sockaddr);
NOTSUPP(pr_sosend);
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -3373,6 +3373,7 @@
.pr_peeraddr = uipc_peeraddr,
.pr_rcvd = uipc_rcvd,
.pr_send = uipc_send,
+ .pr_sendfile_wait = sendfile_wait_generic,
.pr_ready = uipc_ready,
.pr_sense = uipc_sense,
.pr_shutdown = uipc_shutdown,
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1419,6 +1419,7 @@
.pr_rcvd = tcp_usr_rcvd,
.pr_rcvoob = tcp_usr_rcvoob,
.pr_send = tcp_usr_send,
+ .pr_sendfile_wait = sendfile_wait_generic,
.pr_ready = tcp_usr_ready,
.pr_shutdown = tcp_usr_shutdown,
.pr_sockaddr = in_getsockaddr,
@@ -1447,6 +1448,7 @@
.pr_rcvd = tcp_usr_rcvd,
.pr_rcvoob = tcp_usr_rcvoob,
.pr_send = tcp_usr_send,
+ .pr_sendfile_wait = sendfile_wait_generic,
.pr_ready = tcp_usr_ready,
.pr_shutdown = tcp_usr_shutdown,
.pr_sockaddr = in6_mapped_sockaddr,
diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h
--- a/sys/sys/protosw.h
+++ b/sys/sys/protosw.h
@@ -81,6 +81,7 @@
} pr_send_flags_t;
typedef int pr_send_t(struct socket *, int, struct mbuf *,
struct sockaddr *, struct mbuf *, struct thread *);
+typedef int pr_sendfile_wait_t(struct socket *, off_t, int *);
typedef int pr_ready_t(struct socket *, struct mbuf *, int);
typedef int pr_sense_t(struct socket *, struct stat *);
typedef int pr_shutdown_t(struct socket *, enum shutdown_how);
@@ -109,9 +110,9 @@
struct domain *pr_domain; /* domain protocol a member of */
pr_soreceive_t *pr_soreceive; /* recv(2) */
- pr_rcvd_t *pr_rcvd; /* soreceive_generic() if PR_WANTRCVD */
pr_sosend_t *pr_sosend; /* send(2) */
pr_send_t *pr_send; /* send(2) via sosend_generic() */
+ pr_sendfile_wait_t *pr_sendfile_wait; /* sendfile helper */
pr_ready_t *pr_ready; /* sendfile/ktls readyness */
pr_sopoll_t *pr_sopoll; /* poll(2) */
/* Cache line #2 */
@@ -121,7 +122,7 @@
pr_disconnect_t *pr_disconnect; /* sodisconnect() */
pr_close_t *pr_close; /* close(2) */
pr_shutdown_t *pr_shutdown; /* shutdown(2) */
- pr_abort_t *pr_abort; /* abrupt tear down: soabort() */
+ pr_rcvd_t *pr_rcvd; /* soreceive_generic() if PR_WANTRCVD */
pr_aio_queue_t *pr_aio_queue; /* aio(9) */
/* Cache line #3 */
pr_bind_t *pr_bind; /* bind(2) */
@@ -133,6 +134,7 @@
pr_control_t *pr_control; /* ioctl(2) */
pr_rcvoob_t *pr_rcvoob; /* soreceive_rcvoob() */
/* Cache line #4 */
+ pr_abort_t *pr_abort; /* abrupt tear down: soabort() */
pr_ctloutput_t *pr_ctloutput; /* control output (from above) */
pr_peeraddr_t *pr_peeraddr; /* getpeername(2) */
pr_sockaddr_t *pr_sockaddr; /* getsockname(2) */
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -555,6 +555,7 @@
int sosend_generic(struct socket *so, struct sockaddr *addr,
struct uio *uio, struct mbuf *top, struct mbuf *control,
int flags, struct thread *td);
+int sendfile_wait_generic(struct socket *so, off_t need, int *space);
int sosetfib(struct socket *so, int fibnum);
int soshutdown(struct socket *so, enum shutdown_how);
void soupcall_clear(struct socket *, sb_which);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 19, 7:53 AM (9 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27736560
Default Alt Text
D48918.diff (9 KB)
Attached To
Mode
D48918: sendfile: factor out socket send buffer space sensing into a method
Attached
Detach File
Event Timeline
Log In to Comment