diff --git a/lib/libc/sys/recv.2 b/lib/libc/sys/recv.2 --- a/lib/libc/sys/recv.2 +++ b/lib/libc/sys/recv.2 @@ -163,6 +163,7 @@ .Bl -column ".Dv MSG_CMSG_CLOEXEC" -offset indent .It Dv MSG_OOB Ta process out-of-band data .It Dv MSG_PEEK Ta peek at incoming message +.It Dv MSG_TRUNC Ta return real packet or datagram length .It Dv MSG_WAITALL Ta wait for full request or error .It Dv MSG_DONTWAIT Ta do not block .It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec @@ -185,6 +186,17 @@ data from the queue. Thus, a subsequent receive call will return the same data. The +.Dv MSG_TRUNC +flag causes the receive operation to return the full length of the packet +or datagram even if larger than provided buffer. The flag is supported +on SOCK_DGRAM sockets for +.Dv AF_INET +, +.Dv AF_INET6 +and +.Dv AF_UNIX +families. +The .Dv MSG_WAITALL flag requests that the operation block until the full request is satisfied. diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1896,15 +1896,18 @@ struct mbuf *nextrecord; int moff, type = 0; ssize_t orig_resid = uio->uio_resid; + bool report_real_len = false; mp = mp0; if (psa != NULL) *psa = NULL; if (controlp != NULL) *controlp = NULL; - if (flagsp != NULL) + if (flagsp != NULL) { + report_real_len = *flagsp & MSG_TRUNC; + *flagsp &= ~MSG_TRUNC; flags = *flagsp &~ MSG_EOR; - else + } else flags = 0; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); @@ -1978,7 +1981,7 @@ error = ENOTCONN; goto release; } - if (uio->uio_resid == 0) { + if (uio->uio_resid == 0 && !report_real_len) { SOCKBUF_UNLOCK(&so->so_rcv); goto release; } @@ -2326,6 +2329,8 @@ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (m != NULL && pr->pr_flags & PR_ATOMIC) { + if (report_real_len) + uio->uio_resid -= m_length(m, NULL) - moff; flags |= MSG_TRUNC; if ((flags & MSG_PEEK) == 0) (void) sbdroprecord_locked(&so->so_rcv); @@ -2624,7 +2629,7 @@ * For any complicated cases, fall back to the full * soreceive_generic(). */ - if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB)) + if (mp0 != NULL || (flags & (MSG_PEEK | MSG_OOB | MSG_TRUNC))) return (soreceive_generic(so, psa, uio, mp0, controlp, flagsp)); diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1417,7 +1417,7 @@ uipc_peek_dgram(struct socket *so, struct mbuf *m, struct sockaddr **psa, struct uio *uio, struct mbuf **controlp, int *flagsp) { - ssize_t len; + ssize_t len = 0; int error; so->so_rcv.uxdg_peeked = m; @@ -1459,8 +1459,16 @@ } SOCK_IO_RECV_UNLOCK(so); - if (m != NULL && flagsp != NULL) - *flagsp |= MSG_TRUNC; + if (flagsp != NULL) { + if (m != NULL) { + if (*flagsp & MSG_TRUNC) { + /* Report real length of the packet */ + uio->uio_resid -= m_length(m, NULL) - len; + } + *flagsp |= MSG_TRUNC; + } else + *flagsp &= ~MSG_TRUNC; + } return (0); } @@ -1475,7 +1483,7 @@ struct sockbuf *sb = NULL; struct mbuf *m; int flags, error; - ssize_t len; + ssize_t len = 0; bool nonblock; MPASS(mp0 == NULL); @@ -1619,11 +1627,16 @@ SOCK_IO_RECV_UNLOCK(so); if (m != NULL) { - flags |= MSG_TRUNC; + if (flagsp != NULL) { + if (flags & MSG_TRUNC) { + /* Report real length of the packet */ + uio->uio_resid -= m_length(m, NULL); + } + *flagsp |= MSG_TRUNC; + } m_freem(m); - } - if (flagsp != NULL) - *flagsp |= flags; + } else if (flagsp != NULL) + *flagsp &= ~MSG_TRUNC; return (0); } diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile --- a/tests/sys/kern/Makefile +++ b/tests/sys/kern/Makefile @@ -29,6 +29,7 @@ ATF_TESTS_C+= sched_affinity ATF_TESTS_C+= sigaltstack ATF_TESTS_C+= sigwait +ATF_TESTS_C+= socket_msg_trunc TEST_METADATA.sigwait+= is_exclusive="true" .if ${MACHINE_ARCH} != "i386" && ${MACHINE_ARCH:Mpowerpc*} == "" ATF_TESTS_C+= subr_physmem_test diff --git a/tests/sys/kern/socket_msg_trunc.c b/tests/sys/kern/socket_msg_trunc.c new file mode 100644 --- /dev/null +++ b/tests/sys/kern/socket_msg_trunc.c @@ -0,0 +1,169 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2022 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include + +static void +check_recvmsg(const char *test_name) +{ + int ss, cs, rc; + struct sockaddr *sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + struct sockaddr_un saun; + int *sizes, sizes_count; + int one = 1; + + + if (!strcmp(test_name, "udp")) { + ss = socket(PF_INET, SOCK_DGRAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = htons(6666); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sa = (struct sockaddr *)&sin; + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_INET, SOCK_DGRAM, 0); + ATF_CHECK(cs >= 0); + int inet_sizes[] = {80, 255, 256, 1024, 4096, 9000}; + sizes_count = sizeof(inet_sizes) / sizeof(int); + sizes = malloc(sizeof(inet_sizes)); + memcpy(sizes, inet_sizes, sizeof(inet_sizes)); + + } else if (!strcmp(test_name, "udp6")) { + ss = socket(PF_INET6, SOCK_DGRAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + sin6.sin6_port = htons(6666); + const struct in6_addr in6loopback = IN6ADDR_LOOPBACK_INIT; + sin6.sin6_addr = in6loopback; + sa = (struct sockaddr *)&sin6; + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_INET6, SOCK_DGRAM, 0); + ATF_CHECK(cs >= 0); + int inet_sizes[] = {80, 255, 256, 1024, 4096, 9000}; + sizes_count = sizeof(inet_sizes) / sizeof(int); + sizes = malloc(sizeof(inet_sizes)); + memcpy(sizes, inet_sizes, sizeof(inet_sizes)); + + } else if (!strcmp(test_name, "unix")) { + const char *PATH = "/tmp/test_check_recvmsg_socket"; + ss = socket(PF_UNIX, SOCK_DGRAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&saun, sizeof(saun)); + saun.sun_family = AF_UNIX; + strcpy(saun.sun_path, PATH); + saun.sun_len = sizeof(saun); + sa = (struct sockaddr *)&saun; + unlink(PATH); + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_UNIX, SOCK_DGRAM, 0); + ATF_CHECK(cs >= 0); + int unix_sizes[] = {80, 255, 256, 1024, 2000}; + sizes_count = sizeof(unix_sizes) / sizeof(int); + sizes = malloc(sizeof(unix_sizes)); + memcpy(sizes, unix_sizes, sizeof(unix_sizes)); + } else + return; + + char buf[4096]; + memset(buf, 0xFF, sizeof(buf)); + for (int i = 0; i < sizes_count; i++) { + int sz = sizes[i]; + char tbuf[1]; + rc = sendto(cs, buf, sz, 0, sa, sa->sa_len); + ATF_REQUIRE_EQ(rc, sz); + + rc = recv(ss, NULL, 0, MSG_PEEK | MSG_TRUNC); + ATF_CHECK_EQ(rc, sz); + + rc = recv(ss, tbuf, sizeof(tbuf), MSG_PEEK | MSG_TRUNC); + ATF_CHECK_EQ(rc, sz); + + rc = recv(ss, tbuf, sizeof(tbuf), MSG_TRUNC); + ATF_CHECK_EQ(rc, sz); + } + + close(ss); + close(cs); +} + +ATF_TC_WITHOUT_HEAD(socket_afinet_udp_recv_trunc); +ATF_TC_BODY(socket_afinet_udp_recv_trunc, tc) +{ + check_recvmsg("udp"); +} + +ATF_TC_WITHOUT_HEAD(socket_afinet6_udp_recv_trunc); +ATF_TC_BODY(socket_afinet6_udp_recv_trunc, tc) +{ + check_recvmsg("udp6"); +} + +ATF_TC_WITHOUT_HEAD(socket_afunix_recv_trunc); +ATF_TC_BODY(socket_afunix_recv_trunc, tc) +{ + check_recvmsg("unix"); +} + + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, socket_afinet_udp_recv_trunc); + ATF_TP_ADD_TC(tp, socket_afinet6_udp_recv_trunc); + ATF_TP_ADD_TC(tp, socket_afunix_recv_trunc); + + return atf_no_error(); +}