diff --git a/lib/libc/sys/getsockopt.2 b/lib/libc/sys/getsockopt.2 --- a/lib/libc/sys/getsockopt.2 +++ b/lib/libc/sys/getsockopt.2 @@ -28,7 +28,7 @@ .\" @(#)getsockopt.2 8.4 (Berkeley) 5/2/95 .\" $FreeBSD$ .\" -.Dd February 8, 2021 +.Dd August 24, 2022 .Dt GETSOCKOPT 2 .Os .Sh NAME @@ -160,6 +160,7 @@ .It Dv SO_OOBINLINE Ta "enables reception of out-of-band data in band" .It Dv SO_SNDBUF Ta "set buffer size for output" .It Dv SO_RCVBUF Ta "set buffer size for input" +.It Dv SO_RCVBUFFORCE Ta "set buffer size for input ignoring system limit" .It Dv SO_SNDLOWAT Ta "set minimum count for output" .It Dv SO_RCVLOWAT Ta "set minimum count for input" .It Dv SO_SNDTIMEO Ta "set timeout value for output" @@ -294,6 +295,10 @@ .Xr sysctl 3 MIB variable .Dq Li kern.ipc.maxsockbuf . +.Dv SO_RCVBUFFORCE +is a priviledged version of +.Dv SO_RCVBUF +adjusting the input buffer size ignoring system-wide limits. .Pp .Dv SO_SNDLOWAT is an option to set the minimum count for output operations. diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c --- a/sys/compat/linux/linux_socket.c +++ b/sys/compat/linux/linux_socket.c @@ -528,8 +528,9 @@ case LINUX_SO_SNDBUFFORCE: return (SO_SNDBUF); case LINUX_SO_RCVBUF: - case LINUX_SO_RCVBUFFORCE: return (SO_RCVBUF); + case LINUX_SO_RCVBUFFORCE: + return (SO_RCVBUFFORCE); case LINUX_SO_KEEPALIVE: return (SO_KEEPALIVE); case LINUX_SO_OOBINLINE: diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -650,6 +650,26 @@ return (true); } +/* + * Sets socket bytes/mbufs limits ignoring sb_max default. + */ +static bool +sbreserve_force_locked(struct socket *so, sb_which which, u_long cc, + struct thread *td) +{ + struct sockbuf *sb = sobuf(so, which); + rlim_t sbsize_limit; + + SOCK_BUF_LOCK_ASSERT(so, which); + + sbsize_limit = td != NULL ? lim_cur(td, RLIMIT_SBSIZE) : RLIM_INFINITY; + if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc, sbsize_limit)) + return (false); + sb->sb_mbmax = cc * sb_efficiency; + if (sb->sb_lowat > sb->sb_hiwat) + sb->sb_lowat = sb->sb_hiwat; + return (true); +} int sbsetopt(struct socket *so, int cmd, u_long cc) { @@ -671,6 +691,7 @@ break; case SO_RCVLOWAT: case SO_RCVBUF: + case SO_RCVBUFFORCE: lowat = &so->sol_sbrcv_lowat; hiwat = &so->sol_sbrcv_hiwat; flags = &so->sol_sbrcv_flags; @@ -685,6 +706,7 @@ break; case SO_RCVLOWAT: case SO_RCVBUF: + case SO_RCVBUFFORCE: sb = &so->so_rcv; wh = SO_RCV; break; @@ -714,6 +736,18 @@ if (error == 0) *flags &= ~SB_AUTOSIZE; break; + case SO_RCVBUFFORCE: + if (SOLISTENING(so)) { + *hiwat = cc; + if (*lowat > *hiwat) + *lowat = *hiwat; + } else { + if (!sbreserve_force_locked(so, wh, cc, curthread)) + error = ENOBUFS; + } + if (error == 0) + *flags &= ~SB_AUTOSIZE; + break; case SO_SNDLOWAT: case SO_RCVLOWAT: /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -129,6 +129,7 @@ #include #include #include +#include #include #include #include @@ -3098,6 +3099,7 @@ case SO_SNDBUF: case SO_RCVBUF: + case SO_RCVBUFFORCE: case SO_SNDLOWAT: case SO_RCVLOWAT: error = sooptcopyin(sopt, &optval, sizeof optval, @@ -3114,6 +3116,14 @@ goto bad; } + if (sopt->sopt_name == SO_RCVBUFFORCE) { + if (sopt->sopt_td != NULL) + error = priv_check(sopt->sopt_td, + PRIV_NETINET_RCVBUFFORCE); + if (error != 0) + goto bad; + } + error = sbsetopt(so, sopt->sopt_name, optval); break; diff --git a/sys/sys/priv.h b/sys/sys/priv.h --- a/sys/sys/priv.h +++ b/sys/sys/priv.h @@ -403,6 +403,7 @@ #define PRIV_NETINET_SETHDROPTS 505 /* Set certain IPv4/6 header options. */ #define PRIV_NETINET_BINDANY 506 /* Allow bind to any address. */ #define PRIV_NETINET_HASHKEY 507 /* Get and set hash keys for IPv4/6. */ +#define PRIV_NETINET_RCVBUFFORCE 508 /* Allow setting SO_RCVBUFFORCE option. */ /* * Placeholders for IPX/SPX privileges, not supported any more. diff --git a/sys/sys/socket.h b/sys/sys/socket.h --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -173,6 +173,7 @@ #define SO_TS_CLOCK 0x1017 /* clock type used for SO_TIMESTAMP */ #define SO_MAX_PACING_RATE 0x1018 /* socket's max TX pacing rate (Linux name) */ #define SO_DOMAIN 0x1019 /* get socket domain */ +#define SO_RCVBUFFORCE 0x1020 /* receive buffer size, ignoring limits */ #endif #if __BSD_VISIBLE diff --git a/tests/sys/kern/Makefile b/tests/sys/kern/Makefile --- a/tests/sys/kern/Makefile +++ b/tests/sys/kern/Makefile @@ -31,6 +31,7 @@ ATF_TESTS_C+= sigwait ATF_TESTS_C+= socket_accf ATF_TESTS_C+= socket_msg_trunc +ATF_TESTS_C+= socket_rcvbufforce TEST_METADATA.sigwait+= is_exclusive="true" .if ${MACHINE_ARCH} != "i386" && ${MACHINE_ARCH:Mpowerpc*} == "" ATF_TESTS_C+= subr_physmem_test diff --git a/tests/sys/kern/socket_rcvbufforce.c b/tests/sys/kern/socket_rcvbufforce.c new file mode 100644 --- /dev/null +++ b/tests/sys/kern/socket_rcvbufforce.c @@ -0,0 +1,282 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2022 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define TEST_PORT 35471 + +static unsigned long +get_maxsockbuf(void) +{ + unsigned long maxsockbuf; + size_t llen = sizeof(maxsockbuf); + + ATF_REQUIRE(sysctlbyname("kern.ipc.maxsockbuf", &maxsockbuf, &llen, NULL, 0) == 0); + + return (maxsockbuf); +} + +ATF_TC(socket_rcvbufforce_noroot); +ATF_TC_HEAD(socket_rcvbufforce_noroot, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Checks that unpriviledged used can't use SO_RCVBUFFORCE"); + atf_tc_set_md_var(tc, "require.user", "unprivileged"); +} +ATF_TC_BODY(socket_rcvbufforce_noroot, tc) +{ + unsigned long maxsockbuf = get_maxsockbuf(); + + int families[] = { AF_INET, AF_INET6, 0 }; + int protocols[] = { SOCK_STREAM, SOCK_DGRAM, 0 }; + + for (int *pfam = families; *pfam != 0; pfam++) { + for (int *pproto = protocols; *pproto != 0; pproto++) { + int ss = socket(*pfam, *pproto, 0); + ATF_REQUIRE(ss >= 0); + + size_t buf = maxsockbuf * 2; + ATF_REQUIRE_ERRNO(EPERM, setsockopt(ss, SOL_SOCKET, + SO_RCVBUFFORCE, &buf, sizeof(buf)) != 0); + close(ss); + } + } +} + +static void +check_buffer_nostream(const char *test_name) +{ + int ss, cs, rc; + struct sockaddr *sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + int one = 1; + + unsigned long maxsockbuf; + size_t llen = sizeof(maxsockbuf); + + ATF_REQUIRE(sysctlbyname("kern.ipc.maxsockbuf", &maxsockbuf, &llen, NULL, 0) == 0); + + if (!strcmp(test_name, "udp")) { + ss = socket(PF_INET, SOCK_DGRAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = htons(TEST_PORT); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sa = (struct sockaddr *)&sin; + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_INET, SOCK_DGRAM, 0); + ATF_CHECK(cs >= 0); + + } else if (!strcmp(test_name, "udp6")) { + ss = socket(PF_INET6, SOCK_DGRAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + sin6.sin6_port = htons(TEST_PORT); + const struct in6_addr in6loopback = IN6ADDR_LOOPBACK_INIT; + sin6.sin6_addr = in6loopback; + sa = (struct sockaddr *)&sin6; + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_INET6, SOCK_DGRAM, 0); + ATF_CHECK(cs >= 0); + } else + return; + + /* Set large buffer on the server socket */ + unsigned long sz = get_maxsockbuf() * 2; + ATF_REQUIRE_ERRNO(0, setsockopt(ss, SOL_SOCKET, + SO_RCVBUFFORCE, &sz, sizeof(sz)) == 0); + + int chunk_size = 1024; + + char *buf = malloc(chunk_size); + memset(buf, 0xFE, chunk_size); + + for (unsigned long i = chunk_size; i < sz; i += chunk_size) { + rc = sendto(cs, buf, chunk_size, 0, sa, sa->sa_len); + ATF_REQUIRE_EQ(rc, chunk_size); + } + + close(ss); + close(cs); +} + +static void +check_buffer_stream(const char *test_name) +{ + int ss, cs, rc; + struct sockaddr *sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + struct sockaddr_in6 sa_from; + int one = 1; + + unsigned long maxsockbuf; + size_t llen = sizeof(maxsockbuf); + + ATF_REQUIRE(sysctlbyname("kern.ipc.maxsockbuf", &maxsockbuf, &llen, NULL, 0) == 0); + + if (!strcmp(test_name, "udp")) { + ss = socket(PF_INET, SOCK_STREAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_len = sizeof(sin); + sin.sin_port = htons(TEST_PORT); + sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + sa = (struct sockaddr *)&sin; + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_INET, SOCK_STREAM, 0); + ATF_CHECK(cs >= 0); + + } else if (!strcmp(test_name, "udp6")) { + ss = socket(PF_INET6, SOCK_STREAM, 0); + ATF_CHECK(ss >= 0); + rc = setsockopt(ss, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + ATF_CHECK_EQ(0, rc); + bzero(&sin6, sizeof(sin6)); + sin6.sin6_family = AF_INET6; + sin6.sin6_len = sizeof(sin6); + sin6.sin6_port = htons(TEST_PORT); + const struct in6_addr in6loopback = IN6ADDR_LOOPBACK_INIT; + sin6.sin6_addr = in6loopback; + sa = (struct sockaddr *)&sin6; + rc = bind(ss, sa, sa->sa_len); + ATF_CHECK_EQ(0, rc); + + cs = socket(PF_INET6, SOCK_STREAM, 0); + ATF_CHECK(cs >= 0); + } else + return; + + /* Connect first */ + ATF_REQUIRE_ERRNO(0, connect(cs, sa, sa->sa_len) == 0); + socklen_t sa_size = sizeof(sa_from); + int as = accept(ss, (struct sockaddr *)&sa_from, &sa_size); + ATF_REQUIRE(as >= 0); + + ssize_t sz = get_maxsockbuf() * 2; + ATF_REQUIRE_ERRNO(0, setsockopt(as, SOL_SOCKET, + SO_RCVBUFFORCE, &sz, sizeof(sz)) == 0); + + char *buf = malloc(sz); + memset(buf, 0xFE, sz); + + ATF_REQUIRE_EQ(send(cs, buf, sz, 0), sz); + + close(ss); + close(cs); + close(as); +} + +ATF_TC(socket_afinet_udp_rcvbufforce); +ATF_TC_HEAD(socket_afinet_udp_rcvbufforce, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Checks SO_RCVBUFFORCE with inet/udp"); + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(socket_afinet_udp_rcvbufforce, tc) +{ + check_buffer_nostream("udp"); +} + +ATF_TC(socket_afinet6_udp_rcvbufforce); +ATF_TC_HEAD(socket_afinet6_udp_rcvbufforce, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Checks SO_RCVBUFFORCE with inet6/udp"); + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(socket_afinet6_udp_rcvbufforce, tc) +{ + check_buffer_nostream("udp6"); +} + +ATF_TC(socket_afinet_tcp_rcvbufforce); +ATF_TC_HEAD(socket_afinet_tcp_rcvbufforce, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Checks SO_RCVBUFFORCE with inet/tcp"); + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(socket_afinet_tcp_rcvbufforce, tc) +{ + check_buffer_stream("tcp"); +} + +ATF_TC(socket_afinet6_tcp_rcvbufforce); +ATF_TC_HEAD(socket_afinet6_tcp_rcvbufforce, tc) +{ + + atf_tc_set_md_var(tc, "descr", "Checks SO_RCVBUFFORCE with inet6/tcp"); + atf_tc_set_md_var(tc, "require.user", "root"); +} +ATF_TC_BODY(socket_afinet6_tcp_rcvbufforce, tc) +{ + check_buffer_stream("tcp6"); +} + +ATF_TP_ADD_TCS(tp) +{ + + ATF_TP_ADD_TC(tp, socket_afinet_udp_rcvbufforce); + ATF_TP_ADD_TC(tp, socket_afinet6_udp_rcvbufforce); + ATF_TP_ADD_TC(tp, socket_afinet_tcp_rcvbufforce); + ATF_TP_ADD_TC(tp, socket_afinet6_tcp_rcvbufforce); + ATF_TP_ADD_TC(tp, socket_rcvbufforce_noroot); + + return atf_no_error(); +}