diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 --- a/share/man/man4/tcp.4 +++ b/share/man/man4/tcp.4 @@ -31,7 +31,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 3, 2024 +.Dd January 10, 2025 .Dt TCP 4 .Os .Sh NAME @@ -200,6 +200,35 @@ To list the available TCP stacks, see .Va functions_available in the +.Sx FIB support +TCP sockets are FIB-aware. +They inherit the FIB of the process which created the socket, or that of the +listening socket for sockets created by +.Xr accept 2 . +In particular, the FIB is not inherited from that of the interface where the +initiating SYN packet was received. +When an incoming connection request arrives to a listening socket, the initial +handshake also occurs in the FIB of the listening socket, not that of the +received packet. +.Pp +By default, a TCP listening socket can accept connections originating from any +FIB. +If the +.Va net.inet.tcp.bind_all_fibs +tunable is set to 0, a listening socket will only accept connections +originating +from the FIB's listening socket. +Connection requests from other FIBs will be treated as though there is no +listening socket for the destination address and port. +In this mode, multiple listening sockets owned by the same user can listen on +the same address and port so long as they belong to different FIBs, similar to +the behavior of the +.Dv SO_REUSEPORT +socket option. +If the tunable is set to 0, all sockets added to a load-balancing group created +with the +.Dv SO_REUSEPORT_LB +socket option must belong to the same FIB. .Sx MIB (sysctl) Variables section further down. To list the default TCP stack, see @@ -1041,6 +1070,7 @@ .El .Sh SEE ALSO .Xr getsockopt 2 , +.Xr setfib 2 , .Xr socket 2 , .Xr stats 3 , .Xr sysctl 3 , diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -135,6 +135,11 @@ &VNET_NAME(tcp_log_in_vain), 0, "Log all incoming TCP segments to closed ports"); +VNET_DEFINE(int, tcp_bind_all_fibs) = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, bind_all_fibs, CTLFLAG_VNET | CTLFLAG_RDTUN, + &VNET_NAME(tcp_bind_all_fibs), 0, + "Bound sockets receive traffic from all FIBs"); + VNET_DEFINE(int, blackhole) = 0; #define V_blackhole VNET(blackhole) SYSCTL_INT(_net_inet_tcp, OID_AUTO, blackhole, CTLFLAG_VNET | CTLFLAG_RW, @@ -833,7 +838,8 @@ */ lookupflag = INPLOOKUP_WILDCARD | ((thflags & (TH_ACK|TH_SYN)) == TH_SYN ? - INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB); + INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB) | + (V_tcp_bind_all_fibs ? 0 : INPLOOKUP_FIB); findpcb: tp = NULL; #ifdef INET6 diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -262,7 +262,8 @@ goto out; } INP_HASH_WLOCK(&V_tcbinfo); - error = in_pcbbind(inp, sinp, 0, td->td_ucred); + error = in_pcbbind(inp, sinp, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, + td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); out: tcp_bblog_pru(tp, PRU_BIND, error); @@ -336,7 +337,8 @@ } } #endif - error = in6_pcbbind(inp, sin6, 0, td->td_ucred); + error = in6_pcbbind(inp, sin6, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, + td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); out: if (error != 0) @@ -376,7 +378,8 @@ } if (inp->inp_lport == 0) { INP_HASH_WLOCK(&V_tcbinfo); - error = in_pcbbind(inp, NULL, 0, td->td_ucred); + error = in_pcbbind(inp, NULL, + V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred); INP_HASH_WUNLOCK(&V_tcbinfo); } if (error == 0) { @@ -435,7 +438,8 @@ inp->inp_vflag &= ~INP_IPV4; if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) inp->inp_vflag |= INP_IPV4; - error = in6_pcbbind(inp, NULL, 0, td->td_ucred); + error = in6_pcbbind(inp, NULL, + V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred); } INP_HASH_WUNLOCK(&V_tcbinfo); if (error == 0) { diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1271,6 +1271,7 @@ VNET_DECLARE(int, tcp_autorcvbuf_max); VNET_DECLARE(int, tcp_autosndbuf_inc); VNET_DECLARE(int, tcp_autosndbuf_max); +VNET_DECLARE(int, tcp_bind_all_fibs); VNET_DECLARE(int, tcp_delack_enabled); VNET_DECLARE(int, tcp_do_autorcvbuf); VNET_DECLARE(int, tcp_do_autosndbuf); @@ -1324,6 +1325,7 @@ #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) +#define V_tcp_bind_all_fibs VNET(tcp_bind_all_fibs) #define V_tcp_delack_enabled VNET(tcp_delack_enabled) #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf)