diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -28,7 +28,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd April 24, 2025 +.Dd May 1, 2025 .Dt IFCONFIG 8 .Os .Sh NAME @@ -450,8 +450,13 @@ (Inet6 only.) Specify that the address configured is an anycast address, as described in RFC 4291 section 2.6. -Anycast addresses will not be used as source address of any outgoing -IPv6 packets unless an application explicitly binds to the address. +By default, anycast addresses may not be used for TCP connections or as +the source address of any outgoing packet (e.g., with UDP). +See the +.Sx ANYCAST ADDRESSES +section of +.Xr inet6 4 +for more details. .It Cm arp Enable the use of the Address Resolution Protocol .Pq Xr arp 4 diff --git a/share/man/man4/inet6.4 b/share/man/man4/inet6.4 --- a/share/man/man4/inet6.4 +++ b/share/man/man4/inet6.4 @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd February 22, 2023 +.Dd May 1, 2025 .Dt INET6 4 .Os .Sh NAME @@ -436,6 +436,56 @@ from IPv4 mapped address to .Dv AF_INET6 sockets. +.Sh ANYCAST ADDRESSES +IPv6 supports the concept of an anycast address, defined in RFC 4291 +section 2.6. +An anycast address is indistinguishable from a unicast address at the +protocol level, +but the same anycast address may be configured on several interfaces +across multiple hosts, +with a routing protocol (such as BGP or OSPF) used to route traffic from +the source system to the nearest anycast node. +.Pp +An anycast address may be configured by setting the +.Dv IN6_IFF_ANYCAST +flag on an address, or using the +.Dq anycast +option to +.Xr ifconfig 8 . +.Pp +By default, applications may bind a UDP or raw IPv6 socket to an anycast +address and receive traffic, +but are not permitted to send traffic from an anycast address or to +either accept or initiate a TCP connection using a local anycast +address. +This is to avoid applications establishing stateful network connections +over an anycast address, +which may be unreliable if the underlying routing changes and the +packets are redirected to a different anycast node. +This is particularly an issue with TCP connections, +but may also apply to UDP connections which keep state. +.Pp +This behaviour is controlled by the sysctl variable +.Va net.inet6.ip6.ip6_rfc4291_anycast . +If set to 0, which is the default, then incoming UDP anycast traffic +will be permitted, +but applications may not send any traffic from an anycast address, +and incoming TCP connections to an anycast address will be blocked. +If set to 1, then outgoing UDP traffic from an anycast address is +permitted. +If set to 2, then incoming and outgoing UDP and TCP connections are +permitted using a local anycast address. +.Pp +Before changing this setting, +ensure any necessary configuration has been done to make the application +anycast-aware. +.Pp +Regardless of the setting of +.Va ip6_rfc4291_anycast , +an anycast address will never be automatically chosen as the outgoing +address of a connection; +the application must set the source address explicitly, +for example by binding the socket. .Sh SEE ALSO .Xr ioctl 2 , .Xr socket 2 , @@ -446,6 +496,13 @@ .Xr tcp 4 , .Xr udp 4 .Rs +.%A R. Hinden +.%A S. Deering +.%R RFC 4291 +.%D February 2006 +.%T "IP Version 6 Addressing Architecture" +.Re +.Rs .%A A. Conta .%A S. Deering .%A M. Gupta diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -567,8 +567,6 @@ tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) { struct mbuf *m; - struct in6_ifaddr *ia6; - struct ip6_hdr *ip6; m = *mp; if (m->m_len < *offp + sizeof(struct tcphdr)) { @@ -580,19 +578,6 @@ } } - /* - * draft-itojun-ipv6-tcp-to-anycast - * better place to put this in? - */ - ip6 = mtod(m, struct ip6_hdr *); - ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); - if (ia6 && (ia6->ia6_flags & IN6_IFF_ANYCAST)) { - icmp6_error(m, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR, - (caddr_t)&ip6->ip6_dst - (caddr_t)ip6); - *mp = NULL; - return (IPPROTO_DONE); - } - *mp = m; return (tcp_input_with_port(mp, offp, proto, port)); } @@ -630,6 +615,7 @@ struct m_tag *fwd_tag = NULL; #ifdef INET6 struct ip6_hdr *ip6 = NULL; + struct in6_ifaddr *ia6; int isipv6; #else const void *ip6 = NULL; @@ -1234,40 +1220,41 @@ INP_RLOCK_ASSERT(inp); #ifdef INET6 /* - * If deprecated address is forbidden, - * we do not accept SYN to deprecated interface - * address to prevent any new inbound connection from - * getting established. - * When we do not accept SYN, we send a TCP RST, - * with deprecated source address (instead of dropping - * it). We compromise it as it is much better for peer - * to send a RST, and RST will be the final packet - * for the exchange. - * - * If we do not forbid deprecated addresses, we accept - * the SYN packet. RFC2462 does not suggest dropping - * SYN in this case. - * If we decipher RFC2462 5.5.4, it says like this: - * 1. use of deprecated addr with existing - * communication is okay - "SHOULD continue to be - * used" - * 2. use of it with new communication: - * (2a) "SHOULD NOT be used if alternate address - * with sufficient scope is available" - * (2b) nothing mentioned otherwise. - * Here we fall into (2b) case as we have no choice in - * our source address selection - we must obey the peer. - * - * The wording in RFC2462 is confusing, and there are - * multiple description text for deprecated address - * handling - worse, they are not exactly the same. - * I believe 5.5.4 is the best one, so we follow 5.5.4. + * Check for deprecated or anycast addresses. */ - if (isipv6 && !V_ip6_use_deprecated) { - struct in6_ifaddr *ia6; - - ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0 /* XXX */, false); - if (ia6 != NULL && + if (isipv6 && + (!V_ip6_use_deprecated || (V_ip6_rfc4291_anycast < 2)) && + (ia6 = in6ifa_ifwithaddr(&ip6->ip6_dst, 0, false))) { + /* + * If deprecated address is forbidden, we do not accept + * SYN to deprecated interface address to prevent any + * new inbound connection from getting established. + * + * When we do not accept SYN, we send a TCP RST, with + * deprecated source address (instead of dropping it). + * We compromise it as it is much better for peer to + * send a RST, and RST will be the final packet for the + * exchange. + * + * If we do not forbid deprecated addresses, we accept + * the SYN packet. RFC2462 does not suggest dropping + * SYN in this case. + * If we decipher RFC2462 5.5.4, it says like this: + * 1. use of deprecated addr with existing communication + * is okay - "SHOULD continue to be used" + * 2. use of it with new communication: + * (2a) "SHOULD NOT be used if alternate address + * with sufficient scope is available" + * (2b) nothing mentioned otherwise. + * Here we fall into (2b) case as we have no choice in + * our source address selection - we must obey the peer. + * + * The wording in RFC2462 is confusing, and there are + * multiple description text for deprecated address + * handling - worse, they are not exactly the same. + * I believe 5.5.4 is the best one, so we follow 5.5.4. + */ + if (!V_ip6_use_deprecated && (ia6->ia6_flags & IN6_IFF_DEPRECATED)) { if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: Listen socket: " @@ -1277,6 +1264,22 @@ rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } + + /* + * If rfc4291_anycast is less than 2, do not accept TCP + * connections to an anycast address. Sending a RST is + * fine in this case since RFC4291 permits it. + */ + if ((V_ip6_rfc4291_anycast < 2) && + (ia6->ia6_flags & IN6_IFF_ANYCAST)) { + if ((s = tcp_log_addrs(&inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Listen socket: " + "Connection attempt to anycast " + "IPv6 address rejected\n", + s, __func__); + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } } #endif /* INET6 */ /* diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -197,33 +197,45 @@ if ((sooptions & (SO_REUSEADDR | SO_REUSEPORT_LB)) != 0) reuseport_lb = SO_REUSEADDR | SO_REUSEPORT_LB; } else if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) { - struct sockaddr_in6 sin6; struct epoch_tracker et; - struct ifaddr *ifa; - - memset(&sin6, 0, sizeof(sin6)); - sin6.sin6_family = AF_INET6; - sin6.sin6_len = sizeof(sin6); - sin6.sin6_addr = *laddr; + struct in6_ifaddr *ifa; NET_EPOCH_ENTER(et); - if ((ifa = ifa_ifwithaddr((const struct sockaddr *)&sin6)) == - NULL && (inp->inp_flags & INP_BINDANY) == 0) { - NET_EPOCH_EXIT(et); - return (EADDRNOTAVAIL); - } - /* - * We used to prohibit binding to an anycast address here, - * based on RFC3513, but that restriction was removed in - * RFC4291. - */ - if (ifa != NULL && - ((struct in6_ifaddr *)ifa)->ia6_flags & - (IN6_IFF_NOTREADY | IN6_IFF_DETACHED)) { - NET_EPOCH_EXIT(et); - return (EADDRNOTAVAIL); + ifa = in6ifa_ifwithaddr(laddr, 0, false); + if (ifa) { + /* + * If rfc4291_anycast is less than 2, do not allow + * binding a stream socket to an anycast address. For + * listen sockets, doing so is pointless because all + * incoming connections will be rejected anyway, and for + * outgoing connections, we don't want to permit TCP + * over anycast unless it's been explicitly enabled. + * + * We allow binding datagram sockets here regardless of + * the rfc4291_anycast setting so that applications can + * bind to receive incoming packets. + */ + if ((inp->inp_socket->so_type == SOCK_STREAM) && + (ifa->ia6_flags & IN6_IFF_ANYCAST) && + (V_ip6_rfc4291_anycast < 2)) { + NET_EPOCH_EXIT(et); + return (EADDRNOTAVAIL); + } + + /* Reject addresses which are not valid. */ + if (ifa->ia6_flags & + (IN6_IFF_NOTREADY | IN6_IFF_DETACHED)) { + NET_EPOCH_EXIT(et); + return (EADDRNOTAVAIL); + } + } else { + if ((inp->inp_flags & INP_BINDANY) == 0) { + NET_EPOCH_EXIT(et); + return (EADDRNOTAVAIL); + } } + NET_EPOCH_EXIT(et); } diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c --- a/sys/netinet6/in6_proto.c +++ b/sys/netinet6/in6_proto.c @@ -172,6 +172,7 @@ VNET_DEFINE(int, ip6stealth) = 0; #endif VNET_DEFINE(bool, ip6_log_cannot_forward) = 1; +VNET_DEFINE(int, ip6_rfc4291_anycast) = 0; /* * BSDI4 defines these variables in in_proto.c... @@ -345,3 +346,6 @@ log_cannot_forward, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_log_cannot_forward), 1, "Log packets that cannot be forwarded"); +SYSCTL_INT(_net_inet6_ip6, OID_AUTO, ip6_rfc4291_anycast, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip6_rfc4291_anycast), 0, + "Allow outgoing UDP (1) or UDP and TCP (2) anycast traffic"); diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -250,9 +250,21 @@ */ if ((inp->inp_flags & INP_BINDANY) == 0) { ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */, false); - if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST | - IN6_IFF_NOTREADY))) + + if (ia == NULL || (ia->ia6_flags & IN6_IFF_NOTREADY)) + return (EADDRNOTAVAIL); + + /* + * Do not allow an anycast source address unless + * explicitly enabled by the administrator. This is to + * avoid people accidentally configuring stateful + * protocols over anycast without understanding the + * implications of that. + */ + if ((ia->ia6_flags & IN6_IFF_ANYCAST) && + !V_ip6_rfc4291_anycast) return (EADDRNOTAVAIL); + bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp)); } else bcopy(&tmp, srcp, sizeof(*srcp)); @@ -270,6 +282,18 @@ (error = prison_local_ip6(cred, &inp->in6p_laddr, ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) return (error); + + /* + * In the UDP case, we allow binding to an anycast address in + * order to receive packets sent to that address, but we don't + * want to allow sending packets from that address unless + * rfc4291_anycast is enabled. + */ + ia = in6ifa_ifwithaddr(&inp->in6p_laddr, 0, false); + if ((ia != NULL) && (ia->ia6_flags & IN6_IFF_ANYCAST) && + !V_ip6_rfc4291_anycast) + return (EADDRNOTAVAIL); + bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); return (0); } diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -361,6 +361,9 @@ VNET_DECLARE(bool, ip6_log_cannot_forward); #define V_ip6_log_cannot_forward VNET(ip6_log_cannot_forward) +VNET_DECLARE(int, ip6_rfc4291_anycast); +#define V_ip6_rfc4291_anycast VNET(ip6_rfc4291_anycast) + extern struct pr_usrreqs rip6_usrreqs; struct sockopt; diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -766,8 +766,7 @@ } if (ifa != NULL && ((struct in6_ifaddr *)ifa)->ia6_flags & - (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY| - IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { + (IN6_IFF_NOTREADY|IN6_IFF_DETACHED|IN6_IFF_DEPRECATED)) { NET_EPOCH_EXIT(et); return (EADDRNOTAVAIL); } diff --git a/tests/sys/net/Makefile b/tests/sys/net/Makefile --- a/tests/sys/net/Makefile +++ b/tests/sys/net/Makefile @@ -14,6 +14,7 @@ ATF_TESTS_SH+= if_tun_test ATF_TESTS_SH+= if_vlan ATF_TESTS_SH+= if_wg +ATF_TESTS_SH+= anycast TESTS_SUBDIRS+= bpf TESTS_SUBDIRS+= if_ovpn diff --git a/tests/sys/net/anycast.sh b/tests/sys/net/anycast.sh new file mode 100755 --- /dev/null +++ b/tests/sys/net/anycast.sh @@ -0,0 +1,126 @@ +# +# SPDX-License-Identifier: ISC +# +# Copyright (c) 2025 Lexi Winter + +. $(atf_get_srcdir)/../common/vnet.subr + +atf_test_case "tcp_listen" "cleanup" +tcp_listen_head() +{ + atf_set descr "listening on a TCP anycast address" + atf_set require.user root +} + +tcp_listen_body() +{ + vnet_init + + vnet_mkjail ajail + + jexec ajail ifconfig lo0 inet6 2001:db8::1/128 anycast + + # With ip6_rfc4291_anycast < 2, binding to an anycast address should be + # disallowed. + + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=0 + atf_check -s exit:1 -o ignore -e ignore \ + jexec ajail nc -l 2001:db8::1 8080 + + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=1 + atf_check -s exit:1 -o ignore -e ignore \ + jexec ajail nc -l 2001:db8::1 8080 + + # With ip6_rfc4291_anycast=2, we should be able to bind and receive + # traffic. + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=2 + jexec ajail nc -l 2001:db8::1 8080 & sleep 1 + atf_check -s exit:0 -e ignore jexec ajail nc -z 2001:db8::1 8080 +} + +tcp_listen_cleanup() +{ + vnet_cleanup +} + +atf_test_case "tcp_connect" "cleanup" +tcp_connect_head() +{ + atf_set descr "connecting from a TCP anycast address" + atf_set require.user root +} + +tcp_connect_body() +{ + vnet_init + + vnet_mkjail ajail + + jexec ajail ifconfig lo0 inet6 2001:db8::1/128 anycast + + # Start a listener we can connect to + jexec ajail nc -l ::1 8080 & sleep 1 + + # With ip6_rfc4291_anycast < 2, connecting from an anycast address + # should be disallowed. + + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=0 + atf_check -s exit:1 -o ignore -e ignore \ + jexec ajail nc -z -s 2001:db8::1 ::1 8080 + + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=1 + atf_check -s exit:1 -o ignore -e ignore \ + jexec ajail nc -z -s 2001:db8::1 ::1 8080 + + # With ip6_rfc4291_anycast=2, we should be able to connect. + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=2 + atf_check -s exit:0 -e ignore jexec ajail nc -z -s 2001:db8::1 ::1 8080 +} + +tcp_connect_cleanup() +{ + vnet_cleanup +} + +atf_test_case "ping" "cleanup" +ping_head() +{ + atf_set descr "ping (raw socket) using an anycast address" +} + +ping_body() +{ + vnet_init + + vnet_mkjail ajail + + jexec ajail ifconfig lo0 inet6 2001:db8::1/128 anycast + + # With rfc4291_anycast disabled, we should be able to ping the anycast + # address, but not use it as a source address. + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=0 + + atf_check -s exit:0 -o ignore jexec ajail ping -c1 2001:db8::1 + atf_check -s exit:1 -o ignore -e ignore \ + jexec ajail ping -c1 -S 2001:db8::1 ::1 + + # With rfc4291_anycast enabled, we should be able to use the anycast + # address as a source address. + jexec ajail sysctl net.inet6.ip6.ip6_rfc4291_anycast=1 + + atf_check -s exit:0 -o ignore jexec ajail ping -c1 2001:db8::1 + atf_check -s exit:0 -o ignore -e ignore \ + jexec ajail ping -c1 -S 2001:db8::1 ::1 +} + +ping_cleanup() +{ + vnet_cleanup +} + +atf_init_test_cases() +{ + atf_add_test_case "tcp_listen" + atf_add_test_case "tcp_connect" + atf_add_test_case "ping" +}