diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5 --- a/share/man/man5/pf.conf.5 +++ b/share/man/man5/pf.conf.5 @@ -27,7 +27,7 @@ .\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd June 6, 2024 +.Dd June 24, 2024 .Dt PF.CONF 5 .Os .Sh NAME @@ -1400,9 +1400,14 @@ .Xr udp 4 connections; implicitly in the case of .Ar nat -rules and explicitly in the case of +rules and both implicitly and explicitly in the case of .Ar rdr rules. +A +.Ar rdr +rule may cause the source port to be modified if doing so avoids a conflict +with an existing connection. +A random source port in the range 50001-65535 is chosen in this case. Port numbers are never translated with a .Ar binat rule. diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -583,7 +583,7 @@ { struct pf_krule *r = NULL; struct pf_addr *naddr; - uint16_t *nport; + uint16_t *nportp; uint16_t low, high; PF_RULES_RASSERT(); @@ -626,9 +626,8 @@ return (NULL); } - /* XXX We only modify one side for now. */ naddr = &(*nkp)->addr[1]; - nport = &(*nkp)->port[1]; + nportp = &(*nkp)->port[1]; switch (r->action) { case PF_NAT: @@ -641,7 +640,7 @@ } if (r->rpool.mape.offset > 0) { if (pf_get_mape_sport(pd->af, pd->proto, r, saddr, - sport, daddr, dport, naddr, nport, sn)) { + sport, daddr, dport, naddr, nportp, sn)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: MAP-E port allocation (%u/%u/%u)" " failed\n", @@ -651,7 +650,7 @@ goto notrans; } } else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, - daddr, dport, naddr, nport, low, high, sn)) { + daddr, dport, naddr, nportp, low, high, sn)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation (%u-%u) failed\n", r->rpool.proxy_port[0], r->rpool.proxy_port[1])); @@ -725,6 +724,9 @@ } break; case PF_RDR: { + struct pf_state_key_cmp key; + uint16_t cut, low, high, nport; + if (pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn)) goto notrans; if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) @@ -745,9 +747,63 @@ /* Wrap around if necessary. */ if (tmp_nport > 65535) tmp_nport -= 65535; - *nport = htons((uint16_t)tmp_nport); + nport = htons((uint16_t)tmp_nport); } else if (r->rpool.proxy_port[0]) - *nport = htons(r->rpool.proxy_port[0]); + nport = htons(r->rpool.proxy_port[0]); + else + nport = dport; + + /* + * Update the destination port. + */ + *nportp = nport; + + /* + * Do we have a source port conflict in the stack state? Try to + * modulate the source port if so. Note that this is racy since + * the state lookup may not find any matches here but will once + * pf_create_state() actually instantiates the state. + */ + bzero(&key, sizeof(key)); + key.af = pd->af; + key.proto = pd->proto; + key.port[0] = sport; + PF_ACPY(&key.addr[0], saddr, key.af); + key.port[1] = nport; + PF_ACPY(&key.addr[1], naddr, key.af); + + if (!pf_find_state_all_exists(&key, PF_OUT)) + break; + + low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ + high = 65535; + cut = arc4random() % (1 + high - low) + low; + for (uint32_t tmp = cut; + tmp <= high && tmp <= UINT16_MAX; tmp++) { + key.port[0] = htons(tmp); + if (!pf_find_state_all_exists(&key, PF_OUT)) { + /* Update the source port. */ + (*nkp)->port[0] = htons(tmp); + goto out; + } + } + for (uint32_t tmp = cut - 1; tmp >= low; tmp--) { + key.port[0] = htons(tmp); + if (!pf_find_state_all_exists(&key, PF_OUT)) { + /* Update the source port. */ + (*nkp)->port[0] = htons(tmp); + goto out; + } + } + + DPFPRINTF(PF_DEBUG_MISC, + ("pf: RDR source port allocation failed\n")); + if (0) { +out: + DPFPRINTF(PF_DEBUG_MISC, + ("pf: RDR source port allocation %u->%u\n", + ntohs(sport), ntohs((*nkp)->port[0]))); + } break; } default: diff --git a/tests/sys/netpfil/pf/Makefile b/tests/sys/netpfil/pf/Makefile --- a/tests/sys/netpfil/pf/Makefile +++ b/tests/sys/netpfil/pf/Makefile @@ -70,6 +70,7 @@ pfsync_defer.py \ pft_ether.py \ pft_read_ipfix.py \ + rdr-srcport.py \ utils.subr ${PACKAGE}FILESMODE_CVE-2019-5597.py= 0555 diff --git a/tests/sys/netpfil/pf/rdr-srcport.py b/tests/sys/netpfil/pf/rdr-srcport.py new file mode 100644 --- /dev/null +++ b/tests/sys/netpfil/pf/rdr-srcport.py @@ -0,0 +1,20 @@ +# +# A helper script which accepts TCP connections and writes the remote port +# number to the stream. +# + +import socket + +def main(): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.bind(('0.0.0.0', 8888)) + s.listen(5) + + while True: + cs, addr = s.accept() + cs.sendall(str(addr[1]).encode()) + cs.close() + +if __name__ == '__main__': + main() diff --git a/tests/sys/netpfil/pf/rdr.sh b/tests/sys/netpfil/pf/rdr.sh --- a/tests/sys/netpfil/pf/rdr.sh +++ b/tests/sys/netpfil/pf/rdr.sh @@ -121,7 +121,108 @@ pft_cleanup } + +atf_test_case "srcport" "cleanup" +srcport_head() +{ + atf_set descr 'TCP rdr srcport modulation' + atf_set require.user root + atf_set require.progs python3 + atf_set timeout 9999 +} + +# +# Test that rdr works for multiple TCP with same srcip and srcport. +# +# Four jails, a, b, c, d, are used: +# - jail d runs a server on port 8888, +# - jail a makes connections to the server, routed through jails b and c, +# - jail b uses NAT to rewrite source addresses and ports to the same 2-tuple, +# avoiding the need to use SO_REUSEADDR in jail a, +# - jail c uses a redirect rule to map the destination address to the same +# address and port, resulting in a NAT state conflict. +# +# In this case, the rdr rule should also rewrite the source port (again) to +# resolve the state conflict. +# +srcport_body() +{ + pft_init + + j="rdr:srcport" + epair1=$(vnet_mkepair) + epair2=$(vnet_mkepair) + epair3=$(vnet_mkepair) + + echo $epair_one + echo $epair_two + + vnet_mkjail ${j}a ${epair1}a + vnet_mkjail ${j}b ${epair1}b ${epair2}a + vnet_mkjail ${j}c ${epair2}b ${epair3}a + vnet_mkjail ${j}d ${epair3}b + + # configure addresses for a + jexec ${j}a ifconfig lo0 up + jexec ${j}a ifconfig ${epair1}a inet 198.51.100.50/24 up + jexec ${j}a ifconfig ${epair1}a inet alias 198.51.100.51/24 + jexec ${j}a ifconfig ${epair1}a inet alias 198.51.100.52/24 + + # configure addresses for b + jexec ${j}b ifconfig lo0 up + jexec ${j}b ifconfig ${epair1}b inet 198.51.100.1/24 up + jexec ${j}b ifconfig ${epair2}a inet 198.51.101.2/24 up + + # configure addresses for c + jexec ${j}c ifconfig lo0 up + jexec ${j}c ifconfig ${epair2}b inet 198.51.101.3/24 up + jexec ${j}c ifconfig ${epair2}b inet alias 198.51.101.4/24 + jexec ${j}c ifconfig ${epair2}b inet alias 198.51.101.5/24 + jexec ${j}c ifconfig ${epair3}a inet 203.0.113.1/24 up + + # configure addresses for d + jexec ${j}d ifconfig lo0 up + jexec ${j}d ifconfig ${epair3}b inet 203.0.113.50/24 up + + jexec ${j}b sysctl net.inet.ip.forwarding=1 + jexec ${j}c sysctl net.inet.ip.forwarding=1 + jexec ${j}b pfctl -e + jexec ${j}c pfctl -e + + pft_set_rules ${j}b \ + "set debug misc" \ + "nat on ${epair2}a inet from 198.51.100.0/24 to any -> ${epair2}a static-port" + + pft_set_rules ${j}c \ + "set debug misc" \ + "rdr on ${epair2}b proto tcp from any to ${epair2}b port 7777 -> 203.0.113.50 port 8888" + + jexec ${j}a route add default 198.51.100.1 + jexec ${j}c route add 198.51.100.0/24 198.51.101.2 + jexec ${j}d route add 198.51.101.0/24 203.0.113.1 + + jexec ${j}d python3 $(atf_get_srcdir)/rdr-srcport.py & + echo $! > rdr-srcport.pid + sleep 1 + + echo a | jexec ${j}a nc -w 3 -s 198.51.100.50 -p 1234 198.51.101.3 7777 > port1 + + jexec ${j}a nc -s 198.51.100.51 -p 1234 198.51.101.4 7777 > port2 & + jexec ${j}a nc -s 198.51.100.52 -p 1234 198.51.101.5 7777 > port3 & + sleep 1 + + atf_check -o inline:"1234" cat port1 + atf_check -o match:"[0-9]+" -o not-inline:"1234" cat port2 + atf_check -o match:"[0-9]+" -o not-inline:"1234" cat port3 +} + +srcport_cleanup() +{ + pft_cleanup +} + atf_init_test_cases() { atf_add_test_case "tcp_v6" + atf_add_test_case "srcport" }