diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c index 5d85e16f18e3..4b1d74e0e61f 100644 --- a/sys/netpfil/pf/pf_lb.c +++ b/sys/netpfil/pf/pf_lb.c @@ -1,1326 +1,1338 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002 - 2008 Henning Brauer * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * * $OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $ */ #include #include "opt_pf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif /* INET */ #ifdef INET6 #include #endif /* INET6 */ /* * Limit the amount of work we do to find a free source port for redirects that * introduce a state conflict. */ #define V_pf_rdr_srcport_rewrite_tries VNET(pf_rdr_srcport_rewrite_tries) VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16; static uint64_t pf_hash(struct pf_addr *, struct pf_addr *, struct pf_poolhashkey *, sa_family_t); static struct pf_krule *pf_match_translation(int, struct pf_test_ctx *); static enum pf_test_status pf_step_into_translation_anchor(int, struct pf_test_ctx *, struct pf_krule *); static int pf_get_sport(struct pf_pdesc *, struct pf_krule *, struct pf_addr *, uint16_t *, uint16_t, uint16_t, struct pf_kpool *, struct pf_udp_mapping **, pf_sn_types_t); static bool pf_islinklocal(const sa_family_t, const struct pf_addr *); static uint64_t pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, struct pf_poolhashkey *key, sa_family_t af) { SIPHASH_CTX ctx; #ifdef INET6 union { uint64_t hash64; uint32_t hash32[2]; } h; #endif /* INET6 */ uint64_t res = 0; _Static_assert(sizeof(*key) >= SIPHASH_KEY_LENGTH, ""); switch (af) { #ifdef INET case AF_INET: res = SipHash24(&ctx, (const uint8_t *)key, &inaddr->addr32[0], sizeof(inaddr->addr32[0])); hash->addr32[0] = res; break; #endif /* INET */ #ifdef INET6 case AF_INET6: res = SipHash24(&ctx, (const uint8_t *)key, &inaddr->addr32[0], 4 * sizeof(inaddr->addr32[0])); h.hash64 = res; hash->addr32[0] = h.hash32[0]; hash->addr32[1] = h.hash32[1]; /* * siphash isn't big enough, but flipping it around is * good enough here. */ hash->addr32[2] = ~h.hash32[1]; hash->addr32[3] = ~h.hash32[0]; break; #endif /* INET6 */ default: unhandled_af(af); } return (res); } #define PF_TEST_ATTRIB(t, a) \ if (t) { \ r = a; \ continue; \ } else do { \ } while (0) static enum pf_test_status pf_match_translation_rule(int rs_num, struct pf_test_ctx *ctx, struct pf_kruleset *ruleset) { struct pf_krule *r; struct pf_pdesc *pd = ctx->pd; int rtableid = -1; r = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while (r != NULL) { struct pf_rule_addr *src = NULL, *dst = NULL; struct pf_addr_wrap *xdst = NULL; if (r->action == PF_BINAT && pd->dir == PF_IN) { src = &r->dst; if (r->rdr.cur != NULL) xdst = &r->rdr.cur->addr; } else { src = &r->src; dst = &r->dst; } pf_counter_u64_add(&r->evaluations, 1); PF_TEST_ATTRIB(pfi_kkif_match(r->kif, pd->kif) == r->ifnot, r->skip[PF_SKIP_IFP]); PF_TEST_ATTRIB(r->direction && r->direction != pd->dir, r->skip[PF_SKIP_DIR]); PF_TEST_ATTRIB(r->af && r->af != pd->af, r->skip[PF_SKIP_AF]); PF_TEST_ATTRIB(r->proto && r->proto != pd->proto, r->skip[PF_SKIP_PROTO]); PF_TEST_ATTRIB(PF_MISMATCHAW(&src->addr, &pd->nsaddr, pd->af, src->neg, pd->kif, M_GETFIB(pd->m)), r->skip[src == &r->src ? PF_SKIP_SRC_ADDR : PF_SKIP_DST_ADDR]); PF_TEST_ATTRIB(src->port_op && !pf_match_port(src->port_op, src->port[0], src->port[1], pd->nsport), r->skip[src == &r->src ? PF_SKIP_SRC_PORT : PF_SKIP_DST_PORT]); PF_TEST_ATTRIB(dst != NULL && PF_MISMATCHAW(&dst->addr, &pd->ndaddr, pd->af, dst->neg, NULL, M_GETFIB(pd->m)), r->skip[PF_SKIP_DST_ADDR]); PF_TEST_ATTRIB(xdst != NULL && PF_MISMATCHAW(xdst, &pd->ndaddr, pd->af, 0, NULL, M_GETFIB(pd->m)), TAILQ_NEXT(r, entries)); PF_TEST_ATTRIB(dst != NULL && dst->port_op && !pf_match_port(dst->port_op, dst->port[0], dst->port[1], pd->ndport), r->skip[PF_SKIP_DST_PORT]); PF_TEST_ATTRIB(r->match_tag && !pf_match_tag(pd->m, r, &ctx->tag, pd->pf_mtag ? pd->pf_mtag->tag : 0), TAILQ_NEXT(r, entries)); PF_TEST_ATTRIB(r->os_fingerprint != PF_OSFP_ANY && (pd->proto != IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, &pd->hdr.tcp), r->os_fingerprint)), TAILQ_NEXT(r, entries)); if (r->tag) ctx->tag = r->tag; if (r->rtableid >= 0) rtableid = r->rtableid; if (r->anchor == NULL) { if (r->action == PF_NONAT || r->action == PF_NORDR || r->action == PF_NOBINAT) { *ctx->rm = NULL; } else { /* * found matching r */ ctx->tr = r; /* * anchor, with ruleset, where r belongs to */ *ctx->am = ctx->a; /* * ruleset where r belongs to */ *ctx->rsm = ruleset; /* * ruleset, where anchor belongs to. */ ctx->arsm = ctx->aruleset; } break; } else { ctx->a = r; /* remember anchor */ ctx->aruleset = ruleset; /* and its ruleset */ if (pf_step_into_translation_anchor(rs_num, ctx, r) != PF_TEST_OK) { break; } } r = TAILQ_NEXT(r, entries); } if (ctx->tag > 0 && pf_tag_packet(pd, ctx->tag)) return (PF_TEST_FAIL); if (rtableid >= 0) M_SETFIB(pd->m, rtableid); return (PF_TEST_OK); } static enum pf_test_status pf_step_into_translation_anchor(int rs_num, struct pf_test_ctx *ctx, struct pf_krule *r) { enum pf_test_status rv; PF_RULES_RASSERT(); if (ctx->depth >= PF_ANCHOR_STACK_MAX) { printf("%s: anchor stack overflow on %s\n", __func__, r->anchor->name); return (PF_TEST_FAIL); } ctx->depth++; if (r->anchor_wildcard) { struct pf_kanchor *child; rv = PF_TEST_OK; RB_FOREACH(child, pf_kanchor_node, &r->anchor->children) { rv = pf_match_translation_rule(rs_num, ctx, &child->ruleset); if ((rv == PF_TEST_QUICK) || (rv == PF_TEST_FAIL)) { /* * we either hit a rule qith quick action * (more likely), or hit some runtime * error (e.g. pool_get() faillure). */ break; } } } else { rv = pf_match_translation_rule(rs_num, ctx, &r->anchor->ruleset); } ctx->depth--; return (rv); } static struct pf_krule * pf_match_translation(int rs_num, struct pf_test_ctx *ctx) { enum pf_test_status rv; MPASS(ctx->depth == 0); rv = pf_match_translation_rule(rs_num, ctx, &pf_main_ruleset); MPASS(ctx->depth == 0); if (rv != PF_TEST_OK) return (NULL); return (ctx->tr); } static int pf_get_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, uint16_t *nport, uint16_t low, uint16_t high, struct pf_kpool *rpool, struct pf_udp_mapping **udp_mapping, pf_sn_types_t sn_type) { struct pf_state_key_cmp key; struct pf_addr init_addr; int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; int sidx = pd->sidx; int didx = pd->didx; bzero(&init_addr, sizeof(init_addr)); if (udp_mapping) { MPASS(*udp_mapping == NULL); } /* * If we are UDP and have an existing mapping we can get source port * from the mapping. In this case we have to look up the src_node as * pf_map_addr would. */ if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI)) { struct pf_udp_endpoint_cmp udp_source; bzero(&udp_source, sizeof(udp_source)); udp_source.af = pd->af; pf_addrcpy(&udp_source.addr, &pd->nsaddr, pd->af); udp_source.port = pd->nsport; if (udp_mapping) { struct pf_ksrc_node *sn = NULL; struct pf_srchash *sh = NULL; *udp_mapping = pf_udp_mapping_find(&udp_source); if (*udp_mapping) { pf_addrcpy(naddr, &(*udp_mapping)->endpoints[1].addr, pd->af); *nport = (*udp_mapping)->endpoints[1].port; /* * Try to find a src_node as per pf_map_addr(). * XXX: Why? This code seems to do nothing. */ if (rpool->opts & PF_POOL_STICKYADDR && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) sn = pf_find_src_node(&pd->nsaddr, r, pd->af, &sh, sn_type, false); if (sn != NULL) PF_SRC_NODE_UNLOCK(sn); return (0); } else { *udp_mapping = pf_udp_mapping_create(pd->af, &pd->nsaddr, pd->nsport, &init_addr, 0); if (*udp_mapping == NULL) return (1); } } } if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL, &init_addr, rpool, sn_type)) goto failed; if (pd->proto == IPPROTO_ICMP) { if (pd->ndport == htons(ICMP_ECHO)) { low = 1; high = 65535; } else return (0); /* Don't try to modify non-echo ICMP */ } #ifdef INET6 if (pd->proto == IPPROTO_ICMPV6) { if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) { low = 1; high = 65535; } else return (0); /* Don't try to modify non-echo ICMP */ } #endif /* INET6 */ bzero(&key, sizeof(key)); key.af = pd->naf; key.proto = pd->proto; do { pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); pf_addrcpy(&key.addr[sidx], naddr, key.af); key.port[didx] = pd->ndport; if (udp_mapping && *udp_mapping) pf_addrcpy(&(*udp_mapping)->endpoints[1].addr, naddr, pd->af); /* * port search; start random, step; * similar 2 portloop in in_pcbbind */ if (pd->proto == IPPROTO_SCTP) { key.port[sidx] = pd->nsport; if (!pf_find_state_all_exists(&key, dir)) { *nport = pd->nsport; return (0); } else { return (1); /* Fail mapping. */ } } else if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || pd->proto == IPPROTO_ICMP) || (low == 0 && high == 0)) { /* * XXX bug: icmp states don't use the id on both sides. * (traceroute -I through nat) */ key.port[sidx] = pd->nsport; if (!pf_find_state_all_exists(&key, dir)) { *nport = pd->nsport; return (0); } } else if (low == high) { key.port[sidx] = htons(low); if (!pf_find_state_all_exists(&key, dir)) { if (udp_mapping && *udp_mapping != NULL) { (*udp_mapping)->endpoints[1].port = htons(low); if (pf_udp_mapping_insert(*udp_mapping) == 0) { *nport = htons(low); return (0); } } else { *nport = htons(low); return (0); } } } else { uint32_t tmp; uint16_t cut; if (low > high) { tmp = low; low = high; high = tmp; } /* low < high */ cut = arc4random() % (1 + high - low) + low; /* low <= cut <= high */ for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { if (udp_mapping && *udp_mapping != NULL) { (*udp_mapping)->endpoints[sidx].port = htons(tmp); if (pf_udp_mapping_insert(*udp_mapping) == 0) { *nport = htons(tmp); return (0); } } else { key.port[sidx] = htons(tmp); if (!pf_find_state_all_exists(&key, dir)) { *nport = htons(tmp); return (0); } } } tmp = cut; for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { if (pd->proto == IPPROTO_UDP && (rpool->opts & PF_POOL_ENDPI && udp_mapping != NULL)) { (*udp_mapping)->endpoints[1].port = htons(tmp); if (pf_udp_mapping_insert(*udp_mapping) == 0) { *nport = htons(tmp); return (0); } } else { key.port[sidx] = htons(tmp); if (!pf_find_state_all_exists(&key, dir)) { *nport = htons(tmp); return (0); } } } } switch (rpool->opts & PF_POOL_TYPEMASK) { case PF_POOL_RANDOM: case PF_POOL_ROUNDROBIN: /* * pick a different source address since we're out * of free port choices for the current one. */ if (pf_map_addr_sn(pd->naf, r, &pd->nsaddr, naddr, &(pd->naf), NULL, &init_addr, rpool, sn_type)) return (1); break; case PF_POOL_NONE: case PF_POOL_SRCHASH: case PF_POOL_BITMASK: default: return (1); } } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); failed: if (udp_mapping) { uma_zfree(V_pf_udp_mapping_z, *udp_mapping); *udp_mapping = NULL; } return (1); /* none available */ } static bool pf_islinklocal(const sa_family_t af, const struct pf_addr *addr) { if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6)) return (true); return (false); } static int pf_get_mape_sport(struct pf_pdesc *pd, struct pf_krule *r, struct pf_addr *naddr, uint16_t *nport, struct pf_udp_mapping **udp_mapping, struct pf_kpool *rpool) { uint16_t psmask, low, highmask; uint16_t i, ahigh, cut; int ashift, psidshift; ashift = 16 - rpool->mape.offset; psidshift = ashift - rpool->mape.psidlen; psmask = rpool->mape.psid & ((1U << rpool->mape.psidlen) - 1); psmask = psmask << psidshift; highmask = (1U << psidshift) - 1; ahigh = (1U << rpool->mape.offset) - 1; cut = arc4random() & ahigh; if (cut == 0) cut = 1; for (i = cut; i <= ahigh; i++) { low = (i << ashift) | psmask; if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, rpool, udp_mapping, PF_SN_NAT)) return (0); } for (i = cut - 1; i > 0; i--) { low = (i << ashift) | psmask; if (!pf_get_sport(pd, r, naddr, nport, low, low | highmask, rpool, udp_mapping, PF_SN_NAT)) return (0); } return (1); } u_short pf_map_addr(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, struct pf_addr *naddr, struct pfi_kkif **nkif, sa_family_t *naf, struct pf_addr *init_addr, struct pf_kpool *rpool) { u_short reason = PFRES_MATCH; struct pf_addr *raddr = NULL, *rmask = NULL; struct pfr_ktable *kt; uint64_t hashidx; int cnt; sa_family_t wanted_af; u_int8_t pool_type; bool prefer_ipv6_nexthop = rpool->opts & PF_POOL_IPV6NH; KASSERT(saf != 0, ("%s: saf == 0", __func__)); KASSERT(naf != NULL, ("%s: naf = NULL", __func__)); KASSERT((*naf) != 0, ("%s: *naf = 0", __func__)); /* * Given (*naf) is a hint about AF of the forwarded packet. * It might be changed if prefer_ipv6_nexthop is enabled and * the combination of nexthop AF and packet AF allows for it. */ wanted_af = (*naf); mtx_lock(&rpool->mtx); /* Find the route using chosen algorithm. Store the found route in src_node if it was given or found. */ if (rpool->cur->addr.type == PF_ADDR_NOROUTE) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { switch (wanted_af) { #ifdef INET case AF_INET: if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && !PF_POOL_DYNTYPE(rpool->opts)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } raddr = &rpool->cur->addr.p.dyn->pfid_addr4; rmask = &rpool->cur->addr.p.dyn->pfid_mask4; break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && !PF_POOL_DYNTYPE(rpool->opts)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } raddr = &rpool->cur->addr.p.dyn->pfid_addr6; rmask = &rpool->cur->addr.p.dyn->pfid_mask6; break; #endif /* INET6 */ default: unhandled_af(wanted_af); } } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!PF_POOL_DYNTYPE(rpool->opts)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } } else { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; } /* * For pools with a single host with the prefer-ipv6-nexthop option * we can return pool address of any AF, unless the forwarded packet * is IPv6, then we can return only if pool address is IPv6. * For non-prefer-ipv6-nexthop we can return pool address only * of wanted AF, unless the pool address'es AF is unknown, which * happens in case old ioctls have been used to set up the pool. * * Round-robin pools have their own logic for retrying next addresses. */ pool_type = rpool->opts & PF_POOL_TYPEMASK; if (pool_type == PF_POOL_NONE || pool_type == PF_POOL_BITMASK || ((pool_type == PF_POOL_RANDOM || pool_type == PF_POOL_SRCHASH) && rpool->cur->addr.type != PF_ADDR_TABLE && rpool->cur->addr.type != PF_ADDR_DYNIFTL)) { if (prefer_ipv6_nexthop) { if (rpool->cur->af == AF_INET && (*naf) == AF_INET6) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } wanted_af = rpool->cur->af; } else { if (rpool->cur->af != 0 && rpool->cur->af != (*naf)) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } } } switch (pool_type) { case PF_POOL_NONE: pf_addrcpy(naddr, raddr, wanted_af); break; case PF_POOL_BITMASK: pf_poolmask(naddr, raddr, rmask, saddr, wanted_af); break; case PF_POOL_RANDOM: if (rpool->cur->addr.type == PF_ADDR_TABLE || rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (rpool->cur->addr.type == PF_ADDR_TABLE) kt = rpool->cur->addr.p.tbl; else kt = rpool->cur->addr.p.dyn->pfid_kt; kt = pfr_ktable_select_active(kt); if (kt == NULL) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } cnt = kt->pfrkt_cnt; if (cnt == 0) rpool->tblidx = 0; else rpool->tblidx = (int)arc4random_uniform(cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); if (prefer_ipv6_nexthop) wanted_af = AF_INET6; retry_other_af_random: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { /* Retry with IPv4 nexthop for IPv4 traffic */ if (prefer_ipv6_nexthop && wanted_af == AF_INET6 && (*naf) == AF_INET) { wanted_af = AF_INET; goto retry_other_af_random; } else { /* no hosts in wanted AF */ reason = PFRES_MAPFAILED; goto done_pool_mtx; } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) { switch (wanted_af) { #ifdef INET case AF_INET: rpool->counter.addr32[0] = arc4random(); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rmask->addr32[3] != 0xffffffff) rpool->counter.addr32[3] = arc4random(); else break; if (rmask->addr32[2] != 0xffffffff) rpool->counter.addr32[2] = arc4random(); else break; if (rmask->addr32[1] != 0xffffffff) rpool->counter.addr32[1] = arc4random(); else break; if (rmask->addr32[0] != 0xffffffff) rpool->counter.addr32[0] = arc4random(); break; #endif /* INET6 */ } pf_poolmask(naddr, raddr, rmask, &rpool->counter, wanted_af); pf_addrcpy(init_addr, naddr, wanted_af); } else { pf_addr_inc(&rpool->counter, wanted_af); pf_poolmask(naddr, raddr, rmask, &rpool->counter, wanted_af); } break; case PF_POOL_SRCHASH: { unsigned char hash[16]; hashidx = pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, wanted_af); if (rpool->cur->addr.type == PF_ADDR_TABLE || rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (rpool->cur->addr.type == PF_ADDR_TABLE) kt = rpool->cur->addr.p.tbl; else kt = rpool->cur->addr.p.dyn->pfid_kt; kt = pfr_ktable_select_active(kt); if (kt == NULL) { reason = PFRES_MAPFAILED; goto done_pool_mtx; /* unsupported */ } cnt = kt->pfrkt_cnt; if (cnt == 0) rpool->tblidx = 0; else rpool->tblidx = (int)(hashidx % cnt); memset(&rpool->counter, 0, sizeof(rpool->counter)); if (prefer_ipv6_nexthop) wanted_af = AF_INET6; retry_other_af_srchash: if (pfr_pool_get(kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, false)) { /* Retry with IPv4 nexthop for IPv4 traffic */ if (prefer_ipv6_nexthop && wanted_af == AF_INET6 && (*naf) == AF_INET) { wanted_af = AF_INET; goto retry_other_af_srchash; } else { /* no hosts in wanted AF */ reason = PFRES_MAPFAILED; goto done_pool_mtx; } } pf_addrcpy(naddr, &rpool->counter, wanted_af); } else { pf_poolmask(naddr, raddr, rmask, (struct pf_addr *)&hash, wanted_af); } break; } case PF_POOL_ROUNDROBIN: { struct pf_kpooladdr *acur = rpool->cur; retry_other_af_rr: if (prefer_ipv6_nexthop) wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, NULL, true)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, true)) goto get_addr; } else if (rpool->cur->af == wanted_af && pf_match_addr(0, raddr, rmask, &rpool->counter, wanted_af)) goto get_addr; if (prefer_ipv6_nexthop && (*naf) == AF_INET && wanted_af == AF_INET6) { /* Reset table index when changing wanted AF. */ rpool->tblidx = -1; rpool->ipv6_nexthop_af = AF_INET; goto retry_other_af_rr; } try_next: /* Reset prefer-ipv6-nexthop search to IPv6 when iterating pools. */ rpool->ipv6_nexthop_af = AF_INET6; if (TAILQ_NEXT(rpool->cur, entries) == NULL) rpool->cur = TAILQ_FIRST(&rpool->list); else rpool->cur = TAILQ_NEXT(rpool->cur, entries); try_next_ipv6_nexthop_rr: /* Reset table index when iterating pools or changing wanted AF. */ rpool->tblidx = -1; if (prefer_ipv6_nexthop) wanted_af = rpool->ipv6_nexthop_af; if (rpool->cur->addr.type == PF_ADDR_TABLE) { if (!pfr_pool_get(rpool->cur->addr.p.tbl, &rpool->tblidx, &rpool->counter, wanted_af, NULL, true)) goto get_addr; } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, &rpool->tblidx, &rpool->counter, wanted_af, pf_islinklocal, true)) goto get_addr; } else { if (rpool->cur->af == wanted_af) { raddr = &rpool->cur->addr.v.a.addr; rmask = &rpool->cur->addr.v.a.mask; pf_addrcpy(&rpool->counter, raddr, wanted_af); goto get_addr; } } if (prefer_ipv6_nexthop && (*naf) == AF_INET && wanted_af == AF_INET6) { rpool->ipv6_nexthop_af = AF_INET; goto try_next_ipv6_nexthop_rr; } if (rpool->cur != acur) goto try_next; reason = PFRES_MAPFAILED; goto done_pool_mtx; get_addr: pf_addrcpy(naddr, &rpool->counter, wanted_af); if (init_addr != NULL && PF_AZERO(init_addr, wanted_af)) pf_addrcpy(init_addr, naddr, wanted_af); pf_addr_inc(&rpool->counter, wanted_af); break; } } if (wanted_af == 0) { reason = PFRES_MAPFAILED; goto done_pool_mtx; } if (nkif) *nkif = rpool->cur->kif; (*naf) = wanted_af; done_pool_mtx: mtx_unlock(&rpool->mtx); return (reason); } u_short pf_map_addr_sn(sa_family_t saf, struct pf_krule *r, struct pf_addr *saddr, struct pf_addr *naddr, sa_family_t *naf, struct pfi_kkif **nkif, struct pf_addr *init_addr, struct pf_kpool *rpool, pf_sn_types_t sn_type) { struct pf_ksrc_node *sn = NULL; struct pf_srchash *sh = NULL; u_short reason = 0; /* * If this is a sticky-address rule, try to find an existing src_node. */ if (rpool->opts & PF_POOL_STICKYADDR && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) sn = pf_find_src_node(saddr, r, saf, &sh, sn_type, false); if (sn != NULL) { PF_SRC_NODE_LOCK_ASSERT(sn); (*naf) = sn->raf; /* If the supplied address is the same as the current one we've * been asked before, so tell the caller that there's no other * address to be had. */ if (PF_AEQ(naddr, &(sn->raddr), *naf)) { printf("%s: no more addresses\n", __func__); reason = PFRES_MAPFAILED; goto done; } pf_addrcpy(naddr, &(sn->raddr), *naf); if (nkif) *nkif = sn->rkif; if (V_pf_status.debug >= PF_DEBUG_NOISY) { printf("%s: src tracking maps ", __func__); pf_print_host(saddr, 0, saf); printf(" to "); pf_print_host(naddr, 0, *naf); if (nkif) printf("@%s", (*nkif)->pfik_name); printf("\n"); } goto done; } /* * Source node has not been found. Find a new address and store it * in variables given by the caller. */ if ((reason = pf_map_addr(saf, r, saddr, naddr, nkif, naf, init_addr, rpool)) != 0) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: pf_map_addr has failed\n", __func__); goto done; } if (V_pf_status.debug >= PF_DEBUG_NOISY && (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { printf("%s: selected address ", __func__); pf_print_host(naddr, 0, *naf); if (nkif) printf("@%s", (*nkif)->pfik_name); printf("\n"); } done: if (sn != NULL) PF_SRC_NODE_UNLOCK(sn); return (reason); } u_short pf_get_translation(struct pf_test_ctx *ctx) { struct pf_krule *r = NULL; u_short transerror; PF_RULES_RASSERT(); KASSERT(ctx->sk == NULL, ("*skp not NULL")); KASSERT(ctx->nk == NULL, ("*nkp not NULL")); ctx->nr = NULL; if (ctx->pd->dir == PF_OUT) { r = pf_match_translation(PF_RULESET_BINAT, ctx); if (r == NULL) r = pf_match_translation(PF_RULESET_NAT, ctx); } else { r = pf_match_translation(PF_RULESET_RDR, ctx); if (r == NULL) r = pf_match_translation(PF_RULESET_BINAT, ctx); } if (r == NULL) return (PFRES_MAX); switch (r->action) { case PF_NONAT: case PF_NOBINAT: case PF_NORDR: return (PFRES_MAX); } transerror = pf_get_transaddr(ctx, r, r->action, &(r->rdr)); if (transerror == PFRES_MATCH) ctx->nr = r; return (transerror); } u_short pf_get_transaddr(struct pf_test_ctx *ctx, struct pf_krule *r, uint8_t nat_action, struct pf_kpool *rpool) { struct pf_pdesc *pd = ctx->pd; struct pf_addr *naddr; + int idx; uint16_t *nportp; uint16_t low, high; u_short reason; PF_RULES_RASSERT(); KASSERT(r != NULL, ("r is NULL")); KASSERT(!(r->rule_flag & PFRULE_AFTO), ("AFTO rule")); if (ctx->sk == NULL && ctx->nk == NULL) { if (pf_state_key_setup(pd, pd->nsport, pd->ndport, &ctx->sk, &ctx->nk)) return (PFRES_MEMORY); } - naddr = &ctx->nk->addr[1]; - nportp = &ctx->nk->port[1]; + switch (nat_action) { + case PF_NAT: + idx = pd->sidx; + break; + case PF_BINAT: + idx = 1; + break; + case PF_RDR: + idx = pd->didx; + break; + } + naddr = &ctx->nk->addr[idx]; + nportp = &ctx->nk->port[idx]; switch (nat_action) { case PF_NAT: if (pd->proto == IPPROTO_ICMP) { low = 1; high = 65535; } else { low = rpool->proxy_port[0]; high = rpool->proxy_port[1]; } if (rpool->mape.offset > 0) { if (pf_get_mape_sport(pd, r, naddr, nportp, &ctx->udp_mapping, rpool)) { DPFPRINTF(PF_DEBUG_MISC, "pf: MAP-E port allocation (%u/%u/%u)" " failed", rpool->mape.offset, rpool->mape.psidlen, rpool->mape.psid); reason = PFRES_MAPFAILED; goto notrans; } } else if (pf_get_sport(pd, r, naddr, nportp, low, high, rpool, &ctx->udp_mapping, PF_SN_NAT)) { DPFPRINTF(PF_DEBUG_MISC, "pf: NAT proxy port allocation (%u-%u) failed", rpool->proxy_port[0], rpool->proxy_port[1]); reason = PFRES_MAPFAILED; goto notrans; } break; case PF_BINAT: switch (pd->dir) { case PF_OUT: if (rpool->cur->addr.type == PF_ADDR_DYNIFTL){ switch (pd->af) { #ifdef INET case AF_INET: if (rpool->cur->addr.p.dyn-> pfid_acnt4 < 1) { reason = PFRES_MAPFAILED; goto notrans; } pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr4, &rpool->cur->addr.p.dyn->pfid_mask4, &pd->nsaddr, AF_INET); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (rpool->cur->addr.p.dyn-> pfid_acnt6 < 1) { reason = PFRES_MAPFAILED; goto notrans; } pf_poolmask(naddr, &rpool->cur->addr.p.dyn->pfid_addr6, &rpool->cur->addr.p.dyn->pfid_mask6, &pd->nsaddr, AF_INET6); break; #endif /* INET6 */ } } else pf_poolmask(naddr, &rpool->cur->addr.v.a.addr, &rpool->cur->addr.v.a.mask, &pd->nsaddr, pd->af); break; case PF_IN: if (r->src.addr.type == PF_ADDR_DYNIFTL) { switch (pd->af) { #ifdef INET case AF_INET: if (r->src.addr.p.dyn->pfid_acnt4 < 1) { reason = PFRES_MAPFAILED; goto notrans; } pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr4, &r->src.addr.p.dyn->pfid_mask4, &pd->ndaddr, AF_INET); break; #endif /* INET */ #ifdef INET6 case AF_INET6: if (r->src.addr.p.dyn->pfid_acnt6 < 1) { reason = PFRES_MAPFAILED; goto notrans; } pf_poolmask(naddr, &r->src.addr.p.dyn->pfid_addr6, &r->src.addr.p.dyn->pfid_mask6, &pd->ndaddr, AF_INET6); break; #endif /* INET6 */ } } else pf_poolmask(naddr, &r->src.addr.v.a.addr, &r->src.addr.v.a.mask, &pd->ndaddr, pd->af); break; } break; case PF_RDR: { struct pf_state_key_cmp key; int tries; uint16_t cut, low, high, nport; reason = pf_map_addr_sn(pd->af, r, &pd->nsaddr, naddr, &(pd->naf), NULL, NULL, rpool, PF_SN_NAT); if (reason != 0) goto notrans; if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) pf_poolmask(naddr, naddr, &rpool->cur->addr.v.a.mask, &pd->ndaddr, pd->af); /* Do not change SCTP ports. */ if (pd->proto == IPPROTO_SCTP) break; if (rpool->proxy_port[1]) { uint32_t tmp_nport; uint16_t div; div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; div = (div == 0) ? 1 : div; tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + rpool->proxy_port[0]; /* Wrap around if necessary. */ if (tmp_nport > 65535) tmp_nport -= 65535; nport = htons((uint16_t)tmp_nport); } else if (rpool->proxy_port[0]) nport = htons(rpool->proxy_port[0]); else nport = pd->ndport; /* * Update the destination port. */ *nportp = nport; /* * Do we have a source port conflict in the stack state? Try to * modulate the source port if so. Note that this is racy since * the state lookup may not find any matches here but will once * pf_create_state() actually instantiates the state. */ bzero(&key, sizeof(key)); key.af = pd->af; key.proto = pd->proto; key.port[0] = pd->nsport; pf_addrcpy(&key.addr[0], &pd->nsaddr, key.af); key.port[1] = nport; pf_addrcpy(&key.addr[1], naddr, key.af); if (!pf_find_state_all_exists(&key, PF_OUT)) break; tries = 0; low = 50001; /* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */ high = 65535; cut = arc4random() % (1 + high - low) + low; for (uint32_t tmp = cut; tmp <= high && tmp <= UINT16_MAX && tries < V_pf_rdr_srcport_rewrite_tries; tmp++, tries++) { key.port[0] = htons(tmp); if (!pf_find_state_all_exists(&key, PF_OUT)) { /* Update the source port. */ ctx->nk->port[0] = htons(tmp); goto out; } } for (uint32_t tmp = cut - 1; tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries; tmp--, tries++) { key.port[0] = htons(tmp); if (!pf_find_state_all_exists(&key, PF_OUT)) { /* Update the source port. */ ctx->nk->port[0] = htons(tmp); goto out; } } /* * We failed to find a match. Push on ahead anyway, let * pf_state_insert() be the arbiter of whether the state * conflict is tolerable. In particular, with TCP connections * the state may be reused if the TCP state is terminal. */ DPFPRINTF(PF_DEBUG_MISC, "pf: RDR source port allocation failed"); break; out: DPFPRINTF(PF_DEBUG_MISC, "pf: RDR source port allocation %u->%u", ntohs(pd->nsport), ntohs(ctx->nk->port[0])); break; } default: panic("%s: unknown action %u", __func__, r->action); } /* Return success only if translation really happened. */ if (bcmp(ctx->sk, ctx->nk, sizeof(struct pf_state_key_cmp))) { return (PFRES_MATCH); } reason = PFRES_MAX; notrans: uma_zfree(V_pf_state_key_z, ctx->nk); uma_zfree(V_pf_state_key_z, ctx->sk); ctx->sk = ctx->nk = NULL; return (reason); } int pf_get_transaddr_af(struct pf_krule *r, struct pf_pdesc *pd) { #if defined(INET) && defined(INET6) struct pf_addr ndaddr, nsaddr, naddr; u_int16_t nport = 0; int prefixlen = 96; bzero(&nsaddr, sizeof(nsaddr)); bzero(&ndaddr, sizeof(ndaddr)); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: af-to %s %s, ", pd->naf == AF_INET ? "inet" : "inet6", TAILQ_EMPTY(&r->rdr.list) ? "nat" : "rdr"); pf_print_host(&pd->nsaddr, pd->nsport, pd->af); printf(" -> "); pf_print_host(&pd->ndaddr, pd->ndport, pd->af); printf("\n"); } if (TAILQ_EMPTY(&r->nat.list)) panic("pf_get_transaddr_af: no nat pool for source address"); /* get source address and port */ if (pf_get_sport(pd, r, &nsaddr, &nport, r->nat.proxy_port[0], r->nat.proxy_port[1], &r->nat, NULL, PF_SN_NAT)) { DPFPRINTF(PF_DEBUG_MISC, "pf: af-to NAT proxy port allocation (%u-%u) failed", r->nat.proxy_port[0], r->nat.proxy_port[1]); return (-1); } if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { pd->ndport = ntohs(pd->ndport); if (pd->ndport == ICMP6_ECHO_REQUEST) pd->ndport = ICMP_ECHO; else if (pd->ndport == ICMP6_ECHO_REPLY) pd->ndport = ICMP_ECHOREPLY; pd->ndport = htons(pd->ndport); } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { pd->nsport = ntohs(pd->nsport); if (pd->ndport == ICMP_ECHO) pd->ndport = ICMP6_ECHO_REQUEST; else if (pd->ndport == ICMP_ECHOREPLY) pd->ndport = ICMP6_ECHO_REPLY; pd->nsport = htons(pd->nsport); } /* get the destination address and port */ if (! TAILQ_EMPTY(&r->rdr.list)) { if (pf_map_addr_sn(pd->naf, r, &nsaddr, &naddr, &(pd->naf), NULL, NULL, &r->rdr, PF_SN_NAT)) return (-1); if (r->rdr.proxy_port[0]) pd->ndport = htons(r->rdr.proxy_port[0]); if (pd->naf == AF_INET) { /* The prefix is the IPv4 rdr address */ prefixlen = in_mask2len( (struct in_addr *)&r->rdr.cur->addr.v.a.mask); inet_nat46(pd->naf, &pd->ndaddr, &ndaddr, &naddr, prefixlen); } else { /* The prefix is the IPv6 rdr address */ prefixlen = in6_mask2len( (struct in6_addr *)&r->rdr.cur->addr.v.a.mask, NULL); inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &naddr, prefixlen); } } else { if (pd->naf == AF_INET) { /* The prefix is the IPv6 dst address */ prefixlen = in6_mask2len( (struct in6_addr *)&r->dst.addr.v.a.mask, NULL); if (prefixlen < 32) prefixlen = 96; inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &pd->ndaddr, prefixlen); } else { /* * The prefix is the IPv6 nat address * (that was stored in pd->nsaddr) */ prefixlen = in6_mask2len( (struct in6_addr *)&r->nat.cur->addr.v.a.mask, NULL); if (prefixlen > 96) prefixlen = 96; inet_nat64(pd->naf, &pd->ndaddr, &ndaddr, &nsaddr, prefixlen); } } pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pf: af-to %s done, prefixlen %d, ", pd->naf == AF_INET ? "inet" : "inet6", prefixlen); pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); printf(" -> "); pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); printf("\n"); } return (0); #else return (-1); #endif } diff --git a/tests/sys/netpfil/pf/nat.sh b/tests/sys/netpfil/pf/nat.sh index 1ef87cee3598..0824671fa0f1 100644 --- a/tests/sys/netpfil/pf/nat.sh +++ b/tests/sys/netpfil/pf/nat.sh @@ -1,883 +1,918 @@ # # SPDX-License-Identifier: BSD-2-Clause # # Copyright (c) 2018 Kristof Provost # Copyright (c) 2025 Kajetan Staszkiewicz # Copyright (c) 2021 KUROSAWA Takahiro # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. . $(atf_get_srcdir)/utils.subr atf_test_case "exhaust" "cleanup" exhaust_head() { atf_set descr 'Test exhausting the NAT pool' atf_set require.user root } exhaust_body() { pft_init epair_nat=$(vnet_mkepair) epair_echo=$(vnet_mkepair) vnet_mkjail nat ${epair_nat}b ${epair_echo}a vnet_mkjail echo ${epair_echo}b ifconfig ${epair_nat}a 192.0.2.2/24 up route add -net 198.51.100.0/24 192.0.2.1 jexec nat ifconfig ${epair_nat}b 192.0.2.1/24 up jexec nat ifconfig ${epair_echo}a 198.51.100.1/24 up jexec nat sysctl net.inet.ip.forwarding=1 jexec echo ifconfig ${epair_echo}b 198.51.100.2/24 up jexec echo /usr/sbin/inetd -p ${PWD}/inetd-echo.pid $(atf_get_srcdir)/echo_inetd.conf # Disable checksum offload on one of the interfaces to ensure pf handles that jexec nat ifconfig ${epair_nat}a -txcsum # Enable pf! jexec nat pfctl -e pft_set_rules nat \ "nat pass on ${epair_echo}a inet from 192.0.2.0/24 to any -> (${epair_echo}a) port 30000:30001 sticky-address" # Sanity check atf_check -s exit:0 -o ignore ping -c 3 198.51.100.2 atf_check -s exit:0 -o match:foo* echo "foo" | nc -N 198.51.100.2 7 atf_check -s exit:0 -o match:foo* echo "foo" | nc -N 198.51.100.2 7 # This one will fail, but that's expected echo "foo" | nc -N 198.51.100.2 7 & sleep 1 # If the kernel is stuck in pf_get_sport() this will not succeed either. timeout 2 jexec nat pfctl -sa if [ $? -eq 124 ]; then # Timed out atf_fail "pfctl timeout" fi } exhaust_cleanup() { pft_cleanup } atf_test_case "nested_anchor" "cleanup" nested_anchor_head() { atf_set descr 'Test setting and retrieving nested nat anchors' atf_set require.user root } nested_anchor_body() { pft_init epair=$(vnet_mkepair) vnet_mkjail nat ${epair}a pft_set_rules nat \ "nat-anchor \"foo\"" echo "nat-anchor \"bar\"" | jexec nat pfctl -g -a foo -f - echo "nat on ${epair}a from any to any -> (${epair}a)" | jexec nat pfctl -g -a "foo/bar" -f - atf_check -s exit:0 -o inline:"nat-anchor \"foo\" all { nat-anchor \"bar\" all { nat on ${epair}a all -> (${epair}a) round-robin } } " jexec nat pfctl -sn -a "*" } endpoint_independent_setup() { pft_init filter="udp and dst port 1234" # only capture udp pings epair_client=$(vnet_mkepair) epair_nat=$(vnet_mkepair) epair_server1=$(vnet_mkepair) epair_server2=$(vnet_mkepair) bridge=$(vnet_mkbridge) vnet_mkjail nat ${epair_client}b ${epair_nat}a vnet_mkjail client ${epair_client}a vnet_mkjail server1 ${epair_server1}a vnet_mkjail server2 ${epair_server2}a ifconfig ${epair_server1}b up ifconfig ${epair_server2}b up ifconfig ${epair_nat}b up ifconfig ${bridge} \ addm ${epair_server1}b \ addm ${epair_server2}b \ addm ${epair_nat}b \ up jexec nat ifconfig ${epair_client}b 192.0.2.1/24 up jexec nat ifconfig ${epair_nat}a 198.51.100.42/24 up jexec nat sysctl net.inet.ip.forwarding=1 jexec client ifconfig ${epair_client}a 192.0.2.2/24 up jexec client route add default 192.0.2.1 jexec server1 ifconfig ${epair_server1}a 198.51.100.32/24 up jexec server2 ifconfig ${epair_server2}a 198.51.100.22/24 up } endpoint_independent_common() { # Enable pf! jexec nat pfctl -e # validate non-endpoint independent nat rule behaviour pft_set_rules nat "${1}" jexec server1 tcpdump -i ${epair_server1}a -w ${PWD}/server1.pcap \ --immediate-mode $filter & server1tcppid="$!" jexec server2 tcpdump -i ${epair_server2}a -w ${PWD}/server2.pcap \ --immediate-mode $filter & server2tcppid="$!" # send out multiple packets for i in $(seq 1 10); do echo "ping" | jexec client nc -u 198.51.100.32 1234 -p 4242 -w 0 echo "ping" | jexec client nc -u 198.51.100.22 1234 -p 4242 -w 0 done kill $server1tcppid kill $server2tcppid tuple_server1=$(tcpdump -r ${PWD}/server1.pcap | awk '{addr=$3} END {print addr}') tuple_server2=$(tcpdump -r ${PWD}/server2.pcap | awk '{addr=$3} END {print addr}') if [ -z $tuple_server1 ] then atf_fail "server1 did not receive connection from client (default)" fi if [ -z $tuple_server2 ] then atf_fail "server2 did not receive connection from client (default)" fi if [ "$tuple_server1" = "$tuple_server2" ] then echo "server1 tcpdump: $tuple_server1" echo "server2 tcpdump: $tuple_server2" atf_fail "Received same IP:port on server1 and server2 (default)" fi # validate endpoint independent nat rule behaviour pft_set_rules nat "${2}" jexec server1 tcpdump -i ${epair_server1}a -w ${PWD}/server1.pcap \ --immediate-mode $filter & server1tcppid="$!" jexec server2 tcpdump -i ${epair_server2}a -w ${PWD}/server2.pcap \ --immediate-mode $filter & server2tcppid="$!" # send out multiple packets, sometimes one fails to go through for i in $(seq 1 10); do echo "ping" | jexec client nc -u 198.51.100.32 1234 -p 4242 -w 0 echo "ping" | jexec client nc -u 198.51.100.22 1234 -p 4242 -w 0 done kill $server1tcppid kill $server2tcppid tuple_server1=$(tcpdump -r ${PWD}/server1.pcap | awk '{addr=$3} END {print addr}') tuple_server2=$(tcpdump -r ${PWD}/server2.pcap | awk '{addr=$3} END {print addr}') if [ -z $tuple_server1 ] then atf_fail "server1 did not receive connection from client (endpoint-independent)" fi if [ -z $tuple_server2 ] then atf_fail "server2 did not receive connection from client (endpoint-independent)" fi if [ ! "$tuple_server1" = "$tuple_server2" ] then echo "server1 tcpdump: $tuple_server1" echo "server2 tcpdump: $tuple_server2" atf_fail "Received different IP:port on server1 than server2 (endpoint-independent)" fi } atf_test_case "endpoint_independent_compat" "cleanup" endpoint_independent_compat_head() { atf_set descr 'Test that a client behind NAT gets the same external IP:port for different servers' atf_set require.user root } endpoint_independent_compat_body() { endpoint_independent_setup # Sets ${epair_…} variables endpoint_independent_common \ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a)" \ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a) endpoint-independent" } endpoint_independent_compat_cleanup() { pft_cleanup rm -f server1.out rm -f server2.out } atf_test_case "endpoint_independent_pass" "cleanup" endpoint_independent_pass_head() { atf_set descr 'Test that a client behind NAT gets the same external IP:port for different servers' atf_set require.user root } endpoint_independent_pass_body() { endpoint_independent_setup # Sets ${epair_…} variables endpoint_independent_common \ "pass out on ${epair_nat}a inet from ! (${epair_nat}a) to any nat-to (${epair_nat}a) keep state" \ "pass out on ${epair_nat}a inet from ! (${epair_nat}a) to any nat-to (${epair_nat}a) endpoint-independent keep state" } endpoint_independent_pass_cleanup() { pft_cleanup rm -f server1.out rm -f server2.out } nested_anchor_cleanup() { pft_cleanup } atf_test_case "nat6_nolinklocal" "cleanup" nat6_nolinklocal_head() { atf_set descr 'Ensure we do not use link-local addresses' atf_set require.user root } nat6_nolinklocal_body() { pft_init epair_nat=$(vnet_mkepair) epair_echo=$(vnet_mkepair) vnet_mkjail nat ${epair_nat}b ${epair_echo}a vnet_mkjail echo ${epair_echo}b ifconfig ${epair_nat}a inet6 2001:db8::2/64 no_dad up route add -6 -net 2001:db8:1::/64 2001:db8::1 jexec nat ifconfig ${epair_nat}b inet6 2001:db8::1/64 no_dad up jexec nat ifconfig ${epair_echo}a inet6 2001:db8:1::1/64 no_dad up jexec nat sysctl net.inet6.ip6.forwarding=1 jexec echo ifconfig ${epair_echo}b inet6 2001:db8:1::2/64 no_dad up # Ensure we can't reply to link-local pings jexec echo pfctl -e pft_set_rules echo \ "pass" \ "block in inet6 proto icmp6 from fe80::/10 to any icmp6-type echoreq" jexec nat pfctl -e pft_set_rules nat \ "nat pass on ${epair_echo}a inet6 from 2001:db8::/64 to any -> (${epair_echo}a)" \ "pass" # Sanity check atf_check -s exit:0 -o ignore \ ping -6 -c 1 2001:db8::1 for i in `seq 0 10` do atf_check -s exit:0 -o ignore \ ping -6 -c 1 2001:db8:1::2 done } nat6_nolinklocal_cleanup() { pft_cleanup } empty_table_common() { option=$1 pft_init epair_wan=$(vnet_mkepair) epair_lan=$(vnet_mkepair) vnet_mkjail srv ${epair_wan}a jexec srv ifconfig ${epair_wan}a 192.0.2.2/24 up vnet_mkjail rtr ${epair_wan}b ${epair_lan}a jexec rtr ifconfig ${epair_wan}b 192.0.2.1/24 up jexec rtr ifconfig ${epair_lan}a 198.51.100.1/24 up jexec rtr sysctl net.inet.ip.forwarding=1 ifconfig ${epair_lan}b 198.51.100.2/24 up route add default 198.51.100.1 jexec rtr pfctl -e pft_set_rules rtr \ "table " \ "nat on ${epair_wan}b inet from 198.51.100.0/24 -> ${option}" \ "pass" # Sanity checks atf_check -s exit:0 -o ignore \ jexec rtr ping -c 1 192.0.2.2 atf_check -s exit:0 -o ignore \ ping -c 1 198.51.100.1 atf_check -s exit:0 -o ignore \ ping -c 1 192.0.2.1 # Provoke divide by zero ping -c 1 192.0.2.2 true } atf_test_case "empty_table_source_hash" "cleanup" empty_table_source_hash_head() { atf_set descr 'Test source-hash on an emtpy table' atf_set require.user root } empty_table_source_hash_body() { empty_table_common "source-hash" } empty_table_source_hash_cleanup() { pft_cleanup } atf_test_case "empty_table_random" "cleanup" empty_table_random_head() { atf_set descr 'Test random on an emtpy table' atf_set require.user root } empty_table_random_body() { empty_table_common "random" } empty_table_random_cleanup() { pft_cleanup } no_addrs_common() { option=$1 pft_init epair_wan=$(vnet_mkepair) epair_lan=$(vnet_mkepair) vnet_mkjail srv ${epair_wan}a jexec srv ifconfig ${epair_wan}a 192.0.2.2/24 up vnet_mkjail rtr ${epair_wan}b ${epair_lan}a jexec rtr route add -net 192.0.2.0/24 -iface ${epair_wan}b jexec rtr ifconfig ${epair_lan}a 198.51.100.1/24 up jexec rtr sysctl net.inet.ip.forwarding=1 ifconfig ${epair_lan}b 198.51.100.2/24 up route add default 198.51.100.1 jexec rtr pfctl -e pft_set_rules rtr \ "nat on ${epair_wan}b inet from 198.51.100.0/24 -> (${epair_wan}b) ${option}" \ "pass" # Provoke divide by zero ping -c 1 192.0.2.2 true } atf_test_case "no_addrs_source_hash" "cleanup" no_addrs_source_hash_head() { atf_set descr 'Test source-hash on an interface with no addresses' atf_set require.user root } no_addrs_source_hash_body() { no_addrs_common "source-hash" } no_addrs_source_hash_cleanup() { pft_cleanup } atf_test_case "no_addrs_random" "cleanup" no_addrs_random_head() { atf_set descr 'Test random on an interface with no addresses' atf_set require.user root } no_addrs_random_body() { no_addrs_common "random" } no_addrs_random_cleanup() { pft_cleanup } -atf_test_case "nat_pass" "cleanup" -nat_pass_head() +atf_test_case "nat_pass_in" "cleanup" +nat_pass_in_head() { - atf_set descr 'IPv4 NAT on pass rule' + atf_set descr 'IPv4 NAT on inbound pass rule' atf_set require.user root atf_set require.progs scapy } -nat_pass_body() +nat_pass_in_body() +{ + setup_router_server_ipv4 + # Delete the route back to make sure that the traffic has been NAT-ed + jexec server route del -net ${net_tester} ${net_server_host_router} + # Provide routing back to the NAT address + jexec server route add 203.0.113.0/24 ${net_server_host_router} + jexec router route add 203.0.113.0/24 -iface ${epair_tester}b + + pft_set_rules router \ + "block" \ + "pass in on ${epair_tester}b inet proto tcp nat-to 203.0.113.0 keep state" \ + "pass out on ${epair_server}a inet proto tcp keep state" + + ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4201 + + jexec router pfctl -qvvsr + jexec router pfctl -qvvss + jexec router ifconfig + jexec router netstat -rn +} + +nat_pass_in_cleanup() +{ + pft_cleanup +} + +nat_pass_out_head() +{ + atf_set descr 'IPv4 NAT on outbound pass rule' + atf_set require.user root + atf_set require.progs scapy +} + +nat_pass_out_body() { setup_router_server_ipv4 # Delete the route back to make sure that the traffic has been NAT-ed jexec server route del -net ${net_tester} ${net_server_host_router} pft_set_rules router \ "block" \ "pass in on ${epair_tester}b inet proto tcp keep state" \ "pass out on ${epair_server}a inet proto tcp nat-to ${epair_server}a keep state" ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4201 jexec router pfctl -qvvsr jexec router pfctl -qvvss jexec router ifconfig jexec router netstat -rn } -nat_pass_cleanup() +nat_pass_out_cleanup() { pft_cleanup } atf_test_case "nat_match" "cleanup" nat_match_head() { atf_set descr 'IPv4 NAT on match rule' atf_set require.user root atf_set require.progs scapy } nat_match_body() { setup_router_server_ipv4 # Delete the route back to make sure that the traffic has been NAT-ed jexec server route del -net ${net_tester} ${net_server_host_router} # NAT is applied during ruleset evaluation: # rules after "match" match on NAT-ed address pft_set_rules router \ "block" \ "pass in on ${epair_tester}b inet proto tcp keep state" \ "match out on ${epair_server}a inet proto tcp nat-to ${epair_server}a" \ "pass out on ${epair_server}a inet proto tcp from ${epair_server}a keep state" ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4201 jexec router pfctl -qvvsr jexec router pfctl -qvvss jexec router ifconfig jexec router netstat -rn } nat_match_cleanup() { pft_cleanup } map_e_common() { NC_TRY_COUNT=12 pft_init epair_map_e=$(vnet_mkepair) epair_echo=$(vnet_mkepair) vnet_mkjail map_e ${epair_map_e}b ${epair_echo}a vnet_mkjail echo ${epair_echo}b ifconfig ${epair_map_e}a 192.0.2.2/24 up route add -net 198.51.100.0/24 192.0.2.1 jexec map_e ifconfig ${epair_map_e}b 192.0.2.1/24 up jexec map_e ifconfig ${epair_echo}a 198.51.100.1/24 up jexec map_e sysctl net.inet.ip.forwarding=1 jexec echo ifconfig ${epair_echo}b 198.51.100.2/24 up jexec echo /usr/sbin/inetd -p ${PWD}/inetd-echo.pid $(atf_get_srcdir)/echo_inetd.conf # Enable pf! jexec map_e pfctl -e } atf_test_case "map_e_compat" "cleanup" map_e_compat_head() { atf_set descr 'map-e-portset test' atf_set require.user root } map_e_compat_body() { map_e_common pft_set_rules map_e \ "nat pass on ${epair_echo}a inet from 192.0.2.0/24 to any -> (${epair_echo}a) map-e-portset 2/12/0x342" # Only allow specified ports. jexec echo pfctl -e pft_set_rules echo "block return all" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 19720:19723 to (${epair_echo}b) port 7" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 36104:36107 to (${epair_echo}b) port 7" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 52488:52491 to (${epair_echo}b) port 7" \ "set skip on lo" i=0 while [ ${i} -lt ${NC_TRY_COUNT} ] do echo "foo ${i}" | timeout 2 nc -N 198.51.100.2 7 if [ $? -ne 0 ]; then atf_fail "nc failed (${i})" fi i=$((${i}+1)) done } map_e_compat_cleanup() { pft_cleanup } atf_test_case "map_e_pass" "cleanup" map_e_pass_head() { atf_set descr 'map-e-portset test' atf_set require.user root } map_e_pass_body() { map_e_common pft_set_rules map_e \ "pass out on ${epair_echo}a inet from 192.0.2.0/24 to any nat-to (${epair_echo}a) map-e-portset 2/12/0x342 keep state" jexec map_e pfctl -qvvsr # Only allow specified ports. jexec echo pfctl -e pft_set_rules echo "block return all" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 19720:19723 to (${epair_echo}b) port 7" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 36104:36107 to (${epair_echo}b) port 7" \ "pass in on ${epair_echo}b inet proto tcp from 198.51.100.1 port 52488:52491 to (${epair_echo}b) port 7" \ "set skip on lo" i=0 while [ ${i} -lt ${NC_TRY_COUNT} ] do echo "foo ${i}" | timeout 2 nc -N 198.51.100.2 7 if [ $? -ne 0 ]; then atf_fail "nc failed (${i})" fi i=$((${i}+1)) done } map_e_pass_cleanup() { pft_cleanup } atf_test_case "binat_compat" "cleanup" binat_compat_head() { atf_set descr 'IPv4 BINAT with nat ruleset' atf_set require.user root atf_set require.progs scapy } binat_compat_body() { setup_router_server_ipv4 # Delete the route back to make sure that the traffic has been NAT-ed jexec server route del -net ${net_tester} ${net_server_host_router} pft_set_rules router \ "set state-policy if-bound" \ "set ruleset-optimization none" \ "binat on ${epair_server}a inet proto tcp from ${net_tester_host_tester} to any tag sometag -> ${epair_server}a" \ "block" \ "pass in on ${epair_tester}b inet proto tcp !tagged sometag keep state" \ "pass out on ${epair_server}a inet proto tcp tagged sometag keep state" \ "pass in on ${epair_server}a inet proto tcp tagged sometag keep state" \ "pass out on ${epair_tester}b inet proto tcp tagged sometag keep state" # Test the outbound NAT part of BINAT. ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4201 states=$(mktemp) || exit 1 jexec router pfctl -qvss | normalize_pfctl_s > $states for state_regexp in \ "${epair_tester}b tcp ${net_server_host_server}:9 <- ${net_tester_host_tester}:4201 .* 3:2 pkts,.* rule 1" \ "${epair_server}a tcp ${net_server_host_router}:4201 \(${net_tester_host_tester}:4201\) -> ${net_server_host_server}:9 .* 3:2 pkts,.* rule 2" \ ; do grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" done # Test the inbound RDR part of BINAT. # The "tester" becomes "server" and vice versa. inetd_conf=$(mktemp) echo "discard stream tcp nowait root internal" > $inetd_conf inetd -p ${PWD}/inetd_tester.pid $inetd_conf atf_check -s exit:0 \ jexec server ${common_dir}/pft_ping.py \ --ping-type=tcp3way --send-sport=4202 \ --sendif ${epair_server}b \ --to ${net_server_host_router} \ --replyif ${epair_server}b states=$(mktemp) || exit 1 jexec router pfctl -qvss | normalize_pfctl_s > $states for state_regexp in \ "${epair_server}a tcp ${net_tester_host_tester}:9 \(${net_server_host_router}:9\) <- ${net_server_host_server}:4202 .* 3:2 pkts,.* rule 3" \ "${epair_tester}b tcp ${net_server_host_server}:4202 -> ${net_tester_host_tester}:9 .* 3:2 pkts,.* rule 4" \ ; do grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" done } binat_compat_cleanup() { pft_cleanup kill $(cat ${PWD}/inetd_tester.pid) } atf_test_case "binat_match" "cleanup" binat_match_head() { atf_set descr 'IPv4 BINAT with nat ruleset' atf_set require.user root atf_set require.progs scapy } binat_match_body() { setup_router_server_ipv4 # Delete the route back to make sure that the traffic has been NAT-ed jexec server route del -net ${net_tester} ${net_server_host_router} # The "binat-to" rule expands to 2 rules so the ""pass" rules start at 3! pft_set_rules router \ "set state-policy if-bound" \ "set ruleset-optimization none" \ "block" \ "match on ${epair_server}a inet proto tcp from ${net_tester_host_tester} to any tag sometag binat-to ${epair_server}a" \ "pass in on ${epair_tester}b inet proto tcp !tagged sometag keep state" \ "pass out on ${epair_server}a inet proto tcp tagged sometag keep state" \ "pass in on ${epair_server}a inet proto tcp tagged sometag keep state" \ "pass out on ${epair_tester}b inet proto tcp tagged sometag keep state" # Test the outbound NAT part of BINAT. ping_server_check_reply exit:0 --ping-type=tcp3way --send-sport=4201 states=$(mktemp) || exit 1 jexec router pfctl -qvss | normalize_pfctl_s > $states for state_regexp in \ "${epair_tester}b tcp ${net_server_host_server}:9 <- ${net_tester_host_tester}:4201 .* 3:2 pkts,.* rule 3" \ "${epair_server}a tcp ${net_server_host_router}:4201 \(${net_tester_host_tester}:4201\) -> ${net_server_host_server}:9 .* 3:2 pkts,.* rule 4" \ ; do grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" done # Test the inbound RDR part of BINAT. # The "tester" becomes "server" and vice versa. inetd_conf=$(mktemp) echo "discard stream tcp nowait root internal" > $inetd_conf inetd -p ${PWD}/inetd_tester.pid $inetd_conf atf_check -s exit:0 \ jexec server ${common_dir}/pft_ping.py \ --ping-type=tcp3way --send-sport=4202 \ --sendif ${epair_server}b \ --to ${net_server_host_router} \ --replyif ${epair_server}b states=$(mktemp) || exit 1 jexec router pfctl -qvss | normalize_pfctl_s > $states for state_regexp in \ "${epair_server}a tcp ${net_tester_host_tester}:9 \(${net_server_host_router}:9\) <- ${net_server_host_server}:4202 .* 3:2 pkts,.* rule 5" \ "${epair_tester}b tcp ${net_server_host_server}:4202 -> ${net_tester_host_tester}:9 .* 3:2 pkts,.* rule 6" \ ; do grep -qE "${state_regexp}" $states || atf_fail "State not found for '${state_regexp}'" done } binat_match_cleanup() { pft_cleanup kill $(cat ${PWD}/inetd_tester.pid) } atf_test_case "empty_pool" "cleanup" empty_pool_head() { atf_set descr 'NAT with empty pool' atf_set require.user root atf_set require.progs python3 scapy } empty_pool_body() { pft_init setup_router_server_ipv6 pft_set_rules router \ "block" \ "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv }" \ "pass in on ${epair_tester}b" \ "pass out on ${epair_server}a inet6 from any to ${net_server_host_server} nat-to " \ # pf_map_addr_sn() won't be able to pick a target address, because # the table used in redireciton pool is empty. Packet will not be # forwarded, error counter will be increased. ping_server_check_reply exit:1 # Ignore warnings about not-loaded ALTQ atf_check -o "match:map-failed +1 +" -x "jexec router pfctl -qvvsi 2> /dev/null" } empty_pool_cleanup() { pft_cleanup } atf_test_case "dummynet_mask" "cleanup" dummynet_mask_head() { atf_set descr 'Verify that dummynet uses the pre-nat address for masking' atf_set require.user root } dummynet_mask_body() { dummynet_init epair_srv=$(vnet_mkepair) epair_cl=$(vnet_mkepair) ifconfig ${epair_cl}b 192.0.2.2/24 up route add default 192.0.2.1 vnet_mkjail srv ${epair_srv}a jexec srv ifconfig ${epair_srv}a 198.51.100.2/24 up vnet_mkjail gw ${epair_srv}b ${epair_cl}a jexec gw ifconfig ${epair_srv}b 198.51.100.1/24 up jexec gw ifconfig ${epair_cl}a 192.0.2.1/24 up jexec gw sysctl net.inet.ip.forwarding=1 jexec gw dnctl pipe 1 config delay 100 mask src-ip 0xffffff00 jexec gw pfctl -e pft_set_rules gw \ "nat on ${epair_srv}b inet from 192.0.2.0/24 to any -> (${epair_srv}b)" \ "pass out dnpipe 1" atf_check -s exit:0 -o ignore \ ping -c 3 198.51.100.2 # Now check that dummynet looked at the correct address atf_check -s exit:0 -o match:"ip.*192.0.2.0/0" \ jexec gw dnctl pipe show } dummynet_mask_cleanup() { pft_cleanup } atf_init_test_cases() { atf_add_test_case "exhaust" atf_add_test_case "nested_anchor" atf_add_test_case "endpoint_independent_compat" atf_add_test_case "endpoint_independent_pass" atf_add_test_case "nat6_nolinklocal" atf_add_test_case "empty_table_source_hash" atf_add_test_case "no_addrs_source_hash" atf_add_test_case "empty_table_random" atf_add_test_case "no_addrs_random" atf_add_test_case "map_e_compat" atf_add_test_case "map_e_pass" - atf_add_test_case "nat_pass" + atf_add_test_case "nat_pass_in" + atf_add_test_case "nat_pass_out" atf_add_test_case "nat_match" atf_add_test_case "binat_compat" atf_add_test_case "binat_match" atf_add_test_case "empty_pool" atf_add_test_case "dummynet_mask" }