Page MenuHomeFreeBSD

D11137.id143021.diff
No OneTemporary

D11137.id143021.diff

diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -326,6 +326,7 @@
int marker;
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
+#define POM_ENDPI 0x04
u_int8_t opts;
int type;
int staticport;
@@ -512,7 +513,7 @@
%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
%token DNPIPE DNQUEUE RIDENTIFIER
%token LOAD RULESET_OPTIMIZATION PRIO
-%token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
+%token STICKYADDRESS ENDPI MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY BRIDGE_TO
@@ -4593,6 +4594,14 @@
pool_opts.marker |= POM_STICKYADDRESS;
pool_opts.opts |= PF_POOL_STICKYADDR;
}
+ | ENDPI {
+ if (pool_opts.marker & POM_ENDPI) {
+ yyerror("endpoint-independent cannot be redefined");
+ YYERROR;
+ }
+ pool_opts.marker |= POM_ENDPI;
+ pool_opts.opts |= PF_POOL_ENDPI;
+ }
| MAPEPORTSET number '/' number '/' number {
if (pool_opts.mape.offset) {
yyerror("map-e-portset cannot be redefined");
@@ -6299,6 +6308,7 @@
{ "dnqueue", DNQUEUE},
{ "drop", DROP},
{ "dup-to", DUPTO},
+ { "endpoint-independent", ENDPI},
{ "ether", ETHER},
{ "fail-policy", FAILPOLICY},
{ "fairq", FAIRQ},
diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c
--- a/sbin/pfctl/pfctl_parser.c
+++ b/sbin/pfctl/pfctl_parser.c
@@ -488,6 +488,8 @@
}
if (pool->opts & PF_POOL_STICKYADDR)
printf(" sticky-address");
+ if (pool->opts & PF_POOL_ENDPI)
+ printf(" endpoint-independent");
if (id == PF_NAT && p1 == 0 && p2 == 0)
printf(" static-port");
if (pool->mape.offset > 0)
diff --git a/sbin/pfctl/tests/files/pf1021.in b/sbin/pfctl/tests/files/pf1021.in
new file mode 100644
--- /dev/null
+++ b/sbin/pfctl/tests/files/pf1021.in
@@ -0,0 +1 @@
+nat on vtnet1 inet from ! (vtnet1) to any -> (vtnet1) endpoint-independent
diff --git a/sbin/pfctl/tests/files/pf1021.ok b/sbin/pfctl/tests/files/pf1021.ok
new file mode 100644
--- /dev/null
+++ b/sbin/pfctl/tests/files/pf1021.ok
@@ -0,0 +1 @@
+nat on vtnet1 inet from ! (vtnet1) to any -> (vtnet1) round-robin endpoint-independent
diff --git a/share/man/man4/pf.4 b/share/man/man4/pf.4
--- a/share/man/man4/pf.4
+++ b/share/man/man4/pf.4
@@ -26,7 +26,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd September 2, 2024
+.Dd September 6, 2024
.Dt PF 4
.Os
.Sh NAME
@@ -89,6 +89,10 @@
Default value is 32768.
.It Va net.pf.rule_tag_hashsize
Size of the hash table that stores tags.
+.It Va net.pf.udpendpoint_hashsize
+Size of hash table that store UDP endpoint mappings.
+Should be power of 2.
+Default value is 32768.
.It Va net.pf.default_to_drop
This value overrides
.Cd "options PF_DEFAULT_TO_DROP"
diff --git a/share/man/man5/pf.conf.5 b/share/man/man5/pf.conf.5
--- a/share/man/man5/pf.conf.5
+++ b/share/man/man5/pf.conf.5
@@ -27,7 +27,7 @@
.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd June 24, 2024
+.Dd September 4, 2024
.Dt PF.CONF 5
.Os
.Sh NAME
@@ -2278,6 +2278,16 @@
With
.Ar nat
rules, the
+.It Ar endpoint-independent
+With
+.Ar nat
+rules, the
+.Ar endpoint-independent
+option caues
+.Xr pf 4
+to always map connections from a UDP source address and port to the same
+NAT address and port.
+This feature implements "full-cone" NAT behavior.
.Ar map-e-portset
option enables the source port translation of MAP-E (RFC 7597) Customer Edge.
In order to make the host act as a MAP-E Customer Edge, setting up a tunneling
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -940,6 +940,29 @@
u_int8_t pad[1];
};
+/* Keep synced with struct pf_udp_endpoint. */
+struct pf_udp_endpoint_cmp {
+ struct pf_addr addr;
+ uint16_t port;
+ sa_family_t af;
+ uint8_t pad[1];
+};
+
+struct pf_udp_endpoint {
+ struct pf_addr addr;
+ uint16_t port;
+ sa_family_t af;
+ uint8_t pad[1];
+
+ struct pf_udp_mapping *mapping;
+ LIST_ENTRY(pf_udp_endpoint) entry;
+};
+
+struct pf_udp_mapping {
+ struct pf_udp_endpoint endpoints[2];
+ u_int refs;
+};
+
/* Keep synced with struct pf_state_key. */
struct pf_state_key_cmp {
struct pf_addr addr[2];
@@ -1069,6 +1092,7 @@
union pf_krule_ptr nat_rule;
struct pf_addr rt_addr;
struct pf_state_key *key[2]; /* addresses stack and wire */
+ struct pf_udp_mapping *udp_mapping;
struct pfi_kkif *kif;
struct pfi_kkif *orig_kif; /* The real kif, even if we're a floating state (i.e. if == V_pfi_all). */
struct pfi_kkif *rt_kif;
@@ -2124,17 +2148,28 @@
struct mtx lock;
};
+struct pf_udpendpointhash {
+ LIST_HEAD(, pf_udp_endpoint) endpoints;
+ /* refcont is synchronized on the source endpoint's row lock */
+ struct mtx lock;
+};
+
extern u_long pf_ioctl_maxcount;
VNET_DECLARE(u_long, pf_hashmask);
#define V_pf_hashmask VNET(pf_hashmask)
VNET_DECLARE(u_long, pf_srchashmask);
#define V_pf_srchashmask VNET(pf_srchashmask)
+VNET_DECLARE(u_long, pf_udpendpointhashmask);
+#define V_pf_udpendpointhashmask VNET(pf_udpendpointhashmask)
#define PF_HASHSIZ (131072)
#define PF_SRCHASHSIZ (PF_HASHSIZ/4)
+#define PF_UDPENDHASHSIZ (PF_HASHSIZ/4)
VNET_DECLARE(struct pf_keyhash *, pf_keyhash);
VNET_DECLARE(struct pf_idhash *, pf_idhash);
+VNET_DECLARE(struct pf_udpendpointhash *, pf_udpendpointhash);
#define V_pf_keyhash VNET(pf_keyhash)
#define V_pf_idhash VNET(pf_idhash)
+#define V_pf_udpendpointhash VNET(pf_udpendpointhash)
VNET_DECLARE(struct pf_srchash *, pf_srchash);
#define V_pf_srchash VNET(pf_srchash)
@@ -2209,6 +2244,8 @@
#define V_pf_state_z VNET(pf_state_z)
VNET_DECLARE(uma_zone_t, pf_state_key_z);
#define V_pf_state_key_z VNET(pf_state_key_z)
+VNET_DECLARE(uma_zone_t, pf_udp_mapping_z);
+#define V_pf_udp_mapping_z VNET(pf_udp_mapping_z)
VNET_DECLARE(uma_zone_t, pf_state_scrub_z);
#define V_pf_state_scrub_z VNET(pf_state_scrub_z)
@@ -2281,6 +2318,15 @@
extern bool pf_find_state_all_exists(
const struct pf_state_key_cmp *,
u_int);
+extern struct pf_udp_mapping *pf_udp_mapping_find(struct pf_udp_endpoint_cmp
+ *endpoint);
+extern struct pf_udp_mapping *pf_udp_mapping_create(sa_family_t af,
+ struct pf_addr *src_addr, uint16_t src_port,
+ struct pf_addr *nat_addr, uint16_t nat_port);
+extern int pf_udp_mapping_insert(struct pf_udp_mapping
+ *mapping);
+extern void pf_udp_mapping_release(struct pf_udp_mapping
+ *mapping);
extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *,
struct pf_krule *, sa_family_t,
struct pf_srchash **, bool);
@@ -2574,7 +2620,8 @@
struct pf_state_key **, struct pf_state_key **,
struct pf_addr *, struct pf_addr *,
uint16_t, uint16_t, struct pf_kanchor_stackframe *,
- struct pf_krule **);
+ struct pf_krule **,
+ struct pf_udp_mapping **udp_mapping);
struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct mbuf *, int,
struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t);
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -129,6 +129,7 @@
PF_ADDR_RANGE };
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
+#define PF_POOL_ENDPI 0x40
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -283,6 +283,7 @@
uma_zone_t pf_mtag_z;
VNET_DEFINE(uma_zone_t, pf_state_z);
VNET_DEFINE(uma_zone_t, pf_state_key_z);
+VNET_DEFINE(uma_zone_t, pf_udp_mapping_z);
VNET_DEFINE(struct unrhdr64, pf_stateid);
@@ -330,7 +331,7 @@
struct pf_state_key *, struct mbuf *, int,
u_int16_t, u_int16_t, int *, struct pfi_kkif *,
struct pf_kstate **, int, u_int16_t, u_int16_t,
- int, struct pf_krule_slist *);
+ int, struct pf_krule_slist *, struct pf_udp_mapping *);
static int pf_state_key_addr_setup(struct pf_pdesc *, struct mbuf *,
int, struct pf_state_key_cmp *, int, struct pf_addr *,
int, struct pf_addr *, int);
@@ -493,22 +494,29 @@
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
+VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
+VNET_DEFINE(struct pf_udpendpointmapping *, pf_udpendpointmapping);
SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"pf(4)");
VNET_DEFINE(u_long, pf_hashmask);
VNET_DEFINE(u_long, pf_srchashmask);
+VNET_DEFINE(u_long, pf_udpendpointhashmask);
VNET_DEFINE_STATIC(u_long, pf_hashsize);
#define V_pf_hashsize VNET(pf_hashsize)
VNET_DEFINE_STATIC(u_long, pf_srchashsize);
#define V_pf_srchashsize VNET(pf_srchashsize)
+VNET_DEFINE_STATIC(u_long, pf_udpendpointhashsize);
+#define V_pf_udpendpointhashsize VNET(pf_udpendpointhashsize)
u_long pf_ioctl_maxcount = 65535;
SYSCTL_ULONG(_net_pf, OID_AUTO, states_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_hashsize), 0, "Size of pf(4) states hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, source_nodes_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
&VNET_NAME(pf_srchashsize), 0, "Size of pf(4) source nodes hashtable");
+SYSCTL_ULONG(_net_pf, OID_AUTO, udpendpoint_hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
+ &VNET_NAME(pf_udpendpointhashsize), 0, "Size of pf(4) endpoint hashtable");
SYSCTL_ULONG(_net_pf, OID_AUTO, request_maxcount, CTLFLAG_RWTUN,
&pf_ioctl_maxcount, 0, "Maximum number of tables, addresses, ... in a single ioctl() call");
@@ -699,6 +707,17 @@
return (h & V_pf_srchashmask);
}
+static inline uint32_t
+pf_hashudpendpoint(struct pf_udp_endpoint *endpoint)
+{
+ uint32_t h;
+
+ h = murmur3_32_hash32((uint32_t *)endpoint,
+ sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t),
+ V_pf_hashseed);
+ return (h & V_pf_udpendpointhashmask);
+}
+
#ifdef ALTQ
static int
pf_state_hash(struct pf_kstate *s)
@@ -1086,12 +1105,15 @@
struct pf_keyhash *kh;
struct pf_idhash *ih;
struct pf_srchash *sh;
+ struct pf_udpendpointhash *uh;
u_int i;
if (V_pf_hashsize == 0 || !powerof2(V_pf_hashsize))
V_pf_hashsize = PF_HASHSIZ;
if (V_pf_srchashsize == 0 || !powerof2(V_pf_srchashsize))
V_pf_srchashsize = PF_SRCHASHSIZ;
+ if (V_pf_udpendpointhashsize == 0 || !powerof2(V_pf_udpendpointhashsize))
+ V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
V_pf_hashseed = arc4random();
@@ -1154,6 +1176,30 @@
for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++)
mtx_init(&sh->lock, "pf_srchash", NULL, MTX_DEF);
+
+ /* UDP endpoint mappings. */
+ V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings",
+ sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
+ sizeof(struct pf_udpendpointhash), M_PFHASH, M_NOWAIT | M_ZERO);
+ if (V_pf_udpendpointhash == NULL) {
+ printf("pf: Unable to allocate memory for "
+ "udpendpoint_hashsize %lu.\n", V_pf_udpendpointhashsize);
+
+ V_pf_udpendpointhashsize = PF_UDPENDHASHSIZ;
+ V_pf_udpendpointhash = mallocarray(V_pf_udpendpointhashsize,
+ sizeof(struct pf_udpendpointhash), M_PFHASH, M_WAITOK | M_ZERO);
+ }
+
+ V_pf_udpendpointhashmask = V_pf_udpendpointhashsize - 1;
+ for (i = 0, uh = V_pf_udpendpointhash;
+ i <= V_pf_udpendpointhashmask;
+ i++, uh++) {
+ mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
+ MTX_DEF | MTX_DUPOK);
+ }
+
/* ALTQ */
TAILQ_INIT(&V_pf_altqs[0]);
TAILQ_INIT(&V_pf_altqs[1]);
@@ -1187,10 +1233,12 @@
struct pf_keyhash *kh;
struct pf_idhash *ih;
struct pf_srchash *sh;
+ struct pf_udpendpointhash *uh;
struct pf_send_entry *pfse, *next;
u_int i;
- for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= V_pf_hashmask;
+ for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash;
+ i <= V_pf_hashmask;
i++, kh++, ih++) {
KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty",
__func__));
@@ -1209,6 +1257,15 @@
}
free(V_pf_srchash, M_PFHASH);
+ for (i = 0, uh = V_pf_udpendpointhash;
+ i <= V_pf_udpendpointhashmask;
+ i++, uh++) {
+ KASSERT(LIST_EMPTY(&uh->endpoints),
+ ("%s: udp endpoint hash not empty", __func__));
+ mtx_destroy(&uh->lock);
+ }
+ free(V_pf_udpendpointhash, M_PFHASH);
+
STAILQ_FOREACH_SAFE(pfse, &V_pf_sendqueue, pfse_next, next) {
m_freem(pfse->pfse_m);
free(pfse, M_PFTEMP);
@@ -1218,6 +1275,7 @@
uma_zdestroy(V_pf_sources_z);
uma_zdestroy(V_pf_state_z);
uma_zdestroy(V_pf_state_key_z);
+ uma_zdestroy(V_pf_udp_mapping_z);
}
static int
@@ -1807,6 +1865,123 @@
return (false);
}
+struct pf_udp_mapping *
+pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
+ struct pf_addr *nat_addr, uint16_t nat_port)
+{
+ struct pf_udp_mapping *mapping;
+
+ mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
+ if (mapping == NULL)
+ return (NULL);
+ PF_ACPY(&mapping->endpoints[0].addr, src_addr, af);
+ mapping->endpoints[0].port = src_port;
+ mapping->endpoints[0].af = af;
+ mapping->endpoints[0].mapping = mapping;
+ PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af);
+ mapping->endpoints[1].port = nat_port;
+ mapping->endpoints[1].af = af;
+ mapping->endpoints[1].mapping = mapping;
+ refcount_init(&mapping->refs, 1);
+ return (mapping);
+}
+
+int
+pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
+{
+ struct pf_udpendpointhash *h0, *h1;
+ struct pf_udp_endpoint *endpoint;
+ int ret = EEXIST;
+
+ h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
+ h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
+ if (h0 == h1) {
+ PF_HASHROW_LOCK(h0);
+ } else if (h0 < h1) {
+ PF_HASHROW_LOCK(h0);
+ PF_HASHROW_LOCK(h1);
+ } else {
+ PF_HASHROW_LOCK(h1);
+ PF_HASHROW_LOCK(h0);
+ }
+
+ LIST_FOREACH(endpoint, &h0->endpoints, entry) {
+ if (bcmp(endpoint, &mapping->endpoints[0],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint != NULL)
+ goto cleanup;
+ LIST_FOREACH(endpoint, &h1->endpoints, entry) {
+ if (bcmp(endpoint, &mapping->endpoints[1],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint != NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
+ LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
+ ret = 0;
+
+cleanup:
+ if (h0 != h1) {
+ PF_HASHROW_UNLOCK(h0);
+ PF_HASHROW_UNLOCK(h1);
+ } else {
+ PF_HASHROW_UNLOCK(h0);
+ }
+ return (ret);
+}
+
+void
+pf_udp_mapping_release(struct pf_udp_mapping *mapping)
+{
+ /* refcount is synchronized on the source endpoint's row lock */
+ struct pf_udpendpointhash *h0, *h1;
+
+ if (mapping == NULL)
+ return;
+
+ h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
+ PF_HASHROW_LOCK(h0);
+ if (refcount_release(&mapping->refs)) {
+ LIST_REMOVE(&mapping->endpoints[0], entry);
+ PF_HASHROW_UNLOCK(h0);
+ h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
+ PF_HASHROW_LOCK(h1);
+ LIST_REMOVE(&mapping->endpoints[1], entry);
+ PF_HASHROW_UNLOCK(h1);
+
+ uma_zfree(V_pf_udp_mapping_z, mapping);
+ } else {
+ PF_HASHROW_UNLOCK(h0);
+ }
+}
+
+
+struct pf_udp_mapping *
+pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
+{
+ struct pf_udpendpointhash *uh;
+ struct pf_udp_endpoint *endpoint;
+
+ uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
+
+ PF_HASHROW_LOCK(uh);
+ LIST_FOREACH(endpoint, &uh->endpoints, entry) {
+ if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
+ bcmp(endpoint, &endpoint->mapping->endpoints[0],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint == NULL) {
+ PF_HASHROW_UNLOCK(uh);
+ return (NULL);
+ }
+ refcount_acquire(&endpoint->mapping->refs);
+ PF_HASHROW_UNLOCK(uh);
+ return (endpoint->mapping);
+}
/* END state table stuff */
static void
@@ -2423,6 +2598,9 @@
PF_HASHROW_UNLOCK(ih);
pf_detach_state(s);
+
+ pf_udp_mapping_release(s->udp_mapping);
+
/* pf_state_insert() initialises refs to 2 */
return (pf_release_staten(s, 2));
}
@@ -4686,6 +4864,7 @@
u_int16_t bproto_sum = 0, bip_sum = 0;
u_int8_t icmptype = 0, icmpcode = 0;
struct pf_kanchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE];
+ struct pf_udp_mapping *udp_mapping = NULL;
PF_RULES_RASSERT();
@@ -4760,7 +4939,7 @@
/* check packet for BINAT/NAT/RDR */
transerror = pf_get_translation(pd, m, off, kif, &nsn, &sk,
- &nk, saddr, daddr, sport, dport, anchor_stack, &nr);
+ &nk, saddr, daddr, sport, dport, anchor_stack, &nr, &udp_mapping);
switch (transerror) {
default:
/* A translation error occurred. */
@@ -5058,8 +5237,9 @@
int action;
action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off,
sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum,
- hdrlen, &match_rules);
+ hdrlen, &match_rules, udp_mapping);
if (action != PF_PASS) {
+ pf_udp_mapping_release(udp_mapping);
if (action == PF_DROP &&
(r->rule_flag & PFRULE_RETURN))
pf_return(r, nr, pd, sk, off, m, th, kif,
@@ -5075,6 +5255,7 @@
uma_zfree(V_pf_state_key_z, sk);
uma_zfree(V_pf_state_key_z, nk);
+ pf_udp_mapping_release(udp_mapping);
}
/* copy back packet headers if we performed NAT operations */
@@ -5102,6 +5283,8 @@
uma_zfree(V_pf_state_key_z, sk);
uma_zfree(V_pf_state_key_z, nk);
+ pf_udp_mapping_release(udp_mapping);
+
return (PF_DROP);
}
@@ -5111,7 +5294,7 @@
struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport,
u_int16_t dport, int *rewrite, struct pfi_kkif *kif, struct pf_kstate **sm,
int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen,
- struct pf_krule_slist *match_rules)
+ struct pf_krule_slist *match_rules, struct pf_udp_mapping *udp_mapping)
{
struct pf_kstate *s = NULL;
struct pf_ksrc_node *sn = NULL;
@@ -5328,6 +5511,8 @@
return (PF_SYNPROXY_DROP);
}
+ s->udp_mapping = udp_mapping;
+
return (PF_PASS);
csfailed:
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -62,7 +62,8 @@
uint16_t, int, struct pf_kanchor_stackframe *);
static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
- uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **);
+ uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
+ struct pf_udp_mapping **);
#define mix(a,b,c) \
do { \
@@ -216,14 +217,47 @@
pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
- uint16_t high, struct pf_ksrc_node **sn)
+ uint16_t high, struct pf_ksrc_node **sn,
+ struct pf_udp_mapping **udp_mapping)
{
struct pf_state_key_cmp key;
struct pf_addr init_addr;
+ struct pf_srchash *sh = NULL;
bzero(&init_addr, sizeof(init_addr));
+
+ MPASS(*udp_mapping == NULL);
+
+ /*
+ * If we are UDP and have an existing mapping we can get source port
+ * from the mapping. In this case we have to look up the src_node as
+ * pf_map_addr would.
+ */
+ if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
+ struct pf_udp_endpoint_cmp udp_source;
+
+ bzero(&udp_source, sizeof(udp_source));
+ udp_source.af = af;
+ PF_ACPY(&udp_source.addr, saddr, af);
+ udp_source.port = sport;
+ *udp_mapping = pf_udp_mapping_find(&udp_source);
+ if (*udp_mapping) {
+ PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
+ *nport = (*udp_mapping)->endpoints[1].port;
+ /* Try to find a src_node as per pf_map_addr(). */
+ if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+ (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
+ *sn = pf_find_src_node(saddr, r, af, &sh, 0);
+ return (0);
+ } else {
+ *udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
+ if (*udp_mapping == NULL)
+ return (1);
+ }
+ }
+
if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
- return (1);
+ goto failed;
if (proto == IPPROTO_ICMP) {
if (*nport == htons(ICMP_ECHO)) {
@@ -250,6 +284,8 @@
do {
PF_ACPY(&key.addr[1], naddr, key.af);
+ if (*udp_mapping)
+ PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
/*
* port search; start random, step;
@@ -277,8 +313,16 @@
} else if (low == high) {
key.port[1] = htons(low);
if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(low);
- return (0);
+ if (*udp_mapping != NULL) {
+ (*udp_mapping)->endpoints[1].port = htons(low);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(low);
+ return (0);
+ }
+ } else {
+ *nport = htons(low);
+ return (0);
+ }
}
} else {
uint32_t tmp;
@@ -293,18 +337,35 @@
cut = arc4random() % (1 + high - low) + low;
/* low <= cut <= high */
for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
- key.port[1] = htons(tmp);
- if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(tmp);
- return (0);
+ if (*udp_mapping != NULL) {
+ (*udp_mapping)->endpoints[1].port = htons(tmp);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ } else {
+ key.port[1] = htons(tmp);
+ if (!pf_find_state_all_exists(&key, PF_IN)) {
+ *nport = htons(tmp);
+ return (0);
+ }
}
}
tmp = cut;
for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
- key.port[1] = htons(tmp);
- if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(tmp);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(tmp);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ } else {
+ key.port[1] = htons(tmp);
+ if (!pf_find_state_all_exists(&key, PF_IN)) {
+ *nport = htons(tmp);
+ return (0);
+ }
}
}
}
@@ -326,6 +387,10 @@
return (1);
}
} while (! PF_AEQ(&init_addr, naddr, af) );
+
+failed:
+ uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
+ *udp_mapping = NULL;
return (1); /* none available */
}
@@ -333,7 +398,7 @@
pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
- struct pf_ksrc_node **sn)
+ struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping)
{
uint16_t psmask, low, highmask;
uint16_t i, ahigh, cut;
@@ -353,13 +418,13 @@
for (i = cut; i <= ahigh; i++) {
low = (i << ashift) | psmask;
if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
- naddr, nport, low, low | highmask, sn))
+ naddr, nport, low, low | highmask, sn, udp_mapping))
return (0);
}
for (i = cut - 1; i > 0; i--) {
low = (i << ashift) | psmask;
if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
- naddr, nport, low, low | highmask, sn))
+ naddr, nport, low, low | highmask, sn, udp_mapping))
return (0);
}
return (1);
@@ -597,7 +662,8 @@
struct pf_state_key **skp, struct pf_state_key **nkp,
struct pf_addr *saddr, struct pf_addr *daddr,
uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
- struct pf_krule **rp)
+ struct pf_krule **rp,
+ struct pf_udp_mapping **udp_mapping)
{
struct pf_krule *r = NULL;
struct pf_addr *naddr;
@@ -661,7 +727,7 @@
}
if (r->rpool.mape.offset > 0) {
if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
- sport, daddr, dport, naddr, nportp, sn)) {
+ sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: MAP-E port allocation (%u/%u/%u)"
" failed\n",
@@ -672,7 +738,7 @@
goto notrans;
}
} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
- daddr, dport, naddr, nportp, low, high, sn)) {
+ daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
DPFPRINTF(PF_DEBUG_MISC,
("pf: NAT proxy port allocation (%u-%u) failed\n",
r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
diff --git a/tests/sys/netpfil/pf/nat.sh b/tests/sys/netpfil/pf/nat.sh
--- a/tests/sys/netpfil/pf/nat.sh
+++ b/tests/sys/netpfil/pf/nat.sh
@@ -112,6 +112,139 @@
}
+atf_test_case "endpoint_independent" "cleanup"
+endpoint_independent_head()
+{
+ atf_set descr 'Test that a client behind NAT gets the same external IP:port for different servers'
+ atf_set require.user root
+}
+
+endpoint_independent_body()
+{
+ pft_init
+ filter="udp and dst port 1234" # only capture udp pings
+
+ epair_client=$(vnet_mkepair)
+ epair_nat=$(vnet_mkepair)
+ epair_server1=$(vnet_mkepair)
+ epair_server2=$(vnet_mkepair)
+ bridge=$(vnet_mkbridge)
+
+ vnet_mkjail nat ${epair_client}b ${epair_nat}a
+ vnet_mkjail client ${epair_client}a
+ vnet_mkjail server1 ${epair_server1}a
+ vnet_mkjail server2 ${epair_server2}a
+
+ ifconfig ${epair_server1}b up
+ ifconfig ${epair_server2}b up
+ ifconfig ${epair_nat}b up
+ ifconfig ${bridge} \
+ addm ${epair_server1}b \
+ addm ${epair_server2}b \
+ addm ${epair_nat}b \
+ up
+
+ jexec nat ifconfig ${epair_client}b 192.0.2.1/24 up
+ jexec nat ifconfig ${epair_nat}a 198.51.100.42/24 up
+ jexec nat sysctl net.inet.ip.forwarding=1
+
+ jexec client ifconfig ${epair_client}a 192.0.2.2/24 up
+ jexec client route add default 192.0.2.1
+
+ jexec server1 ifconfig ${epair_server1}a 198.51.100.32/24 up
+ jexec server2 ifconfig ${epair_server2}a 198.51.100.22/24 up
+
+ # Enable pf!
+ jexec nat pfctl -e
+
+ # validate non-endpoint independent nat rule behaviour
+ pft_set_rules nat \
+ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a)"
+
+ jexec server1 tcpdump -i ${epair_server1}a -w ${PWD}/server1.pcap \
+ --immediate-mode $filter &
+ server1tcppid="$!"
+ jexec server2 tcpdump -i ${epair_server2}a -w ${PWD}/server2.pcap \
+ --immediate-mode $filter &
+ server2tcppid="$!"
+
+ # send out multiple packets
+ for i in $(seq 1 10); do
+ echo "ping" | jexec client nc -u 198.51.100.32 1234 -p 4242 -w 0
+ echo "ping" | jexec client nc -u 198.51.100.22 1234 -p 4242 -w 0
+ done
+
+ kill $server1tcppid
+ kill $server2tcppid
+
+ tuple_server1=$(tcpdump -r ${PWD}/server1.pcap | awk '{addr=$3} END {print addr}')
+ tuple_server2=$(tcpdump -r ${PWD}/server2.pcap | awk '{addr=$3} END {print addr}')
+
+ if [ -z $tuple_server1 ]
+ then
+ atf_fail "server1 did not receive connection from client (default)"
+ fi
+
+ if [ -z $tuple_server2 ]
+ then
+ atf_fail "server2 did not receive connection from client (default)"
+ fi
+
+ if [ "$tuple_server1" = "$tuple_server2" ]
+ then
+ echo "server1 tcpdump: $tuple_server1"
+ echo "server2 tcpdump: $tuple_server2"
+ atf_fail "Received same IP:port on server1 and server2 (default)"
+ fi
+
+ # validate endpoint independent nat rule behaviour
+ pft_set_rules nat \
+ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a) endpoint-independent"
+
+ jexec server1 tcpdump -i ${epair_server1}a -w ${PWD}/server1.pcap \
+ --immediate-mode $filter &
+ server1tcppid="$!"
+ jexec server2 tcpdump -i ${epair_server2}a -w ${PWD}/server2.pcap \
+ --immediate-mode $filter &
+ server2tcppid="$!"
+
+ # send out multiple packets, sometimes one fails to go through
+ for i in $(seq 1 10); do
+ echo "ping" | jexec client nc -u 198.51.100.32 1234 -p 4242 -w 0
+ echo "ping" | jexec client nc -u 198.51.100.22 1234 -p 4242 -w 0
+ done
+
+ kill $server1tcppid
+ kill $server2tcppid
+
+ tuple_server1=$(tcpdump -r ${PWD}/server1.pcap | awk '{addr=$3} END {print addr}')
+ tuple_server2=$(tcpdump -r ${PWD}/server2.pcap | awk '{addr=$3} END {print addr}')
+
+ if [ -z $tuple_server1 ]
+ then
+ atf_fail "server1 did not receive connection from client (endpoint-independent)"
+ fi
+
+ if [ -z $tuple_server2 ]
+ then
+ atf_fail "server2 did not receive connection from client (endpoint-independent)"
+ fi
+
+ if [ ! "$tuple_server1" = "$tuple_server2" ]
+ then
+ echo "server1 tcpdump: $tuple_server1"
+ echo "server2 tcpdump: $tuple_server2"
+ atf_fail "Received different IP:port on server1 than server2 (endpoint-independent)"
+ fi
+}
+
+endpoint_independent_cleanup()
+{
+ pft_cleanup
+ rm -f server1.out
+ rm -f server2.out
+}
+
nested_anchor_cleanup()
{
pft_cleanup
@@ -121,4 +254,5 @@
{
atf_add_test_case "exhaust"
atf_add_test_case "nested_anchor"
+ atf_add_test_case "endpoint_independent"
}

File Metadata

Mime Type
text/plain
Expires
Wed, Nov 19, 8:18 AM (8 h, 14 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25612390
Default Alt Text
D11137.id143021.diff (28 KB)

Event Timeline