Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F140955797
D11137.id142155.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
D11137.id142155.diff
View Options
diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -326,6 +326,7 @@
int marker;
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
+#define POM_ENDPI 0x04
u_int8_t opts;
int type;
int staticport;
@@ -512,7 +513,7 @@
%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
%token DNPIPE DNQUEUE RIDENTIFIER
%token LOAD RULESET_OPTIMIZATION PRIO
-%token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
+%token STICKYADDRESS ENDPI MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY BRIDGE_TO
@@ -4593,6 +4594,15 @@
pool_opts.marker |= POM_STICKYADDRESS;
pool_opts.opts |= PF_POOL_STICKYADDR;
}
+ | ENDPI {
+ if (pool_opts.marker & POM_ENDPI) {
+ yyerror("endpoint-independent cannot be redefined");
+ YYERROR;
+ }
+ pool_opts.marker |= POM_ENDPI;
+#define PF_POOL_ENDPI 0x40
+ pool_opts.opts |= PF_POOL_ENDPI;
+ }
| MAPEPORTSET number '/' number '/' number {
if (pool_opts.mape.offset) {
yyerror("map-e-portset cannot be redefined");
@@ -6295,6 +6305,7 @@
{ "dnqueue", DNQUEUE},
{ "drop", DROP},
{ "dup-to", DUPTO},
+ { "endpoint-independent", ENDPI},
{ "ether", ETHER},
{ "fail-policy", FAILPOLICY},
{ "fairq", FAIRQ},
diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c
--- a/sbin/pfctl/pfctl_parser.c
+++ b/sbin/pfctl/pfctl_parser.c
@@ -488,6 +488,9 @@
}
if (pool->opts & PF_POOL_STICKYADDR)
printf(" sticky-address");
+#define PF_POOL_ENDPI 0x40
+ if (pool->opts & PF_POOL_ENDPI)
+ printf(" endpoint-independent");
if (id == PF_NAT && p1 == 0 && p2 == 0)
printf(" static-port");
if (pool->mape.offset > 0)
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -940,6 +940,29 @@
u_int8_t pad[1];
};
+/* Keep synced with struct pf_udp_endpoint. */
+struct pf_udp_endpoint_cmp {
+ struct pf_addr addr;
+ uint16_t port;
+ sa_family_t af;
+ uint8_t pad[1];
+};
+
+struct pf_udp_endpoint {
+ struct pf_addr addr;
+ uint16_t port;
+ sa_family_t af;
+ uint8_t pad[1];
+
+ struct pf_udp_mapping *mapping;
+ LIST_ENTRY(pf_udp_endpoint) entry;
+};
+
+struct pf_udp_mapping {
+ struct pf_udp_endpoint endpoints[2];
+ u_int refs;
+};
+
/* Keep synced with struct pf_state_key. */
struct pf_state_key_cmp {
struct pf_addr addr[2];
@@ -2281,6 +2304,15 @@
extern bool pf_find_state_all_exists(
const struct pf_state_key_cmp *,
u_int);
+extern struct pf_udp_mapping *pf_udp_mapping_find(struct pf_udp_endpoint_cmp
+ *endpoint);
+extern struct pf_udp_mapping *pf_udp_mapping_create(sa_family_t af,
+ struct pf_addr *src_addr, uint16_t src_port,
+ struct pf_addr *nat_addr, uint16_t nat_port);
+extern int pf_udp_mapping_insert(struct pf_udp_mapping
+ *mapping);
+extern void pf_udp_mapping_release(struct pf_udp_mapping
+ *mapping);
extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *,
struct pf_krule *, sa_family_t,
struct pf_srchash **, bool);
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -129,6 +129,7 @@
PF_ADDR_RANGE };
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
+#define PF_POOL_ENDPI 0x40
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -487,6 +487,7 @@
MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
+VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
@@ -1126,6 +1127,8 @@
i++, kh++, ih++) {
mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
+ mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
+ MTX_DEF | MTX_DUPOK);
}
/* Source nodes. */
@@ -1743,6 +1746,120 @@
return (false);
}
+struct pf_udp_mapping *
+pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
+ struct pf_addr *nat_addr, uint16_t nat_port)
+{
+ struct pf_udp_mapping *mapping;
+
+ mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
+ if (mapping == NULL)
+ return NULL;
+ PF_ACPY(&mapping->endpoints[0].addr, src_addr, af);
+ mapping->endpoints[0].port = src_port;
+ mapping->endpoints[0].af = af;
+ mapping->endpoints[0].mapping = mapping;
+ PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af);
+ mapping->endpoints[1].port = nat_port;
+ mapping->endpoints[1].af = af;
+ mapping->endpoints[1].mapping = mapping;
+ refcount_init(&mapping->refs, 1);
+ return (mapping);
+}
+
+int
+pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
+{
+ struct pf_udpendpointhash *h0, *h1;
+ struct pf_udp_endpoint *endpoint;
+ int ret = 1;
+
+ h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
+ h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
+ if (h0 == h1) {
+ PF_HASHROW_LOCK(h0);
+ } else if (h0 < h1) {
+ PF_HASHROW_LOCK(h0);
+ PF_HASHROW_LOCK(h1);
+ } else {
+ PF_HASHROW_LOCK(h1);
+ PF_HASHROW_LOCK(h0);
+ }
+
+ LIST_FOREACH(endpoint, &h0->endpoints, entry) {
+ if (bcmp(endpoint, &mapping->endpoints[0],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint != NULL)
+ goto cleanup;
+ LIST_FOREACH(endpoint, &h1->endpoints, entry) {
+ if (bcmp(endpoint, &mapping->endpoints[1],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint != NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
+ LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
+ ret = 0;
+
+cleanup:
+ if (h0 != h1) {
+ PF_HASHROW_UNLOCK(h0);
+ PF_HASHROW_UNLOCK(h1);
+ } else {
+ PF_HASHROW_UNLOCK(h0);
+ }
+ return (ret);
+}
+
+void
+pf_udp_mapping_release(struct pf_udp_mapping *mapping)
+{
+ /* refcount is synchronized on the source endpoint's row lock */
+ struct pf_udpendpointhash *h0, *h1;
+
+ h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
+ PF_HASHROW_LOCK(h0);
+ if (refcount_release(&mapping->refs)) {
+ LIST_REMOVE(&mapping->endpoints[0], entry);
+ PF_HASHROW_UNLOCK(h0);
+ h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
+ PF_HASHROW_LOCK(h1);
+ LIST_REMOVE(&mapping->endpoints[1], entry);
+ PF_HASHROW_UNLOCK(h1);
+
+ uma_zfree(V_pf_udp_mapping_z, mapping);
+ } else {
+ PF_HASHROW_UNLOCK(h0);
+ }
+}
+
+
+struct pf_udp_mapping *
+pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
+{
+ struct pf_udpendpointhash *uh;
+ struct pf_udp_endpoint *endpoint;
+
+ uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
+
+ PF_HASHROW_LOCK(uh);
+ LIST_FOREACH(endpoint, &uh->endpoints, entry) {
+ if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
+ bcmp(endpoint, &endpoint->mapping->endpoints[0],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint == NULL) {
+ PF_HASHROW_UNLOCK(uh);
+ return NULL;
+ }
+ refcount_acquire(&endpoint->mapping->refs);
+ PF_HASHROW_UNLOCK(uh);
+ return (endpoint->mapping);
+}
/* END state table stuff */
static void
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -220,8 +220,38 @@
{
struct pf_state_key_cmp key;
struct pf_addr init_addr;
+ struct pf_srchash *sh = NULL;
bzero(&init_addr, sizeof(init_addr));
+
+ /*
+ * If we are UDP and have an existing mapping we can get source port
+ * from the mapping. In this case we have to look up the src_node as
+ * pf_map_addr would.
+ */
+ if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
+ struct pf_udp_endpoint_cmp udp_source;
+
+ bzero(&udp_source, sizeof(udp_source));
+ udp_source.af = af;
+ PF_ACPY(&udp_source.addr, saddr, af);
+ udp_source.port = sport;
+ *udp_mapping = pf_udp_mapping_find(&udp_source);
+ if (*udp_mapping) {
+ PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
+ *nport = (*udp_mapping)->endpoints[1].port;
+ /* Try to find a src_node as per pf_map_addr(). */
+ if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+ (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
+ *sn = pf_find_src_node(saddr, r, af, &sh, 0);
+ return (0);
+ } else {
+ *udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
+ if (*udp_mapping == NULL)
+ return (1);
+ }
+ }
+
if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
return (1);
@@ -277,8 +307,17 @@
} else if (low == high) {
key.port[1] = htons(low);
if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(low);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(low);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(low);
+ return (0);
+ }
+ } else {
+ *nport = htons(low);
+ return (0);
+ }
}
} else {
uint32_t tmp;
@@ -293,18 +332,36 @@
cut = arc4random() % (1 + high - low) + low;
/* low <= cut <= high */
for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
- key.port[1] = htons(tmp);
- if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(tmp);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(tmp);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ } else {
+ key.port[1] = htons(tmp);
+ if (!pf_find_state_all_exists(&key, PF_IN)) {
+ *nport = htons(tmp);
+ return (0);
+ }
}
}
tmp = cut;
for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
- key.port[1] = htons(tmp);
- if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(tmp);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(tmp);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ } else {
+ key.port[1] = htons(tmp);
+ if (!pf_find_state_all_exists(&key, PF_IN)) {
+ *nport = htons(tmp);
+ return (0);
+ }
}
}
}
diff --git a/tests/sys/netpfil/pf/nat.sh b/tests/sys/netpfil/pf/nat.sh
--- a/tests/sys/netpfil/pf/nat.sh
+++ b/tests/sys/netpfil/pf/nat.sh
@@ -112,6 +112,126 @@
}
+atf_test_case "endpoint_independent" "cleanup"
+endpoint_independent_head()
+{
+ atf_set descr 'Test that a client behind NAT gets the same external IP:port for different servers'
+ atf_set require.user root
+}
+
+endpoint_independent_body()
+{
+ pft_init
+
+ epair_client=$(vnet_mkepair)
+ epair_nat=$(vnet_mkepair)
+ epair_server1=$(vnet_mkepair)
+ epair_server2=$(vnet_mkepair)
+ bridge=$(vnet_mkbridge)
+
+ vnet_mkjail nat ${epair_client}b ${epair_nat}a
+ vnet_mkjail client ${epair_client}a
+ vnet_mkjail server1 ${epair_server1}a
+ vnet_mkjail server2 ${epair_server2}a
+
+ ifconfig ${epair_server1}b up
+ ifconfig ${epair_server2}b up
+ ifconfig ${epair_nat}b up
+ ifconfig ${bridge} \
+ addm ${epair_server1}b \
+ addm ${epair_server2}b \
+ addm ${epair_nat}b \
+ up
+
+ jexec nat ifconfig ${epair_client}b 192.0.2.1/24 up
+ jexec nat ifconfig ${epair_nat}a 10.42.42.42/8 up
+ jexec nat sysctl net.inet.ip.forwarding=1
+
+ jexec client ifconfig ${epair_client}a 192.0.2.2/24 up
+ jexec client route add default 192.0.2.1
+
+ jexec server1 ifconfig ${epair_server1}a 10.32.32.32/8 up
+ jexec server2 ifconfig ${epair_server2}a 10.22.22.22/8 up
+
+ # Enable pf!
+ jexec nat pfctl -e
+
+ # validate non-endpoint independent nat rule behaviour
+ pft_set_rules nat \
+ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a)"
+
+ jexec server1 nc -u -l 1234 -v 2> server1.out &
+ server1pid="$!"
+ jexec server2 nc -u -l 1234 -v 2> server2.out &
+ server2pid="$!"
+
+ # send out three packets because sometimes one fails to go through
+ for i in $(seq 1 3); do
+ echo "ping" | jexec client nc -u 10.32.32.32 1234 -p 4242 -w 0
+ echo "ping" | jexec client nc -u 10.22.22.22 1234 -p 4242 -w 0
+ done
+
+ ipport_server1=$(cat server1.out | grep Connection)
+ ipport_server2=$(cat server2.out | grep Connection)
+
+ if [ -z "$ipport_server1" ]; then
+ atf_fail "server1 did not receive connection from client (default)"
+ fi
+
+ if [ -z "$ipport_server2" ]; then
+ atf_fail "server2 did not receive connection from client (default)"
+ fi
+
+ if [ "$ipport_server1" = "$ipport_server2" ]; then
+ echo "server1: $ipport_server1"
+ echo "server2: $ipport_server2"
+ atf_fail "Received same IP:port on server1 and server2 (default)"
+ fi
+ kill $server1pid
+ kill $server2pid
+
+ # validate endpoint independent nat rule behaviour
+ pft_set_rules nat \
+ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a) endpoint-independent"
+
+ jexec server1 nc -u -l 1234 -v 2> server1.out &
+ server1pid="$!"
+ jexec server2 nc -u -l 1234 -v 2> server2.out &
+ server2pid="$!"
+
+ # send out three packets because sometimes one fails to go through
+ for i in $(seq 1 3); do
+ echo "ping" | jexec client nc -u 10.32.32.32 1234 -p 4242 -w 0
+ echo "ping" | jexec client nc -u 10.22.22.22 1234 -p 4242 -w 0
+ done
+
+ ipport_server1=$(cat server1.out | grep Connection)
+ ipport_server2=$(cat server2.out | grep Connection)
+
+ if [ -z "$ipport_server1" ]; then
+ atf_fail "server1 did not receive connection from client (endpoint-independent)"
+ fi
+
+ if [ -z "$ipport_server2" ]; then
+ atf_fail "server2 did not receive connection from client (endpoint-independent)"
+ fi
+
+ if [ ! "$ipport_server1" = "$ipport_server2" ]; then
+ echo "server1: $ipport_server1"
+ echo "server2: $ipport_server2"
+ atf_fail "Received different IP:port on server1 than server2 (endpoint-independent)"
+ fi
+ kill $server1pid
+ kill $server2pid
+}
+
+endpoint_independent_cleanup()
+{
+ pft_cleanup
+ rm -f server1.out
+ rm -f server2.out
+}
+
nested_anchor_cleanup()
{
pft_cleanup
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 31, 2:11 AM (7 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27399636
Default Alt Text
D11137.id142155.diff (14 KB)
Attached To
Mode
D11137: pf: Add support for endpoint independent NAT bindings for UDP
Attached
Detach File
Event Timeline
Log In to Comment