Page MenuHomeFreeBSD

D11137.id142155.diff
No OneTemporary

D11137.id142155.diff

diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
--- a/sbin/pfctl/parse.y
+++ b/sbin/pfctl/parse.y
@@ -326,6 +326,7 @@
int marker;
#define POM_TYPE 0x01
#define POM_STICKYADDRESS 0x02
+#define POM_ENDPI 0x04
u_int8_t opts;
int type;
int staticport;
@@ -512,7 +513,7 @@
%token UPPERLIMIT QUEUE PRIORITY QLIMIT HOGS BUCKETS RTABLE TARGET INTERVAL
%token DNPIPE DNQUEUE RIDENTIFIER
%token LOAD RULESET_OPTIMIZATION PRIO
-%token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
+%token STICKYADDRESS ENDPI MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE
%token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW
%token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE SETTOS
%token DIVERTTO DIVERTREPLY BRIDGE_TO
@@ -4593,6 +4594,15 @@
pool_opts.marker |= POM_STICKYADDRESS;
pool_opts.opts |= PF_POOL_STICKYADDR;
}
+ | ENDPI {
+ if (pool_opts.marker & POM_ENDPI) {
+ yyerror("endpoint-independent cannot be redefined");
+ YYERROR;
+ }
+ pool_opts.marker |= POM_ENDPI;
+#define PF_POOL_ENDPI 0x40
+ pool_opts.opts |= PF_POOL_ENDPI;
+ }
| MAPEPORTSET number '/' number '/' number {
if (pool_opts.mape.offset) {
yyerror("map-e-portset cannot be redefined");
@@ -6295,6 +6305,7 @@
{ "dnqueue", DNQUEUE},
{ "drop", DROP},
{ "dup-to", DUPTO},
+ { "endpoint-independent", ENDPI},
{ "ether", ETHER},
{ "fail-policy", FAILPOLICY},
{ "fairq", FAIRQ},
diff --git a/sbin/pfctl/pfctl_parser.c b/sbin/pfctl/pfctl_parser.c
--- a/sbin/pfctl/pfctl_parser.c
+++ b/sbin/pfctl/pfctl_parser.c
@@ -488,6 +488,9 @@
}
if (pool->opts & PF_POOL_STICKYADDR)
printf(" sticky-address");
+#define PF_POOL_ENDPI 0x40
+ if (pool->opts & PF_POOL_ENDPI)
+ printf(" endpoint-independent");
if (id == PF_NAT && p1 == 0 && p2 == 0)
printf(" static-port");
if (pool->mape.offset > 0)
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -940,6 +940,29 @@
u_int8_t pad[1];
};
+/* Keep synced with struct pf_udp_endpoint. */
+struct pf_udp_endpoint_cmp {
+ struct pf_addr addr;
+ uint16_t port;
+ sa_family_t af;
+ uint8_t pad[1];
+};
+
+struct pf_udp_endpoint {
+ struct pf_addr addr;
+ uint16_t port;
+ sa_family_t af;
+ uint8_t pad[1];
+
+ struct pf_udp_mapping *mapping;
+ LIST_ENTRY(pf_udp_endpoint) entry;
+};
+
+struct pf_udp_mapping {
+ struct pf_udp_endpoint endpoints[2];
+ u_int refs;
+};
+
/* Keep synced with struct pf_state_key. */
struct pf_state_key_cmp {
struct pf_addr addr[2];
@@ -2281,6 +2304,15 @@
extern bool pf_find_state_all_exists(
const struct pf_state_key_cmp *,
u_int);
+extern struct pf_udp_mapping *pf_udp_mapping_find(struct pf_udp_endpoint_cmp
+ *endpoint);
+extern struct pf_udp_mapping *pf_udp_mapping_create(sa_family_t af,
+ struct pf_addr *src_addr, uint16_t src_port,
+ struct pf_addr *nat_addr, uint16_t nat_port);
+extern int pf_udp_mapping_insert(struct pf_udp_mapping
+ *mapping);
+extern void pf_udp_mapping_release(struct pf_udp_mapping
+ *mapping);
extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *,
struct pf_krule *, sa_family_t,
struct pf_srchash **, bool);
diff --git a/sys/netpfil/pf/pf.h b/sys/netpfil/pf/pf.h
--- a/sys/netpfil/pf/pf.h
+++ b/sys/netpfil/pf/pf.h
@@ -129,6 +129,7 @@
PF_ADDR_RANGE };
#define PF_POOL_TYPEMASK 0x0f
#define PF_POOL_STICKYADDR 0x20
+#define PF_POOL_ENDPI 0x40
#define PF_WSCALE_FLAG 0x80
#define PF_WSCALE_MASK 0x0f
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -487,6 +487,7 @@
MALLOC_DEFINE(M_PFHASH, "pf_hash", "pf(4) hash header structures");
MALLOC_DEFINE(M_PF_RULE_ITEM, "pf_krule_item", "pf(4) rule items");
+VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash);
VNET_DEFINE(struct pf_keyhash *, pf_keyhash);
VNET_DEFINE(struct pf_idhash *, pf_idhash);
VNET_DEFINE(struct pf_srchash *, pf_srchash);
@@ -1126,6 +1127,8 @@
i++, kh++, ih++) {
mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK);
mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF);
+ mtx_init(&uh->lock, "pf_udpendpointhash", NULL,
+ MTX_DEF | MTX_DUPOK);
}
/* Source nodes. */
@@ -1743,6 +1746,120 @@
return (false);
}
+struct pf_udp_mapping *
+pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port,
+ struct pf_addr *nat_addr, uint16_t nat_port)
+{
+ struct pf_udp_mapping *mapping;
+
+ mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO);
+ if (mapping == NULL)
+ return NULL;
+ PF_ACPY(&mapping->endpoints[0].addr, src_addr, af);
+ mapping->endpoints[0].port = src_port;
+ mapping->endpoints[0].af = af;
+ mapping->endpoints[0].mapping = mapping;
+ PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af);
+ mapping->endpoints[1].port = nat_port;
+ mapping->endpoints[1].af = af;
+ mapping->endpoints[1].mapping = mapping;
+ refcount_init(&mapping->refs, 1);
+ return (mapping);
+}
+
+int
+pf_udp_mapping_insert(struct pf_udp_mapping *mapping)
+{
+ struct pf_udpendpointhash *h0, *h1;
+ struct pf_udp_endpoint *endpoint;
+ int ret = 1;
+
+ h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
+ h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
+ if (h0 == h1) {
+ PF_HASHROW_LOCK(h0);
+ } else if (h0 < h1) {
+ PF_HASHROW_LOCK(h0);
+ PF_HASHROW_LOCK(h1);
+ } else {
+ PF_HASHROW_LOCK(h1);
+ PF_HASHROW_LOCK(h0);
+ }
+
+ LIST_FOREACH(endpoint, &h0->endpoints, entry) {
+ if (bcmp(endpoint, &mapping->endpoints[0],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint != NULL)
+ goto cleanup;
+ LIST_FOREACH(endpoint, &h1->endpoints, entry) {
+ if (bcmp(endpoint, &mapping->endpoints[1],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint != NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry);
+ LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry);
+ ret = 0;
+
+cleanup:
+ if (h0 != h1) {
+ PF_HASHROW_UNLOCK(h0);
+ PF_HASHROW_UNLOCK(h1);
+ } else {
+ PF_HASHROW_UNLOCK(h0);
+ }
+ return (ret);
+}
+
+void
+pf_udp_mapping_release(struct pf_udp_mapping *mapping)
+{
+ /* refcount is synchronized on the source endpoint's row lock */
+ struct pf_udpendpointhash *h0, *h1;
+
+ h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])];
+ PF_HASHROW_LOCK(h0);
+ if (refcount_release(&mapping->refs)) {
+ LIST_REMOVE(&mapping->endpoints[0], entry);
+ PF_HASHROW_UNLOCK(h0);
+ h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])];
+ PF_HASHROW_LOCK(h1);
+ LIST_REMOVE(&mapping->endpoints[1], entry);
+ PF_HASHROW_UNLOCK(h1);
+
+ uma_zfree(V_pf_udp_mapping_z, mapping);
+ } else {
+ PF_HASHROW_UNLOCK(h0);
+ }
+}
+
+
+struct pf_udp_mapping *
+pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key)
+{
+ struct pf_udpendpointhash *uh;
+ struct pf_udp_endpoint *endpoint;
+
+ uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)];
+
+ PF_HASHROW_LOCK(uh);
+ LIST_FOREACH(endpoint, &uh->endpoints, entry) {
+ if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 &&
+ bcmp(endpoint, &endpoint->mapping->endpoints[0],
+ sizeof(struct pf_udp_endpoint_cmp)) == 0)
+ break;
+ }
+ if (endpoint == NULL) {
+ PF_HASHROW_UNLOCK(uh);
+ return NULL;
+ }
+ refcount_acquire(&endpoint->mapping->refs);
+ PF_HASHROW_UNLOCK(uh);
+ return (endpoint->mapping);
+}
/* END state table stuff */
static void
diff --git a/sys/netpfil/pf/pf_lb.c b/sys/netpfil/pf/pf_lb.c
--- a/sys/netpfil/pf/pf_lb.c
+++ b/sys/netpfil/pf/pf_lb.c
@@ -220,8 +220,38 @@
{
struct pf_state_key_cmp key;
struct pf_addr init_addr;
+ struct pf_srchash *sh = NULL;
bzero(&init_addr, sizeof(init_addr));
+
+ /*
+ * If we are UDP and have an existing mapping we can get source port
+ * from the mapping. In this case we have to look up the src_node as
+ * pf_map_addr would.
+ */
+ if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
+ struct pf_udp_endpoint_cmp udp_source;
+
+ bzero(&udp_source, sizeof(udp_source));
+ udp_source.af = af;
+ PF_ACPY(&udp_source.addr, saddr, af);
+ udp_source.port = sport;
+ *udp_mapping = pf_udp_mapping_find(&udp_source);
+ if (*udp_mapping) {
+ PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
+ *nport = (*udp_mapping)->endpoints[1].port;
+ /* Try to find a src_node as per pf_map_addr(). */
+ if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
+ (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
+ *sn = pf_find_src_node(saddr, r, af, &sh, 0);
+ return (0);
+ } else {
+ *udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
+ if (*udp_mapping == NULL)
+ return (1);
+ }
+ }
+
if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
return (1);
@@ -277,8 +307,17 @@
} else if (low == high) {
key.port[1] = htons(low);
if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(low);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(low);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(low);
+ return (0);
+ }
+ } else {
+ *nport = htons(low);
+ return (0);
+ }
}
} else {
uint32_t tmp;
@@ -293,18 +332,36 @@
cut = arc4random() % (1 + high - low) + low;
/* low <= cut <= high */
for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
- key.port[1] = htons(tmp);
- if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(tmp);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(tmp);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ } else {
+ key.port[1] = htons(tmp);
+ if (!pf_find_state_all_exists(&key, PF_IN)) {
+ *nport = htons(tmp);
+ return (0);
+ }
}
}
tmp = cut;
for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
- key.port[1] = htons(tmp);
- if (!pf_find_state_all_exists(&key, PF_IN)) {
- *nport = htons(tmp);
- return (0);
+ if (proto == IPPROTO_UDP &&
+ (r->rpool.opts & PF_POOL_ENDPI)) {
+ (*udp_mapping)->endpoints[1].port = htons(tmp);
+ if (pf_udp_mapping_insert(*udp_mapping) == 0) {
+ *nport = htons(tmp);
+ return (0);
+ }
+ } else {
+ key.port[1] = htons(tmp);
+ if (!pf_find_state_all_exists(&key, PF_IN)) {
+ *nport = htons(tmp);
+ return (0);
+ }
}
}
}
diff --git a/tests/sys/netpfil/pf/nat.sh b/tests/sys/netpfil/pf/nat.sh
--- a/tests/sys/netpfil/pf/nat.sh
+++ b/tests/sys/netpfil/pf/nat.sh
@@ -112,6 +112,126 @@
}
+atf_test_case "endpoint_independent" "cleanup"
+endpoint_independent_head()
+{
+ atf_set descr 'Test that a client behind NAT gets the same external IP:port for different servers'
+ atf_set require.user root
+}
+
+endpoint_independent_body()
+{
+ pft_init
+
+ epair_client=$(vnet_mkepair)
+ epair_nat=$(vnet_mkepair)
+ epair_server1=$(vnet_mkepair)
+ epair_server2=$(vnet_mkepair)
+ bridge=$(vnet_mkbridge)
+
+ vnet_mkjail nat ${epair_client}b ${epair_nat}a
+ vnet_mkjail client ${epair_client}a
+ vnet_mkjail server1 ${epair_server1}a
+ vnet_mkjail server2 ${epair_server2}a
+
+ ifconfig ${epair_server1}b up
+ ifconfig ${epair_server2}b up
+ ifconfig ${epair_nat}b up
+ ifconfig ${bridge} \
+ addm ${epair_server1}b \
+ addm ${epair_server2}b \
+ addm ${epair_nat}b \
+ up
+
+ jexec nat ifconfig ${epair_client}b 192.0.2.1/24 up
+ jexec nat ifconfig ${epair_nat}a 10.42.42.42/8 up
+ jexec nat sysctl net.inet.ip.forwarding=1
+
+ jexec client ifconfig ${epair_client}a 192.0.2.2/24 up
+ jexec client route add default 192.0.2.1
+
+ jexec server1 ifconfig ${epair_server1}a 10.32.32.32/8 up
+ jexec server2 ifconfig ${epair_server2}a 10.22.22.22/8 up
+
+ # Enable pf!
+ jexec nat pfctl -e
+
+ # validate non-endpoint independent nat rule behaviour
+ pft_set_rules nat \
+ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a)"
+
+ jexec server1 nc -u -l 1234 -v 2> server1.out &
+ server1pid="$!"
+ jexec server2 nc -u -l 1234 -v 2> server2.out &
+ server2pid="$!"
+
+ # send out three packets because sometimes one fails to go through
+ for i in $(seq 1 3); do
+ echo "ping" | jexec client nc -u 10.32.32.32 1234 -p 4242 -w 0
+ echo "ping" | jexec client nc -u 10.22.22.22 1234 -p 4242 -w 0
+ done
+
+ ipport_server1=$(cat server1.out | grep Connection)
+ ipport_server2=$(cat server2.out | grep Connection)
+
+ if [ -z "$ipport_server1" ]; then
+ atf_fail "server1 did not receive connection from client (default)"
+ fi
+
+ if [ -z "$ipport_server2" ]; then
+ atf_fail "server2 did not receive connection from client (default)"
+ fi
+
+ if [ "$ipport_server1" = "$ipport_server2" ]; then
+ echo "server1: $ipport_server1"
+ echo "server2: $ipport_server2"
+ atf_fail "Received same IP:port on server1 and server2 (default)"
+ fi
+ kill $server1pid
+ kill $server2pid
+
+ # validate endpoint independent nat rule behaviour
+ pft_set_rules nat \
+ "nat on ${epair_nat}a inet from ! (${epair_nat}a) to any -> (${epair_nat}a) endpoint-independent"
+
+ jexec server1 nc -u -l 1234 -v 2> server1.out &
+ server1pid="$!"
+ jexec server2 nc -u -l 1234 -v 2> server2.out &
+ server2pid="$!"
+
+ # send out three packets because sometimes one fails to go through
+ for i in $(seq 1 3); do
+ echo "ping" | jexec client nc -u 10.32.32.32 1234 -p 4242 -w 0
+ echo "ping" | jexec client nc -u 10.22.22.22 1234 -p 4242 -w 0
+ done
+
+ ipport_server1=$(cat server1.out | grep Connection)
+ ipport_server2=$(cat server2.out | grep Connection)
+
+ if [ -z "$ipport_server1" ]; then
+ atf_fail "server1 did not receive connection from client (endpoint-independent)"
+ fi
+
+ if [ -z "$ipport_server2" ]; then
+ atf_fail "server2 did not receive connection from client (endpoint-independent)"
+ fi
+
+ if [ ! "$ipport_server1" = "$ipport_server2" ]; then
+ echo "server1: $ipport_server1"
+ echo "server2: $ipport_server2"
+ atf_fail "Received different IP:port on server1 than server2 (endpoint-independent)"
+ fi
+ kill $server1pid
+ kill $server2pid
+}
+
+endpoint_independent_cleanup()
+{
+ pft_cleanup
+ rm -f server1.out
+ rm -f server2.out
+}
+
nested_anchor_cleanup()
{
pft_cleanup

File Metadata

Mime Type
text/plain
Expires
Wed, Dec 31, 2:11 AM (7 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27399636
Default Alt Text
D11137.id142155.diff (14 KB)

Event Timeline