Index: sys/net/pfvar.h =================================================================== --- sys/net/pfvar.h +++ sys/net/pfvar.h @@ -683,6 +683,29 @@ u_int8_t pad[1]; }; +/* Keep synced with struct pf_udp_endpoint. */ +struct pf_udp_endpoint_cmp { + struct pf_addr addr; + uint16_t port; + sa_family_t af; + uint8_t pad[1]; +}; + +struct pf_udp_endpoint { + struct pf_addr addr; + uint16_t port; + sa_family_t af; + uint8_t pad[1]; + + struct pf_udp_mapping *mapping; + LIST_ENTRY(pf_udp_endpoint) entry; +}; + +struct pf_udp_mapping { + struct pf_udp_endpoint endpoints[2]; + u_int refs; +}; + /* Keep synced with struct pf_state_key. */ struct pf_state_key_cmp { struct pf_addr addr[2]; @@ -728,6 +751,7 @@ union pf_rule_ptr nat_rule; struct pf_addr rt_addr; struct pf_state_key *key[2]; /* addresses stack and wire */ + struct pf_udp_mapping *udp_mapping; struct pfi_kif *kif; struct pfi_kif *rt_kif; struct pf_src_node *src_node; @@ -1452,6 +1476,11 @@ struct mtx lock; }; +struct pf_udpendpointhash { + LIST_HEAD(, pf_udp_endpoint) endpoints; + struct mtx lock; +}; + struct pf_keyhash { LIST_HEAD(, pf_state_key) keys; struct mtx lock; @@ -1465,8 +1494,10 @@ extern u_long pf_hashmask; extern u_long pf_srchashmask; #define PF_HASHSIZ (32768) +VNET_DECLARE(struct pf_udpendpointhash *, pf_udpendpointhash); VNET_DECLARE(struct pf_keyhash *, pf_keyhash); VNET_DECLARE(struct pf_idhash *, pf_idhash); +#define V_pf_udpendpointhash VNET(pf_udpendpointhash) #define V_pf_keyhash VNET(pf_keyhash) #define V_pf_idhash VNET(pf_idhash) VNET_DECLARE(struct pf_srchash *, pf_srchash); @@ -1517,6 +1548,8 @@ #define V_pf_state_z VNET(pf_state_z) VNET_DECLARE(uma_zone_t, pf_state_key_z); #define V_pf_state_key_z VNET(pf_state_key_z) +VNET_DECLARE(uma_zone_t, pf_udp_mapping_z); +#define V_pf_udp_mapping_z VNET(pf_udp_mapping_z) VNET_DECLARE(uma_zone_t, pf_state_scrub_z); #define V_pf_state_scrub_z VNET(pf_state_scrub_z) @@ -1555,6 +1588,14 @@ extern struct pf_state *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_state *pf_find_state_all(struct pf_state_key_cmp *, u_int, int *); +extern struct pf_udp_mapping *pf_udp_mapping_find(struct pf_udp_endpoint_cmp *endpoint); +extern struct pf_udp_mapping *pf_udp_mapping_create(sa_family_t af, + struct pf_addr *src_addr, uint16_t src_port, + struct pf_addr *nat_addr, uint16_t nat_port); +extern int pf_udp_mapping_insert(struct pf_udp_mapping *mapping); +extern void pf_udp_mapping_release(struct pf_udp_mapping *mapping); + + extern struct pf_src_node *pf_find_src_node(struct pf_addr *, struct pf_rule *, sa_family_t, int); extern void pf_unlink_src_node(struct pf_src_node *); @@ -1749,7 +1790,8 @@ int, int, struct pfi_kif *, struct pf_src_node **, struct pf_state_key **, struct pf_state_key **, struct pf_addr *, struct pf_addr *, - uint16_t, uint16_t, struct pf_anchor_stackframe *); + uint16_t, uint16_t, struct pf_anchor_stackframe *, + struct pf_udp_mapping **udp_mapping); struct pf_state_key *pf_state_key_setup(struct pf_pdesc *, struct pf_addr *, struct pf_addr *, u_int16_t, u_int16_t); Index: sys/netpfil/pf/pf.c =================================================================== --- sys/netpfil/pf/pf.c +++ sys/netpfil/pf/pf.c @@ -194,6 +194,7 @@ uma_zone_t pf_mtag_z; VNET_DEFINE(uma_zone_t, pf_state_z); VNET_DEFINE(uma_zone_t, pf_state_key_z); +VNET_DEFINE(uma_zone_t, pf_udp_mapping_z); VNET_DEFINE(uint64_t, pf_stateid[MAXCPU]); #define PFID_CPUBITS 8 @@ -241,7 +242,7 @@ struct pf_state_key *, struct mbuf *, int, u_int16_t, u_int16_t, int *, struct pfi_kif *, struct pf_state **, int, u_int16_t, u_int16_t, - int); + int, struct pf_udp_mapping *); static int pf_test_fragment(struct pf_rule **, int, struct pfi_kif *, struct mbuf *, void *, struct pf_pdesc *, struct pf_rule **, @@ -356,6 +357,7 @@ VNET_DEFINE(struct pf_keyhash *, pf_keyhash); VNET_DEFINE(struct pf_idhash *, pf_idhash); VNET_DEFINE(struct pf_srchash *, pf_srchash); +VNET_DEFINE(struct pf_udpendpointhash *, pf_udpendpointhash); SYSCTL_NODE(_net, OID_AUTO, pf, CTLFLAG_RW, 0, "pf(4)"); @@ -425,6 +427,18 @@ return (h & pf_hashmask); } +static inline uint32_t +pf_hashudpendpoint(struct pf_udp_endpoint *endpoint) +{ + uint32_t h; + + h = murmur3_32_hash32((uint32_t *)endpoint, + sizeof(struct pf_udp_endpoint_cmp)/sizeof(uint32_t), + V_pf_hashseed); + + return (h & pf_hashmask); +} + static __inline uint32_t pf_hashsrc(struct pf_addr *addr, sa_family_t af) { @@ -779,6 +793,7 @@ { struct pf_keyhash *kh; struct pf_idhash *ih; + struct pf_udpendpointhash *uh; struct pf_srchash *sh; u_int i; @@ -799,15 +814,20 @@ V_pf_state_key_z = uma_zcreate("pf state keys", sizeof(struct pf_state_key), pf_state_key_ctor, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + V_pf_udp_mapping_z = uma_zcreate("pf UDP mappings", + sizeof(struct pf_udp_mapping), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); V_pf_keyhash = malloc(pf_hashsize * sizeof(struct pf_keyhash), M_PFHASH, M_WAITOK | M_ZERO); V_pf_idhash = malloc(pf_hashsize * sizeof(struct pf_idhash), M_PFHASH, M_WAITOK | M_ZERO); + V_pf_udpendpointhash = malloc(pf_hashsize * sizeof(struct pf_udpendpointhash), + M_PFHASH, M_WAITOK | M_ZERO); pf_hashmask = pf_hashsize - 1; - for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask; - i++, kh++, ih++) { + for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash, uh=V_pf_udpendpointhash; i <= pf_hashmask; + i++, kh++, ih++, uh++) { mtx_init(&kh->lock, "pf_keyhash", NULL, MTX_DEF | MTX_DUPOK); mtx_init(&ih->lock, "pf_idhash", NULL, MTX_DEF); + mtx_init(&uh->lock, "pf_udpendpointhash", NULL, MTX_DEF | MTX_DUPOK); } /* Source nodes. */ @@ -851,21 +871,26 @@ { struct pf_keyhash *kh; struct pf_idhash *ih; + struct pf_udpendpointhash *uh; struct pf_srchash *sh; struct pf_send_entry *pfse, *next; u_int i; - for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash; i <= pf_hashmask; - i++, kh++, ih++) { + for (i = 0, kh = V_pf_keyhash, ih = V_pf_idhash, uh=V_pf_udpendpointhash; i <= pf_hashmask; + i++, kh++, ih++, uh++) { KASSERT(LIST_EMPTY(&kh->keys), ("%s: key hash not empty", __func__)); KASSERT(LIST_EMPTY(&ih->states), ("%s: id hash not empty", __func__)); + KASSERT(LIST_EMPTY(&uh->endpoints), ("%s: udpendpoint hash not empty", + __func__)); mtx_destroy(&kh->lock); mtx_destroy(&ih->lock); + mtx_destroy(&uh->lock); } free(V_pf_keyhash, M_PFHASH); free(V_pf_idhash, M_PFHASH); + free(V_pf_udpendpointhash, M_PFHASH); for (i = 0, sh = V_pf_srchash; i <= pf_srchashmask; i++, sh++) { KASSERT(LIST_EMPTY(&sh->nodes), @@ -882,6 +907,7 @@ uma_zdestroy(V_pf_sources_z); uma_zdestroy(V_pf_state_z); uma_zdestroy(V_pf_state_key_z); + uma_zdestroy(V_pf_udp_mapping_z); } static int @@ -1369,6 +1395,114 @@ return (ret); } +struct pf_udp_mapping* +pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port, + struct pf_addr *nat_addr, uint16_t nat_port) +{ + struct pf_udp_mapping *mapping; + + mapping = uma_zalloc(V_pf_udp_mapping_z, M_NOWAIT | M_ZERO); + if (mapping == NULL) + return NULL; + PF_ACPY(&mapping->endpoints[0].addr, src_addr, af); + mapping->endpoints[0].port = src_port; + mapping->endpoints[0].af = af; + mapping->endpoints[0].mapping = mapping; + PF_ACPY(&mapping->endpoints[1].addr, nat_addr, af); + mapping->endpoints[1].port = nat_port; + mapping->endpoints[1].af = af; + mapping->endpoints[1].mapping = mapping; + refcount_init(&mapping->refs, 1); + return (mapping); +} + +int +pf_udp_mapping_insert(struct pf_udp_mapping *mapping) +{ + struct pf_udpendpointhash *h0, *h1; + struct pf_udp_endpoint *endpoint; + int ret = 1; + + h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])]; + h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])]; + if (h0 == h1) { + PF_HASHROW_LOCK(h0); + } else if (h0 < h1) { + PF_HASHROW_LOCK(h0); + PF_HASHROW_LOCK(h1); + } else { + PF_HASHROW_LOCK(h1); + PF_HASHROW_LOCK(h0); + } + + LIST_FOREACH(endpoint, &h0->endpoints, entry) + if (bcmp(endpoint, &mapping->endpoints[0], sizeof(struct pf_udp_endpoint_cmp)) == 0) + break; + if (endpoint != NULL) + goto cleanup; + LIST_FOREACH(endpoint, &h1->endpoints, entry) + if (bcmp(endpoint, &mapping->endpoints[1], sizeof(struct pf_udp_endpoint_cmp)) == 0) + break; + if (endpoint != NULL) + goto cleanup; + LIST_INSERT_HEAD(&h0->endpoints, &mapping->endpoints[0], entry); + LIST_INSERT_HEAD(&h1->endpoints, &mapping->endpoints[1], entry); + ret = 0; + +cleanup: + if (h0 != h1) { + PF_HASHROW_UNLOCK(h0); + PF_HASHROW_UNLOCK(h1); + } else { + PF_HASHROW_UNLOCK(h0); + } + return (ret); +} + +void +pf_udp_mapping_release(struct pf_udp_mapping *mapping) +{ + /* refcount is synchronized on the source endpoint's row lock */ + struct pf_udpendpointhash *h0, *h1; + + h0 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[0])]; + PF_HASHROW_LOCK(h0); + if (refcount_release(&mapping->refs)) { + LIST_REMOVE(&mapping->endpoints[0], entry); + PF_HASHROW_UNLOCK(h0); + h1 = &V_pf_udpendpointhash[pf_hashudpendpoint(&mapping->endpoints[1])]; + PF_HASHROW_LOCK(h1); + LIST_REMOVE(&mapping->endpoints[1], entry); + PF_HASHROW_UNLOCK(h1); + + uma_zfree(V_pf_udp_mapping_z, mapping); + } else { + PF_HASHROW_UNLOCK(h0); + } +} + +struct pf_udp_mapping * +pf_udp_mapping_find(struct pf_udp_endpoint_cmp *key) +{ + struct pf_udpendpointhash *uh; + struct pf_udp_endpoint *endpoint; + + uh = &V_pf_udpendpointhash[pf_hashudpendpoint((struct pf_udp_endpoint*)key)]; + + PF_HASHROW_LOCK(uh); + LIST_FOREACH(endpoint, &uh->endpoints, entry) + if (bcmp(endpoint, key, sizeof(struct pf_udp_endpoint_cmp)) == 0 && + bcmp(endpoint, &endpoint->mapping->endpoints[0], sizeof(struct pf_udp_endpoint_cmp)) == 0) + break; + if (endpoint == NULL) { + PF_HASHROW_UNLOCK(uh); + return NULL; + } + refcount_acquire(&endpoint->mapping->refs); + PF_HASHROW_UNLOCK(uh); + return (endpoint->mapping); +} + /* END state table stuff */ static void @@ -1650,6 +1784,9 @@ pf_detach_state(s); refcount_release(&s->refs); + if (s->udp_mapping) + pf_udp_mapping_release(s->udp_mapping); + return (pf_release_state(s)); } @@ -3136,6 +3273,7 @@ u_int16_t bproto_sum = 0, bip_sum = 0; u_int8_t icmptype = 0, icmpcode = 0; struct pf_anchor_stackframe anchor_stack[PF_ANCHOR_STACKSIZE]; + struct pf_udp_mapping *udp_mapping = NULL; PF_RULES_RASSERT(); @@ -3199,7 +3337,7 @@ /* check packet for BINAT/NAT/RDR */ if ((nr = pf_get_translation(pd, m, off, direction, kif, &nsn, &sk, - &nk, saddr, daddr, sport, dport, anchor_stack)) != NULL) { + &nk, saddr, daddr, sport, dport, anchor_stack, &udp_mapping)) != NULL) { KASSERT(sk != NULL, ("%s: null sk", __func__)); KASSERT(nk != NULL, ("%s: null nk", __func__)); @@ -3506,14 +3644,19 @@ int action; action = pf_create_state(r, nr, a, pd, nsn, nk, sk, m, off, sport, dport, &rewrite, kif, sm, tag, bproto_sum, bip_sum, - hdrlen); - if (action != PF_PASS) + hdrlen, udp_mapping); + if (action != PF_PASS) { + if (udp_mapping != NULL) + pf_udp_mapping_release(udp_mapping); return (action); + } } else { if (sk != NULL) uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); + if (udp_mapping != NULL) + pf_udp_mapping_release(udp_mapping); } /* copy back packet headers if we performed NAT operations */ @@ -3538,6 +3681,8 @@ uma_zfree(V_pf_state_key_z, sk); if (nk != NULL) uma_zfree(V_pf_state_key_z, nk); + if (udp_mapping != NULL) + pf_udp_mapping_release(udp_mapping); return (PF_DROP); } @@ -3546,7 +3691,8 @@ struct pf_pdesc *pd, struct pf_src_node *nsn, struct pf_state_key *nk, struct pf_state_key *sk, struct mbuf *m, int off, u_int16_t sport, u_int16_t dport, int *rewrite, struct pfi_kif *kif, struct pf_state **sm, - int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen) + int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen, + struct pf_udp_mapping *udp_mapping) { struct pf_state *s = NULL; struct pf_src_node *sn = NULL; @@ -3752,6 +3898,7 @@ REASON_SET(&reason, PFRES_SYNPROXY); return (PF_SYNPROXY_DROP); } + s->udp_mapping = udp_mapping; return (PF_PASS); Index: sys/netpfil/pf/pf_lb.c =================================================================== --- sys/netpfil/pf/pf_lb.c +++ sys/netpfil/pf/pf_lb.c @@ -63,7 +63,8 @@ uint16_t, int, struct pf_anchor_stackframe *); static int pf_get_sport(sa_family_t, uint8_t, struct pf_rule *, struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *, - uint16_t *, uint16_t, uint16_t, struct pf_src_node **); + uint16_t *, uint16_t, uint16_t, struct pf_src_node **, + struct pf_udp_mapping**); #define mix(a,b,c) \ do { \ @@ -214,14 +215,37 @@ pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr, uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low, - uint16_t high, struct pf_src_node **sn) + uint16_t high, struct pf_src_node **sn, struct pf_udp_mapping **udp_mapping) { struct pf_state_key_cmp key; struct pf_addr init_addr; + if (proto == IPPROTO_UDP) { + struct pf_udp_endpoint_cmp udp_source; + + bzero(&udp_source, sizeof(udp_source)); + udp_source.af = af; + PF_ACPY(&udp_source.addr, saddr, af); + udp_source.port = sport; + *udp_mapping = pf_udp_mapping_find(&udp_source); + if (*udp_mapping) { + PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af); + *nport = (*udp_mapping)->endpoints[1].port; + /* as per pf_map_addr(): */ + if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR && + (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) + *sn = pf_find_src_node(saddr, r, af, 0); + return (0); + } else { + *udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0); + if (*udp_mapping == NULL) + return (1); + } + } + bzero(&init_addr, sizeof(init_addr)); if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn)) - return (1); + goto failed; if (proto == IPPROTO_ICMP) { low = 1; @@ -236,6 +260,8 @@ do { PF_ACPY(&key.addr[1], naddr, key.af); + if ((*udp_mapping)) + PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af); /* * port search; start random, step; @@ -255,8 +281,16 @@ } else if (low == high) { key.port[1] = htons(low); if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { - *nport = htons(low); - return (0); + if (proto == IPPROTO_UDP) { + (*udp_mapping)->endpoints[1].port = htons(low); + if (pf_udp_mapping_insert(*udp_mapping) == 0) { + *nport = htons(low); + return (0); + } + } else { + *nport = htons(low); + return (0); + } } } else { uint16_t tmp, cut; @@ -270,19 +304,35 @@ cut = arc4random() % (1 + high - low) + low; /* low <= cut <= high */ for (tmp = cut; tmp <= high; ++(tmp)) { - key.port[1] = htons(tmp); - if (pf_find_state_all(&key, PF_IN, NULL) == - NULL) { - *nport = htons(tmp); - return (0); + if (proto == IPPROTO_UDP) { + (*udp_mapping)->endpoints[1].port = htons(tmp); + if (pf_udp_mapping_insert(*udp_mapping) == 0) { + *nport = htons(tmp); + return (0); + } + } else { + key.port[1] = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == + NULL) { + *nport = htons(tmp); + return (0); + } } } for (tmp = cut - 1; tmp >= low; --(tmp)) { - key.port[1] = htons(tmp); - if (pf_find_state_all(&key, PF_IN, NULL) == - NULL) { - *nport = htons(tmp); - return (0); + if (proto == IPPROTO_UDP) { + (*udp_mapping)->endpoints[1].port = htons(tmp); + if (pf_udp_mapping_insert(*udp_mapping) == 0) { + *nport = htons(tmp); + return (0); + } + } else { + key.port[1] = htons(tmp); + if (pf_find_state_all(&key, PF_IN, NULL) == + NULL) { + *nport = htons(tmp); + return (0); + } } } } @@ -300,7 +350,13 @@ return (1); } } while (! PF_AEQ(&init_addr, naddr, af) ); - return (1); /* none available */ + /* none available */ +failed: + if (*udp_mapping) { + uma_zfree(V_pf_udp_mapping_z, *udp_mapping); + *udp_mapping = NULL; + } + return (1); } int @@ -512,7 +568,8 @@ struct pfi_kif *kif, struct pf_src_node **sn, struct pf_state_key **skp, struct pf_state_key **nkp, struct pf_addr *saddr, struct pf_addr *daddr, - uint16_t sport, uint16_t dport, struct pf_anchor_stackframe *anchor_stack) + uint16_t sport, uint16_t dport, struct pf_anchor_stackframe *anchor_stack, + struct pf_udp_mapping **udp_mapping) { struct pf_rule *r = NULL; struct pf_addr *naddr; @@ -566,7 +623,7 @@ case PF_NAT: if (pf_get_sport(pd->af, pd->proto, r, saddr, sport, daddr, dport, naddr, nport, r->rpool.proxy_port[0], - r->rpool.proxy_port[1], sn)) { + r->rpool.proxy_port[1], sn, udp_mapping)) { DPFPRINTF(PF_DEBUG_MISC, ("pf: NAT proxy port allocation (%u-%u) failed\n", r->rpool.proxy_port[0], r->rpool.proxy_port[1]));