Index: head/sbin/ipfw/Makefile =================================================================== --- head/sbin/ipfw/Makefile +++ head/sbin/ipfw/Makefile @@ -5,7 +5,7 @@ PACKAGE=ipfw PROG= ipfw SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c tables.c -SRCS+= nptv6.c +SRCS+= nat64lsn.c nat64stl.c nptv6.c WARNS?= 2 .if ${MK_PF} != "no" Index: head/sbin/ipfw/ipfw.8 =================================================================== --- head/sbin/ipfw/ipfw.8 +++ head/sbin/ipfw/ipfw.8 @@ -1,7 +1,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 19, 2016 +.Dd August 13, 2016 .Dt IPFW 8 .Os .Sh NAME @@ -113,6 +113,37 @@ .Oc .Oc .Ar pathname +.Ss STATEFUL IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION +.Nm +.Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm create Ar create-options +.Nm +.Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm config Ar config-options +.Nm +.Oo Cm set Ar N Oc Cm nat64lsn +.Brq Ar name | all +.Brq Cm list | show +.Op Cm states +.Nm +.Oo Cm set Ar N Oc Cm nat64lsn +.Brq Ar name | all +.Cm destroy +.Nm +.Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm stats Op Cm reset +.Ss STATELESS IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION +.Nm +.Oo Cm set Ar N Oc Cm nat64stl Ar name Cm create Ar create-options +.Nm +.Oo Cm set Ar N Oc Cm nat64stl Ar name Cm config Ar config-options +.Nm +.Oo Cm set Ar N Oc Cm nat64stl +.Brq Ar name | all +.Brq Cm list | show +.Nm +.Oo Cm set Ar N Oc Cm nat64stl +.Brq Ar name | all +.Cm destroy +.Nm +.Oo Cm set Ar N Oc Cm nat64stl Ar name Cm stats Op Cm reset .Ss IPv6-to-IPv6 NETWORK PREFIX TRANSLATION .Nm .Oo Cm set Ar N Oc Cm nptv6 Ar name Cm create Ar create-options @@ -837,6 +868,16 @@ see the .Sx NETWORK ADDRESS TRANSLATION (NAT) Section for further information. +.It Cm nat64lsn Ar name +Pass packet to a stateful NAT64 instance (for IPv6/IPv4 network address and +protocol translation): see the +.Sx IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION +Section for further information. +.It Cm nat64stl Ar name +Pass packet to a stateless NAT64 instance (for IPv6/IPv4 network address and +protocol translation): see the +.Sx IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION +Section for further information. .It Cm nptv6 Ar name Pass packet to a NPTv6 instance (for IPv6-to-IPv6 network prefix translation): see the @@ -2927,9 +2968,189 @@ See .Sx SYSCTL VARIABLES for more info. +.Sh IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION +.Nm +supports in-kernel IPv6/IPv4 network address and protocol translation. +Stateful NAT64 translation allows IPv6-only clients to contact IPv4 servers +using unicast TCP, UDP or ICMP protocols. +One or more IPv4 addresses assigned to a stateful NAT64 translator are shared +among serveral IPv6-only clients. +When stateful NAT64 is used in conjunction with DNS64, no changes are usually +required in the IPv6 client or the IPv4 server. +The kernel module +.Cm ipfw_nat64 +should be loaded or kernel should have +.Cm options IPFIREWALL_NAT64 +to be able use stateful NAT64 translator. +.Pp +Stateful NAT64 uses a bunch of memory for several types of objects. +When IPv6 client initiates connection, NAT64 translator creates a host entry +in the states table. +Each host entry has a number of ports group entries allocated on demand. +Ports group entries contains connection state entries. +There are several options to control limits and lifetime for these objects. +.Pp +NAT64 translator follows RFC7915 when does ICMPv6/ICMP translation, +unsupported message types will be silently dropped. +IPv6 needs several ICMPv6 message types to be explicitly allowed for correct +operation. +Make sure that ND6 neighbor solicitation (ICMPv6 type 135) and neighbor +advertisement (ICMPv6 type 136) messages will not be handled by translation +rules. +.Pp +After translation NAT64 translator sends packets through corresponding netisr +queue. +Thus translator host should be configured as IPv4 and IPv6 router. +.Pp +Currently both stateful and stateless NAT64 translators use Well-Known IPv6 +Prefix +.Ar 64:ff9b::/96 +to represent IPv4 addresses in the IPv6 address. +Thus DNS64 service and routing should be configured to use Well-Known IPv6 +Prefix. +.Pp +The stateful NAT64 configuration command is the following: +.Bd -ragged -offset indent +.Bk -words +.Cm nat64lsn +.Ar name +.Cm create +.Ar create-options +.Ek +.Ed +.Pp +The following parameters can be configured: +.Bl -tag -width indent +.It Cm prefix4 Ar ipv4_prefix/mask +The IPv4 prefix with mask defines the pool of IPv4 addresses used as +source address after translation. +Stateful NAT64 module translates IPv6 source address of client to one +IPv4 address from this pool. +Note that incoming IPv4 packets that don't have corresponding state entry +in the states table will be dropped by translator. +Make sure that translation rules handle packets, destined to configured prefix. +.It Cm max_ports Ar number +Maximum number of ports reserved for upper level protocols to one IPv6 client. +All reserved ports are divided into chunks between supported protocols. +The number of connections from one IPv6 client is limited by this option. +Note that closed TCP connections still remain in the list of connections until +.Cm tcp_close_age +interval will not expire. +Default value is +.Ar 2048 . +.It Cm host_del_age Ar seconds +The number of seconds until the host entry for a IPv6 client will be deleted +and all its resources will be released due to inactivity. +Default value is +.Ar 3600 . +.It Cm pg_del_age Ar seconds +The number of seconds until a ports group with unused state entries will +be released. +Default value is +.Ar 900 . +.It Cm tcp_syn_age Ar seconds +The number of seconds while a state entry for TCP connection with only SYN +sent will be kept. +If TCP connection establishing will not be finished, +state entry will be deleted. +Default value is +.Ar 10 . +.It Cm tcp_est_age Ar seconds +The number of seconds while a state entry for established TCP connection +will be kept. +Default value is +.Ar 7200 . +.It Cm tcp_close_age Ar seconds +The number of seconds while a state entry for closed TCP connection +will be kept. +Keeping state entries for closed connections is needed, because IPv4 servers +typically keep closed connections in a TIME_WAIT state for a several minutes. +Since translator's IPv4 addresses are shared among all IPv6 clients, +new connections from the same addresses and ports may be rejected by server, +because these connections are still in a TIME_WAIT state. +Keeping them in translator's state table protects from such rejects. +Default value is +.Ar 180 . +.It Cm udp_age Ar seconds +The number of seconds while translator keeps state entry in a waiting for +reply to the sent UDP datagram. +Default value is +.Ar 120 . +.It Cm icmp_age Ar seconds +The number of seconds while translator keeps state entry in a waiting for +reply to the sent ICMP message. +Default value is +.Ar 60 . +.It Cm log +Turn on logging of all handled packets via BPF through +.Ar ipfwlog0 +interface. +.Ar ipfwlog0 +is a pseudo interface and can be created after a boot manually with +.Cm ifconfig +command. +Note that it has different purpose than +.Ar ipfw0 +interface. +Translators sends to BPF an additional information with each packet. +With +.Cm tcpdump +you are able to see each handled packet before and after translation. +.It Cm -log +Turn off logging of all handled packets via BPF. +.El +.Pp +To inspect a states table of stateful NAT64 the following command can be used: +.Bd -ragged -offset indent +.Bk -words +.Cm nat64lsn +.Ar name +.Cm show Cm states +.Ek +.Ed +.Pp +.Pp +Stateless NAT64 translator doesn't use a states table for translation +and converts IPv4 addresses to IPv6 and vice versa solely based on the +mappings taken from configured lookup tables. +Since a states table doesn't used by stateless translator, +it can be configured to pass IPv4 clients to IPv6-only servers. +.Pp +The stateless NAT64 configuration command is the following: +.Bd -ragged -offset indent +.Bk -words +.Cm nat64stl +.Ar name +.Cm create +.Ar create-options +.Ek +.Ed +.Pp +The following parameters can be configured: +.Bl -tag -width indent +.It Cm table4 Ar table46 +The lookup table +.Ar table46 +contains mapping how IPv4 addresses should be translated to IPv6 addresses. +.It Cm table6 Ar table64 +The lookup table +.Ar table64 +contains mapping how IPv6 addresses should be translated to IPv4 addresses. +.It Cm log +Turn on logging of all handled packets via BPF through +.Ar ipfwlog0 +interface. +.It Cm -log +Turn off logging of all handled packets via BPF. +.El +.Pp +Note that the behavior of stateless translator with respect to not matched +packets differs from stateful translator. +If corresponding addresses was not found in the lookup tables, the packet +will not be dropped and the search continues. .Sh IPv6-to-IPv6 NETWORK PREFIX TRANSLATION (NPTv6) .Nm -support in-kernel IPv6-to-IPv6 network prefix translation as described +supports in-kernel IPv6-to-IPv6 network prefix translation as described in RFC6296. The kernel module .Cm ipfw_nptv6 Index: head/sbin/ipfw/ipfw2.h =================================================================== --- head/sbin/ipfw/ipfw2.h +++ head/sbin/ipfw/ipfw2.h @@ -254,7 +254,30 @@ TOK_UNLOCK, TOK_VLIST, TOK_OLIST, + + /* NAT64 tokens */ + TOK_NAT64STL, + TOK_NAT64LSN, TOK_STATS, + TOK_STATES, + TOK_CONFIG, + TOK_TABLE4, + TOK_TABLE6, + TOK_PREFIX4, + TOK_PREFIX6, + TOK_AGG_LEN, + TOK_AGG_COUNT, + TOK_MAX_PORTS, + TOK_JMAXLEN, + TOK_PORT_RANGE, + TOK_HOST_DEL_AGE, + TOK_PG_DEL_AGE, + TOK_TCP_SYN_AGE, + TOK_TCP_CLOSE_AGE, + TOK_TCP_EST_AGE, + TOK_UDP_AGE, + TOK_ICMP_AGE, + TOK_LOGOFF, /* NPTv6 tokens */ TOK_NPTV6, @@ -347,6 +370,8 @@ void ipfw_zero(int ac, char *av[], int optname); void ipfw_list(int ac, char *av[], int show_counters); void ipfw_internal_handler(int ac, char *av[]); +void ipfw_nat64lsn_handler(int ac, char *av[]); +void ipfw_nat64stl_handler(int ac, char *av[]); void ipfw_nptv6_handler(int ac, char *av[]); int ipfw_check_object_name(const char *name); @@ -384,7 +409,10 @@ /* tables.c */ struct _ipfw_obj_ctlv; +struct _ipfw_obj_ntlv; int table_check_name(const char *tablename); void ipfw_list_ta(int ac, char *av[]); void ipfw_list_values(int ac, char *av[]); +void table_fill_ntlv(struct _ipfw_obj_ntlv *ntlv, const char *name, + uint8_t set, uint16_t uidx); Index: head/sbin/ipfw/ipfw2.c =================================================================== --- head/sbin/ipfw/ipfw2.c +++ head/sbin/ipfw/ipfw2.c @@ -235,6 +235,8 @@ }; static struct _s_x rule_eactions[] = { + { "nat64lsn", TOK_NAT64LSN }, + { "nat64stl", TOK_NAT64STL }, { "nptv6", TOK_NPTV6 }, { NULL, 0 } /* terminator */ }; Index: head/sbin/ipfw/main.c =================================================================== --- head/sbin/ipfw/main.c +++ head/sbin/ipfw/main.c @@ -425,6 +425,10 @@ if (co.use_set || try_next) { if (_substrcmp(*av, "delete") == 0) ipfw_delete(av); + else if (!strncmp(*av, "nat64stl", strlen(*av))) + ipfw_nat64stl_handler(ac, av); + else if (!strncmp(*av, "nat64lsn", strlen(*av))) + ipfw_nat64lsn_handler(ac, av); else if (!strncmp(*av, "nptv6", strlen(*av))) ipfw_nptv6_handler(ac, av); else if (_substrcmp(*av, "flush") == 0) Index: head/sbin/ipfw/nat64lsn.c =================================================================== --- head/sbin/ipfw/nat64lsn.c +++ head/sbin/ipfw/nat64lsn.c @@ -0,0 +1,854 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Alexander V. Chernikov + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static void nat64lsn_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, + uint8_t set); +typedef int (nat64lsn_cb_t)(ipfw_nat64lsn_cfg *cfg, const char *name, + uint8_t set); +static int nat64lsn_foreach(nat64lsn_cb_t *f, const char *name, uint8_t set, + int sort); + +static void nat64lsn_create(const char *name, uint8_t set, int ac, char **av); +static void nat64lsn_config(const char *name, uint8_t set, int ac, char **av); +static void nat64lsn_destroy(const char *name, uint8_t set); +static void nat64lsn_stats(const char *name, uint8_t set); +static void nat64lsn_reset_stats(const char *name, uint8_t set); +static int nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, + uint8_t set); +static int nat64lsn_destroy_cb(ipfw_nat64lsn_cfg *cfg, const char *name, + uint8_t set); +static int nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, + uint8_t set); + +static struct _s_x nat64cmds[] = { + { "create", TOK_CREATE }, + { "config", TOK_CONFIG }, + { "destroy", TOK_DESTROY }, + { "list", TOK_LIST }, + { "show", TOK_LIST }, + { "stats", TOK_STATS }, + { NULL, 0 } +}; + +static uint64_t +nat64lsn_print_states(void *buf) +{ + char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], f[INET_ADDRSTRLEN]; + char sflags[4], *sf, *proto; + ipfw_obj_header *oh; + ipfw_obj_data *od; + ipfw_nat64lsn_stg *stg; + ipfw_nat64lsn_state *ste; + uint64_t next_idx; + int i, sz; + + oh = (ipfw_obj_header *)buf; + od = (ipfw_obj_data *)(oh + 1); + stg = (ipfw_nat64lsn_stg *)(od + 1); + sz = od->head.length - sizeof(*od); + next_idx = 0; + while (sz > 0 && next_idx != 0xFF) { + next_idx = stg->next_idx; + sz -= sizeof(*stg); + if (stg->count == 0) { + stg++; + continue; + } + switch (stg->proto) { + case IPPROTO_TCP: + proto = "TCP"; + break; + case IPPROTO_UDP: + proto = "UDP"; + break; + case IPPROTO_ICMPV6: + proto = "ICMPv6"; + break; + } + inet_ntop(AF_INET6, &stg->host6, s, sizeof(s)); + inet_ntop(AF_INET, &stg->alias4, a, sizeof(a)); + ste = (ipfw_nat64lsn_state *)(stg + 1); + for (i = 0; i < stg->count && sz > 0; i++) { + sf = sflags; + inet_ntop(AF_INET, &ste->daddr, f, sizeof(f)); + if (stg->proto == IPPROTO_TCP) { + if (ste->flags & 0x02) + *sf++ = 'S'; + if (ste->flags & 0x04) + *sf++ = 'E'; + if (ste->flags & 0x01) + *sf++ = 'F'; + } + *sf = '\0'; + switch (stg->proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n", + s, ste->sport, a, ste->aport, proto, + sflags, ste->idle, f, ste->dport); + break; + case IPPROTO_ICMPV6: + printf("%s\t%s\t%s\t\t%d\t%s\n", + s, a, proto, ste->idle, f); + break; + default: + printf("%s\t%s\t%d\t\t%d\t%s\n", + s, a, stg->proto, ste->idle, f); + } + ste++; + sz -= sizeof(*ste); + } + stg = (ipfw_nat64lsn_stg *)ste; + } + return (next_idx); +} + +static int +nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set) +{ + ipfw_obj_header *oh; + ipfw_obj_data *od; + void *buf; + uint64_t next_idx; + size_t sz; + + if (name != NULL && strcmp(cfg->name, name) != 0) + return (ESRCH); + + if (set != 0 && cfg->set != set) + return (ESRCH); + + next_idx = 0; + sz = 4096; + if ((buf = calloc(1, sz)) == NULL) + err(EX_OSERR, NULL); + do { + oh = (ipfw_obj_header *)buf; + od = (ipfw_obj_data *)(oh + 1); + nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set); + od->head.type = IPFW_TLV_OBJDATA; + od->head.length = sizeof(*od) + sizeof(next_idx); + *((uint64_t *)(od + 1)) = next_idx; + if (do_get3(IP_FW_NAT64LSN_LIST_STATES, &oh->opheader, &sz)) + err(EX_OSERR, "Error reading nat64lsn states"); + next_idx = nat64lsn_print_states(buf); + sz = 4096; + memset(buf, 0, sz); + } while (next_idx != 0xFF); + + free(buf); + return (0); +} + +static struct _s_x nat64statscmds[] = { + { "reset", TOK_RESET }, + { NULL, 0 } +}; + +static void +ipfw_nat64lsn_stats_handler(const char *name, uint8_t set, int ac, char *av[]) +{ + int tcmd; + + if (ac == 0) { + nat64lsn_stats(name, set); + return; + } + NEED1("nat64lsn stats needs command"); + tcmd = get_token(nat64statscmds, *av, "nat64lsn stats command"); + switch (tcmd) { + case TOK_RESET: + nat64lsn_reset_stats(name, set); + } +} + +static struct _s_x nat64listcmds[] = { + { "states", TOK_STATES }, + { "config", TOK_CONFIG }, + { NULL, 0 } +}; + +static void +ipfw_nat64lsn_list_handler(const char *name, uint8_t set, int ac, char *av[]) +{ + int tcmd; + + if (ac == 0) { + nat64lsn_foreach(nat64lsn_show_cb, name, set, 1); + return; + } + NEED1("nat64lsn list needs command"); + tcmd = get_token(nat64listcmds, *av, "nat64lsn list command"); + switch (tcmd) { + case TOK_STATES: + nat64lsn_foreach(nat64lsn_states_cb, name, set, 1); + break; + case TOK_CONFIG: + nat64lsn_foreach(nat64lsn_show_cb, name, set, 1); + } +} + +/* + * This one handles all nat64lsn-related commands + * ipfw [set N] nat64lsn NAME {create | config} ... + * ipfw [set N] nat64lsn NAME stats + * ipfw [set N] nat64lsn {NAME | all} destroy + * ipfw [set N] nat64lsn {NAME | all} {list | show} [config | states] + */ +#define nat64lsn_check_name table_check_name +void +ipfw_nat64lsn_handler(int ac, char *av[]) +{ + const char *name; + int tcmd; + uint8_t set; + + if (co.use_set != 0) + set = co.use_set - 1; + else + set = 0; + ac--; av++; + + NEED1("nat64lsn needs instance name"); + name = *av; + if (nat64lsn_check_name(name) != 0) { + if (strcmp(name, "all") == 0) + name = NULL; + else + errx(EX_USAGE, "nat64lsn instance name %s is invalid", + name); + } + ac--; av++; + NEED1("nat64lsn needs command"); + + tcmd = get_token(nat64cmds, *av, "nat64lsn command"); + if (name == NULL && tcmd != TOK_DESTROY && tcmd != TOK_LIST) + errx(EX_USAGE, "nat64lsn instance name required"); + switch (tcmd) { + case TOK_CREATE: + ac--; av++; + nat64lsn_create(name, set, ac, av); + break; + case TOK_CONFIG: + ac--; av++; + nat64lsn_config(name, set, ac, av); + break; + case TOK_LIST: + ac--; av++; + ipfw_nat64lsn_list_handler(name, set, ac, av); + break; + case TOK_DESTROY: + if (name == NULL) + nat64lsn_foreach(nat64lsn_destroy_cb, NULL, set, 0); + else + nat64lsn_destroy(name, set); + break; + case TOK_STATS: + ac--; av++; + ipfw_nat64lsn_stats_handler(name, set, ac, av); + } +} + +static void +nat64lsn_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set) +{ + + ntlv->head.type = IPFW_TLV_EACTION_NAME(1); /* it doesn't matter */ + ntlv->head.length = sizeof(ipfw_obj_ntlv); + ntlv->idx = 1; + ntlv->set = set; + strlcpy(ntlv->name, name, sizeof(ntlv->name)); +} + +static void +nat64lsn_apply_mask(int af, void *prefix, uint16_t plen) +{ + struct in6_addr mask6, *p6; + struct in_addr mask4, *p4; + + if (af == AF_INET) { + p4 = (struct in_addr *)prefix; + mask4.s_addr = htonl(~((1 << (32 - plen)) - 1)); + p4->s_addr &= mask4.s_addr; + } else if (af == AF_INET6) { + p6 = (struct in6_addr *)prefix; + n2mask(&mask6, plen); + APPLY_MASK(p6, &mask6); + } +} + +static void +nat64lsn_parse_prefix(const char *arg, int af, void *prefix, uint16_t *plen) +{ + char *p, *l; + + p = strdup(arg); + if (p == NULL) + err(EX_OSERR, NULL); + if ((l = strchr(p, '/')) != NULL) + *l++ = '\0'; + if (l == NULL) + errx(EX_USAGE, "Prefix length required"); + if (inet_pton(af, p, prefix) != 1) + errx(EX_USAGE, "Bad prefix: %s", p); + *plen = (uint16_t)strtol(l, &l, 10); + if (*l != '\0' || *plen == 0 || (af == AF_INET && *plen > 32) || + (af == AF_INET6 && *plen > 96)) + errx(EX_USAGE, "Bad prefix length: %s", arg); + nat64lsn_apply_mask(af, prefix, *plen); + free(p); +} + +static uint32_t +nat64lsn_parse_int(const char *arg, const char *desc) +{ + char *p; + uint32_t val; + + val = (uint32_t)strtol(arg, &p, 10); + if (*p != '\0') + errx(EX_USAGE, "Invalid %s value: %s\n", desc, arg); + return (val); +} + +static struct _s_x nat64newcmds[] = { + { "prefix6", TOK_PREFIX6 }, + { "agg_len", TOK_AGG_LEN }, /* not yet */ + { "agg_count", TOK_AGG_COUNT }, /* not yet */ + { "port_range", TOK_PORT_RANGE }, /* not yet */ + { "jmaxlen", TOK_JMAXLEN }, + { "prefix4", TOK_PREFIX4 }, + { "max_ports", TOK_MAX_PORTS }, + { "host_del_age", TOK_HOST_DEL_AGE }, + { "pg_del_age", TOK_PG_DEL_AGE }, + { "tcp_syn_age", TOK_TCP_SYN_AGE }, + { "tcp_close_age",TOK_TCP_CLOSE_AGE }, + { "tcp_est_age", TOK_TCP_EST_AGE }, + { "udp_age", TOK_UDP_AGE }, + { "icmp_age", TOK_ICMP_AGE }, + { "log", TOK_LOG }, + { "-log", TOK_LOGOFF }, + { NULL, 0 } +}; + +/* + * Creates new nat64lsn instance + * ipfw nat64lsn create + * [ max_ports ] + * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ] + */ +#define NAT64LSN_HAS_PREFIX4 0x01 +#define NAT64LSN_HAS_PREFIX6 0x02 +static void +nat64lsn_create(const char *name, uint8_t set, int ac, char **av) +{ + char buf[sizeof(ipfw_obj_lheader) + sizeof(ipfw_nat64lsn_cfg)]; + ipfw_nat64lsn_cfg *cfg; + ipfw_obj_lheader *olh; + int tcmd, flags; + char *opt; + + memset(&buf, 0, sizeof(buf)); + olh = (ipfw_obj_lheader *)buf; + cfg = (ipfw_nat64lsn_cfg *)(olh + 1); + + /* Some reasonable defaults */ + inet_pton(AF_INET6, "64:ff9b::", &cfg->prefix6); + cfg->plen6 = 96; + cfg->set = set; + cfg->max_ports = NAT64LSN_MAX_PORTS; + cfg->jmaxlen = NAT64LSN_JMAXLEN; + cfg->nh_delete_delay = NAT64LSN_HOST_AGE; + cfg->pg_delete_delay = NAT64LSN_PG_AGE; + cfg->st_syn_ttl = NAT64LSN_TCP_SYN_AGE; + cfg->st_estab_ttl = NAT64LSN_TCP_EST_AGE; + cfg->st_close_ttl = NAT64LSN_TCP_FIN_AGE; + cfg->st_udp_ttl = NAT64LSN_UDP_AGE; + cfg->st_icmp_ttl = NAT64LSN_ICMP_AGE; + flags = NAT64LSN_HAS_PREFIX6; + while (ac > 0) { + tcmd = get_token(nat64newcmds, *av, "option"); + opt = *av; + ac--; av++; + + switch (tcmd) { + case TOK_PREFIX4: + NEED1("IPv4 prefix required"); + nat64lsn_parse_prefix(*av, AF_INET, &cfg->prefix4, + &cfg->plen4); + flags |= NAT64LSN_HAS_PREFIX4; + ac--; av++; + break; +#if 0 + case TOK_PREFIX6: + NEED1("IPv6 prefix required"); + nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6, + &cfg->plen6); + ac--; av++; + break; + case TOK_AGG_LEN: + NEED1("Aggregation prefix len required"); + cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_AGG_COUNT: + NEED1("Max per-prefix count required"); + cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_PORT_RANGE: + NEED1("port range x[:y] required"); + if ((p = strchr(*av, ':')) == NULL) + cfg->min_port = (uint16_t)nat64lsn_parse_int( + *av, opt); + else { + *p++ = '\0'; + cfg->min_port = (uint16_t)nat64lsn_parse_int( + *av, opt); + cfg->max_port = (uint16_t)nat64lsn_parse_int( + p, opt); + } + ac--; av++; + break; + case TOK_JMAXLEN: + NEED1("job queue length required"); + cfg->jmaxlen = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; +#endif + case TOK_MAX_PORTS: + NEED1("Max per-user ports required"); + cfg->max_ports = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_HOST_DEL_AGE: + NEED1("host delete delay required"); + cfg->nh_delete_delay = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_PG_DEL_AGE: + NEED1("portgroup delete delay required"); + cfg->pg_delete_delay = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_TCP_SYN_AGE: + NEED1("tcp syn age required"); + cfg->st_syn_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_TCP_CLOSE_AGE: + NEED1("tcp close age required"); + cfg->st_close_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_TCP_EST_AGE: + NEED1("tcp est age required"); + cfg->st_estab_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_UDP_AGE: + NEED1("udp age required"); + cfg->st_udp_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_ICMP_AGE: + NEED1("icmp age required"); + cfg->st_icmp_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_LOG: + cfg->flags |= NAT64_LOG; + break; + case TOK_LOGOFF: + cfg->flags &= ~NAT64_LOG; + break; + } + } + + /* Check validness */ + if ((flags & NAT64LSN_HAS_PREFIX4) != NAT64LSN_HAS_PREFIX4) + errx(EX_USAGE, "prefix4 required"); + + olh->count = 1; + olh->objsize = sizeof(*cfg); + olh->size = sizeof(buf); + strlcpy(cfg->name, name, sizeof(cfg->name)); + if (do_set3(IP_FW_NAT64LSN_CREATE, &olh->opheader, sizeof(buf)) != 0) + err(EX_OSERR, "nat64lsn instance creation failed"); +} + +/* + * Configures existing nat64lsn instance + * ipfw nat64lsn config + * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ] + */ +static void +nat64lsn_config(const char *name, uint8_t set, int ac, char **av) +{ + char buf[sizeof(ipfw_obj_header) + sizeof(ipfw_nat64lsn_cfg)]; + ipfw_nat64lsn_cfg *cfg; + ipfw_obj_header *oh; + size_t sz; + char *opt; + int tcmd; + + if (ac == 0) + errx(EX_USAGE, "config options required"); + memset(&buf, 0, sizeof(buf)); + oh = (ipfw_obj_header *)buf; + cfg = (ipfw_nat64lsn_cfg *)(oh + 1); + sz = sizeof(buf); + + nat64lsn_fill_ntlv(&oh->ntlv, name, set); + if (do_get3(IP_FW_NAT64LSN_CONFIG, &oh->opheader, &sz) != 0) + err(EX_OSERR, "failed to get config for instance %s", name); + + while (ac > 0) { + tcmd = get_token(nat64newcmds, *av, "option"); + opt = *av; + ac--; av++; + + switch (tcmd) { + case TOK_MAX_PORTS: + NEED1("Max per-user ports required"); + cfg->max_ports = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_JMAXLEN: + NEED1("job queue length required"); + cfg->jmaxlen = nat64lsn_parse_int(*av, opt); + ac--; av++; + break; + case TOK_HOST_DEL_AGE: + NEED1("host delete delay required"); + cfg->nh_delete_delay = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_PG_DEL_AGE: + NEED1("portgroup delete delay required"); + cfg->pg_delete_delay = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_TCP_SYN_AGE: + NEED1("tcp syn age required"); + cfg->st_syn_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_TCP_CLOSE_AGE: + NEED1("tcp close age required"); + cfg->st_close_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_TCP_EST_AGE: + NEED1("tcp est age required"); + cfg->st_estab_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_UDP_AGE: + NEED1("udp age required"); + cfg->st_udp_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_ICMP_AGE: + NEED1("icmp age required"); + cfg->st_icmp_ttl = (uint16_t)nat64lsn_parse_int( + *av, opt); + ac--; av++; + break; + case TOK_LOG: + cfg->flags |= NAT64_LOG; + break; + case TOK_LOGOFF: + cfg->flags &= ~NAT64_LOG; + break; + default: + errx(EX_USAGE, "Can't change %s option", opt); + } + } + + if (do_set3(IP_FW_NAT64LSN_CONFIG, &oh->opheader, sizeof(buf)) != 0) + err(EX_OSERR, "nat64lsn instance configuration failed"); +} + +/* + * Reset nat64lsn instance statistics specified by @oh->ntlv. + * Request: [ ipfw_obj_header ] + */ +static void +nat64lsn_reset_stats(const char *name, uint8_t set) +{ + ipfw_obj_header oh; + + memset(&oh, 0, sizeof(oh)); + nat64lsn_fill_ntlv(&oh.ntlv, name, set); + if (do_set3(IP_FW_NAT64LSN_RESET_STATS, &oh.opheader, sizeof(oh)) != 0) + err(EX_OSERR, "failed to reset stats for instance %s", name); +} + +/* + * Destroys nat64lsn instance specified by @oh->ntlv. + * Request: [ ipfw_obj_header ] + */ +static void +nat64lsn_destroy(const char *name, uint8_t set) +{ + ipfw_obj_header oh; + + memset(&oh, 0, sizeof(oh)); + nat64lsn_fill_ntlv(&oh.ntlv, name, set); + if (do_set3(IP_FW_NAT64LSN_DESTROY, &oh.opheader, sizeof(oh)) != 0) + err(EX_OSERR, "failed to destroy nat instance %s", name); +} + +/* + * Get nat64lsn instance statistics. + * Request: [ ipfw_obj_header ] + * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ] ] + */ +static int +nat64lsn_get_stats(const char *name, uint8_t set, + struct ipfw_nat64lsn_stats *stats) +{ + ipfw_obj_header *oh; + ipfw_obj_ctlv *oc; + size_t sz; + + sz = sizeof(*oh) + sizeof(*oc) + sizeof(*stats); + oh = calloc(1, sz); + nat64lsn_fill_ntlv(&oh->ntlv, name, set); + if (do_get3(IP_FW_NAT64LSN_STATS, &oh->opheader, &sz) == 0) { + oc = (ipfw_obj_ctlv *)(oh + 1); + memcpy(stats, oc + 1, sizeof(*stats)); + free(oh); + return (0); + } + free(oh); + return (-1); +} + +#define _P_STAT(_s, _f) printf("%8s:\t%lu\n", #_f, _s._f) +static void +nat64lsn_stats(const char *name, uint8_t set) +{ + struct ipfw_nat64lsn_stats stats; + + if (nat64lsn_get_stats(name, set, &stats) != 0) + err(EX_OSERR, "Error retrieving stats"); + + _P_STAT(stats, opcnt64); + _P_STAT(stats, opcnt46); + _P_STAT(stats, ofrags); + _P_STAT(stats, ifrags); + _P_STAT(stats, oerrors); + _P_STAT(stats, noroute4); + _P_STAT(stats, noroute6); + _P_STAT(stats, noproto); + _P_STAT(stats, nomem); + _P_STAT(stats, dropped); + + _P_STAT(stats, hostcount); + _P_STAT(stats, tcpchunks); + _P_STAT(stats, udpchunks); + _P_STAT(stats, icmpchunks); + _P_STAT(stats, jcalls); + _P_STAT(stats, jrequests); + _P_STAT(stats, jhostsreq); + _P_STAT(stats, jportreq); + _P_STAT(stats, jhostfails); + _P_STAT(stats, jportfails); + _P_STAT(stats, jreinjected); + _P_STAT(stats, jmaxlen); + _P_STAT(stats, jnomem); + _P_STAT(stats, screated); + _P_STAT(stats, sdeleted); + _P_STAT(stats, spgcreated); + _P_STAT(stats, spgdeleted); +} + +static int +nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set) +{ + char abuf[INET6_ADDRSTRLEN]; + + if (name != NULL && strcmp(cfg->name, name) != 0) + return (ESRCH); + + if (co.use_set != 0 && cfg->set != set) + return (ESRCH); + + if (co.use_set != 0 || cfg->set != 0) + printf("set %u ", cfg->set); + inet_ntop(AF_INET, &cfg->prefix4, abuf, sizeof(abuf)); + printf("nat64lsn %s prefix4 %s/%u ", cfg->name, abuf, cfg->plen4); +#if 0 + inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf)); + printf("prefix6 %s/%u", abuf, cfg->plen6); + printf("agg_len %u agg_count %u ", cfg->agg_prefix_len, + cfg->agg_prefix_max); + if (cfg->min_port != NAT64LSN_PORT_MIN || + cfg->max_port != NAT64LSN_PORT_MAX) + printf(" port_range %u:%u", cfg->min_port, cfg->max_port); + if (cfg->jmaxlen != NAT64LSN_JMAXLEN) + printf(" jmaxlen %u ", cfg->jmaxlen); +#endif + if (cfg->max_ports != NAT64LSN_MAX_PORTS) + printf(" max_ports %u", cfg->max_ports); + if (cfg->nh_delete_delay != NAT64LSN_HOST_AGE) + printf(" host_del_age %u", cfg->nh_delete_delay); + if (cfg->pg_delete_delay != NAT64LSN_PG_AGE) + printf(" pg_del_age %u ", cfg->pg_delete_delay); + if (cfg->st_syn_ttl != NAT64LSN_TCP_SYN_AGE) + printf(" tcp_syn_age %u", cfg->st_syn_ttl); + if (cfg->st_close_ttl != NAT64LSN_TCP_FIN_AGE) + printf(" tcp_close_age %u", cfg->st_close_ttl); + if (cfg->st_estab_ttl != NAT64LSN_TCP_EST_AGE) + printf(" tcp_est_age %u", cfg->st_estab_ttl); + if (cfg->st_udp_ttl != NAT64LSN_UDP_AGE) + printf(" udp_age %u", cfg->st_udp_ttl); + if (cfg->st_icmp_ttl != NAT64LSN_ICMP_AGE) + printf(" icmp_age %u", cfg->st_icmp_ttl); + if (cfg->flags & NAT64_LOG) + printf(" log"); + printf("\n"); + return (0); +} + +static int +nat64lsn_destroy_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set) +{ + + if (co.use_set != 0 && cfg->set != set) + return (ESRCH); + + nat64lsn_destroy(cfg->name, cfg->set); + return (0); +} + + +/* + * Compare nat64lsn instances names. + * Honor number comparison. + */ +static int +nat64name_cmp(const void *a, const void *b) +{ + ipfw_nat64lsn_cfg *ca, *cb; + + ca = (ipfw_nat64lsn_cfg *)a; + cb = (ipfw_nat64lsn_cfg *)b; + + if (ca->set > cb->set) + return (1); + else if (ca->set < cb->set) + return (-1); + return (stringnum_cmp(ca->name, cb->name)); +} + +/* + * Retrieves nat64lsn instance list from kernel, + * optionally sorts it and calls requested function for each instance. + * + * Request: [ ipfw_obj_lheader ] + * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ] + */ +static int +nat64lsn_foreach(nat64lsn_cb_t *f, const char *name, uint8_t set, int sort) +{ + ipfw_obj_lheader *olh; + ipfw_nat64lsn_cfg *cfg; + size_t sz; + int i, error; + + /* Start with reasonable default */ + sz = sizeof(*olh) + 16 * sizeof(ipfw_nat64lsn_cfg); + + for (;;) { + if ((olh = calloc(1, sz)) == NULL) + return (ENOMEM); + + olh->size = sz; + if (do_get3(IP_FW_NAT64LSN_LIST, &olh->opheader, &sz) != 0) { + sz = olh->size; + free(olh); + if (errno != ENOMEM) + return (errno); + continue; + } + + if (sort != 0) + qsort(olh + 1, olh->count, olh->objsize, + nat64name_cmp); + + cfg = (ipfw_nat64lsn_cfg *)(olh + 1); + for (i = 0; i < olh->count; i++) { + error = f(cfg, name, set); /* Ignore errors for now */ + cfg = (ipfw_nat64lsn_cfg *)((caddr_t)cfg + + olh->objsize); + } + free(olh); + break; + } + return (0); +} + Index: head/sbin/ipfw/nat64stl.c =================================================================== --- head/sbin/ipfw/nat64stl.c +++ head/sbin/ipfw/nat64stl.c @@ -0,0 +1,521 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static int nat64stl_check_prefix(struct in6_addr *prefix, int length); +typedef int (nat64stl_cb_t)(ipfw_nat64stl_cfg *i, const char *name, + uint8_t set); +static int nat64stl_foreach(nat64stl_cb_t *f, const char *name, uint8_t set, + int sort); + +static void nat64stl_create(const char *name, uint8_t set, int ac, char **av); +static void nat64stl_config(const char *name, uint8_t set, int ac, char **av); +static void nat64stl_destroy(const char *name, uint8_t set); +static void nat64stl_stats(const char *name, uint8_t set); +static void nat64stl_reset_stats(const char *name, uint8_t set); +static int nat64stl_show_cb(ipfw_nat64stl_cfg *cfg, const char *name, + uint8_t set); +static int nat64stl_destroy_cb(ipfw_nat64stl_cfg *cfg, const char *name, + uint8_t set); + +static struct _s_x nat64cmds[] = { + { "create", TOK_CREATE }, + { "config", TOK_CONFIG }, + { "destroy", TOK_DESTROY }, + { "list", TOK_LIST }, + { "show", TOK_LIST }, + { "stats", TOK_STATS }, + { NULL, 0 } +}; + +#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b) +#define IN6_IS_ADDR_WKPFX(a) \ + ((a)->__u6_addr.__u6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \ + (a)->__u6_addr.__u6_addr32[1] == 0 && \ + (a)->__u6_addr.__u6_addr32[2] == 0) +static int +nat64stl_check_prefix(struct in6_addr *prefix, int length) +{ + + if (IN6_IS_ADDR_WKPFX(prefix) && length == 96) + return (0); +#if 0 + switch (length) { + case 32: + case 40: + case 48: + case 56: + case 64: + /* Well-known prefix has 96 prefix length */ + if (IN6_IS_ADDR_WKPFX(prefix)) + return (1); + /* FALLTHROUGH */ + case 96: + /* Bits 64 to 71 must be set to zero */ + if (prefix->__u6_addr.__u6_addr8[8] != 0) + return (1); + /* XXX: looks incorrect */ + if (IN6_IS_ADDR_MULTICAST(prefix) || + IN6_IS_ADDR_UNSPECIFIED(prefix) || + IN6_IS_ADDR_LOOPBACK(prefix)) + return (1); + return (0); + } +#endif + return (1); +} + +static struct _s_x nat64statscmds[] = { + { "reset", TOK_RESET }, + { NULL, 0 } +}; + +/* + * This one handles all nat64stl-related commands + * ipfw [set N] nat64stl NAME {create | config} ... + * ipfw [set N] nat64stl NAME stats [reset] + * ipfw [set N] nat64stl {NAME | all} destroy + * ipfw [set N] nat64stl {NAME | all} {list | show} + */ +#define nat64stl_check_name table_check_name +void +ipfw_nat64stl_handler(int ac, char *av[]) +{ + const char *name; + int tcmd; + uint8_t set; + + if (co.use_set != 0) + set = co.use_set - 1; + else + set = 0; + ac--; av++; + + NEED1("nat64stl needs instance name"); + name = *av; + if (nat64stl_check_name(name) != 0) { + if (strcmp(name, "all") == 0) + name = NULL; + else + errx(EX_USAGE, "nat64stl instance name %s is invalid", + name); + } + ac--; av++; + NEED1("nat64stl needs command"); + + tcmd = get_token(nat64cmds, *av, "nat64stl command"); + if (name == NULL && tcmd != TOK_DESTROY && tcmd != TOK_LIST) + errx(EX_USAGE, "nat64stl instance name required"); + switch (tcmd) { + case TOK_CREATE: + ac--; av++; + nat64stl_create(name, set, ac, av); + break; + case TOK_CONFIG: + ac--; av++; + nat64stl_config(name, set, ac, av); + break; + case TOK_LIST: + nat64stl_foreach(nat64stl_show_cb, name, set, 1); + break; + case TOK_DESTROY: + if (name == NULL) + nat64stl_foreach(nat64stl_destroy_cb, NULL, set, 0); + else + nat64stl_destroy(name, set); + break; + case TOK_STATS: + ac--; av++; + if (ac == 0) { + nat64stl_stats(name, set); + break; + } + tcmd = get_token(nat64statscmds, *av, "stats command"); + if (tcmd == TOK_RESET) + nat64stl_reset_stats(name, set); + } +} + + +static void +nat64stl_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set) +{ + + ntlv->head.type = IPFW_TLV_EACTION_NAME(1); /* it doesn't matter */ + ntlv->head.length = sizeof(ipfw_obj_ntlv); + ntlv->idx = 1; + ntlv->set = set; + strlcpy(ntlv->name, name, sizeof(ntlv->name)); +} + +static struct _s_x nat64newcmds[] = { + { "table4", TOK_TABLE4 }, + { "table6", TOK_TABLE6 }, + { "prefix6", TOK_PREFIX6 }, + { "log", TOK_LOG }, + { "-log", TOK_LOGOFF }, + { NULL, 0 } +}; + +/* + * Creates new nat64stl instance + * ipfw nat64stl create table4 table6 [ prefix6 ] + * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ] + */ +#define NAT64STL_HAS_TABLE4 0x01 +#define NAT64STL_HAS_TABLE6 0x02 +#define NAT64STL_HAS_PREFIX6 0x04 +static void +nat64stl_create(const char *name, uint8_t set, int ac, char *av[]) +{ + char buf[sizeof(ipfw_obj_lheader) + sizeof(ipfw_nat64stl_cfg)]; + ipfw_nat64stl_cfg *cfg; + ipfw_obj_lheader *olh; + int tcmd, flags; + char *p; + + memset(buf, 0, sizeof(buf)); + olh = (ipfw_obj_lheader *)buf; + cfg = (ipfw_nat64stl_cfg *)(olh + 1); + + /* Some reasonable defaults */ + inet_pton(AF_INET6, "64:ff9b::", &cfg->prefix6); + cfg->plen6 = 96; + cfg->set = set; + flags = NAT64STL_HAS_PREFIX6; + while (ac > 0) { + tcmd = get_token(nat64newcmds, *av, "option"); + ac--; av++; + + switch (tcmd) { + case TOK_TABLE4: + NEED1("table name required"); + table_fill_ntlv(&cfg->ntlv4, *av, set, 4); + flags |= NAT64STL_HAS_TABLE4; + ac--; av++; + break; + case TOK_TABLE6: + NEED1("table name required"); + table_fill_ntlv(&cfg->ntlv6, *av, set, 6); + flags |= NAT64STL_HAS_TABLE6; + ac--; av++; + break; + case TOK_PREFIX6: + NEED1("IPv6 prefix6 required"); + if ((p = strchr(*av, '/')) != NULL) + *p++ = '\0'; + if (inet_pton(AF_INET6, *av, &cfg->prefix6) != 1) + errx(EX_USAGE, + "Bad prefix: %s", *av); + cfg->plen6 = strtol(p, NULL, 10); + if (nat64stl_check_prefix(&cfg->prefix6, + cfg->plen6) != 0) + errx(EX_USAGE, + "Bad prefix length: %s", p); + flags |= NAT64STL_HAS_PREFIX6; + ac--; av++; + break; + case TOK_LOG: + cfg->flags |= NAT64_LOG; + break; + case TOK_LOGOFF: + cfg->flags &= ~NAT64_LOG; + break; + } + } + + /* Check validness */ + if ((flags & NAT64STL_HAS_TABLE4) != NAT64STL_HAS_TABLE4) + errx(EX_USAGE, "table4 required"); + if ((flags & NAT64STL_HAS_TABLE6) != NAT64STL_HAS_TABLE6) + errx(EX_USAGE, "table6 required"); + if ((flags & NAT64STL_HAS_PREFIX6) != NAT64STL_HAS_PREFIX6) + errx(EX_USAGE, "prefix6 required"); + + olh->count = 1; + olh->objsize = sizeof(*cfg); + olh->size = sizeof(buf); + strlcpy(cfg->name, name, sizeof(cfg->name)); + if (do_set3(IP_FW_NAT64STL_CREATE, &olh->opheader, sizeof(buf)) != 0) + err(EX_OSERR, "nat64stl instance creation failed"); +} + +/* + * Configures existing nat64stl instance + * ipfw nat64stl config + * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ] + */ +static void +nat64stl_config(const char *name, uint8_t set, int ac, char **av) +{ + char buf[sizeof(ipfw_obj_header) + sizeof(ipfw_nat64stl_cfg)]; + ipfw_nat64stl_cfg *cfg; + ipfw_obj_header *oh; + char *opt; + size_t sz; + int tcmd; + + if (ac == 0) + errx(EX_USAGE, "config options required"); + memset(&buf, 0, sizeof(buf)); + oh = (ipfw_obj_header *)buf; + cfg = (ipfw_nat64stl_cfg *)(oh + 1); + sz = sizeof(buf); + + nat64stl_fill_ntlv(&oh->ntlv, name, set); + if (do_get3(IP_FW_NAT64STL_CONFIG, &oh->opheader, &sz) != 0) + err(EX_OSERR, "failed to get config for instance %s", name); + + while (ac > 0) { + tcmd = get_token(nat64newcmds, *av, "option"); + opt = *av; + ac--; av++; + + switch (tcmd) { +#if 0 + case TOK_TABLE4: + NEED1("table name required"); + table_fill_ntlv(&cfg->ntlv4, *av, set, 4); + ac--; av++; + break; + case TOK_TABLE6: + NEED1("table name required"); + table_fill_ntlv(&cfg->ntlv6, *av, set, 6); + ac--; av++; + break; +#endif + case TOK_LOG: + cfg->flags |= NAT64_LOG; + break; + case TOK_LOGOFF: + cfg->flags &= ~NAT64_LOG; + break; + default: + errx(EX_USAGE, "Can't change %s option", opt); + } + } + + if (do_set3(IP_FW_NAT64STL_CONFIG, &oh->opheader, sizeof(buf)) != 0) + err(EX_OSERR, "nat64stl instance configuration failed"); +} + +/* + * Destroys nat64stl instance. + * Request: [ ipfw_obj_header ] + */ +static void +nat64stl_destroy(const char *name, uint8_t set) +{ + ipfw_obj_header oh; + + memset(&oh, 0, sizeof(oh)); + nat64stl_fill_ntlv(&oh.ntlv, name, set); + if (do_set3(IP_FW_NAT64STL_DESTROY, &oh.opheader, sizeof(oh)) != 0) + err(EX_OSERR, "failed to destroy nat instance %s", name); +} + +/* + * Get nat64stl instance statistics. + * Request: [ ipfw_obj_header ] + * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ] ] + */ +static int +nat64stl_get_stats(const char *name, uint8_t set, + struct ipfw_nat64stl_stats *stats) +{ + ipfw_obj_header *oh; + ipfw_obj_ctlv *oc; + size_t sz; + + sz = sizeof(*oh) + sizeof(*oc) + sizeof(*stats); + oh = calloc(1, sz); + nat64stl_fill_ntlv(&oh->ntlv, name, set); + if (do_get3(IP_FW_NAT64STL_STATS, &oh->opheader, &sz) == 0) { + oc = (ipfw_obj_ctlv *)(oh + 1); + memcpy(stats, oc + 1, sizeof(*stats)); + free(oh); + return (0); + } + free(oh); + return (-1); +} + +#define _P_STAT(_s, _f) printf("%8s:\t%lu\n", #_f, _s._f) +static void +nat64stl_stats(const char *name, uint8_t set) +{ + struct ipfw_nat64stl_stats stats; + + if (nat64stl_get_stats(name, set, &stats) != 0) + err(EX_OSERR, "Error retrieving stats"); + + _P_STAT(stats, opcnt64); + _P_STAT(stats, opcnt46); + _P_STAT(stats, ofrags); + _P_STAT(stats, ifrags); + _P_STAT(stats, oerrors); + _P_STAT(stats, noroute4); + _P_STAT(stats, noroute6); + _P_STAT(stats, noproto); + _P_STAT(stats, nomem); + _P_STAT(stats, dropped); +} + +/* + * Reset nat64stl instance statistics specified by @oh->ntlv. + * Request: [ ipfw_obj_header ] + */ +static void +nat64stl_reset_stats(const char *name, uint8_t set) +{ + ipfw_obj_header oh; + + memset(&oh, 0, sizeof(oh)); + nat64stl_fill_ntlv(&oh.ntlv, name, set); + if (do_set3(IP_FW_NAT64STL_RESET_STATS, &oh.opheader, sizeof(oh)) != 0) + err(EX_OSERR, "failed to reset stats for instance %s", name); +} + +static int +nat64stl_show_cb(ipfw_nat64stl_cfg *cfg, const char *name, uint8_t set) +{ + + if (name != NULL && strcmp(cfg->name, name) != 0) + return (ESRCH); + + if (co.use_set != 0 && cfg->set != set) + return (ESRCH); + + if (co.use_set != 0 || cfg->set != 0) + printf("set %u ", cfg->set); + printf("nat64stl %s table4 %s table6 %s", + cfg->name, cfg->ntlv4.name, cfg->ntlv6.name); + if (cfg->flags & NAT64_LOG) + printf(" log"); + printf("\n"); + return (0); +} + +static int +nat64stl_destroy_cb(ipfw_nat64stl_cfg *cfg, const char *name, uint8_t set) +{ + + if (co.use_set != 0 && cfg->set != set) + return (ESRCH); + + nat64stl_destroy(cfg->name, cfg->set); + return (0); +} + + +/* + * Compare nat64stl instances names. + * Honor number comparison. + */ +static int +nat64name_cmp(const void *a, const void *b) +{ + ipfw_nat64stl_cfg *ca, *cb; + + ca = (ipfw_nat64stl_cfg *)a; + cb = (ipfw_nat64stl_cfg *)b; + + if (ca->set > cb->set) + return (1); + else if (ca->set < cb->set) + return (-1); + return (stringnum_cmp(ca->name, cb->name)); +} + +/* + * Retrieves nat64stl instance list from kernel, + * optionally sorts it and calls requested function for each instance. + * + * Request: [ ipfw_obj_lheader ] + * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ] + */ +static int +nat64stl_foreach(nat64stl_cb_t *f, const char *name, uint8_t set, int sort) +{ + ipfw_obj_lheader *olh; + ipfw_nat64stl_cfg *cfg; + size_t sz; + int i, error; + + /* Start with reasonable default */ + sz = sizeof(*olh) + 16 * sizeof(*cfg); + for (;;) { + if ((olh = calloc(1, sz)) == NULL) + return (ENOMEM); + + olh->size = sz; + if (do_get3(IP_FW_NAT64STL_LIST, &olh->opheader, &sz) != 0) { + sz = olh->size; + free(olh); + if (errno != ENOMEM) + return (errno); + continue; + } + + if (sort != 0) + qsort(olh + 1, olh->count, olh->objsize, + nat64name_cmp); + + cfg = (ipfw_nat64stl_cfg *)(olh + 1); + for (i = 0; i < olh->count; i++) { + error = f(cfg, name, set); /* Ignore errors for now */ + cfg = (ipfw_nat64stl_cfg *)((caddr_t)cfg + + olh->objsize); + } + free(olh); + break; + } + return (0); +} + Index: head/sbin/ipfw/tables.c =================================================================== --- head/sbin/ipfw/tables.c +++ head/sbin/ipfw/tables.c @@ -53,8 +53,6 @@ static int table_swap(ipfw_obj_header *oh, char *second); static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i); static int table_show_info(ipfw_xtable_info *i, void *arg); -static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, - uint32_t set, uint16_t uidx); static int table_flush_one(ipfw_xtable_info *i, void *arg); static int table_show_one(ipfw_xtable_info *i, void *arg); @@ -155,7 +153,7 @@ ipfw_xtable_info i; ipfw_obj_header oh; char *tablename; - uint32_t set; + uint8_t set; void *arg; memset(&oh, 0, sizeof(oh)); @@ -292,8 +290,8 @@ } } -static void -table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint32_t set, +void +table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set, uint16_t uidx) { Index: head/sys/conf/NOTES =================================================================== --- head/sys/conf/NOTES +++ head/sys/conf/NOTES @@ -976,6 +976,8 @@ # IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires # LIBALIAS. # +# IPFIREWALL_NAT64 adds support for in kernel NAT64 in ipfw. +# # IPFIREWALL_NPTV6 adds support for in kernel NPTv6 in ipfw. # # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding @@ -999,6 +1001,7 @@ options IPFIREWALL_VERBOSE_LIMIT=100 #limit verbosity options IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default options IPFIREWALL_NAT #ipfw kernel nat support +options IPFIREWALL_NAT64 #ipfw kernel NAT64 support options IPFIREWALL_NPTV6 #ipfw kernel IPv6 NPT support options IPDIVERT #divert sockets options IPFILTER #ipfilter support Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -3883,6 +3883,18 @@ netpfil/ipfw/ip_fw_table_value.c optional inet ipfirewall netpfil/ipfw/ip_fw_iface.c optional inet ipfirewall netpfil/ipfw/ip_fw_nat.c optional inet ipfirewall_nat +netpfil/ipfw/nat64/ip_fw_nat64.c optional inet inet6 ipfirewall \ + ipfirewall_nat64 +netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \ + ipfirewall_nat64 +netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \ + ipfirewall_nat64 +netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \ + ipfirewall_nat64 +netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \ + ipfirewall_nat64 +netpfil/ipfw/nat64/nat64_translate.c optional inet inet6 ipfirewall \ + ipfirewall_nat64 netpfil/ipfw/nptv6/ip_fw_nptv6.c optional inet inet6 ipfirewall \ ipfirewall_nptv6 netpfil/ipfw/nptv6/nptv6.c optional inet inet6 ipfirewall \ Index: head/sys/conf/options =================================================================== --- head/sys/conf/options +++ head/sys/conf/options @@ -418,6 +418,7 @@ IPFIREWALL opt_ipfw.h IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h IPFIREWALL_NAT opt_ipfw.h +IPFIREWALL_NAT64_DIRECT_OUTPUT opt_ipfw.h IPFIREWALL_NPTV6 opt_ipfw.h IPFIREWALL_VERBOSE opt_ipfw.h IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h Index: head/sys/modules/Makefile =================================================================== --- head/sys/modules/Makefile +++ head/sys/modules/Makefile @@ -167,6 +167,7 @@ ${_ipfilter} \ ${_ipfw} \ ipfw_nat \ + ${_ipfw_nat64} \ ${_ipfw_nptv6} \ ${_ipmi} \ ip6_mroute_mod \ @@ -459,6 +460,9 @@ _if_me= if_me _ipdivert= ipdivert _ipfw= ipfw +.if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) +_ipfw_nat64= ipfw_nat64 +.endif .endif .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES) Index: head/sys/modules/ipfw_nat64/Makefile =================================================================== --- head/sys/modules/ipfw_nat64/Makefile +++ head/sys/modules/ipfw_nat64/Makefile @@ -0,0 +1,11 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../netpfil/ipfw/nat64 + +KMOD= ipfw_nat64 +SRCS= ip_fw_nat64.c nat64_translate.c +SRCS+= nat64lsn.c nat64lsn_control.c +SRCS+= nat64stl.c nat64stl_control.c +SRCS+= opt_ipfw.h + +.include Index: head/sys/netinet/ip_fw.h =================================================================== --- head/sys/netinet/ip_fw.h +++ head/sys/netinet/ip_fw.h @@ -110,6 +110,21 @@ #define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */ #define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */ +#define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */ +#define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */ +#define IP_FW_NAT64STL_CONFIG 132 /* Modify stateless NAT64 instance */ +#define IP_FW_NAT64STL_LIST 133 /* List stateless NAT64 instances */ +#define IP_FW_NAT64STL_STATS 134 /* Get NAT64STL instance statistics */ +#define IP_FW_NAT64STL_RESET_STATS 135 /* Reset NAT64STL instance statistics */ + +#define IP_FW_NAT64LSN_CREATE 140 /* Create stateful NAT64 instance */ +#define IP_FW_NAT64LSN_DESTROY 141 /* Destroy stateful NAT64 instance */ +#define IP_FW_NAT64LSN_CONFIG 142 /* Modify stateful NAT64 instance */ +#define IP_FW_NAT64LSN_LIST 143 /* List stateful NAT64 instances */ +#define IP_FW_NAT64LSN_STATS 144 /* Get NAT64LSN instance statistics */ +#define IP_FW_NAT64LSN_LIST_STATES 145 /* Get stateful NAT64 states */ +#define IP_FW_NAT64LSN_RESET_STATS 146 /* Reset NAT64LSN instance statistics */ + #define IP_FW_NPTV6_CREATE 150 /* Create NPTv6 instance */ #define IP_FW_NPTV6_DESTROY 151 /* Destroy NPTv6 instance */ #define IP_FW_NPTV6_CONFIG 152 /* Modify NPTv6 instance */ @@ -792,11 +807,17 @@ #define IPFW_TLV_RANGE 9 #define IPFW_TLV_EACTION 10 #define IPFW_TLV_COUNTERS 11 +#define IPFW_TLV_OBJDATA 12 #define IPFW_TLV_STATE_NAME 14 #define IPFW_TLV_EACTION_BASE 1000 #define IPFW_TLV_EACTION_NAME(arg) (IPFW_TLV_EACTION_BASE + (arg)) +typedef struct _ipfw_obj_data { + ipfw_obj_tlv head; + void *data[0]; +} ipfw_obj_data; + /* Object name TLV */ typedef struct _ipfw_obj_ntlv { ipfw_obj_tlv head; /* TLV header */ Index: head/sys/netinet6/ip_fw_nat64.h =================================================================== --- head/sys/netinet6/ip_fw_nat64.h +++ head/sys/netinet6/ip_fw_nat64.h @@ -0,0 +1,154 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Alexander V. Chernikov + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NETINET6_IP_FW_NAT64_H_ +#define _NETINET6_IP_FW_NAT64_H_ + +struct ipfw_nat64stl_stats { + uint64_t opcnt64; /* 6to4 of packets translated */ + uint64_t opcnt46; /* 4to6 of packets translated */ + uint64_t ofrags; /* number of fragments generated */ + uint64_t ifrags; /* number of fragments received */ + uint64_t oerrors; /* number of output errors */ + uint64_t noroute4; + uint64_t noroute6; + uint64_t noproto; /* Protocol not supported */ + uint64_t nomem; /* mbuf allocation filed */ + uint64_t dropped; /* dropped due to some errors */ +}; + +struct ipfw_nat64lsn_stats { + uint64_t opcnt64; /* 6to4 of packets translated */ + uint64_t opcnt46; /* 4to6 of packets translated */ + uint64_t ofrags; /* number of fragments generated */ + uint64_t ifrags; /* number of fragments received */ + uint64_t oerrors; /* number of output errors */ + uint64_t noroute4; + uint64_t noroute6; + uint64_t noproto; /* Protocol not supported */ + uint64_t nomem; /* mbuf allocation filed */ + uint64_t dropped; /* dropped due to some errors */ + + uint64_t nomatch4; /* No addr/port match */ + uint64_t jcalls; /* Number of job handler calls */ + uint64_t jrequests; /* Number of job requests */ + uint64_t jhostsreq; /* Number of job host requests */ + uint64_t jportreq; /* Number of portgroup requests */ + uint64_t jhostfails; /* Number of failed host allocs */ + uint64_t jportfails; /* Number of failed portgroup allocs */ + uint64_t jreinjected; /* Number of packets reinjected to q */ + uint64_t jmaxlen; /* Max queue length reached */ + uint64_t jnomem; /* No memory to alloc queue item */ + + uint64_t screated; /* Number of states created */ + uint64_t sdeleted; /* Number of states deleted */ + uint64_t spgcreated; /* Number of portgroups created */ + uint64_t spgdeleted; /* Number of portgroups deleted */ + uint64_t hostcount; /* Number of hosts */ + uint64_t tcpchunks; /* Number of TCP chunks */ + uint64_t udpchunks; /* Number of UDP chunks */ + uint64_t icmpchunks; /* Number of ICMP chunks */ + + uint64_t _reserved[4]; +}; + +#define NAT64_LOG 0x0001 /* Enable logging via BPF */ + +typedef struct _ipfw_nat64stl_cfg { + char name[64]; /* NAT name */ + ipfw_obj_ntlv ntlv6; /* object name tlv */ + ipfw_obj_ntlv ntlv4; /* object name tlv */ + struct in6_addr prefix6; /* NAT64 prefix */ + uint8_t plen6; /* Prefix length */ + uint8_t set; /* Named instance set [0..31] */ + uint8_t spare[2]; + uint32_t flags; +} ipfw_nat64stl_cfg; + +/* + * NAT64LSN default configuration values + */ +#define NAT64LSN_MAX_PORTS 2048 /* Max number of ports per host */ +#define NAT64LSN_JMAXLEN 2048 /* Max outstanding requests. */ +#define NAT64LSN_TCP_SYN_AGE 10 /* State's TTL after SYN received. */ +#define NAT64LSN_TCP_EST_AGE (2 * 3600) /* TTL for established connection */ +#define NAT64LSN_TCP_FIN_AGE 180 /* State's TTL after FIN/RST received */ +#define NAT64LSN_UDP_AGE 120 /* TTL for UDP states */ +#define NAT64LSN_ICMP_AGE 60 /* TTL for ICMP states */ +#define NAT64LSN_HOST_AGE 3600 /* TTL for stale host entry */ +#define NAT64LSN_PG_AGE 900 /* TTL for stale ports groups */ + +typedef struct _ipfw_nat64lsn_cfg { + char name[64]; /* NAT name */ + uint32_t flags; + uint32_t max_ports; /* Max ports per client */ + uint32_t agg_prefix_len; /* Prefix length to count */ + uint32_t agg_prefix_max; /* Max hosts per agg prefix */ + struct in_addr prefix4; + uint16_t plen4; /* Prefix length */ + uint16_t plen6; /* Prefix length */ + struct in6_addr prefix6; /* NAT64 prefix */ + uint32_t jmaxlen; /* Max jobqueue length */ + uint16_t min_port; /* Min port group # to use */ + uint16_t max_port; /* Max port group # to use */ + uint16_t nh_delete_delay;/* Stale host delete delay */ + uint16_t pg_delete_delay;/* Stale portgroup delete delay */ + uint16_t st_syn_ttl; /* TCP syn expire */ + uint16_t st_close_ttl; /* TCP fin expire */ + uint16_t st_estab_ttl; /* TCP established expire */ + uint16_t st_udp_ttl; /* UDP expire */ + uint16_t st_icmp_ttl; /* ICMP expire */ + uint8_t set; /* Named instance set [0..31] */ + uint8_t spare; +} ipfw_nat64lsn_cfg; + +typedef struct _ipfw_nat64lsn_state { + struct in_addr daddr; /* Remote IPv4 address */ + uint16_t dport; /* Remote destination port */ + uint16_t aport; /* Local alias port */ + uint16_t sport; /* Source port */ + uint8_t flags; /* State flags */ + uint8_t spare[3]; + uint16_t idle; /* Last used time */ +} ipfw_nat64lsn_state; + +typedef struct _ipfw_nat64lsn_stg { + uint64_t next_idx; /* next state index */ + struct in_addr alias4; /* IPv4 alias address */ + uint8_t proto; /* protocol */ + uint8_t flags; + uint16_t spare; + struct in6_addr host6; /* Bound IPv6 host */ + uint32_t count; /* Number of states */ + uint32_t spare2; +} ipfw_nat64lsn_stg; + +#endif /* _NETINET6_IP_FW_NAT64_H_ */ + Index: head/sys/netpfil/ipfw/nat64/ip_fw_nat64.h =================================================================== --- head/sys/netpfil/ipfw/nat64/ip_fw_nat64.h +++ head/sys/netpfil/ipfw/nat64/ip_fw_nat64.h @@ -0,0 +1,117 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IP_FW_NAT64_H_ +#define _IP_FW_NAT64_H_ + +#define DPRINTF(mask, fmt, ...) \ + if (nat64_debug & (mask)) \ + printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__) +#define DP_GENERIC 0x0001 +#define DP_OBJ 0x0002 +#define DP_JQUEUE 0x0004 +#define DP_STATE 0x0008 +#define DP_DROPS 0x0010 +#define DP_ALL 0xFFFF +extern int nat64_debug; + +#if 0 +#define NAT64NOINLINE __noinline +#else +#define NAT64NOINLINE +#endif + +int nat64stl_init(struct ip_fw_chain *ch, int first); +void nat64stl_uninit(struct ip_fw_chain *ch, int last); +int nat64lsn_init(struct ip_fw_chain *ch, int first); +void nat64lsn_uninit(struct ip_fw_chain *ch, int last); + +struct ip_fw_nat64_stats { + counter_u64_t opcnt64; /* 6to4 of packets translated */ + counter_u64_t opcnt46; /* 4to6 of packets translated */ + counter_u64_t ofrags; /* number of fragments generated */ + counter_u64_t ifrags; /* number of fragments received */ + counter_u64_t oerrors; /* number of output errors */ + counter_u64_t noroute4; + counter_u64_t noroute6; + counter_u64_t nomatch4; /* No addr/port match */ + counter_u64_t noproto; /* Protocol not supported */ + counter_u64_t nomem; /* mbufs allocation failed */ + counter_u64_t dropped; /* number of packets silently + * dropped due to some errors/ + * unsupported/etc. + */ + + counter_u64_t jrequests; /* number of jobs requests queued */ + counter_u64_t jcalls; /* number of jobs handler calls */ + counter_u64_t jhostsreq; /* number of hosts requests */ + counter_u64_t jportreq; + counter_u64_t jhostfails; + counter_u64_t jportfails; + counter_u64_t jmaxlen; + counter_u64_t jnomem; + counter_u64_t jreinjected; + + counter_u64_t screated; + counter_u64_t sdeleted; + counter_u64_t spgcreated; + counter_u64_t spgdeleted; +}; + +#define IPFW_NAT64_VERSION 1 +#define NAT64STATS (sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t)) +typedef struct _nat64_stats_block { + counter_u64_t stats[NAT64STATS]; +} nat64_stats_block; +#define NAT64STAT_ADD(s, f, v) \ + counter_u64_add((s)->stats[ \ + offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v)) +#define NAT64STAT_INC(s, f) NAT64STAT_ADD(s, f, 1) +#define NAT64STAT_FETCH(s, f) \ + counter_u64_fetch((s)->stats[ \ + offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)]) + +#define L3HDR(_ip, _t) ((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl)) +#define TCP(p) ((struct tcphdr *)(p)) +#define UDP(p) ((struct udphdr *)(p)) +#define ICMP(p) ((struct icmphdr *)(p)) +#define ICMP6(p) ((struct icmp6_hdr *)(p)) + +#define NAT64SKIP 0 +#define NAT64RETURN 1 +#define NAT64MFREE -1 + +/* Well-known prefix 64:ff9b::/96 */ +#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b) +#define IN6_IS_ADDR_WKPFX(a) \ + ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \ + (a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0) + +#endif + Index: head/sys/netpfil/ipfw/nat64/ip_fw_nat64.c =================================================================== --- head/sys/netpfil/ipfw/nat64/ip_fw_nat64.c +++ head/sys/netpfil/ipfw/nat64/ip_fw_nat64.c @@ -0,0 +1,129 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include +#include + + +int nat64_debug = 0; +SYSCTL_DECL(_net_inet_ip_fw); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW, + &nat64_debug, 0, "Debug level for NAT64 module"); + +int nat64_allow_private = 0; +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_allow_private, CTLFLAG_RW, + &nat64_allow_private, 0, + "Allow use of non-global IPv4 addresses with NAT64"); + +static int +vnet_ipfw_nat64_init(const void *arg __unused) +{ + struct ip_fw_chain *ch; + int first, error; + + ch = &V_layer3_chain; + first = IS_DEFAULT_VNET(curvnet) ? 1: 0; + error = nat64stl_init(ch, first); + if (error != 0) + return (error); + error = nat64lsn_init(ch, first); + if (error != 0) { + nat64stl_uninit(ch, first); + return (error); + } + return (0); +} + +static int +vnet_ipfw_nat64_uninit(const void *arg __unused) +{ + struct ip_fw_chain *ch; + int last; + + ch = &V_layer3_chain; + last = IS_DEFAULT_VNET(curvnet) ? 1: 0; + nat64stl_uninit(ch, last); + nat64lsn_uninit(ch, last); + return (0); +} + +static int +ipfw_nat64_modevent(module_t mod, int type, void *unused) +{ + + switch (type) { + case MOD_LOAD: + case MOD_UNLOAD: + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t ipfw_nat64_mod = { + "ipfw_nat64", + ipfw_nat64_modevent, + 0 +}; + +/* Define startup order. */ +#define IPFW_NAT64_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN +#define IPFW_NAT64_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */ +#define IPFW_NAT64_MODULE_ORDER (IPFW_NAT64_MODEVENT_ORDER + 1) +#define IPFW_NAT64_VNET_ORDER (IPFW_NAT64_MODEVENT_ORDER + 2) + +DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL, + SI_ORDER_ANY); +MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3); +MODULE_VERSION(ipfw_nat64, 1); + +VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL, + IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL); +VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL, + IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL); Index: head/sys/netpfil/ipfw/nat64/nat64_translate.h =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64_translate.h +++ head/sys/netpfil/ipfw/nat64/nat64_translate.h @@ -0,0 +1,116 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IP_FW_NAT64_TRANSLATE_H_ +#define _IP_FW_NAT64_TRANSLATE_H_ + +#ifdef RTALLOC_NOLOCK +#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_fib_nolock((ro), 0, (fib)) +#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc_nolock((ro), (fib)) +#define FREE_ROUTE(ro) +#else +#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_ign_fib((ro), 0, (fib)) +#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc((ro), (fib)) +#define FREE_ROUTE(ro) RO_RTFREE((ro)) +#endif + +static inline int +nat64_check_ip6(struct in6_addr *addr) +{ + + /* XXX: We should really check /8 */ + if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */ + IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr)) + return (1); + return (0); +} + +extern int nat64_allow_private; +static inline int +nat64_check_private_ip4(in_addr_t ia) +{ + + if (nat64_allow_private) + return (0); + /* WKPFX must not be used to represent non-global IPv4 addresses */ +// if (cfg->flags & NAT64_WKPFX) { + /* IN_PRIVATE */ + if ((ia & htonl(0xff000000)) == htonl(0x0a000000) || + (ia & htonl(0xfff00000)) == htonl(0xac100000) || + (ia & htonl(0xffff0000)) == htonl(0xc0a80000)) + return (1); + /* + * RFC 5735: + * 192.0.0.0/24 - reserved for IETF protocol assignments + * 192.88.99.0/24 - for use as 6to4 relay anycast addresses + * 198.18.0.0/15 - for use in benchmark tests + * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use + * in documentation and example code + */ + if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) || + (ia & htonl(0xffffff00)) == htonl(0xc0586300) || + (ia & htonl(0xfffffe00)) == htonl(0xc6120000) || + (ia & htonl(0xffffff00)) == htonl(0xc0000200) || + (ia & htonl(0xfffffe00)) == htonl(0xc6336400) || + (ia & htonl(0xffffff00)) == htonl(0xcb007100)) + return (1); +// } + return (0); +} + +static inline int +nat64_check_ip4(in_addr_t ia) +{ + + /* IN_LOOPBACK */ + if ((ia & htonl(0xff000000)) == htonl(0x7f000000)) + return (1); + /* IN_LINKLOCAL */ + if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000)) + return (1); + /* IN_MULTICAST & IN_EXPERIMENTAL */ + if ((ia & htonl(0xe0000000)) == htonl(0xe0000000)) + return (1); + return (0); +} + +#define nat64_get_ip4(_ip6) ((_ip6)->s6_addr32[3]) +#define nat64_set_ip4(_ip6, _ip4) (_ip6)->s6_addr32[3] = (_ip4) + +int nat64_getlasthdr(struct mbuf *m, int *offset); +int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, + struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats, + void *logdata); +int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, + nat64_stats_block *stats, void *logdata); +int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, + nat64_stats_block *stats, void *logdata); + +#endif + Index: head/sys/netpfil/ipfw/nat64/nat64_translate.c =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64_translate.c +++ head/sys/netpfil/ipfw/nat64/nat64_translate.c @@ -0,0 +1,1573 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "opt_ipfw.h" + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static void +nat64_log(struct pfloghdr *logdata, struct mbuf *m, sa_family_t family) +{ + + logdata->dir = PF_OUT; + logdata->af = family; + ipfw_bpf_mtap2(logdata, PFLOG_HDRLEN, m); +} +#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT +static __noinline struct sockaddr* nat64_find_route4(struct route *ro, + in_addr_t dest, struct mbuf *m); +static __noinline struct sockaddr* nat64_find_route6(struct route_in6 *ro, + struct in6_addr *dest, struct mbuf *m); + +static __noinline int +nat64_output(struct ifnet *ifp, struct mbuf *m, + struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, + void *logdata) +{ + int error; + + if (logdata != NULL) + nat64_log(logdata, m, dst->sa_family); + error = (*ifp->if_output)(ifp, m, dst, ro); + if (error != 0) + NAT64STAT_INC(stats, oerrors); + return (error); +} + +static __noinline int +nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) +{ + struct route_in6 ro6; + struct route ro4, *ro; + struct sockaddr *dst; + struct ifnet *ifp; + struct ip6_hdr *ip6; + struct ip *ip4; + int error; + + ip4 = mtod(m, struct ip *); + switch (ip4->ip_v) { + case IPVERSION: + ro = &ro4; + dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m); + if (dst == NULL) + NAT64STAT_INC(stats, noroute4); + break; + case (IPV6_VERSION >> 4): + ip6 = (struct ip6_hdr *)ip4; + ro = (struct route *)&ro6; + dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m); + if (dst == NULL) + NAT64STAT_INC(stats, noroute6); + break; + default: + m_freem(m); + NAT64STAT_INC(stats, dropped); + DPRINTF(DP_DROPS, "dropped due to unknown IP version"); + return (EAFNOSUPPORT); + } + if (dst == NULL) { + FREE_ROUTE(ro); + m_freem(m); + return (EHOSTUNREACH); + } + if (logdata != NULL) + nat64_log(logdata, m, dst->sa_family); + ifp = ro->ro_rt->rt_ifp; + error = (*ifp->if_output)(ifp, m, dst, ro); + if (error != 0) + NAT64STAT_INC(stats, oerrors); + FREE_ROUTE(ro); + return (error); +} +#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ +static __noinline int +nat64_output(struct ifnet *ifp, struct mbuf *m, + struct sockaddr *dst, struct route *ro, nat64_stats_block *stats, + void *logdata) +{ + struct ip *ip4; + int ret, af; + + ip4 = mtod(m, struct ip *); + switch (ip4->ip_v) { + case IPVERSION: + af = AF_INET; + ret = NETISR_IP; + break; + case (IPV6_VERSION >> 4): + af = AF_INET6; + ret = NETISR_IPV6; + break; + default: + m_freem(m); + NAT64STAT_INC(stats, dropped); + DPRINTF(DP_DROPS, "unknown IP version"); + return (EAFNOSUPPORT); + } + if (logdata != NULL) + nat64_log(logdata, m, af); + ret = netisr_queue(ret, m); + if (ret != 0) + NAT64STAT_INC(stats, oerrors); + return (ret); +} + +static __noinline int +nat64_output_one(struct mbuf *m, nat64_stats_block *stats, void *logdata) +{ + + return (nat64_output(NULL, m, NULL, NULL, stats, logdata)); +} +#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */ + + +#if 0 +void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize); + +void +print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize) +{ + char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf)); + inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf)); + snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt); +} + + +static __noinline int +nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6) +{ + + /* assume the prefix is properly filled with zeros */ + bcopy(&cfg->prefix, ip6, sizeof(*ip6)); + switch (cfg->plen) { + case 32: + case 96: + ip6->s6_addr32[cfg->plen / 32] = ia; + break; + case 40: + case 48: + case 56: +#if BYTE_ORDER == BIG_ENDIAN + ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | + (ia >> (cfg->plen % 32)); + ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32); +#elif BYTE_ORDER == LITTLE_ENDIAN + ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] | + (ia << (cfg->plen % 32)); + ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32); +#endif + break; + case 64: +#if BYTE_ORDER == BIG_ENDIAN + ip6->s6_addr32[2] = ia >> 8; + ip6->s6_addr32[3] = ia << 24; +#elif BYTE_ORDER == LITTLE_ENDIAN + ip6->s6_addr32[2] = ia << 8; + ip6->s6_addr32[3] = ia >> 24; +#endif + break; + default: + return (0); + }; + ip6->s6_addr8[8] = 0; + return (1); +} + +static __noinline in_addr_t +nat64_extract_ip4(struct in6_addr *ip6, int plen) +{ + in_addr_t ia; + + /* + * According to RFC 6052 p2.2: + * IPv4-embedded IPv6 addresses are composed of a variable-length + * prefix, the embedded IPv4 address, and a variable length suffix. + * The suffix bits are reserved for future extensions and SHOULD + * be set to zero. + */ + switch (plen) { + case 32: + if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0) + goto badip6; + break; + case 40: + if (ip6->s6_addr32[3] != 0 || + (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0) + goto badip6; + break; + case 48: + if (ip6->s6_addr32[3] != 0 || + (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0) + goto badip6; + break; + case 56: + if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0) + goto badip6; + break; + case 64: + if (ip6->s6_addr8[8] != 0 || + (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0) + goto badip6; + }; + switch (plen) { + case 32: + case 96: + ia = ip6->s6_addr32[plen / 32]; + break; + case 40: + case 48: + case 56: +#if BYTE_ORDER == BIG_ENDIAN + ia = (ip6->s6_addr32[1] << (plen % 32)) | + (ip6->s6_addr32[2] >> (24 - plen % 32)); +#elif BYTE_ORDER == LITTLE_ENDIAN + ia = (ip6->s6_addr32[1] >> (plen % 32)) | + (ip6->s6_addr32[2] << (24 - plen % 32)); +#endif + break; + case 64: +#if BYTE_ORDER == BIG_ENDIAN + ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24); +#elif BYTE_ORDER == LITTLE_ENDIAN + ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24); +#endif + break; + default: + return (0); + }; + if (nat64_check_ip4(ia) != 0 || + nat64_check_private_ip4(ia) != 0) + goto badip4; + + return (ia); +badip4: + DPRINTF(DP_GENERIC, "invalid destination address: %08x", ia); + return (0); +badip6: + DPRINTF(DP_GENERIC, "invalid IPv4-embedded IPv6 address"); + return (0); +} +#endif + +/* + * According to RFC 1624 the equation for incremental checksum update is: + * HC' = ~(~HC + ~m + m') -- [Eqn. 3] + * HC' = HC - ~m - m' -- [Eqn. 4] + * So, when we are replacing IPv4 addresses to IPv6, we + * can assume, that new bytes previously were zeros, and vise versa - + * when we replacing IPv6 addresses to IPv4, now unused bytes become + * zeros. The payload length in pseudo header has bigger size, but one + * half of it should be zero. Using the equation 4 we get: + * HC' = HC - (~m0 + m0') -- m0 is first changed word + * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word + * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... = + * = HC - sum(~m[i] + m'[i]) + * + * The function result should be used as follows: + * IPv6 to IPv4: HC' = cksum_add(HC, result) + * IPv4 to IPv6: HC' = cksum_add(HC, ~result) + */ +static __noinline uint16_t +nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip) +{ + uint32_t sum; + uint16_t *p; + + sum = ~ip->ip_src.s_addr >> 16; + sum += ~ip->ip_src.s_addr & 0xffff; + sum += ~ip->ip_dst.s_addr >> 16; + sum += ~ip->ip_dst.s_addr & 0xffff; + + for (p = (uint16_t *)&ip6->ip6_src; + p < (uint16_t *)(&ip6->ip6_src + 2); p++) + sum += *p; + + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + return (sum); +} + +#if __FreeBSD_version < 1100000 +#define ip_fillid(ip) (ip)->ip_id = ip_newid() +#endif +static __noinline void +nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag, + uint16_t plen, uint8_t proto, struct ip *ip) +{ + + /* assume addresses are already initialized */ + ip->ip_v = IPVERSION; + ip->ip_hl = sizeof(*ip) >> 2; + ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + ip->ip_len = htons(sizeof(*ip) + plen); +#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT + ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC; +#else + /* Forwarding code will decrement TTL. */ + ip->ip_ttl = ip6->ip6_hlim; +#endif + ip->ip_sum = 0; + ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto; + ip_fillid(ip); + if (frag != NULL) { + ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3); + if (frag->ip6f_offlg & IP6F_MORE_FRAG) + ip->ip_off |= htons(IP_MF); + } else { + ip->ip_off = htons(IP_DF); + } + ip->ip_sum = in_cksum_hdr(ip); +} + +#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag)) +static __noinline int +nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq, + struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off) +{ + struct ip6_frag ip6f; + struct mbuf *n; + uint16_t hlen, len, offset; + int plen; + + plen = ntohs(ip6->ip6_plen); + hlen = sizeof(struct ip6_hdr); + + /* Fragmentation isn't needed */ + if (ip_off == 0 && plen <= mtu - hlen) { + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) { + NAT64STAT_INC(stats, nomem); + return (ENOMEM); + } + bcopy(ip6, mtod(m, void *), hlen); + if (mbufq_enqueue(mq, m) != 0) { + m_freem(m); + NAT64STAT_INC(stats, dropped); + DPRINTF(DP_DROPS, "dropped due to mbufq overflow"); + return (ENOBUFS); + } + return (0); + } + + hlen += sizeof(struct ip6_frag); + ip6f.ip6f_reserved = 0; + ip6f.ip6f_nxt = ip6->ip6_nxt; + ip6->ip6_nxt = IPPROTO_FRAGMENT; + if (ip_off != 0) { + /* + * We have got an IPv4 fragment. + * Use offset value and ip_id from original fragment. + */ + ip6f.ip6f_ident = htonl(ntohs(ip_id)); + offset = (ntohs(ip_off) & IP_OFFMASK) << 3; + NAT64STAT_INC(stats, ifrags); + } else { + /* The packet size exceeds interface MTU */ + ip6f.ip6f_ident = htonl(ip6_randomid()); + offset = 0; /* First fragment*/ + } + while (plen > 0 && m != NULL) { + n = NULL; + len = FRAGSZ(mtu) & ~7; + if (len > plen) + len = plen; + ip6->ip6_plen = htons(len + sizeof(ip6f)); + ip6f.ip6f_offlg = ntohs(offset); + if (len < plen || (ip_off & htons(IP_MF)) != 0) + ip6f.ip6f_offlg |= IP6F_MORE_FRAG; + offset += len; + plen -= len; + if (plen > 0) { + n = m_split(m, len, M_NOWAIT); + if (n == NULL) + goto fail; + } + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) + goto fail; + bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr)); + bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)), + sizeof(struct ip6_frag)); + if (mbufq_enqueue(mq, m) != 0) + goto fail; + m = n; + } + NAT64STAT_ADD(stats, ofrags, mbufq_len(mq)); + return (0); +fail: + if (m != NULL) + m_freem(m); + if (n != NULL) + m_freem(n); + mbufq_drain(mq); + NAT64STAT_INC(stats, nomem); + return (ENOMEM); +} + +#if __FreeBSD_version < 1100000 +#define rt_expire rt_rmx.rmx_expire +#define rt_mtu rt_rmx.rmx_mtu +#endif +static __noinline struct sockaddr* +nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m) +{ + struct sockaddr_in6 *dst; + struct rtentry *rt; + + bzero(ro, sizeof(*ro)); + dst = (struct sockaddr_in6 *)&ro->ro_dst; + dst->sin6_family = AF_INET6; + dst->sin6_len = sizeof(*dst); + dst->sin6_addr = *dest; + IN6_LOOKUP_ROUTE(ro, M_GETFIB(m)); + rt = ro->ro_rt; + if (rt && (rt->rt_flags & RTF_UP) && + (rt->rt_ifp->if_flags & IFF_UP) && + (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if (rt->rt_flags & RTF_GATEWAY) + dst = (struct sockaddr_in6 *)rt->rt_gateway; + } else + return (NULL); + if (((rt->rt_flags & RTF_REJECT) && + (rt->rt_expire == 0 || + time_uptime < rt->rt_expire)) || + rt->rt_ifp->if_link_state == LINK_STATE_DOWN) + return (NULL); + return ((struct sockaddr *)dst); +} + +#define NAT64_ICMP6_PLEN 64 +static __noinline void +nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu, + nat64_stats_block *stats, void *logdata) +{ + struct icmp6_hdr *icmp6; + struct ip6_hdr *ip6, *oip6; + struct mbuf *n; + int len, plen; + + len = 0; + plen = nat64_getlasthdr(m, &len); + if (plen < 0) { + DPRINTF(DP_DROPS, "mbuf isn't contigious"); + goto freeit; + } + /* + * Do not send ICMPv6 in reply to ICMPv6 errors. + */ + if (plen == IPPROTO_ICMPV6) { + if (m->m_len < len + sizeof(*icmp6)) { + DPRINTF(DP_DROPS, "mbuf isn't contigious"); + goto freeit; + } + icmp6 = mtodo(m, len); + if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST || + icmp6->icmp6_type == ND_REDIRECT) { + DPRINTF(DP_DROPS, "do not send ICMPv6 in reply to " + "ICMPv6 errors"); + goto freeit; + } + } + /* + if (icmp6_ratelimit(&ip6->ip6_src, type, code)) + goto freeit; + */ + ip6 = mtod(m, struct ip6_hdr *); + switch (type) { + case ICMP6_DST_UNREACH: + case ICMP6_PACKET_TOO_BIG: + case ICMP6_TIME_EXCEEDED: + case ICMP6_PARAM_PROB: + break; + default: + goto freeit; + } + /* Calculate length of ICMPv6 payload */ + len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN: + m->m_pkthdr.len; + + /* Create new ICMPv6 datagram */ + plen = len + sizeof(struct icmp6_hdr); + n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT, + MT_HEADER, M_PKTHDR); + if (n == NULL) { + NAT64STAT_INC(stats, nomem); + m_freem(m); + return; + } + /* + * Move pkthdr from original mbuf. We should have initialized some + * fields, because we can reinject this mbuf to netisr and it will + * go trough input path (it requires at least rcvif should be set). + * Also do M_ALIGN() to reduce chances of need to allocate new mbuf + * in the chain, when we will do M_PREPEND() or make some type of + * tunneling. + */ + m_move_pkthdr(n, m); + M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr); + + n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen; + oip6 = mtod(n, struct ip6_hdr *); + oip6->ip6_src = ip6->ip6_dst; + oip6->ip6_dst = ip6->ip6_src; + oip6->ip6_nxt = IPPROTO_ICMPV6; + oip6->ip6_flow = 0; + oip6->ip6_vfc |= IPV6_VERSION; + oip6->ip6_hlim = V_ip6_defhlim; + oip6->ip6_plen = htons(plen); + + icmp6 = mtodo(n, sizeof(struct ip6_hdr)); + icmp6->icmp6_cksum = 0; + icmp6->icmp6_type = type; + icmp6->icmp6_code = code; + icmp6->icmp6_mtu = htonl(mtu); + + m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) + + sizeof(struct icmp6_hdr))); + icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6, + sizeof(struct ip6_hdr), plen); + m_freem(m); + nat64_output_one(n, stats, logdata); + return; +freeit: + NAT64STAT_INC(stats, dropped); + m_freem(m); +} + +static __noinline struct sockaddr* +nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m) +{ + struct sockaddr_in *dst; + struct rtentry *rt; + + bzero(ro, sizeof(*ro)); + dst = (struct sockaddr_in *)&ro->ro_dst; + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr.s_addr = dest; + IN_LOOKUP_ROUTE(ro, M_GETFIB(m)); + rt = ro->ro_rt; + if (rt && (rt->rt_flags & RTF_UP) && + (rt->rt_ifp->if_flags & IFF_UP) && + (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if (rt->rt_flags & RTF_GATEWAY) + dst = (struct sockaddr_in *)rt->rt_gateway; + } else + return (NULL); + if (((rt->rt_flags & RTF_REJECT) && + (rt->rt_expire == 0 || + time_uptime < rt->rt_expire)) || + rt->rt_ifp->if_link_state == LINK_STATE_DOWN) + return (NULL); + return ((struct sockaddr *)dst); +} + +#define NAT64_ICMP_PLEN 64 +static __noinline void +nat64_icmp_reflect(struct mbuf *m, uint8_t type, + uint8_t code, uint16_t mtu, nat64_stats_block *stats, void *logdata) +{ + struct icmp *icmp; + struct ip *ip, *oip; + struct mbuf *n; + int len, plen; + + ip = mtod(m, struct ip *); + /* Do not send ICMP error if packet is not the first fragment */ + if (ip->ip_off & ~ntohs(IP_MF|IP_DF)) { + DPRINTF(DP_DROPS, "not first fragment"); + goto freeit; + } + /* Do not send ICMP in reply to ICMP errors */ + if (ip->ip_p == IPPROTO_ICMP) { + if (m->m_len < (ip->ip_hl << 2)) { + DPRINTF(DP_DROPS, "mbuf isn't contigious"); + goto freeit; + } + icmp = mtodo(m, ip->ip_hl << 2); + if (!ICMP_INFOTYPE(icmp->icmp_type)) { + DPRINTF(DP_DROPS, "do not send ICMP in reply to " + "ICMP errors"); + goto freeit; + } + } + switch (type) { + case ICMP_UNREACH: + case ICMP_TIMXCEED: + case ICMP_PARAMPROB: + break; + default: + goto freeit; + } + /* Calculate length of ICMP payload */ + len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8: + m->m_pkthdr.len; + + /* Create new ICMPv4 datagram */ + plen = len + sizeof(struct icmphdr) + sizeof(uint32_t); + n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT, + MT_HEADER, M_PKTHDR); + if (n == NULL) { + NAT64STAT_INC(stats, nomem); + m_freem(m); + return; + } + m_move_pkthdr(n, m); + M_ALIGN(n, sizeof(struct ip) + plen + max_hdr); + + n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen; + oip = mtod(n, struct ip *); + oip->ip_v = IPVERSION; + oip->ip_hl = sizeof(struct ip) >> 2; + oip->ip_tos = 0; + oip->ip_len = htons(n->m_pkthdr.len); + oip->ip_ttl = V_ip_defttl; + oip->ip_p = IPPROTO_ICMP; + ip_fillid(oip); + oip->ip_off = htons(IP_DF); + oip->ip_src = ip->ip_dst; + oip->ip_dst = ip->ip_src; + oip->ip_sum = 0; + oip->ip_sum = in_cksum_hdr(oip); + + icmp = mtodo(n, sizeof(struct ip)); + icmp->icmp_type = type; + icmp->icmp_code = code; + icmp->icmp_cksum = 0; + icmp->icmp_pmvoid = 0; + icmp->icmp_nextmtu = htons(mtu); + m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) + + sizeof(struct icmphdr) + sizeof(uint32_t))); + icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen, + sizeof(struct ip)); + m_freem(m); + nat64_output_one(n, stats, logdata); + return; +freeit: + NAT64STAT_INC(stats, dropped); + m_freem(m); +} + +/* Translate ICMP echo request/reply into ICMPv6 */ +static void +nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6, + uint16_t id, uint8_t type) +{ + uint16_t old; + + old = *(uint16_t *)icmp6; /* save type+code in one word */ + icmp6->icmp6_type = type; + /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */ + icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, + old, *(uint16_t *)icmp6); + if (id != 0) { + old = icmp6->icmp6_id; + icmp6->icmp6_id = id; + /* Reflect ICMP id translation in the cksum */ + icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum, + old, id); + } + /* Reflect IPv6 pseudo header in the cksum */ + icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen), + IPPROTO_ICMPV6, ~icmp6->icmp6_cksum); +} + +static __noinline struct mbuf * +nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid, + int offset, nat64_stats_block *stats) +{ + struct ip ip; + struct icmp *icmp; + struct tcphdr *tcp; + struct udphdr *udp; + struct ip6_hdr *eip6; + struct mbuf *n; + uint32_t mtu; + int len, hlen, plen; + uint8_t type, code; + + if (m->m_len < offset + ICMP_MINLEN) + m = m_pullup(m, offset + ICMP_MINLEN); + if (m == NULL) { + NAT64STAT_INC(stats, nomem); + return (m); + } + mtu = 0; + icmp = mtodo(m, offset); + /* RFC 7915 p4.2 */ + switch (icmp->icmp_type) { + case ICMP_ECHOREPLY: + type = ICMP6_ECHO_REPLY; + code = 0; + break; + case ICMP_UNREACH: + type = ICMP6_DST_UNREACH; + switch (icmp->icmp_code) { + case ICMP_UNREACH_NET: + case ICMP_UNREACH_HOST: + case ICMP_UNREACH_SRCFAIL: + case ICMP_UNREACH_NET_UNKNOWN: + case ICMP_UNREACH_HOST_UNKNOWN: + case ICMP_UNREACH_TOSNET: + case ICMP_UNREACH_TOSHOST: + code = ICMP6_DST_UNREACH_NOROUTE; + break; + case ICMP_UNREACH_PROTOCOL: + type = ICMP6_PARAM_PROB; + code = ICMP6_PARAMPROB_NEXTHEADER; + break; + case ICMP_UNREACH_PORT: + code = ICMP6_DST_UNREACH_NOPORT; + break; + case ICMP_UNREACH_NEEDFRAG: + type = ICMP6_PACKET_TOO_BIG; + code = 0; + /* XXX: needs an additional look */ + mtu = max(IPV6_MMTU, ntohs(icmp->icmp_nextmtu) + 20); + break; + case ICMP_UNREACH_NET_PROHIB: + case ICMP_UNREACH_HOST_PROHIB: + case ICMP_UNREACH_FILTER_PROHIB: + case ICMP_UNREACH_PRECEDENCE_CUTOFF: + code = ICMP6_DST_UNREACH_ADMIN; + break; + default: + DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", + icmp->icmp_type, icmp->icmp_code); + goto freeit; + } + break; + case ICMP_TIMXCEED: + type = ICMP6_TIME_EXCEEDED; + code = icmp->icmp_code; + break; + case ICMP_ECHO: + type = ICMP6_ECHO_REQUEST; + code = 0; + break; + case ICMP_PARAMPROB: + type = ICMP6_PARAM_PROB; + switch (icmp->icmp_code) { + case ICMP_PARAMPROB_ERRATPTR: + case ICMP_PARAMPROB_LENGTH: + code = ICMP6_PARAMPROB_HEADER; + switch (icmp->icmp_pptr) { + case 0: /* Version/IHL */ + case 1: /* Type Of Service */ + mtu = icmp->icmp_pptr; + break; + case 2: /* Total Length */ + case 3: mtu = 4; /* Payload Length */ + break; + case 8: /* Time to Live */ + mtu = 7; /* Hop Limit */ + break; + case 9: /* Protocol */ + mtu = 6; /* Next Header */ + break; + case 12: /* Source address */ + case 13: + case 14: + case 15: + mtu = 8; + break; + case 16: /* Destination address */ + case 17: + case 18: + case 19: + mtu = 24; + break; + default: /* Silently drop */ + DPRINTF(DP_DROPS, "Unsupported ICMP type %d," + " code %d, pptr %d", icmp->icmp_type, + icmp->icmp_code, icmp->icmp_pptr); + goto freeit; + } + break; + default: + DPRINTF(DP_DROPS, "Unsupported ICMP type %d," + " code %d, pptr %d", icmp->icmp_type, + icmp->icmp_code, icmp->icmp_pptr); + goto freeit; + } + break; + default: + DPRINTF(DP_DROPS, "Unsupported ICMP type %d, code %d", + icmp->icmp_type, icmp->icmp_code); + goto freeit; + } + /* + * For echo request/reply we can use original payload, + * but we need adjust icmp_cksum, because ICMPv6 cksum covers + * IPv6 pseudo header and ICMPv6 types differs from ICMPv4. + */ + if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) { + nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type); + return (m); + } + /* + * For other types of ICMP messages we need to translate inner + * IPv4 header to IPv6 header. + * Assume ICMP src is the same as payload dst + * E.g. we have ( GWsrc1 , NATIP1 ) in outer header + * and ( NATIP1, Hostdst1 ) in ICMP copy header. + * In that case, we already have map for NATIP1 and GWsrc1. + * The only thing we need is to copy IPv6 map prefix to + * Hostdst1. + */ + hlen = offset + ICMP_MINLEN; + if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) { + DPRINTF(DP_DROPS, "Message is too short %d", + m->m_pkthdr.len); + goto freeit; + } + m_copydata(m, hlen, sizeof(struct ip), (char *)&ip); + if (ip.ip_v != IPVERSION) { + DPRINTF(DP_DROPS, "Wrong IP version %d", ip.ip_v); + goto freeit; + } + hlen += ip.ip_hl << 2; /* Skip inner IP header */ + if (nat64_check_ip4(ip.ip_src.s_addr) != 0 || + nat64_check_ip4(ip.ip_dst.s_addr) != 0 || + nat64_check_private_ip4(ip.ip_src.s_addr) != 0 || + nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) { + DPRINTF(DP_DROPS, "IP addresses checks failed %04x -> %04x", + ntohl(ip.ip_src.s_addr), ntohl(ip.ip_dst.s_addr)); + goto freeit; + } + if (m->m_pkthdr.len < hlen + ICMP_MINLEN) { + DPRINTF(DP_DROPS, "Message is too short %d", + m->m_pkthdr.len); + goto freeit; + } +#if 0 + /* + * Check that inner source matches the outer destination. + * XXX: We need some method to convert IPv4 into IPv6 address here, + * and compare IPv6 addresses. + */ + if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) { + DPRINTF(DP_GENERIC, "Inner source doesn't match destination ", + "%04x vs %04x", ip.ip_src.s_addr, + nat64_get_ip4(&ip6->ip6_dst)); + goto freeit; + } +#endif + /* + * Create new mbuf for ICMPv6 datagram. + * NOTE: len is data length just after inner IP header. + */ + len = m->m_pkthdr.len - hlen; + if (sizeof(struct ip6_hdr) + + sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN) + len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) - + sizeof(struct ip6_hdr); + plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len; + n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR); + if (n == NULL) { + NAT64STAT_INC(stats, nomem); + m_freem(m); + return (NULL); + } + m_move_pkthdr(n, m); + M_ALIGN(n, offset + plen + max_hdr); + n->m_len = n->m_pkthdr.len = offset + plen; + /* Adjust ip6_plen in outer header */ + ip6->ip6_plen = htons(plen); + /* Construct new inner IPv6 header */ + eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr)); + eip6->ip6_src = ip6->ip6_dst; + /* Use the fact that we have single /96 prefix for IPv4 map */ + eip6->ip6_dst = ip6->ip6_src; + nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr); + + eip6->ip6_flow = htonl(ip.ip_tos << 20); + eip6->ip6_vfc |= IPV6_VERSION; + eip6->ip6_hlim = ip.ip_ttl; + eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2)); + eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p; + m_copydata(m, hlen, len, (char *)(eip6 + 1)); + /* + * We need to translate source port in the inner ULP header, + * and adjust ULP checksum. + */ + switch (ip.ip_p) { + case IPPROTO_TCP: + if (len < offsetof(struct tcphdr, th_sum)) + break; + tcp = TCP(eip6 + 1); + if (icmpid != 0) { + tcp->th_sum = cksum_adjust(tcp->th_sum, + tcp->th_sport, icmpid); + tcp->th_sport = icmpid; + } + tcp->th_sum = cksum_add(tcp->th_sum, + ~nat64_cksum_convert(eip6, &ip)); + break; + case IPPROTO_UDP: + if (len < offsetof(struct udphdr, uh_sum)) + break; + udp = UDP(eip6 + 1); + if (icmpid != 0) { + udp->uh_sum = cksum_adjust(udp->uh_sum, + udp->uh_sport, icmpid); + udp->uh_sport = icmpid; + } + udp->uh_sum = cksum_add(udp->uh_sum, + ~nat64_cksum_convert(eip6, &ip)); + break; + case IPPROTO_ICMP: + /* + * Check if this is an ICMP error message for echo request + * that we sent. I.e. ULP in the data containing invoking + * packet is IPPROTO_ICMP and its type is ICMP_ECHO. + */ + icmp = (struct icmp *)(eip6 + 1); + if (icmp->icmp_type != ICMP_ECHO) { + m_freem(n); + goto freeit; + } + /* + * For our client this original datagram should looks + * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST. + * Thus we need adjust icmp_cksum and convert type from + * ICMP_ECHO to ICMP6_ECHO_REQUEST. + */ + nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid, + ICMP6_ECHO_REQUEST); + } + m_freem(m); + /* Convert ICMPv4 into ICMPv6 header */ + icmp = mtodo(n, offset); + ICMP6(icmp)->icmp6_type = type; + ICMP6(icmp)->icmp6_code = code; + ICMP6(icmp)->icmp6_mtu = htonl(mtu); + ICMP6(icmp)->icmp6_cksum = 0; + ICMP6(icmp)->icmp6_cksum = cksum_add( + ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0), + in_cksum_skip(n, n->m_pkthdr.len, offset)); + return (n); +freeit: + m_freem(m); + NAT64STAT_INC(stats, dropped); + return (NULL); +} + +int +nat64_getlasthdr(struct mbuf *m, int *offset) +{ + struct ip6_hdr *ip6; + struct ip6_hbh *hbh; + int proto, hlen; + + if (offset != NULL) + hlen = *offset; + else + hlen = 0; + + if (m->m_len < hlen + sizeof(*ip6)) + return (-1); + + ip6 = mtodo(m, hlen); + hlen += sizeof(*ip6); + proto = ip6->ip6_nxt; + /* Skip extension headers */ + while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING || + proto == IPPROTO_DSTOPTS) { + hbh = mtodo(m, hlen); + /* + * We expect mbuf has contigious data up to + * upper level header. + */ + if (m->m_len < hlen) + return (-1); + /* + * We doesn't support Jumbo payload option, + * so return error. + */ + if (proto == IPPROTO_HOPOPTS && ip6->ip6_plen == 0) + return (-1); + proto = hbh->ip6h_nxt; + hlen += hbh->ip6h_len << 3; + } + if (offset != NULL) + *offset = hlen; + return (proto); +} + +int +nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr, + struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats, + void *logdata) +{ + struct route_in6 ro; + struct ip6_hdr ip6; + struct ifnet *ifp; + struct ip *ip; + struct mbufq mq; + struct sockaddr *dst; + uint32_t mtu; + uint16_t ip_id, ip_off; + uint16_t *csum; + int plen, hlen; + uint8_t proto; + + ip = mtod(m, struct ip*); + + if (ip->ip_ttl <= IPTTLDEC) { + nat64_icmp_reflect(m, ICMP_TIMXCEED, + ICMP_TIMXCEED_INTRANS, 0, stats, logdata); + return (NAT64RETURN); + } + + ip6.ip6_dst = *daddr; + ip6.ip6_src = *saddr; + + hlen = ip->ip_hl << 2; + plen = ntohs(ip->ip_len) - hlen; + proto = ip->ip_p; + + /* Save ip_id and ip_off, both are in network byte order */ + ip_id = ip->ip_id; + ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF); + + /* Fragment length must be multiple of 8 octets */ + if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) { + nat64_icmp_reflect(m, ICMP_PARAMPROB, + ICMP_PARAMPROB_LENGTH, 0, stats, logdata); + return (NAT64RETURN); + } + /* Fragmented ICMP is unsupported */ + if (proto == IPPROTO_ICMP && ip_off != 0) { + DPRINTF(DP_DROPS, "dropped due to fragmented ICMP"); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + + dst = nat64_find_route6(&ro, &ip6.ip6_dst, m); + if (dst == NULL) { + FREE_ROUTE(&ro); + NAT64STAT_INC(stats, noroute6); + nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, + stats, logdata); + return (NAT64RETURN); + } + ifp = ro.ro_rt->rt_ifp; + if (ro.ro_rt->rt_mtu != 0) + mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); + else + mtu = ifp->if_mtu; + if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) { + FREE_ROUTE(&ro); + nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, + FRAGSZ(mtu) + sizeof(struct ip), stats, logdata); + return (NAT64RETURN); + } + + ip6.ip6_flow = htonl(ip->ip_tos << 20); + ip6.ip6_vfc |= IPV6_VERSION; +#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT + ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC; +#else + /* Forwarding code will decrement HLIM. */ + ip6.ip6_hlim = ip->ip_ttl; +#endif + ip6.ip6_plen = htons(plen); + ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto; + /* Convert checksums. */ + switch (proto) { + case IPPROTO_TCP: + csum = &TCP(mtodo(m, hlen))->th_sum; + if (lport != 0) { + struct tcphdr *tcp = TCP(mtodo(m, hlen)); + *csum = cksum_adjust(*csum, tcp->th_dport, lport); + tcp->th_dport = lport; + } + *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); + break; + case IPPROTO_UDP: + csum = &UDP(mtodo(m, hlen))->uh_sum; + if (lport != 0) { + struct udphdr *udp = UDP(mtodo(m, hlen)); + *csum = cksum_adjust(*csum, udp->uh_dport, lport); + udp->uh_dport = lport; + } + *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip)); + break; + case IPPROTO_ICMP: + m = nat64_icmp_translate(m, &ip6, lport, hlen, stats); + if (m == NULL) { + FREE_ROUTE(&ro); + /* stats already accounted */ + return (NAT64RETURN); + } + } + + m_adj(m, hlen); + mbufq_init(&mq, 255); + nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off); + while ((m = mbufq_dequeue(&mq)) != NULL) { + if (nat64_output(ifp, m, dst, (struct route *)&ro, stats, + logdata) != 0) + break; + NAT64STAT_INC(stats, opcnt46); + } + mbufq_drain(&mq); + FREE_ROUTE(&ro); + return (NAT64RETURN); +} + +int +nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport, + nat64_stats_block *stats, void *logdata) +{ + struct ip ip; + struct icmp6_hdr *icmp6; + struct ip6_frag *ip6f; + struct ip6_hdr *ip6, *ip6i; + uint32_t mtu; + int plen, proto; + uint8_t type, code; + + if (hlen == 0) { + ip6 = mtod(m, struct ip6_hdr *); + if (nat64_check_ip6(&ip6->ip6_src) != 0 || + nat64_check_ip6(&ip6->ip6_dst) != 0) + return (NAT64SKIP); + + proto = nat64_getlasthdr(m, &hlen); + if (proto != IPPROTO_ICMPV6) { + DPRINTF(DP_DROPS, + "dropped due to mbuf isn't contigious"); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + } + + /* + * Translate ICMPv6 type and code to ICMPv4 (RFC7915). + * NOTE: ICMPv6 echo handled by nat64_do_handle_ip6(). + */ + icmp6 = mtodo(m, hlen); + mtu = 0; + switch (icmp6->icmp6_type) { + case ICMP6_DST_UNREACH: + type = ICMP_UNREACH; + switch (icmp6->icmp6_code) { + case ICMP6_DST_UNREACH_NOROUTE: + case ICMP6_DST_UNREACH_BEYONDSCOPE: + case ICMP6_DST_UNREACH_ADDR: + code = ICMP_UNREACH_HOST; + break; + case ICMP6_DST_UNREACH_ADMIN: + code = ICMP_UNREACH_HOST_PROHIB; + break; + case ICMP6_DST_UNREACH_NOPORT: + code = ICMP_UNREACH_PORT; + break; + default: + DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," + " code %d", icmp6->icmp6_type, + icmp6->icmp6_code); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + break; + case ICMP6_PACKET_TOO_BIG: + type = ICMP_UNREACH; + code = ICMP_UNREACH_NEEDFRAG; + mtu = ntohl(icmp6->icmp6_mtu); + if (mtu < IPV6_MMTU) { + DPRINTF(DP_DROPS, "Wrong MTU %d in ICMPv6 type %d," + " code %d", mtu, icmp6->icmp6_type, + icmp6->icmp6_code); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + /* + * Adjust MTU to reflect difference between + * IPv6 an IPv4 headers. + */ + mtu -= sizeof(struct ip6_hdr) - sizeof(struct ip); + break; + case ICMP6_TIME_EXCEED_TRANSIT: + type = ICMP_TIMXCEED; + code = ICMP_TIMXCEED_INTRANS; + break; + case ICMP6_PARAM_PROB: + switch (icmp6->icmp6_code) { + case ICMP6_PARAMPROB_HEADER: + type = ICMP_PARAMPROB; + code = ICMP_PARAMPROB_ERRATPTR; + mtu = ntohl(icmp6->icmp6_pptr); + switch (mtu) { + case 0: /* Version/Traffic Class */ + case 1: /* Traffic Class/Flow Label */ + break; + case 4: /* Payload Length */ + case 5: + mtu = 2; + break; + case 6: /* Next Header */ + mtu = 9; + break; + case 7: /* Hop Limit */ + mtu = 8; + break; + default: + if (mtu >= 8 && mtu <= 23) { + mtu = 12; /* Source address */ + break; + } + if (mtu >= 24 && mtu <= 39) { + mtu = 16; /* Destination address */ + break; + } + DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," + " code %d, pptr %d", icmp6->icmp6_type, + icmp6->icmp6_code, mtu); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + case ICMP6_PARAMPROB_NEXTHEADER: + type = ICMP_UNREACH; + code = ICMP_UNREACH_PROTOCOL; + break; + default: + DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d," + " code %d, pptr %d", icmp6->icmp6_type, + icmp6->icmp6_code, ntohl(icmp6->icmp6_pptr)); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + break; + default: + DPRINTF(DP_DROPS, "Unsupported ICMPv6 type %d, code %d", + icmp6->icmp6_type, icmp6->icmp6_code); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + + hlen += sizeof(struct icmp6_hdr); + if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { + NAT64STAT_INC(stats, dropped); + DPRINTF(DP_DROPS, "Message is too short %d", + m->m_pkthdr.len); + return (NAT64MFREE); + } + /* + * We need at least ICMP_MINLEN bytes of original datagram payload + * to generate ICMP message. It is nice that ICMP_MINLEN is equal + * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment + * header we will not have to do m_pullup() again. + * + * What we have here: + * Outer header: (IPv6iGW, v4mapPRefix+v4exthost) + * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport] + * We need to translate it to: + * + * Outer header: (alias_host, v4exthost) + * Inner header: (v4exthost, alias_host) [sport, alias_port] + * + * Assume caller function has checked if v4mapPRefix+v4host + * matches configured prefix. + * The only two things we should be provided with are mapping between + * IPv6iHost <> alias_host and between dport and alias_port. + */ + if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) + m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); + if (m == NULL) { + NAT64STAT_INC(stats, nomem); + return (NAT64RETURN); + } + ip6 = mtod(m, struct ip6_hdr *); + ip6i = mtodo(m, hlen); + ip6f = NULL; + proto = ip6i->ip6_nxt; + plen = ntohs(ip6i->ip6_plen); + hlen += sizeof(struct ip6_hdr); + if (proto == IPPROTO_FRAGMENT) { + if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) + + ICMP_MINLEN) + goto fail; + ip6f = mtodo(m, hlen); + proto = ip6f->ip6f_nxt; + plen -= sizeof(struct ip6_frag); + hlen += sizeof(struct ip6_frag); + /* Ajust MTU to reflect frag header size */ + if (type == ICMP_UNREACH && code == ICMP_UNREACH_NEEDFRAG) + mtu -= sizeof(struct ip6_frag); + } + if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { + DPRINTF(DP_DROPS, "Unsupported proto %d in the inner header", + proto); + goto fail; + } + if (nat64_check_ip6(&ip6i->ip6_src) != 0 || + nat64_check_ip6(&ip6i->ip6_dst) != 0) { + DPRINTF(DP_DROPS, "Inner addresses do not passes the check"); + goto fail; + } + /* Check if outer dst is the same as inner src */ + if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src)) { + DPRINTF(DP_DROPS, "Inner src doesn't match outer dst"); + goto fail; + } + + /* Now we need to make a fake IPv4 packet to generate ICMP message */ + ip.ip_dst.s_addr = aaddr; + ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src); + /* XXX: Make fake ulp header */ +#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT + ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */ +#endif + nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip); + m_adj(m, hlen - sizeof(struct ip)); + bcopy(&ip, mtod(m, void *), sizeof(ip)); + nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats, logdata); + return (NAT64RETURN); +fail: + /* + * We must call m_freem() because mbuf pointer could be + * changed with m_pullup(). + */ + m_freem(m); + NAT64STAT_INC(stats, dropped); + return (NAT64RETURN); +} + +int +nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport, + nat64_stats_block *stats, void *logdata) +{ + struct route ro; + struct ip ip; + struct ifnet *ifp; + struct ip6_frag *frag; + struct ip6_hdr *ip6; + struct icmp6_hdr *icmp6; + struct sockaddr *dst; + uint16_t *csum; + uint32_t mtu; + int plen, hlen; + uint8_t proto; + + /* + * XXX: we expect ipfw_chk() did m_pullup() up to upper level + * protocol's headers. Also we skip some checks, that ip6_input(), + * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. + */ + ip6 = mtod(m, struct ip6_hdr *); + if (nat64_check_ip6(&ip6->ip6_src) != 0 || + nat64_check_ip6(&ip6->ip6_dst) != 0) { + return (NAT64SKIP); + } + + /* Starting from this point we must not return zero */ + ip.ip_src.s_addr = aaddr; + if (nat64_check_ip4(ip.ip_src.s_addr) != 0) { + DPRINTF(DP_GENERIC, "invalid source address: %08x", + ip.ip_src.s_addr); + /* XXX: stats? */ + return (NAT64MFREE); + } + + ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst); + if (ip.ip_dst.s_addr == 0) { + /* XXX: stats? */ + return (NAT64MFREE); + } + + if (ip6->ip6_hlim <= IPV6_HLIMDEC) { + nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED, + ICMP6_TIME_EXCEED_TRANSIT, 0, stats, logdata); + return (NAT64RETURN); + } + + hlen = 0; + plen = ntohs(ip6->ip6_plen); + proto = nat64_getlasthdr(m, &hlen); + if (proto < 0) { + DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + frag = NULL; + if (proto == IPPROTO_FRAGMENT) { + /* ipfw_chk should m_pullup up to frag header */ + if (m->m_len < hlen + sizeof(*frag)) { + DPRINTF(DP_DROPS, + "dropped due to mbuf isn't contigious"); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + frag = mtodo(m, hlen); + proto = frag->ip6f_nxt; + hlen += sizeof(*frag); + /* Fragmented ICMPv6 is unsupported */ + if (proto == IPPROTO_ICMPV6) { + DPRINTF(DP_DROPS, "dropped due to fragmented ICMPv6"); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + /* Fragment length must be multiple of 8 octets */ + if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 && + ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) { + nat64_icmp6_reflect(m, ICMP6_PARAM_PROB, + ICMP6_PARAMPROB_HEADER, + offsetof(struct ip6_hdr, ip6_plen), stats, + logdata); + return (NAT64RETURN); + } + } + plen -= hlen - sizeof(struct ip6_hdr); + if (plen < 0 || m->m_pkthdr.len < plen + hlen) { + DPRINTF(DP_DROPS, "plen %d, pkthdr.len %d, hlen %d", + plen, m->m_pkthdr.len, hlen); + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + + icmp6 = NULL; /* Make gcc happy */ + if (proto == IPPROTO_ICMPV6) { + icmp6 = mtodo(m, hlen); + if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST && + icmp6->icmp6_type != ICMP6_ECHO_REPLY) + return (nat64_handle_icmp6(m, hlen, aaddr, aport, + stats, logdata)); + } + dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m); + if (dst == NULL) { + FREE_ROUTE(&ro); + NAT64STAT_INC(stats, noroute4); + nat64_icmp6_reflect(m, ICMP6_DST_UNREACH, + ICMP6_DST_UNREACH_NOROUTE, 0, stats, logdata); + return (NAT64RETURN); + } + + ifp = ro.ro_rt->rt_ifp; + if (ro.ro_rt->rt_mtu != 0) + mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu); + else + mtu = ifp->if_mtu; + if (mtu < plen + sizeof(ip)) { + FREE_ROUTE(&ro); + nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats, + logdata); + return (NAT64RETURN); + } + nat64_init_ip4hdr(ip6, frag, plen, proto, &ip); + /* Convert checksums. */ + switch (proto) { + case IPPROTO_TCP: + csum = &TCP(mtodo(m, hlen))->th_sum; + if (aport != 0) { + struct tcphdr *tcp = TCP(mtodo(m, hlen)); + *csum = cksum_adjust(*csum, tcp->th_sport, aport); + tcp->th_sport = aport; + } + *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); + break; + case IPPROTO_UDP: + csum = &UDP(mtodo(m, hlen))->uh_sum; + if (aport != 0) { + struct udphdr *udp = UDP(mtodo(m, hlen)); + *csum = cksum_adjust(*csum, udp->uh_sport, aport); + udp->uh_sport = aport; + } + *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip)); + break; + case IPPROTO_ICMPV6: + /* Checksum in ICMPv6 covers pseudo header */ + csum = &icmp6->icmp6_cksum; + *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen, + IPPROTO_ICMPV6, 0)); + /* Convert ICMPv6 types to ICMP */ + mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */ + if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST) + icmp6->icmp6_type = ICMP_ECHO; + else /* ICMP6_ECHO_REPLY */ + icmp6->icmp6_type = ICMP_ECHOREPLY; + *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6); + if (aport != 0) { + uint16_t old_id = icmp6->icmp6_id; + icmp6->icmp6_id = aport; + *csum = cksum_adjust(*csum, old_id, aport); + } + break; + }; + + m_adj(m, hlen - sizeof(ip)); + bcopy(&ip, mtod(m, void *), sizeof(ip)); + if (nat64_output(ifp, m, dst, &ro, stats, logdata) == 0) + NAT64STAT_INC(stats, opcnt64); + FREE_ROUTE(&ro); + return (NAT64RETURN); +} + Index: head/sys/netpfil/ipfw/nat64/nat64lsn.h =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64lsn.h +++ head/sys/netpfil/ipfw/nat64/nat64lsn.h @@ -0,0 +1,351 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Alexander V. Chernikov + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IP_FW_NAT64LSN_H_ +#define _IP_FW_NAT64LSN_H_ + +#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */ +#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS) + +#define NAT64_MIN_PORT 1024 +#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS) + +struct st_ptr { + uint8_t idx; /* index in nh->pg_ptr array. + * NOTE: it starts from 1. + */ + uint8_t off; +}; +#define NAT64LSN_MAXPGPTR ((1 << (sizeof(uint8_t) * NBBY)) - 1) +#define NAT64LSN_PGPTRMASKBITS (sizeof(uint64_t) * NBBY) +#define NAT64LSN_PGPTRNMASK (roundup(NAT64LSN_MAXPGPTR, \ + NAT64LSN_PGPTRMASKBITS) / NAT64LSN_PGPTRMASKBITS) + +struct nat64lsn_portgroup; +/* sizeof(struct nat64lsn_host) = 64 + 64x2 + 8x8 = 256 bytes */ +struct nat64lsn_host { + struct rwlock h_lock; /* Host states lock */ + + struct in6_addr addr; + struct nat64lsn_host *next; + uint16_t timestamp; /* Last altered */ + uint16_t hsize; /* ports hash size */ + uint16_t pg_used; /* Number of portgroups used */ +#define NAT64LSN_REMAININGPG 8 /* Number of remaining PG before + * requesting of new chunk of indexes. + */ + uint16_t pg_allocated; /* Number of portgroups indexes + * allocated. + */ +#define NAT64LSN_HSIZE 64 + struct st_ptr phash[NAT64LSN_HSIZE]; /* XXX: hardcoded size */ + /* + * PG indexes are stored in chunks with 32 elements. + * The maximum count is limited to 255 due to st_ptr->idx is uint8_t. + */ +#define NAT64LSN_PGIDX_CHUNK 32 +#define NAT64LSN_PGNIDX (roundup(NAT64LSN_MAXPGPTR, \ + NAT64LSN_PGIDX_CHUNK) / NAT64LSN_PGIDX_CHUNK) + struct nat64lsn_portgroup **pg_ptr[NAT64LSN_PGNIDX]; /* PG indexes */ +}; + +#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED) +#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED) + +#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock) +#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock) +#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock) +#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock) +#define NAT64_LOCK(h) NAT64_WLOCK(h) +#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h) +#define NAT64_LOCK_INIT(h) do { \ + rw_init(&(h)->h_lock, "NAT64 host lock"); \ + } while (0) + +#define NAT64_LOCK_DESTROY(h) do { \ + rw_destroy(&(h)->h_lock); \ + } while (0) + +/* Internal proto index */ +#define NAT_PROTO_TCP 1 +#define NAT_PROTO_UDP 2 +#define NAT_PROTO_ICMP 3 + +#define NAT_MAX_PROTO 4 +extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; + +VNET_DECLARE(uint16_t, nat64lsn_eid); +#define V_nat64lsn_eid VNET(nat64lsn_eid) +#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid) + +/* Timestamp macro */ +#define _CT ((int)time_uptime % 65536) +#define SET_AGE(x) (x) = _CT +#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \ + (int)65536 + _CT - (x)) + +#ifdef __LP64__ +/* ffsl() is capable of checking 64-bit ints */ +#define _FFS64 +#endif + +/* 16 bytes */ +struct nat64lsn_state { + union { + struct { + in_addr_t faddr; /* Remote IPv4 address */ + uint16_t fport; /* Remote IPv4 port */ + uint16_t lport; /* Local IPv6 port */ + }s; + uint64_t hkey; + } u; + uint8_t nat_proto; + uint8_t flags; + uint16_t timestamp; + struct st_ptr cur; /* Index of portgroup in nat64lsn_host */ + struct st_ptr next; /* Next entry index */ +}; + +/* + * 1024+32 bytes per 64 states, used to store state + * AND for outside-in state lookup + */ +struct nat64lsn_portgroup { + struct nat64lsn_host *host; /* IPv6 source host info */ + in_addr_t aaddr; /* Alias addr, network format */ + uint16_t aport; /* Base port */ + uint16_t timestamp; + uint8_t nat_proto; + uint8_t spare[3]; + uint32_t idx; +#ifdef _FFS64 + uint64_t freemask; /* Mask of free entries */ +#else + uint32_t freemask[2]; /* Mask of free entries */ +#endif + struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */ +}; +#ifdef _FFS64 +#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((uint64_t)1<<(_idx)) +#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((uint64_t)1<<(_idx)) +#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((uint64_t)1<<(_idx))) +#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0) +#define PG_GET_FREE_IDX(_pg) (ffsll((_pg)->freemask)) +#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0) +#else +#define PG_MARK_BUSY_IDX(_pg, _idx) \ + (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32)) +#define PG_MARK_FREE_IDX(_pg, _idx) \ + (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32)) +#define PG_IS_FREE_IDX(_pg, _idx) \ + ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32))) +#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0) +#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg) +#define PG_IS_EMPTY(_pg) \ + ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0)) + +static inline int +_pg_get_free_idx(const struct nat64lsn_portgroup *pg) +{ + int i; + + if ((i = ffsl(pg->freemask[0])) != 0) + return (i); + if ((i = ffsl(pg->freemask[1])) != 0) + return (i + 32); + return (0); +} + +#endif + +TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item); + +#define NAT64LSN_FLAGSMASK (NAT64_LOG) +struct nat64lsn_cfg { + struct named_object no; + //struct nat64_exthost *ex; /* Pointer to external addr array */ + struct nat64lsn_portgroup **pg; /* XXX: array of pointers */ + struct nat64lsn_host **ih; /* Host hash */ + uint32_t prefix4; /* IPv4 prefix */ + uint32_t pmask4; /* IPv4 prefix mask */ + uint32_t ihsize; /* IPv6 host hash size */ + uint8_t plen4; + uint8_t plen6; + uint8_t nomatch_verdict;/* What to return to ipfw on no-match */ + uint8_t nomatch_final; /* Exit outer loop? */ + struct in6_addr prefix6; /* IPv6 prefix to embed IPv4 hosts */ + + uint32_t ihcount; /* Number of items in host hash */ + int max_chunks; /* Max chunks per client */ + int agg_prefix_len; /* Prefix length to count */ + int agg_prefix_max; /* Max hosts per agg prefix */ + uint32_t jmaxlen; /* Max jobqueue length */ + uint32_t flags; + uint16_t min_chunk; /* Min port group # to use */ + uint16_t max_chunk; /* Max port group # to use */ + uint16_t nh_delete_delay; /* Stale host delete delay */ + uint16_t pg_delete_delay; /* Stale portgroup del delay */ + uint16_t st_syn_ttl; /* TCP syn expire */ + uint16_t st_close_ttl; /* TCP fin expire */ + uint16_t st_estab_ttl; /* TCP established expire */ + uint16_t st_udp_ttl; /* UDP expire */ + uint16_t st_icmp_ttl; /* ICMP expire */ + uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */ + + struct callout periodic; + struct callout jcallout; + struct ip_fw_chain *ch; + struct vnet *vp; + struct nat64lsn_job_head jhead; + int jlen; + char name[64]; /* Nat instance name */ + nat64_stats_block stats; +}; + +struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch, + size_t numaddr); +void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg); +void nat64lsn_start_instance(struct nat64lsn_cfg *cfg); +void nat64lsn_init_internal(void); +void nat64lsn_uninit_internal(void); +int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, + ipfw_insn *cmd, int *done); + +void +nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, + const char *px, int off); +/* + * Portgroup layout + * addr x nat_proto x port_off + * + */ + +#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS) +#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO) + +#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4)) +#define __GET_PORTGROUP_IDX(_proto, _port) \ + ((_proto - 1) * _ADDR_PG_PROTO_COUNT + \ + ((_port) >> NAT64_CHUNK_SIZE_BITS)) + +#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \ + GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \ + __GET_PORTGROUP_IDX(_proto, _port) +#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \ + ((_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)]) + +#define PORTGROUP_CHUNK(_nh, _idx) \ + ((_nh)->pg_ptr[(_idx)]) +#define PORTGROUP_BYSIDX(_cfg, _nh, _idx) \ + (PORTGROUP_CHUNK(_nh, (_idx - 1) / NAT64LSN_PGIDX_CHUNK) \ + [((_idx) - 1) % NAT64LSN_PGIDX_CHUNK]) + + +/* Chained hash table */ +#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _x = _PX##first(_ph, _buck); \ + for ( ; _x != NULL; _x = _PX##next(_x)) { \ + if (_PX##cmp(_key, _PX##val(_x))) \ + break; \ + } \ + if (_x == NULL) \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \ + _PX##unlock(_ph, _buck); + +#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \ + unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _PX##next(_i) = _PX##first(_ph, _buck); \ + _PX##first(_ph, _buck) = _i; \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \ + unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \ + _PX##lock(_ph, _buck); \ + _x = _PX##first(_ph, _buck); \ + _tmp = NULL; \ + for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ + if (_PX##cmp(_key, _PX##val(_x))) \ + break; \ + } \ + if (_x != NULL) { \ + if (_tmp == NULL) \ + _PX##first(_ph, _buck) = _PX##next(_x); \ + else \ + _PX##next(_tmp) = _PX##next(_x); \ + } \ + _PX##unlock(_ph, _buck); \ +} while(0) + +#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \ + for (unsigned int _i = 0; _i < _hsize; _i++) { \ + _PX##lock(_ph, _i); \ + _x = _PX##first(_ph, _i); \ + _tmp = NULL; \ + for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \ + if (_cb(_x, _arg) == 0) \ + continue; \ + if (_tmp == NULL) \ + _PX##first(_ph, _i) = _PX##next(_x); \ + else \ + _tmp = _PX##next(_x); \ + } \ + _PX##unlock(_ph, _i); \ + } \ +} while(0) + +#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \ + unsigned int _buck; \ + for (unsigned int _i = 0; _i < _hsize; _i++) { \ + _x = _PX##first(_ph, _i); \ + _y = _x; \ + while (_y != NULL) { \ + _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\ + _y = _PX##next(_x); \ + _PX##next(_x) = _PX##first(_nph, _buck); \ + _PX##first(_nph, _buck) = _x; \ + } \ + } \ +} while(0) + +#endif /* _IP_FW_NAT64LSN_H_ */ + Index: head/sys/netpfil/ipfw/nat64/nat64lsn.c =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64lsn.c +++ head/sys/netpfil/ipfw/nat64/nat64lsn.c @@ -0,0 +1,1770 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015 Alexander V. Chernikov + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MALLOC_DEFINE(M_NAT64LSN, "NAT64LSN", "NAT64LSN"); + +static void nat64lsn_periodic(void *data); +#define PERIODIC_DELAY 4 +static uint8_t nat64lsn_proto_map[256]; +uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO]; + +#define NAT64_FLAG_FIN 0x01 /* FIN was seen */ +#define NAT64_FLAG_SYN 0x02 /* First syn in->out */ +#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */ +#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN) + +#define NAT64_FLAG_RDR 0x80 /* Port redirect */ +#define NAT64_LOOKUP(chain, cmd) \ + (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1) +/* + * Delayed job queue, used to create new hosts + * and new portgroups + */ +enum nat64lsn_jtype { + JTYPE_NEWHOST = 1, + JTYPE_NEWPORTGROUP, + JTYPE_DELPORTGROUP, +}; + +struct nat64lsn_job_item { + TAILQ_ENTRY(nat64lsn_job_item) next; + enum nat64lsn_jtype jtype; + struct nat64lsn_host *nh; + struct nat64lsn_portgroup *pg; + void *spare_idx; + struct in6_addr haddr; + uint8_t nat_proto; + uint8_t done; + int needs_idx; + int delcount; + unsigned int fhash; /* Flow hash */ + uint32_t aaddr; /* Last used address (net) */ + struct mbuf *m; + struct ipfw_flow_id f_id; + uint64_t delmask[NAT64LSN_PGPTRNMASK]; +}; + +static struct mtx jmtx; +#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF) +#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx) +#define JQUEUE_LOCK() mtx_lock(&jmtx) +#define JQUEUE_UNLOCK() mtx_unlock(&jmtx) + +static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_item *ji); +static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_head *jhead, int jlen); + +static struct nat64lsn_job_item *nat64lsn_create_job(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, int jtype); +static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, + int needs_idx); +static int nat64lsn_request_host(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm); +static int nat64lsn_translate4(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm); +static int nat64lsn_translate6(struct nat64lsn_cfg *cfg, + struct ipfw_flow_id *f_id, struct mbuf **pm); + +static int alloc_portgroup(struct nat64lsn_job_item *ji); +static void destroy_portgroup(struct nat64lsn_portgroup *pg); +static void destroy_host6(struct nat64lsn_host *nh); +static int alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); + +static int attach_portgroup(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_item *ji); +static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji); + + +/* XXX tmp */ +static uma_zone_t nat64lsn_host_zone; +static uma_zone_t nat64lsn_pg_zone; +static uma_zone_t nat64lsn_pgidx_zone; + +static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, + struct nat64lsn_host *nh); + +#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16)) +#define I6_first(_ph, h) (_ph)[h] +#define I6_next(x) (x)->next +#define I6_val(x) (&(x)->addr) +#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b) +#define I6_lock(a, b) +#define I6_unlock(a, b) + +#define I6HASH_FIND(_cfg, _res, _a) \ + CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a) +#define I6HASH_INSERT(_cfg, _i) \ + CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i) +#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \ + CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a) + +#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \ + CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg) + +#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8) + +static unsigned +djb_hash(const unsigned char *h, const int len) +{ + unsigned int result = 0; + int i; + + for (i = 0; i < len; i++) + result = 33 * result ^ h[i]; + + return (result); +} + +/* +static size_t +bitmask_size(size_t num, int *level) +{ + size_t x; + int c; + + for (c = 0, x = num; num > 1; num /= 64, c++) + ; + + return (x); +} + +static void +bitmask_prepare(uint64_t *pmask, size_t bufsize, int level) +{ + size_t x, z; + + memset(pmask, 0xFF, bufsize); + for (x = 0, z = 1; level > 1; x += z, z *= 64, level--) + ; + pmask[x] ~= 0x01; +} +*/ + +static void +nat64lsn_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, + uint32_t n, uint32_t sn) +{ + + memset(plog, 0, sizeof(plog)); + plog->length = PFLOG_REAL_HDRLEN; + plog->af = family; + plog->action = PF_NAT; + plog->dir = PF_IN; + plog->rulenr = htonl(n); + plog->subrulenr = htonl(sn); + plog->ruleset[0] = '\0'; + strlcpy(plog->ifname, "NAT64LSN", sizeof(plog->ifname)); + ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); +} +/* + * Inspects icmp packets to see if the message contains different + * packet header so we need to alter @addr and @port. + */ +static int +inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr, + uint16_t *port) +{ + struct ip *ip; + struct tcphdr *tcp; + struct udphdr *udp; + struct icmphdr *icmp; + int off; + uint8_t proto; + + ip = mtod(*m, struct ip *); /* Outer IP header */ + off = (ip->ip_hl << 2) + ICMP_MINLEN; + if ((*m)->m_len < off) + *m = m_pullup(*m, off); + if (*m == NULL) + return (ENOMEM); + + ip = mtod(*m, struct ip *); /* Outer IP header */ + icmp = L3HDR(ip, struct icmphdr *); + switch (icmp->icmp_type) { + case ICMP_ECHO: + case ICMP_ECHOREPLY: + /* Use icmp ID as distinguisher */ + *port = ntohs(*((uint16_t *)(icmp + 1))); + return (0); + case ICMP_UNREACH: + case ICMP_TIMXCEED: + break; + default: + return (EOPNOTSUPP); + } + /* + * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits + * of ULP header. + */ + if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN) + return (EINVAL); + if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN) + *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN); + if (*m == NULL) + return (ENOMEM); + ip = mtodo(*m, off); /* Inner IP header */ + proto = ip->ip_p; + off += ip->ip_hl << 2; /* Skip inner IP header */ + *addr = ntohl(ip->ip_src.s_addr); + if ((*m)->m_len < off + ICMP_MINLEN) + *m = m_pullup(*m, off + ICMP_MINLEN); + if (*m == NULL) + return (ENOMEM); + switch (proto) { + case IPPROTO_TCP: + tcp = mtodo(*m, off); + *nat_proto = NAT_PROTO_TCP; + *port = ntohs(tcp->th_sport); + return (0); + case IPPROTO_UDP: + udp = mtodo(*m, off); + *nat_proto = NAT_PROTO_UDP; + *port = ntohs(udp->uh_sport); + return (0); + case IPPROTO_ICMP: + /* + * We will translate only ICMP errors for our ICMP + * echo requests. + */ + icmp = mtodo(*m, off); + if (icmp->icmp_type != ICMP_ECHO) + return (EOPNOTSUPP); + *port = ntohs(*((uint16_t *)(icmp + 1))); + return (0); + }; + return (EOPNOTSUPP); +} + +static inline uint8_t +convert_tcp_flags(uint8_t flags) +{ + uint8_t result; + + result = flags & (TH_FIN|TH_SYN); + result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */ + result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */ + + return (result); +} + +static NAT64NOINLINE int +nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, + struct mbuf **pm) +{ + struct pfloghdr loghdr, *logdata; + struct in6_addr src6; + struct nat64lsn_portgroup *pg; + struct nat64lsn_host *nh; + struct nat64lsn_state *st; + struct ip *ip; + uint32_t addr; + uint16_t state_flags, state_ts; + uint16_t port, lport; + uint8_t nat_proto; + int ret; + + addr = f_id->dst_ip; + port = f_id->dst_port; + if (addr < cfg->prefix4 || addr > cfg->pmask4) { + NAT64STAT_INC(&cfg->stats, nomatch4); + return (cfg->nomatch_verdict); + } + + /* Check if protocol is supported and get its short id */ + nat_proto = nat64lsn_proto_map[f_id->proto]; + if (nat_proto == 0) { + NAT64STAT_INC(&cfg->stats, noproto); + return (cfg->nomatch_verdict); + } + + /* We might need to handle icmp differently */ + if (nat_proto == NAT_PROTO_ICMP) { + ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port); + if (ret != 0) { + if (ret == ENOMEM) + NAT64STAT_INC(&cfg->stats, nomem); + else + NAT64STAT_INC(&cfg->stats, noproto); + return (cfg->nomatch_verdict); + } + /* XXX: Check addr for validity */ + if (addr < cfg->prefix4 || addr > cfg->pmask4) { + NAT64STAT_INC(&cfg->stats, nomatch4); + return (cfg->nomatch_verdict); + } + } + + /* Calc portgroup offset w.r.t protocol */ + pg = GET_PORTGROUP(cfg, addr, nat_proto, port); + + /* Check if this port is occupied by any portgroup */ + if (pg == NULL) { + NAT64STAT_INC(&cfg->stats, nomatch4); +#if 0 + DPRINTF(DP_STATE, "NOMATCH %u %d %d (%d)", addr, nat_proto, port, + _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port)); +#endif + return (cfg->nomatch_verdict); + } + + /* TODO: Check flags to see if we need to do some static mapping */ + nh = pg->host; + + /* Prepare some fields we might need to update */ + SET_AGE(state_ts); + ip = mtod(*pm, struct ip *); + if (ip->ip_p == IPPROTO_TCP) + state_flags = convert_tcp_flags( + L3HDR(ip, struct tcphdr *)->th_flags); + else + state_flags = 0; + + /* Lock host and get port mapping */ + NAT64_LOCK(nh); + + st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)]; + if (st->timestamp != state_ts) + st->timestamp = state_ts; + if ((st->flags & state_flags) != state_flags) + st->flags |= state_flags; + lport = htons(st->u.s.lport); + + NAT64_UNLOCK(nh); + + if (cfg->flags & NAT64_LOG) { + logdata = &loghdr; + nat64lsn_log(logdata, *pm, AF_INET, pg->idx, st->cur.off); + } else + logdata = NULL; + + src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0]; + src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1]; + src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2]; + src6.s6_addr32[3] = htonl(f_id->src_ip); + + ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport, + &cfg->stats, logdata); + + if (ret == NAT64SKIP) + return (IP_FW_PASS); + if (ret == NAT64MFREE) + m_freem(*pm); + *pm = NULL; + + return (IP_FW_DENY); +} + +void +nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st, + const char *px, int off) +{ + char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN]; + + if ((nat64_debug & DP_STATE) == 0) + return; + inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s)); + inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a)); + inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d)); + + DPRINTF(DP_STATE, "%s: PG %d ST [%p|%d]: %s:%d/%d <%s:%d> " + "%s:%d AGE %d", px, pg->idx, st, off, + s, st->u.s.lport, pg->nat_proto, a, pg->aport + off, + d, st->u.s.fport, GET_AGE(st->timestamp)); +} + +/* + * Check if particular TCP state is stale and should be deleted. + * Return 1 if true, 0 otherwise. + */ +static int +nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_state *st, int age) +{ + int ttl; + + if (st->flags & NAT64_FLAG_FIN) + ttl = cfg->st_close_ttl; + else if (st->flags & NAT64_FLAG_ESTAB) + ttl = cfg->st_estab_ttl; + else if (st->flags & NAT64_FLAG_SYN) + ttl = cfg->st_syn_ttl; + else + ttl = cfg->st_syn_ttl; + + if (age > ttl) + return (1); + return (0); +} + +/* + * Check if nat state @st is stale and should be deleted. + * Return 1 if true, 0 otherwise. + */ +static NAT64NOINLINE int +nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg, + const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st) +{ + int age, delete; + + age = GET_AGE(st->timestamp); + delete = 0; + + /* Skip immutable records */ + if (st->flags & NAT64_FLAG_RDR) + return (0); + + switch (pg->nat_proto) { + case NAT_PROTO_TCP: + delete = nat64lsn_periodic_check_tcp(cfg, st, age); + break; + case NAT_PROTO_UDP: + if (age > cfg->st_udp_ttl) + delete = 1; + break; + case NAT_PROTO_ICMP: + if (age > cfg->st_icmp_ttl) + delete = 1; + break; + } + + return (delete); +} + + +/* + * The following structures and functions + * are used to perform SLIST_FOREACH_SAFE() + * analog for states identified by struct st_ptr. + */ + +struct st_idx { + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + struct st_ptr sidx_next; +}; + +static struct st_idx * +st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, + struct st_ptr *sidx, struct st_idx *si) +{ + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + + if (sidx->idx == 0) { + memset(si, 0, sizeof(*si)); + return (si); + } + + pg = PORTGROUP_BYSIDX(cfg, nh, sidx->idx); + st = &pg->states[sidx->off]; + + si->pg = pg; + si->st = st; + si->sidx_next = st->next; + + return (si); +} + +static struct st_idx * +st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh, + struct st_idx *si) +{ + struct st_ptr sidx; + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + + sidx = si->sidx_next; + if (sidx.idx == 0) { + memset(si, 0, sizeof(*si)); + si->st = NULL; + si->pg = NULL; + return (si); + } + + pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); + st = &pg->states[sidx.off]; + + si->pg = pg; + si->st = st; + si->sidx_next = st->next; + + return (si); +} + +static struct st_idx * +st_save_cond(struct st_idx *si_dst, struct st_idx *si) +{ + if (si->st != NULL) + *si_dst = *si; + + return (si_dst); +} + +unsigned int +nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh) +{ + struct st_idx si, si_prev; + int i; + unsigned int delcount; + + delcount = 0; + for (i = 0; i < nh->hsize; i++) { + memset(&si_prev, 0, sizeof(si_prev)); + for (st_first(cfg, nh, &nh->phash[i], &si); + si.st != NULL; + st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) { + if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0) + continue; + nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE", + si.st->cur.off); + /* Unlink from hash */ + if (si_prev.st != NULL) + si_prev.st->next = si.st->next; + else + nh->phash[i] = si.st->next; + /* Delete state and free its data */ + PG_MARK_FREE_IDX(si.pg, si.st->cur.off); + memset(si.st, 0, sizeof(struct nat64lsn_state)); + si.st = NULL; + delcount++; + + /* Update portgroup timestamp */ + SET_AGE(si.pg->timestamp); + } + } + NAT64STAT_ADD(&cfg->stats, sdeleted, delcount); + return (delcount); +} + +/* + * Checks if portgroup is not used and can be deleted, + * Returns 1 if stale, 0 otherwise + */ +static int +stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg) +{ + + if (!PG_IS_EMPTY(pg)) + return (0); + if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay) + return (0); + return (1); +} + +/* + * Checks if host record is not used and can be deleted, + * Returns 1 if stale, 0 otherwise + */ +static int +stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh) +{ + + if (nh->pg_used != 0) + return (0); + if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay) + return (0); + return (1); +} + +struct nat64lsn_periodic_data { + struct nat64lsn_cfg *cfg; + struct nat64lsn_job_head jhead; + int jlen; +}; + +static NAT64NOINLINE int +nat64lsn_periodic_chkhost(struct nat64lsn_host *nh, + struct nat64lsn_periodic_data *d) +{ + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_portgroup *pg; + struct nat64lsn_job_item *ji; + uint64_t delmask[NAT64LSN_PGPTRNMASK]; + int delcount, i; + + delcount = 0; + memset(delmask, 0, sizeof(delmask)); + + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_JQUEUE, "Checking %s host %s on cpu %d", + stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu); + if (!stale_nh(d->cfg, nh)) { + /* Non-stale host. Inspect internals */ + NAT64_LOCK(nh); + + /* Stage 1: Check&expire states */ + if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0) + SET_AGE(nh->timestamp); + + /* Stage 2: Check if we need to expire */ + for (i = 0; i < nh->pg_used; i++) { + pg = PORTGROUP_BYSIDX(d->cfg, nh, i + 1); + if (pg == NULL) + continue; + + /* Check if we can delete portgroup */ + if (stale_pg(d->cfg, pg) == 0) + continue; + + DPRINTF(DP_JQUEUE, "Check PG %d", i); + delmask[i / 64] |= ((uint64_t)1 << (i % 64)); + delcount++; + } + + NAT64_UNLOCK(nh); + if (delcount == 0) + return (0); + } + + DPRINTF(DP_JQUEUE, "Queueing %d portgroups for deleting", delcount); + /* We have something to delete - add it to queue */ + ji = nat64lsn_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP); + if (ji == NULL) + return (0); + + ji->haddr = nh->addr; + ji->delcount = delcount; + memcpy(ji->delmask, delmask, sizeof(ji->delmask)); + + TAILQ_INSERT_TAIL(&d->jhead, ji, next); + d->jlen++; + return (0); +} + +/* + * This procedure is used to perform various maintance + * on dynamic hash list. Currently it is called every second. + */ +static void +nat64lsn_periodic(void *data) +{ + struct ip_fw_chain *ch; + IPFW_RLOCK_TRACKER; + struct nat64lsn_cfg *cfg; + struct nat64lsn_periodic_data d; + struct nat64lsn_host *nh, *tmp; + + cfg = (struct nat64lsn_cfg *) data; + ch = cfg->ch; + CURVNET_SET(cfg->vp); + + memset(&d, 0, sizeof(d)); + d.cfg = cfg; + TAILQ_INIT(&d.jhead); + + IPFW_RLOCK(ch); + + /* Stage 1: foreach host, check all its portgroups */ + I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d); + + /* Enqueue everything we have requested */ + nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen); + + callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY); + + IPFW_RUNLOCK(ch); + + CURVNET_RESTORE(); +} + +static NAT64NOINLINE void +reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + + if (ji->m == NULL) + return; + + /* Request has failed or packet type is wrong */ + if (ji->f_id.addr_type != 6 || ji->done == 0) { + m_freem(ji->m); + ji->m = NULL; + NAT64STAT_INC(&cfg->stats, dropped); + DPRINTF(DP_DROPS, "mbuf dropped: type %d, done %d", + ji->jtype, ji->done); + return; + } + + /* + * XXX: Limit recursion level + */ + + NAT64STAT_INC(&cfg->stats, jreinjected); + DPRINTF(DP_JQUEUE, "Reinject mbuf"); + nat64lsn_translate6(cfg, &ji->f_id, &ji->m); +} + +static void +destroy_portgroup(struct nat64lsn_portgroup *pg) +{ + + DPRINTF(DP_OBJ, "DESTROY PORTGROUP %d %p", pg->idx, pg); + uma_zfree(nat64lsn_pg_zone, pg); +} + +static NAT64NOINLINE int +alloc_portgroup(struct nat64lsn_job_item *ji) +{ + struct nat64lsn_portgroup *pg; + + pg = uma_zalloc(nat64lsn_pg_zone, M_NOWAIT); + if (pg == NULL) + return (1); + + if (ji->needs_idx != 0) { + ji->spare_idx = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); + /* Failed alloc isn't always fatal, so don't check */ + } + memset(&pg->freemask, 0xFF, sizeof(pg->freemask)); + pg->nat_proto = ji->nat_proto; + ji->pg = pg; + return (0); + +} + +static void +destroy_host6(struct nat64lsn_host *nh) +{ + char a[INET6_ADDRSTRLEN]; + int i; + + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "DESTROY HOST %s %p (pg used %d)", a, nh, + nh->pg_used); + NAT64_LOCK_DESTROY(nh); + for (i = 0; i < nh->pg_allocated / NAT64LSN_PGIDX_CHUNK; i++) + uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, i)); + uma_zfree(nat64lsn_host_zone, nh); +} + +static NAT64NOINLINE int +alloc_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + struct nat64lsn_host *nh; + char a[INET6_ADDRSTRLEN]; + + nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT); + if (nh == NULL) + return (1); + PORTGROUP_CHUNK(nh, 0) = uma_zalloc(nat64lsn_pgidx_zone, M_NOWAIT); + if (PORTGROUP_CHUNK(nh, 0) == NULL) { + uma_zfree(nat64lsn_host_zone, nh); + return (2); + } + if (alloc_portgroup(ji) != 0) { + NAT64STAT_INC(&cfg->stats, jportfails); + uma_zfree(nat64lsn_pgidx_zone, PORTGROUP_CHUNK(nh, 0)); + uma_zfree(nat64lsn_host_zone, nh); + return (3); + } + + NAT64_LOCK_INIT(nh); + nh->addr = ji->haddr; + nh->hsize = NAT64LSN_HSIZE; /* XXX: hardcoded size */ + nh->pg_allocated = NAT64LSN_PGIDX_CHUNK; + nh->pg_used = 0; + ji->nh = nh; + + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "ALLOC HOST %s %p", a, ji->nh); + return (0); +} + +/* + * Finds free @pg index inside @nh + */ +static NAT64NOINLINE int +find_nh_pg_idx(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, int *idx) +{ + int i; + + for (i = 0; i < nh->pg_allocated; i++) { + if (PORTGROUP_BYSIDX(cfg, nh, i + 1) == NULL) { + *idx = i; + return (0); + } + } + return (1); +} + +static NAT64NOINLINE int +attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_host *nh; + + I6HASH_FIND(cfg, nh, &ji->haddr); + if (nh == NULL) { + /* Add new host to list */ + nh = ji->nh; + I6HASH_INSERT(cfg, nh); + cfg->ihcount++; + ji->nh = NULL; + + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "ATTACH HOST %s %p", a, nh); + /* + * Try to add portgroup. + * Note it will automatically set + * 'done' on ji if successful. + */ + if (attach_portgroup(cfg, ji) != 0) { + DPRINTF(DP_DROPS, "%s %p failed to attach PG", + a, nh); + NAT64STAT_INC(&cfg->stats, jportfails); + return (1); + } + return (0); + } + + /* + * nh isn't NULL. This probably means we had several simultaneous + * host requests. The previous one request has already attached + * this host. Requeue attached mbuf and mark job as done, but + * leave nh and pg pointers not changed, so nat64lsn_do_request() + * will release all allocated resources. + */ + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ, "%s %p is already attached as %p", + a, ji->nh, nh); + ji->done = 1; + return (0); +} + +static NAT64NOINLINE int +find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off, + int nat_proto, uint16_t *aport, int *ppg_idx) +{ + int j, pg_idx; + + pg_idx = addr_off * _ADDR_PG_COUNT + + (nat_proto - 1) * _ADDR_PG_PROTO_COUNT; + + for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) { + if (cfg->pg[pg_idx + j] != NULL) + continue; + + *aport = j * NAT64_CHUNK_SIZE; + *ppg_idx = pg_idx + j; + return (1); + } + + return (0); +} + +/* + * XXX: This function needs to be rewritten to + * use free bitmask for faster pg finding, + * additionally, it should take into consideration + * a) randomization and + * b) previous addresses allocated to given nat instance + * + */ +static NAT64NOINLINE int +find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji, + uint32_t *aaddr, uint16_t *aport, int *ppg_idx) +{ + int i, nat_proto; + + /* + * XXX: Use bitmask index to be able to find/check if IP address + * has some spare pg's + */ + nat_proto = ji->nat_proto; + + /* First, try to use same address */ + if (ji->aaddr != 0) { + i = ntohl(ji->aaddr) - cfg->prefix4; + if (find_pg_place_addr(cfg, i, nat_proto, aport, + ppg_idx) != 0){ + /* Found! */ + *aaddr = htonl(cfg->prefix4 + i); + return (0); + } + } + + /* Next, try to use random address based on flow hash */ + i = ji->fhash % (1 << (32 - cfg->plen4)); + if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) { + /* Found! */ + *aaddr = htonl(cfg->prefix4 + i); + return (0); + } + + + /* Last one: simply find ANY available */ + for (i = 0; i < (1 << (32 - cfg->plen4)); i++) { + if (find_pg_place_addr(cfg, i, nat_proto, aport, + ppg_idx) != 0){ + /* Found! */ + *aaddr = htonl(cfg->prefix4 + i); + return (0); + } + } + + return (1); +} + +static NAT64NOINLINE int +attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_portgroup *pg; + struct nat64lsn_host *nh; + uint32_t aaddr; + uint16_t aport; + int nh_pg_idx, pg_idx; + + pg = ji->pg; + + /* + * Find source host and bind: we can't rely on + * pg->host + */ + I6HASH_FIND(cfg, nh, &ji->haddr); + if (nh == NULL) + return (1); + + /* Find spare port chunk */ + if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0) { + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ | DP_DROPS, "empty PG not found for %s", a); + return (2); + } + + /* Expand PG indexes if needed */ + if (nh->pg_allocated < cfg->max_chunks && ji->spare_idx != NULL) { + PORTGROUP_CHUNK(nh, nh->pg_allocated / NAT64LSN_PGIDX_CHUNK) = + ji->spare_idx; + nh->pg_allocated += NAT64LSN_PGIDX_CHUNK; + ji->spare_idx = NULL; + } + + /* Find empty index to store PG in the @nh */ + if (find_nh_pg_idx(cfg, nh, &nh_pg_idx) != 0) { + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_OBJ | DP_DROPS, "free PG index not found for %s", + a); + return (3); + } + + cfg->pg[pg_idx] = pg; + cfg->protochunks[pg->nat_proto]++; + NAT64STAT_INC(&cfg->stats, spgcreated); + + pg->aaddr = aaddr; + pg->aport = aport; + pg->host = nh; + pg->idx = pg_idx; + SET_AGE(pg->timestamp); + + PORTGROUP_BYSIDX(cfg, nh, nh_pg_idx + 1) = pg; + if (nh->pg_used == nh_pg_idx) + nh->pg_used++; + SET_AGE(nh->timestamp); + + ji->pg = NULL; + ji->done = 1; + + return (0); +} + +static NAT64NOINLINE void +consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + struct nat64lsn_host *nh, *nh_tmp; + struct nat64lsn_portgroup *pg, *pg_list[256]; + int i, pg_lidx, idx; + + /* Find source host */ + I6HASH_FIND(cfg, nh, &ji->haddr); + if (nh == NULL || nh->pg_used == 0) + return; + + memset(pg_list, 0, sizeof(pg_list)); + pg_lidx = 0; + + NAT64_LOCK(nh); + + for (i = nh->pg_used - 1; i >= 0; i--) { + if ((ji->delmask[i / 64] & ((uint64_t)1 << (i % 64))) == 0) + continue; + pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); + + /* Check that PG isn't busy. */ + if (stale_pg(cfg, pg) == 0) + continue; + + /* DO delete */ + pg_list[pg_lidx++] = pg; + PORTGROUP_BYSIDX(cfg, nh, i + 1) = NULL; + + idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto, + pg->aport); + KASSERT(cfg->pg[idx] == pg, ("Non matched pg")); + cfg->pg[idx] = NULL; + cfg->protochunks[pg->nat_proto]--; + NAT64STAT_INC(&cfg->stats, spgdeleted); + + /* Decrease pg_used */ + while (nh->pg_used > 0 && + PORTGROUP_BYSIDX(cfg, nh, nh->pg_used) == NULL) + nh->pg_used--; + + /* Check if on-stack buffer has ended */ + if (pg_lidx == nitems(pg_list)) + break; + } + + NAT64_UNLOCK(nh); + + if (stale_nh(cfg, nh)) { + I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr); + KASSERT(nh != NULL, ("Unable to find address")); + cfg->ihcount--; + ji->nh = nh; + I6HASH_FIND(cfg, nh, &ji->haddr); + KASSERT(nh == NULL, ("Failed to delete address")); + } + + /* TODO: Delay freeing portgroups */ + while (pg_lidx > 0) { + pg_lidx--; + NAT64STAT_INC(&cfg->stats, spgdeleted); + destroy_portgroup(pg_list[pg_lidx]); + } +} + +/* + * Main request handler. + * Responsible for handling jqueue, e.g. + * creating new hosts, addind/deleting portgroups. + */ +static NAT64NOINLINE void +nat64lsn_do_request(void *data) +{ + IPFW_RLOCK_TRACKER; + struct nat64lsn_job_head jhead; + struct nat64lsn_job_item *ji; + int jcount, nhsize; + struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data; + struct ip_fw_chain *ch; + int delcount; + + CURVNET_SET(cfg->vp); + + TAILQ_INIT(&jhead); + + /* XXX: We're running unlocked here */ + + ch = cfg->ch; + delcount = 0; + IPFW_RLOCK(ch); + + /* Grab queue */ + JQUEUE_LOCK(); + TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next); + jcount = cfg->jlen; + cfg->jlen = 0; + JQUEUE_UNLOCK(); + + /* check if we need to resize hash */ + nhsize = 0; + if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) { + nhsize = cfg->ihsize; + for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2) + ; + } else if (cfg->ihcount < cfg->ihsize * 4) { + nhsize = cfg->ihsize; + for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2) + ; + } + + IPFW_RUNLOCK(ch); + + if (TAILQ_EMPTY(&jhead)) { + CURVNET_RESTORE(); + return; + } + + NAT64STAT_INC(&cfg->stats, jcalls); + DPRINTF(DP_JQUEUE, "count=%d", jcount); + + /* + * TODO: + * What we should do here is to build a hash + * to ensure we don't have lots of duplicate requests. + * Skip this for now. + * + * TODO: Limit per-call number of items + */ + + /* Pre-allocate everything for entire chain */ + TAILQ_FOREACH(ji, &jhead, next) { + switch (ji->jtype) { + case JTYPE_NEWHOST: + if (alloc_host6(cfg, ji) != 0) + NAT64STAT_INC(&cfg->stats, jhostfails); + break; + case JTYPE_NEWPORTGROUP: + if (alloc_portgroup(ji) != 0) + NAT64STAT_INC(&cfg->stats, jportfails); + break; + case JTYPE_DELPORTGROUP: + delcount += ji->delcount; + break; + default: + break; + } + } + + /* + * TODO: Alloc hew hash + */ + nhsize = 0; + if (nhsize > 0) { + /* XXX: */ + } + + /* Apply all changes in batch */ + IPFW_UH_WLOCK(ch); + IPFW_WLOCK(ch); + + TAILQ_FOREACH(ji, &jhead, next) { + switch (ji->jtype) { + case JTYPE_NEWHOST: + if (ji->nh != NULL) + attach_host6(cfg, ji); + break; + case JTYPE_NEWPORTGROUP: + if (ji->pg != NULL && + attach_portgroup(cfg, ji) != 0) + NAT64STAT_INC(&cfg->stats, jportfails); + break; + case JTYPE_DELPORTGROUP: + consider_del_portgroup(cfg, ji); + break; + } + } + + if (nhsize > 0) { + /* XXX: Move everything to new hash */ + } + + IPFW_WUNLOCK(ch); + IPFW_UH_WUNLOCK(ch); + + /* Flush unused entries */ + while (!TAILQ_EMPTY(&jhead)) { + ji = TAILQ_FIRST(&jhead); + TAILQ_REMOVE(&jhead, ji, next); + if (ji->nh != NULL) + destroy_host6(ji->nh); + if (ji->pg != NULL) + destroy_portgroup(ji->pg); + if (ji->m != NULL) + reinject_mbuf(cfg, ji); + if (ji->spare_idx != NULL) + uma_zfree(nat64lsn_pgidx_zone, ji->spare_idx); + free(ji, M_IPFW); + } + CURVNET_RESTORE(); +} + +static NAT64NOINLINE struct nat64lsn_job_item * +nat64lsn_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id, + int jtype) +{ + struct nat64lsn_job_item *ji; + struct in6_addr haddr; + uint8_t nat_proto; + + /* + * Do not try to lock possibly contested mutex if we're near the limit. + * Drop packet instead. + */ + if (cfg->jlen >= cfg->jmaxlen) { + NAT64STAT_INC(&cfg->stats, jmaxlen); + return (NULL); + } + + memset(&haddr, 0, sizeof(haddr)); + nat_proto = 0; + if (f_id != NULL) { + haddr = f_id->src_ip6; + nat_proto = nat64lsn_proto_map[f_id->proto]; + + DPRINTF(DP_JQUEUE, "REQUEST pg nat_proto %d on proto %d", + nat_proto, f_id->proto); + + if (nat_proto == 0) + return (NULL); + } + + ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW, + M_NOWAIT | M_ZERO); + + if (ji == NULL) { + NAT64STAT_INC(&cfg->stats, jnomem); + return (NULL); + } + + ji->jtype = jtype; + + if (f_id != NULL) { + ji->f_id = *f_id; + ji->haddr = haddr; + ji->nat_proto = nat_proto; + } + + return (ji); +} + +static NAT64NOINLINE void +nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji) +{ + + if (ji == NULL) + return; + + JQUEUE_LOCK(); + TAILQ_INSERT_TAIL(&cfg->jhead, ji, next); + cfg->jlen++; + NAT64STAT_INC(&cfg->stats, jrequests); + + if (callout_pending(&cfg->jcallout) == 0) + callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); + JQUEUE_UNLOCK(); +} + +static NAT64NOINLINE void +nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, + struct nat64lsn_job_head *jhead, int jlen) +{ + + if (TAILQ_EMPTY(jhead)) + return; + + /* Attach current queue to execution one */ + JQUEUE_LOCK(); + TAILQ_CONCAT(&cfg->jhead, jhead, next); + cfg->jlen += jlen; + NAT64STAT_ADD(&cfg->stats, jrequests, jlen); + + if (callout_pending(&cfg->jcallout) == 0) + callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg); + JQUEUE_UNLOCK(); +} + +static unsigned int +flow6_hash(const struct ipfw_flow_id *f_id) +{ + unsigned char hbuf[36]; + + memcpy(hbuf, &f_id->dst_ip6, 16); + memcpy(&hbuf[16], &f_id->src_ip6, 16); + memcpy(&hbuf[32], &f_id->dst_port, 2); + memcpy(&hbuf[32], &f_id->src_port, 2); + + return (djb_hash(hbuf, sizeof(hbuf))); +} + +static NAT64NOINLINE int +nat64lsn_request_host(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm) +{ + struct nat64lsn_job_item *ji; + struct mbuf *m; + + m = *pm; + *pm = NULL; + + ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWHOST); + if (ji == NULL) { + m_freem(m); + NAT64STAT_INC(&cfg->stats, dropped); + DPRINTF(DP_DROPS, "failed to create job"); + } else { + ji->m = m; + /* Provide pseudo-random value based on flow */ + ji->fhash = flow6_hash(f_id); + nat64lsn_enqueue_job(cfg, ji); + NAT64STAT_INC(&cfg->stats, jhostsreq); + } + + return (IP_FW_PASS); +} + +static NAT64NOINLINE int +nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg, + const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr, + int needs_idx) +{ + struct nat64lsn_job_item *ji; + struct mbuf *m; + + m = *pm; + *pm = NULL; + + ji = nat64lsn_create_job(cfg, f_id, JTYPE_NEWPORTGROUP); + if (ji == NULL) { + m_freem(m); + NAT64STAT_INC(&cfg->stats, dropped); + DPRINTF(DP_DROPS, "failed to create job"); + } else { + ji->m = m; + /* Provide pseudo-random value based on flow */ + ji->fhash = flow6_hash(f_id); + ji->aaddr = aaddr; + ji->needs_idx = needs_idx; + nat64lsn_enqueue_job(cfg, ji); + NAT64STAT_INC(&cfg->stats, jportreq); + } + + return (IP_FW_PASS); +} + +static NAT64NOINLINE struct nat64lsn_state * +nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh, + int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr) +{ + struct nat64lsn_portgroup *pg; + struct nat64lsn_state *st; + int i, hval, off; + + /* XXX: create additional bitmask for selecting proper portgroup */ + for (i = 0; i < nh->pg_used; i++) { + pg = PORTGROUP_BYSIDX(cfg, nh, i + 1); + if (pg == NULL) + continue; + if (*aaddr == 0) + *aaddr = pg->aaddr; + if (pg->nat_proto != nat_proto) + continue; + + off = PG_GET_FREE_IDX(pg); + if (off != 0) { + /* We have found spare state. Use it */ + off--; + PG_MARK_BUSY_IDX(pg, off); + st = &pg->states[off]; + + /* + * Fill in new info. Assume state was zeroed. + * Timestamp and flags will be filled by caller. + */ + st->u.s = kst->u.s; + st->cur.idx = i + 1; + st->cur.off = off; + + /* Insert into host hash table */ + hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1); + st->next = nh->phash[hval]; + nh->phash[hval] = st->cur; + + nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off); + + NAT64STAT_INC(&cfg->stats, screated); + + return (st); + } + /* Saev last used alias affress */ + *aaddr = pg->aaddr; + } + + return (NULL); +} + +static NAT64NOINLINE int +nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id, + struct mbuf **pm) +{ + struct pfloghdr loghdr, *logdata; + char a[INET6_ADDRSTRLEN]; + struct nat64lsn_host *nh; + struct st_ptr sidx; + struct nat64lsn_state *st, kst; + struct nat64lsn_portgroup *pg; + struct icmp6_hdr *icmp6; + uint32_t aaddr; + int action, hval, nat_proto, proto; + uint16_t aport, state_ts, state_flags; + + /* Check if af/protocol is supported and get it short id */ + nat_proto = nat64lsn_proto_map[f_id->proto]; + if (nat_proto == 0) { + /* + * Since we can be called from jobs handler, we need + * to free mbuf by self, do not leave this task to + * ipfw_check_packet(). + */ + NAT64STAT_INC(&cfg->stats, noproto); + m_freem(*pm); + *pm = NULL; + return (IP_FW_DENY); + } + + /* Try to find host first */ + I6HASH_FIND(cfg, nh, &f_id->src_ip6); + + if (nh == NULL) + return (nat64lsn_request_host(cfg, f_id, pm)); + + /* Fill-in on-stack state structure */ + kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3]; + kst.u.s.fport = f_id->dst_port; + kst.u.s.lport = f_id->src_port; + + /* Prepare some fields we might need to update */ + hval = 0; + proto = nat64_getlasthdr(*pm, &hval); + if (proto < 0) { + NAT64STAT_INC(&cfg->stats, dropped); + DPRINTF(DP_DROPS, "dropped due to mbuf isn't contigious"); + m_freem(*pm); + *pm = NULL; + return (IP_FW_DENY); + } + + SET_AGE(state_ts); + if (proto == IPPROTO_TCP) + state_flags = convert_tcp_flags( + TCP(mtodo(*pm, hval))->th_flags); + else + state_flags = 0; + if (proto == IPPROTO_ICMPV6) { + /* Alter local port data */ + icmp6 = mtodo(*pm, hval); + if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST || + icmp6->icmp6_type == ICMP6_ECHO_REPLY) + kst.u.s.lport = ntohs(icmp6->icmp6_id); + } + + hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1); + pg = NULL; + st = NULL; + + /* OK, let's find state in host hash */ + NAT64_LOCK(nh); + sidx = nh->phash[hval]; + int k = 0; + while (sidx.idx != 0) { + pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); + st = &pg->states[sidx.off]; + //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off, + //st->next.idx, st->next.off); + if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto) + break; + if (k++ > 1000) { + DPRINTF(DP_ALL, "XXX: too long %d/%d %d/%d\n", + sidx.idx, sidx.off, st->next.idx, st->next.off); + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_GENERIC, "TR host %s %p on cpu %d", + a, nh, curcpu); + k = 0; + } + sidx = st->next; + } + + if (sidx.idx == 0) { + aaddr = 0; + st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr); + if (st == NULL) { + /* No free states. Request more if we can */ + if (nh->pg_used >= cfg->max_chunks) { + /* Limit reached */ + NAT64STAT_INC(&cfg->stats, dropped); + inet_ntop(AF_INET6, &nh->addr, a, sizeof(a)); + DPRINTF(DP_DROPS, "PG limit reached " + " for host %s (used %u, allocated %u, " + "limit %u)", a, + nh->pg_used * NAT64_CHUNK_SIZE, + nh->pg_allocated * NAT64_CHUNK_SIZE, + cfg->max_chunks * NAT64_CHUNK_SIZE); + m_freem(*pm); + *pm = NULL; + NAT64_UNLOCK(nh); + return (IP_FW_DENY); + } + if ((nh->pg_allocated <= + nh->pg_used + NAT64LSN_REMAININGPG) && + nh->pg_allocated < cfg->max_chunks) + action = 1; /* Request new indexes */ + else + action = 0; + NAT64_UNLOCK(nh); + //DPRINTF("No state, unlock for %p", nh); + return (nat64lsn_request_portgroup(cfg, f_id, + pm, aaddr, action)); + } + + /* We've got new state. */ + sidx = st->cur; + pg = PORTGROUP_BYSIDX(cfg, nh, sidx.idx); + } + + /* Okay, state found */ + + /* Update necessary fileds */ + if (st->timestamp != state_ts) + st->timestamp = state_ts; + if ((st->flags & state_flags) != 0) + st->flags |= state_flags; + + /* Copy needed state data */ + aaddr = pg->aaddr; + aport = htons(pg->aport + sidx.off); + + NAT64_UNLOCK(nh); + + if (cfg->flags & NAT64_LOG) { + logdata = &loghdr; + nat64lsn_log(logdata, *pm, AF_INET6, pg->idx, st->cur.off); + } else + logdata = NULL; + + action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats, logdata); + if (action == NAT64SKIP) + return (IP_FW_PASS); + if (action == NAT64MFREE) + m_freem(*pm); + *pm = NULL; /* mark mbuf as consumed */ + return (IP_FW_DENY); +} + +/* + * Main dataplane entry point. + */ +int +ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args, + ipfw_insn *cmd, int *done) +{ + ipfw_insn *icmd; + struct nat64lsn_cfg *cfg; + int ret; + + IPFW_RLOCK_ASSERT(ch); + + *done = 1; /* terminate the search */ + icmd = cmd + 1; + if (cmd->opcode != O_EXTERNAL_ACTION || + cmd->arg1 != V_nat64lsn_eid || + icmd->opcode != O_EXTERNAL_INSTANCE || + (cfg = NAT64_LOOKUP(ch, icmd)) == NULL) + return (0); + + switch (args->f_id.addr_type) { + case 4: + ret = nat64lsn_translate4(cfg, &args->f_id, &args->m); + break; + case 6: + ret = nat64lsn_translate6(cfg, &args->f_id, &args->m); + break; + default: + return (0); + } + return (ret); +} + +static int +nat64lsn_ctor_host(void *mem, int size, void *arg, int flags) +{ + struct nat64lsn_host *nh; + + nh = (struct nat64lsn_host *)mem; + memset(nh->pg_ptr, 0, sizeof(nh->pg_ptr)); + memset(nh->phash, 0, sizeof(nh->phash)); + return (0); +} + +static int +nat64lsn_ctor_pgidx(void *mem, int size, void *arg, int flags) +{ + + memset(mem, 0, size); + return (0); +} + +void +nat64lsn_init_internal(void) +{ + + memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map)); + /* Set up supported protocol map */ + nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP; + nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP; + nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP; + nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP; + /* Fill in reverse proto map */ + memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map)); + nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP; + nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP; + nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6; + + JQUEUE_LOCK_INIT(); + nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone", + sizeof(struct nat64lsn_host), nat64lsn_ctor_host, NULL, + NULL, NULL, UMA_ALIGN_PTR, 0); + nat64lsn_pg_zone = uma_zcreate("NAT64 portgroups zone", + sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + nat64lsn_pgidx_zone = uma_zcreate("NAT64 portgroup indexes zone", + sizeof(struct nat64lsn_portgroup *) * NAT64LSN_PGIDX_CHUNK, + nat64lsn_ctor_pgidx, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); +} + +void +nat64lsn_uninit_internal(void) +{ + + JQUEUE_LOCK_DESTROY(); + uma_zdestroy(nat64lsn_host_zone); + uma_zdestroy(nat64lsn_pg_zone); + uma_zdestroy(nat64lsn_pgidx_zone); +} + +void +nat64lsn_start_instance(struct nat64lsn_cfg *cfg) +{ + + callout_reset(&cfg->periodic, hz * PERIODIC_DELAY, + nat64lsn_periodic, cfg); +} + +struct nat64lsn_cfg * +nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr) +{ + struct nat64lsn_cfg *cfg; + + cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO); + TAILQ_INIT(&cfg->jhead); + cfg->vp = curvnet; + cfg->ch = ch; + COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK); + + cfg->ihsize = NAT64LSN_HSIZE; + cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW, + M_WAITOK | M_ZERO); + + cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW, + M_WAITOK | M_ZERO); + + callout_init(&cfg->periodic, CALLOUT_MPSAFE); + callout_init(&cfg->jcallout, CALLOUT_MPSAFE); + + return (cfg); +} + +/* + * Destroy all hosts callback. + * Called on module unload when all activity already finished, so + * can work without any locks. + */ +static NAT64NOINLINE int +nat64lsn_destroy_host(struct nat64lsn_host *nh, struct nat64lsn_cfg *cfg) +{ + struct nat64lsn_portgroup *pg; + int i; + + for (i = nh->pg_used; i > 0; i--) { + pg = PORTGROUP_BYSIDX(cfg, nh, i); + if (pg == NULL) + continue; + cfg->pg[pg->idx] = NULL; + destroy_portgroup(pg); + nh->pg_used--; + } + destroy_host6(nh); + cfg->ihcount--; + return (0); +} + +void +nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg) +{ + struct nat64lsn_host *nh, *tmp; + + JQUEUE_LOCK(); + callout_drain(&cfg->jcallout); + JQUEUE_UNLOCK(); + + callout_drain(&cfg->periodic); + I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_destroy_host, cfg); + DPRINTF(DP_OBJ, "instance %s: hosts %d", cfg->name, cfg->ihcount); + + COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS); + free(cfg->ih, M_IPFW); + free(cfg->pg, M_IPFW); + free(cfg, M_IPFW); +} + Index: head/sys/netpfil/ipfw/nat64/nat64lsn_control.c =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64lsn_control.c +++ head/sys/netpfil/ipfw/nat64/nat64lsn_control.c @@ -0,0 +1,917 @@ +/*- + * Copyright (c) 2015 Yandex LLC + * Copyright (c) 2015 Alexander V. Chernikov + * Copyright (c) 2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +VNET_DEFINE(uint16_t, nat64lsn_eid) = 0; + +static struct nat64lsn_cfg * +nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set) +{ + struct nat64lsn_cfg *cfg; + + cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set, + IPFW_TLV_NAT64LSN_NAME, name); + + return (cfg); +} + +static void +nat64lsn_default_config(ipfw_nat64lsn_cfg *uc) +{ + + if (uc->max_ports == 0) + uc->max_ports = NAT64LSN_MAX_PORTS; + else + uc->max_ports = roundup(uc->max_ports, NAT64_CHUNK_SIZE); + if (uc->max_ports > NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR) + uc->max_ports = NAT64_CHUNK_SIZE * NAT64LSN_MAXPGPTR; + if (uc->jmaxlen == 0) + uc->jmaxlen = NAT64LSN_JMAXLEN; + if (uc->jmaxlen > 65536) + uc->jmaxlen = 65536; + if (uc->nh_delete_delay == 0) + uc->nh_delete_delay = NAT64LSN_HOST_AGE; + if (uc->pg_delete_delay == 0) + uc->pg_delete_delay = NAT64LSN_PG_AGE; + if (uc->st_syn_ttl == 0) + uc->st_syn_ttl = NAT64LSN_TCP_SYN_AGE; + if (uc->st_close_ttl == 0) + uc->st_close_ttl = NAT64LSN_TCP_FIN_AGE; + if (uc->st_estab_ttl == 0) + uc->st_estab_ttl = NAT64LSN_TCP_EST_AGE; + if (uc->st_udp_ttl == 0) + uc->st_udp_ttl = NAT64LSN_UDP_AGE; + if (uc->st_icmp_ttl == 0) + uc->st_icmp_ttl = NAT64LSN_ICMP_AGE; +} + +/* + * Creates new nat64lsn instance. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ] + * + * Returns 0 on success + */ +static int +nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_lheader *olh; + ipfw_nat64lsn_cfg *uc; + struct nat64lsn_cfg *cfg; + struct namedobj_instance *ni; + uint32_t addr4, mask4; + + if (sd->valsize != sizeof(*olh) + sizeof(*uc)) + return (EINVAL); + + olh = (ipfw_obj_lheader *)sd->kbuf; + uc = (ipfw_nat64lsn_cfg *)(olh + 1); + + if (ipfw_check_object_name_generic(uc->name) != 0) + return (EINVAL); + + if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS) + return (EINVAL); + + if (uc->plen4 > 32) + return (EINVAL); + if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0)) + return (EINVAL); + + /* XXX: Check prefix4 to be global */ + addr4 = ntohl(uc->prefix4.s_addr); + mask4 = ~((1 << (32 - uc->plen4)) - 1); + if ((addr4 & mask4) != addr4) + return (EINVAL); + + /* XXX: Check prefix6 */ + if (uc->min_port == 0) + uc->min_port = NAT64_MIN_PORT; + if (uc->max_port == 0) + uc->max_port = 65535; + if (uc->min_port > uc->max_port) + return (EINVAL); + uc->min_port = roundup(uc->min_port, NAT64_CHUNK_SIZE); + uc->max_port = roundup(uc->max_port, NAT64_CHUNK_SIZE); + + nat64lsn_default_config(uc); + + ni = CHAIN_TO_SRV(ch); + IPFW_UH_RLOCK(ch); + if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { + IPFW_UH_RUNLOCK(ch); + return (EEXIST); + } + IPFW_UH_RUNLOCK(ch); + + cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4)); + strlcpy(cfg->name, uc->name, sizeof(cfg->name)); + cfg->no.name = cfg->name; + cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME; + cfg->no.set = uc->set; + + cfg->prefix4 = addr4; + cfg->pmask4 = addr4 | ~mask4; + /* XXX: Copy 96 bits */ + cfg->plen6 = 96; + memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8); + cfg->plen4 = uc->plen4; + cfg->flags = uc->flags & NAT64LSN_FLAGSMASK; + cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; + cfg->agg_prefix_len = uc->agg_prefix_len; + cfg->agg_prefix_max = uc->agg_prefix_max; + + cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE; + cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE; + + cfg->jmaxlen = uc->jmaxlen; + cfg->nh_delete_delay = uc->nh_delete_delay; + cfg->pg_delete_delay = uc->pg_delete_delay; + cfg->st_syn_ttl = uc->st_syn_ttl; + cfg->st_close_ttl = uc->st_close_ttl; + cfg->st_estab_ttl = uc->st_estab_ttl; + cfg->st_udp_ttl = uc->st_udp_ttl; + cfg->st_icmp_ttl = uc->st_icmp_ttl; + + cfg->nomatch_verdict = IP_FW_DENY; + cfg->nomatch_final = 1; /* Exit outer loop by default */ + + IPFW_UH_WLOCK(ch); + + if (nat64lsn_find(ni, uc->name, uc->set) != NULL) { + IPFW_UH_WUNLOCK(ch); + nat64lsn_destroy_instance(cfg); + return (EEXIST); + } + + if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) { + IPFW_UH_WUNLOCK(ch); + nat64lsn_destroy_instance(cfg); + return (ENOSPC); + } + ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no); + + /* Okay, let's link data */ + IPFW_WLOCK(ch); + SRV_OBJECT(ch, cfg->no.kidx) = cfg; + IPFW_WUNLOCK(ch); + + nat64lsn_start_instance(cfg); + + IPFW_UH_WUNLOCK(ch); + return (0); +} + +static void +nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg) +{ + + IPFW_UH_WLOCK_ASSERT(ch); + + ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); + ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); +} + +/* + * Destroys nat64 instance. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * + * Returns 0 on success + */ +static int +nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + struct nat64lsn_cfg *cfg; + ipfw_obj_header *oh; + + if (sd->valsize != sizeof(*oh)) + return (EINVAL); + + oh = (ipfw_obj_header *)op3; + + IPFW_UH_WLOCK(ch); + cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + + if (cfg->no.refcnt > 0) { + IPFW_UH_WUNLOCK(ch); + return (EBUSY); + } + + IPFW_WLOCK(ch); + SRV_OBJECT(ch, cfg->no.kidx) = NULL; + IPFW_WUNLOCK(ch); + + nat64lsn_detach_config(ch, cfg); + IPFW_UH_WUNLOCK(ch); + + nat64lsn_destroy_instance(cfg); + return (0); +} + +#define __COPY_STAT_FIELD(_cfg, _stats, _field) \ + (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field) +static void +export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, + struct ipfw_nat64lsn_stats *stats) +{ + + __COPY_STAT_FIELD(cfg, stats, opcnt64); + __COPY_STAT_FIELD(cfg, stats, opcnt46); + __COPY_STAT_FIELD(cfg, stats, ofrags); + __COPY_STAT_FIELD(cfg, stats, ifrags); + __COPY_STAT_FIELD(cfg, stats, oerrors); + __COPY_STAT_FIELD(cfg, stats, noroute4); + __COPY_STAT_FIELD(cfg, stats, noroute6); + __COPY_STAT_FIELD(cfg, stats, nomatch4); + __COPY_STAT_FIELD(cfg, stats, noproto); + __COPY_STAT_FIELD(cfg, stats, nomem); + __COPY_STAT_FIELD(cfg, stats, dropped); + + __COPY_STAT_FIELD(cfg, stats, jcalls); + __COPY_STAT_FIELD(cfg, stats, jrequests); + __COPY_STAT_FIELD(cfg, stats, jhostsreq); + __COPY_STAT_FIELD(cfg, stats, jportreq); + __COPY_STAT_FIELD(cfg, stats, jhostfails); + __COPY_STAT_FIELD(cfg, stats, jportfails); + __COPY_STAT_FIELD(cfg, stats, jmaxlen); + __COPY_STAT_FIELD(cfg, stats, jnomem); + __COPY_STAT_FIELD(cfg, stats, jreinjected); + __COPY_STAT_FIELD(cfg, stats, screated); + __COPY_STAT_FIELD(cfg, stats, sdeleted); + __COPY_STAT_FIELD(cfg, stats, spgcreated); + __COPY_STAT_FIELD(cfg, stats, spgdeleted); + + stats->hostcount = cfg->ihcount; + stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP]; + stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP]; + stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP]; +} +#undef __COPY_STAT_FIELD + +static void +nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg, + ipfw_nat64lsn_cfg *uc) +{ + + uc->flags = cfg->flags & NAT64LSN_FLAGSMASK; + uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE; + uc->agg_prefix_len = cfg->agg_prefix_len; + uc->agg_prefix_max = cfg->agg_prefix_max; + + uc->jmaxlen = cfg->jmaxlen; + uc->nh_delete_delay = cfg->nh_delete_delay; + uc->pg_delete_delay = cfg->pg_delete_delay; + uc->st_syn_ttl = cfg->st_syn_ttl; + uc->st_close_ttl = cfg->st_close_ttl; + uc->st_estab_ttl = cfg->st_estab_ttl; + uc->st_udp_ttl = cfg->st_udp_ttl; + uc->st_icmp_ttl = cfg->st_icmp_ttl; + uc->prefix4.s_addr = htonl(cfg->prefix4); + uc->prefix6 = cfg->prefix6; + uc->plen4 = cfg->plen4; + uc->plen6 = cfg->plen6; + uc->set = cfg->no.set; + strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); +} + +struct nat64_dump_arg { + struct ip_fw_chain *ch; + struct sockopt_data *sd; +}; + +static int +export_config_cb(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg; + ipfw_nat64lsn_cfg *uc; + + uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd, + sizeof(*uc)); + nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc); + return (0); +} + +/* + * Lists all nat64 lsn instances currently available in kernel. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ] + * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ] + * + * Returns 0 on success + */ +static int +nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_lheader *olh; + struct nat64_dump_arg da; + + /* Check minimum header size */ + if (sd->valsize < sizeof(ipfw_obj_lheader)) + return (EINVAL); + + olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); + + IPFW_UH_RLOCK(ch); + olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), + IPFW_TLV_NAT64LSN_NAME); + olh->objsize = sizeof(ipfw_nat64lsn_cfg); + olh->size = sizeof(*olh) + olh->count * olh->objsize; + + if (sd->valsize < olh->size) { + IPFW_UH_RUNLOCK(ch); + return (ENOMEM); + } + memset(&da, 0, sizeof(da)); + da.ch = ch; + da.sd = sd; + ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da, + IPFW_TLV_NAT64LSN_NAME); + IPFW_UH_RUNLOCK(ch); + + return (0); +} + +/* + * Change existing nat64lsn instance configuration. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_nat64lsn_cfg ] + * Reply: [ ipfw_obj_header ipfw_nat64lsn_cfg ] + * + * Returns 0 on success + */ +static int +nat64lsn_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, + struct sockopt_data *sd) +{ + ipfw_obj_header *oh; + ipfw_nat64lsn_cfg *uc; + struct nat64lsn_cfg *cfg; + struct namedobj_instance *ni; + + if (sd->valsize != sizeof(*oh) + sizeof(*uc)) + return (EINVAL); + + oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, + sizeof(*oh) + sizeof(*uc)); + uc = (ipfw_nat64lsn_cfg *)(oh + 1); + + if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || + oh->ntlv.set >= IPFW_MAX_SETS) + return (EINVAL); + + ni = CHAIN_TO_SRV(ch); + if (sd->sopt->sopt_dir == SOPT_GET) { + IPFW_UH_RLOCK(ch); + cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_RUNLOCK(ch); + return (EEXIST); + } + nat64lsn_export_config(ch, cfg, uc); + IPFW_UH_RUNLOCK(ch); + return (0); + } + + nat64lsn_default_config(uc); + + IPFW_UH_WLOCK(ch); + cfg = nat64lsn_find(ni, oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_WUNLOCK(ch); + return (EEXIST); + } + + /* + * For now allow to change only following values: + * jmaxlen, nh_del_age, pg_del_age, tcp_syn_age, tcp_close_age, + * tcp_est_age, udp_age, icmp_age, flags, max_ports. + */ + + cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE; + cfg->jmaxlen = uc->jmaxlen; + cfg->nh_delete_delay = uc->nh_delete_delay; + cfg->pg_delete_delay = uc->pg_delete_delay; + cfg->st_syn_ttl = uc->st_syn_ttl; + cfg->st_close_ttl = uc->st_close_ttl; + cfg->st_estab_ttl = uc->st_estab_ttl; + cfg->st_udp_ttl = uc->st_udp_ttl; + cfg->st_icmp_ttl = uc->st_icmp_ttl; + cfg->flags = uc->flags & NAT64LSN_FLAGSMASK; + + IPFW_UH_WUNLOCK(ch); + + return (0); +} + +/* + * Get nat64lsn statistics. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * Reply: [ ipfw_obj_header ipfw_counter_tlv ] + * + * Returns 0 on success + */ +static int +nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, + struct sockopt_data *sd) +{ + struct ipfw_nat64lsn_stats stats; + struct nat64lsn_cfg *cfg; + ipfw_obj_header *oh; + ipfw_obj_ctlv *ctlv; + size_t sz; + + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); + if (sd->valsize % sizeof(uint64_t)) + return (EINVAL); + if (sd->valsize < sz) + return (ENOMEM); + oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); + if (oh == NULL) + return (EINVAL); + memset(&stats, 0, sizeof(stats)); + + IPFW_UH_RLOCK(ch); + cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_RUNLOCK(ch); + return (ESRCH); + } + + export_stats(ch, cfg, &stats); + IPFW_UH_RUNLOCK(ch); + + ctlv = (ipfw_obj_ctlv *)(oh + 1); + memset(ctlv, 0, sizeof(*ctlv)); + ctlv->head.type = IPFW_TLV_COUNTERS; + ctlv->head.length = sz - sizeof(ipfw_obj_header); + ctlv->count = sizeof(stats) / sizeof(uint64_t); + ctlv->objsize = sizeof(uint64_t); + ctlv->version = IPFW_NAT64_VERSION; + memcpy(ctlv + 1, &stats, sizeof(stats)); + return (0); +} + +/* + * Reset nat64lsn statistics. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * + * Returns 0 on success + */ +static int +nat64lsn_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, + struct sockopt_data *sd) +{ + struct nat64lsn_cfg *cfg; + ipfw_obj_header *oh; + + if (sd->valsize != sizeof(*oh)) + return (EINVAL); + oh = (ipfw_obj_header *)sd->kbuf; + if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || + oh->ntlv.set >= IPFW_MAX_SETS) + return (EINVAL); + + IPFW_UH_WLOCK(ch); + cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS); + IPFW_UH_WUNLOCK(ch); + return (0); +} + +/* + * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg + * ipfw_nat64lsn_state x count, ... ] ] + */ +static int +export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg, + ipfw_nat64lsn_stg *stg, struct sockopt_data *sd) +{ + ipfw_nat64lsn_state *ste; + struct nat64lsn_state *st; + int i, count; + + NAT64_LOCK(pg->host); + count = 0; + for (i = 0; i < 64; i++) { + if (PG_IS_BUSY_IDX(pg, i)) + count++; + } + DPRINTF(DP_STATE, "EXPORT PG %d, count %d", pg->idx, count); + + if (count == 0) { + stg->count = 0; + NAT64_UNLOCK(pg->host); + return (0); + } + ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd, + count * sizeof(ipfw_nat64lsn_state)); + if (ste == NULL) { + NAT64_UNLOCK(pg->host); + return (1); + } + + stg->alias4.s_addr = pg->aaddr; + stg->proto = nat64lsn_rproto_map[pg->nat_proto]; + stg->flags = 0; + stg->host6 = pg->host->addr; + stg->count = count; + for (i = 0; i < 64; i++) { + if (PG_IS_FREE_IDX(pg, i)) + continue; + st = &pg->states[i]; + ste->daddr.s_addr = st->u.s.faddr; + ste->dport = st->u.s.fport; + ste->aport = pg->aport + i; + ste->sport = st->u.s.lport; + ste->flags = st->flags; /* XXX filter flags */ + ste->idle = GET_AGE(st->timestamp); + ste++; + } + NAT64_UNLOCK(pg->host); + + return (0); +} + +static int +get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, + uint16_t *port) +{ + + if (*port < 65536 - NAT64_CHUNK_SIZE) { + *port += NAT64_CHUNK_SIZE; + return (0); + } + *port = 0; + + if (*nat_proto < NAT_MAX_PROTO - 1) { + *nat_proto += 1; + return (0); + } + *nat_proto = 1; + + if (*addr < cfg->pmask4) { + *addr += 1; + return (0); + } + + /* End of space. */ + return (1); +} + +#define PACK_IDX(addr, proto, port) \ + ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8) +#define UNPACK_IDX(idx, addr, proto, port) \ + (addr) = (uint32_t)((idx) >> 32); \ + (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \ + (proto) = (uint8_t)(((idx) >> 8) & 0xFF) + +static struct nat64lsn_portgroup * +get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, + uint16_t *port) +{ + struct nat64lsn_portgroup *pg; + uint64_t pre_pack, post_pack; + + pg = NULL; + pre_pack = PACK_IDX(*addr, *nat_proto, *port); + for (;;) { + if (get_next_idx(cfg, addr, nat_proto, port) != 0) { + /* End of states */ + return (pg); + } + + pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); + if (pg != NULL) + break; + } + + post_pack = PACK_IDX(*addr, *nat_proto, *port); + if (pre_pack == post_pack) + DPRINTF(DP_STATE, "XXX: PACK_IDX %u %d %d", + *addr, *nat_proto, *port); + return (pg); +} + +static __noinline struct nat64lsn_portgroup * +get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto, + uint16_t *port) +{ + struct nat64lsn_portgroup *pg; + + pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port); + if (pg == NULL) + pg = get_next_pg(cfg, addr, nat_proto, port); + + return (pg); +} + +/* + * Lists nat64lsn states. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]] + * Reply: [ ipfw_obj_header ipfw_obj_data [ + * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ] + * + * Returns 0 on success + */ +static int +nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_header *oh; + ipfw_obj_data *od; + ipfw_nat64lsn_stg *stg; + struct nat64lsn_cfg *cfg; + struct nat64lsn_portgroup *pg, *pg_next; + uint64_t next_idx; + size_t sz; + uint32_t addr, states; + uint16_t port; + uint8_t nat_proto; + + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + + sizeof(uint64_t); + /* Check minimum header size */ + if (sd->valsize < sz) + return (EINVAL); + + oh = (ipfw_obj_header *)sd->kbuf; + od = (ipfw_obj_data *)(oh + 1); + if (od->head.type != IPFW_TLV_OBJDATA || + od->head.length != sz - sizeof(ipfw_obj_header)) + return (EINVAL); + + next_idx = *(uint64_t *)(od + 1); + /* Translate index to the request position to start from */ + UNPACK_IDX(next_idx, addr, nat_proto, port); + if (nat_proto >= NAT_MAX_PROTO) + return (EINVAL); + if (nat_proto == 0 && addr != 0) + return (EINVAL); + + IPFW_UH_RLOCK(ch); + cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_RUNLOCK(ch); + return (ESRCH); + } + /* Fill in starting point */ + if (addr == 0) { + addr = cfg->prefix4; + nat_proto = 1; + port = 0; + } + if (addr < cfg->prefix4 || addr > cfg->pmask4) { + IPFW_UH_RUNLOCK(ch); + DPRINTF(DP_GENERIC | DP_STATE, "XXX: %lu %u %u", + next_idx, addr, cfg->pmask4); + return (EINVAL); + } + + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) + + sizeof(ipfw_nat64lsn_stg); + if (sd->valsize < sz) + return (ENOMEM); + oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz); + od = (ipfw_obj_data *)(oh + 1); + od->head.type = IPFW_TLV_OBJDATA; + od->head.length = sz - sizeof(ipfw_obj_header); + stg = (ipfw_nat64lsn_stg *)(od + 1); + + pg = get_first_pg(cfg, &addr, &nat_proto, &port); + if (pg == NULL) { + /* No states */ + stg->next_idx = 0xFF; + stg->count = 0; + IPFW_UH_RUNLOCK(ch); + return (0); + } + states = 0; + pg_next = NULL; + while (pg != NULL) { + pg_next = get_next_pg(cfg, &addr, &nat_proto, &port); + if (pg_next == NULL) + stg->next_idx = 0xFF; + else + stg->next_idx = PACK_IDX(addr, nat_proto, port); + + if (export_pg_states(cfg, pg, stg, sd) != 0) { + IPFW_UH_RUNLOCK(ch); + return (states == 0 ? ENOMEM: 0); + } + states += stg->count; + od->head.length += stg->count * sizeof(ipfw_nat64lsn_state); + sz += stg->count * sizeof(ipfw_nat64lsn_state); + if (pg_next != NULL) { + sz += sizeof(ipfw_nat64lsn_stg); + if (sd->valsize < sz) + break; + stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd, + sizeof(ipfw_nat64lsn_stg)); + } + pg = pg_next; + } + IPFW_UH_RUNLOCK(ch); + return (0); +} + +static struct ipfw_sopt_handler scodes[] = { + { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create }, + { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy }, + { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64lsn_config }, + { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list }, + { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats }, + { IP_FW_NAT64LSN_RESET_STATS,0, HDIR_SET, nat64lsn_reset_stats }, + { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states }, +}; + +static int +nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) +{ + ipfw_insn *icmd; + + icmd = cmd - 1; + if (icmd->opcode != O_EXTERNAL_ACTION || + icmd->arg1 != V_nat64lsn_eid) + return (1); + + *puidx = cmd->arg1; + *ptype = 0; + return (0); +} + +static void +nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx) +{ + + cmd->arg1 = idx; +} + +static int +nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, + struct named_object **pno) +{ + int err; + + err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, + IPFW_TLV_NAT64LSN_NAME, pno); + return (err); +} + +static struct named_object * +nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx) +{ + struct namedobj_instance *ni; + struct named_object *no; + + IPFW_UH_WLOCK_ASSERT(ch); + ni = CHAIN_TO_SRV(ch); + no = ipfw_objhash_lookup_kidx(ni, idx); + KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx)); + + return (no); +} + +static int +nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, + enum ipfw_sets_cmd cmd) +{ + + return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME, + set, new_set, cmd)); +} + +static struct opcode_obj_rewrite opcodes[] = { + { + .opcode = O_EXTERNAL_INSTANCE, + .etlv = IPFW_TLV_EACTION /* just show it isn't table */, + .classifier = nat64lsn_classify, + .update = nat64lsn_update_arg1, + .find_byname = nat64lsn_findbyname, + .find_bykidx = nat64lsn_findbykidx, + .manage_sets = nat64lsn_manage_sets, + }, +}; + +static int +destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + struct nat64lsn_cfg *cfg; + struct ip_fw_chain *ch; + + ch = (struct ip_fw_chain *)arg; + cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx); + SRV_OBJECT(ch, no->kidx) = NULL; + nat64lsn_detach_config(ch, cfg); + nat64lsn_destroy_instance(cfg); + return (0); +} + +int +nat64lsn_init(struct ip_fw_chain *ch, int first) +{ + + if (first != 0) + nat64lsn_init_internal(); + V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn"); + if (V_nat64lsn_eid == 0) + return (ENXIO); + IPFW_ADD_SOPT_HANDLER(first, scodes); + IPFW_ADD_OBJ_REWRITER(first, opcodes); + return (0); +} + +void +nat64lsn_uninit(struct ip_fw_chain *ch, int last) +{ + + IPFW_DEL_OBJ_REWRITER(last, opcodes); + IPFW_DEL_SOPT_HANDLER(last, scodes); + ipfw_del_eaction(ch, V_nat64lsn_eid); + /* + * Since we already have deregistered external action, + * our named objects become unaccessible via rules, because + * all rules were truncated by ipfw_del_eaction(). + * So, we can unlink and destroy our named objects without holding + * IPFW_WLOCK(). + */ + IPFW_UH_WLOCK(ch); + ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, + IPFW_TLV_NAT64LSN_NAME); + V_nat64lsn_eid = 0; + IPFW_UH_WUNLOCK(ch); + if (last != 0) + nat64lsn_uninit_internal(); +} + Index: head/sys/netpfil/ipfw/nat64/nat64stl.h =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64stl.h +++ head/sys/netpfil/ipfw/nat64/nat64stl.h @@ -0,0 +1,58 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _IP_FW_NAT64STL_H_ +#define _IP_FW_NAT64STL_H_ + +struct nat64stl_cfg { + struct named_object no; + + uint16_t map64; /* table with 6to4 mapping */ + uint16_t map46; /* table with 4to6 mapping */ + + struct in6_addr prefix6;/* IPv6 prefix */ + uint8_t plen6; /* prefix length */ + uint8_t flags; /* flags for internal use */ +#define NAT64STL_KIDX 0x0100 +#define NAT64STL_46T 0x0200 +#define NAT64STL_64T 0x0400 +#define NAT64STL_FLAGSMASK (NAT64_LOG) /* flags to pass to userland */ + char name[64]; + nat64_stats_block stats; +}; + +VNET_DECLARE(uint16_t, nat64stl_eid); +#define V_nat64stl_eid VNET(nat64stl_eid) +#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid) + +int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, + ipfw_insn *cmd, int *done); + +#endif + Index: head/sys/netpfil/ipfw/nat64/nat64stl.c =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64stl.c +++ head/sys/netpfil/ipfw/nat64/nat64stl.c @@ -0,0 +1,260 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define NAT64_LOOKUP(chain, cmd) \ + (struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1) + +static void +nat64stl_log(struct pfloghdr *plog, struct mbuf *m, sa_family_t family, + uint32_t kidx) +{ + static uint32_t pktid = 0; + + memset(plog, 0, sizeof(plog)); + plog->length = PFLOG_REAL_HDRLEN; + plog->af = family; + plog->action = PF_NAT; + plog->dir = PF_IN; + plog->rulenr = htonl(kidx); + plog->subrulenr = htonl(++pktid); + plog->ruleset[0] = '\0'; + strlcpy(plog->ifname, "NAT64STL", sizeof(plog->ifname)); + ipfw_bpf_mtap2(plog, PFLOG_HDRLEN, m); +} + +static int +nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, + struct mbuf *m, uint32_t tablearg) +{ + struct pfloghdr loghdr, *logdata; + struct in6_addr saddr, daddr; + struct ip *ip; + + ip = mtod(m, struct ip*); + if (nat64_check_ip4(ip->ip_src.s_addr) != 0 || + nat64_check_ip4(ip->ip_dst.s_addr) != 0 || + nat64_check_private_ip4(ip->ip_src.s_addr) != 0 || + nat64_check_private_ip4(ip->ip_dst.s_addr) != 0) + return (NAT64SKIP); + + daddr = TARG_VAL(chain, tablearg, nh6); + if (nat64_check_ip6(&daddr) != 0) + return (NAT64MFREE); + saddr = cfg->prefix6; + nat64_set_ip4(&saddr, ip->ip_src.s_addr); + + if (cfg->flags & NAT64_LOG) { + logdata = &loghdr; + nat64stl_log(logdata, m, AF_INET, cfg->no.kidx); + } else + logdata = NULL; + return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats, + logdata)); +} + +static int +nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, + struct mbuf *m, uint32_t tablearg) +{ + struct pfloghdr loghdr, *logdata; + struct ip6_hdr *ip6; + uint32_t aaddr; + + aaddr = htonl(TARG_VAL(chain, tablearg, nh4)); + + /* + * NOTE: we expect ipfw_chk() did m_pullup() up to upper level + * protocol's headers. Also we skip some checks, that ip6_input(), + * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did. + */ + ip6 = mtod(m, struct ip6_hdr *); + /* Check ip6_dst matches configured prefix */ + if (bcmp(&ip6->ip6_dst, &cfg->prefix6, cfg->plen6 / 8) != 0) + return (NAT64SKIP); + + if (cfg->flags & NAT64_LOG) { + logdata = &loghdr; + nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx); + } else + logdata = NULL; + return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats, logdata)); +} + +static int +nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg, + struct mbuf *m) +{ + struct pfloghdr loghdr, *logdata; + nat64_stats_block *stats; + struct ip6_hdr *ip6i; + struct icmp6_hdr *icmp6; + uint32_t tablearg; + int hlen, proto; + + hlen = 0; + stats = &cfg->stats; + proto = nat64_getlasthdr(m, &hlen); + if (proto != IPPROTO_ICMPV6) { + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + icmp6 = mtodo(m, hlen); + switch (icmp6->icmp6_type) { + case ICMP6_DST_UNREACH: + case ICMP6_PACKET_TOO_BIG: + case ICMP6_TIME_EXCEED_TRANSIT: + case ICMP6_PARAM_PROB: + break; + default: + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + hlen += sizeof(struct icmp6_hdr); + if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) { + NAT64STAT_INC(stats, dropped); + return (NAT64MFREE); + } + if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) + m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN); + if (m == NULL) { + NAT64STAT_INC(stats, nomem); + return (NAT64RETURN); + } + /* + * Use destination address from inner IPv6 header to determine + * IPv4 mapped address. + */ + ip6i = mtodo(m, hlen); + if (ipfw_lookup_table_extended(chain, cfg->map64, + sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) { + m_freem(m); + return (NAT64RETURN); + } + if (cfg->flags & NAT64_LOG) { + logdata = &loghdr; + nat64stl_log(logdata, m, AF_INET6, cfg->no.kidx); + } else + logdata = NULL; + return (nat64_handle_icmp6(m, 0, + htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats, logdata)); +} + +int +ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args, + ipfw_insn *cmd, int *done) +{ + ipfw_insn *icmd; + struct nat64stl_cfg *cfg; + uint32_t tablearg; + int ret; + + IPFW_RLOCK_ASSERT(chain); + + *done = 0; /* try next rule if not matched */ + icmd = cmd + 1; + if (cmd->opcode != O_EXTERNAL_ACTION || + cmd->arg1 != V_nat64stl_eid || + icmd->opcode != O_EXTERNAL_INSTANCE || + (cfg = NAT64_LOOKUP(chain, icmd)) == NULL) + return (0); + + switch (args->f_id.addr_type) { + case 4: + ret = ipfw_lookup_table(chain, cfg->map46, + htonl(args->f_id.dst_ip), &tablearg); + break; + case 6: + ret = ipfw_lookup_table_extended(chain, cfg->map64, + sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg); + break; + default: + return (0); + } + if (ret == 0) { + /* + * In case when packet is ICMPv6 message from an intermediate + * router, the source address of message will not match the + * addresses from our map64 table. + */ + if (args->f_id.proto != IPPROTO_ICMPV6) + return (0); + + ret = nat64stl_handle_icmp6(chain, cfg, args->m); + } else { + if (args->f_id.addr_type == 4) + ret = nat64stl_handle_ip4(chain, cfg, args->m, + tablearg); + else + ret = nat64stl_handle_ip6(chain, cfg, args->m, + tablearg); + } + if (ret == NAT64SKIP) + return (0); + + *done = 1; /* terminate the search */ + if (ret == NAT64MFREE) + m_freem(args->m); + args->m = NULL; + return (IP_FW_DENY); +} + + Index: head/sys/netpfil/ipfw/nat64/nat64stl_control.c =================================================================== --- head/sys/netpfil/ipfw/nat64/nat64stl_control.c +++ head/sys/netpfil/ipfw/nat64/nat64stl_control.c @@ -0,0 +1,621 @@ +/*- + * Copyright (c) 2015-2016 Yandex LLC + * Copyright (c) 2015-2016 Andrey V. Elsukov + * Copyright (c) 2015 Alexander V. Chernikov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +VNET_DEFINE(uint16_t, nat64stl_eid) = 0; + +static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set); +static void nat64stl_free_config(struct nat64stl_cfg *cfg); +static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni, + const char *name, uint8_t set); + +static struct nat64stl_cfg * +nat64stl_alloc_config(const char *name, uint8_t set) +{ + struct nat64stl_cfg *cfg; + + cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO); + COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK); + cfg->no.name = cfg->name; + cfg->no.etlv = IPFW_TLV_NAT64STL_NAME; + cfg->no.set = set; + strlcpy(cfg->name, name, sizeof(cfg->name)); + return (cfg); +} + +static void +nat64stl_free_config(struct nat64stl_cfg *cfg) +{ + + COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS); + free(cfg, M_IPFW); +} + +static void +nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, + ipfw_nat64stl_cfg *uc) +{ + struct named_object *no; + + uc->prefix6 = cfg->prefix6; + uc->plen6 = cfg->plen6; + uc->flags = cfg->flags & NAT64STL_FLAGSMASK; + uc->set = cfg->no.set; + strlcpy(uc->name, cfg->no.name, sizeof(uc->name)); + + no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64); + ipfw_export_obj_ntlv(no, &uc->ntlv6); + no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46); + ipfw_export_obj_ntlv(no, &uc->ntlv4); +} + +struct nat64stl_dump_arg { + struct ip_fw_chain *ch; + struct sockopt_data *sd; +}; + +static int +export_config_cb(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg; + ipfw_nat64stl_cfg *uc; + + uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc)); + nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc); + return (0); +} + +static struct nat64stl_cfg * +nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set) +{ + struct nat64stl_cfg *cfg; + + cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set, + IPFW_TLV_NAT64STL_NAME, name); + + return (cfg); +} + + +static int +nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, + ipfw_nat64stl_cfg *i) +{ + + IPFW_UH_WLOCK_ASSERT(ch); + + if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) + return (ENOSPC); + cfg->flags |= NAT64STL_KIDX; + + if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0) + return (EINVAL); + cfg->flags |= NAT64STL_46T; + + if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0) + return (EINVAL); + cfg->flags |= NAT64STL_64T; + + ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no); + + return (0); +} + +/* + * Creates new nat64 instance. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ] + * + * Returns 0 on success + */ +static int +nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_lheader *olh; + ipfw_nat64stl_cfg *uc; + struct namedobj_instance *ni; + struct nat64stl_cfg *cfg; + int error; + + if (sd->valsize != sizeof(*olh) + sizeof(*uc)) + return (EINVAL); + + olh = (ipfw_obj_lheader *)sd->kbuf; + uc = (ipfw_nat64stl_cfg *)(olh + 1); + + if (ipfw_check_object_name_generic(uc->name) != 0) + return (EINVAL); + if (!IN6_IS_ADDR_WKPFX(&uc->prefix6)) + return (EINVAL); + if (uc->plen6 != 96 || uc->set >= IPFW_MAX_SETS) + return (EINVAL); + + /* XXX: check types of tables */ + + ni = CHAIN_TO_SRV(ch); + error = 0; + + IPFW_UH_RLOCK(ch); + if (nat64stl_find(ni, uc->name, uc->set) != NULL) { + IPFW_UH_RUNLOCK(ch); + return (EEXIST); + } + IPFW_UH_RUNLOCK(ch); + + cfg = nat64stl_alloc_config(uc->name, uc->set); + cfg->prefix6 = uc->prefix6; + cfg->plen6 = uc->plen6; + cfg->flags = uc->flags & NAT64STL_FLAGSMASK; + + IPFW_UH_WLOCK(ch); + + if (nat64stl_find(ni, uc->name, uc->set) != NULL) { + IPFW_UH_WUNLOCK(ch); + nat64stl_free_config(cfg); + return (EEXIST); + } + error = nat64stl_create_internal(ch, cfg, uc); + if (error == 0) { + /* Okay, let's link data */ + IPFW_WLOCK(ch); + SRV_OBJECT(ch, cfg->no.kidx) = cfg; + IPFW_WUNLOCK(ch); + + IPFW_UH_WUNLOCK(ch); + return (0); + } + + if (cfg->flags & NAT64STL_KIDX) + ipfw_objhash_free_idx(ni, cfg->no.kidx); + if (cfg->flags & NAT64STL_46T) + ipfw_unref_table(ch, cfg->map46); + if (cfg->flags & NAT64STL_64T) + ipfw_unref_table(ch, cfg->map64); + + IPFW_UH_WUNLOCK(ch); + nat64stl_free_config(cfg); + return (error); +} + +/* + * Change existing nat64stl instance configuration. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ipfw_nat64stl_cfg ] + * Reply: [ ipfw_obj_header ipfw_nat64stl_cfg ] + * + * Returns 0 on success + */ +static int +nat64stl_config(struct ip_fw_chain *ch, ip_fw3_opheader *op, + struct sockopt_data *sd) +{ + ipfw_obj_header *oh; + ipfw_nat64stl_cfg *uc; + struct nat64stl_cfg *cfg; + struct namedobj_instance *ni; + + if (sd->valsize != sizeof(*oh) + sizeof(*uc)) + return (EINVAL); + + oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, + sizeof(*oh) + sizeof(*uc)); + uc = (ipfw_nat64stl_cfg *)(oh + 1); + + if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || + oh->ntlv.set >= IPFW_MAX_SETS) + return (EINVAL); + + ni = CHAIN_TO_SRV(ch); + if (sd->sopt->sopt_dir == SOPT_GET) { + IPFW_UH_RLOCK(ch); + cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_RUNLOCK(ch); + return (EEXIST); + } + nat64stl_export_config(ch, cfg, uc); + IPFW_UH_RUNLOCK(ch); + return (0); + } + + IPFW_UH_WLOCK(ch); + cfg = nat64stl_find(ni, oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_WUNLOCK(ch); + return (EEXIST); + } + + /* + * For now allow to change only following values: + * flags. + */ + + cfg->flags = uc->flags & NAT64STL_FLAGSMASK; + IPFW_UH_WUNLOCK(ch); + return (0); +} + +static void +nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg) +{ + + IPFW_UH_WLOCK_ASSERT(ch); + + ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no); + ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx); + ipfw_unref_table(ch, cfg->map46); + ipfw_unref_table(ch, cfg->map64); +} + +/* + * Destroys nat64 instance. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * + * Returns 0 on success + */ +static int +nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_header *oh; + struct nat64stl_cfg *cfg; + + if (sd->valsize != sizeof(*oh)) + return (EINVAL); + + oh = (ipfw_obj_header *)sd->kbuf; + if (ipfw_check_object_name_generic(oh->ntlv.name) != 0) + return (EINVAL); + + IPFW_UH_WLOCK(ch); + cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + if (cfg->no.refcnt > 0) { + IPFW_UH_WUNLOCK(ch); + return (EBUSY); + } + + IPFW_WLOCK(ch); + SRV_OBJECT(ch, cfg->no.kidx) = NULL; + IPFW_WUNLOCK(ch); + + nat64stl_detach_config(ch, cfg); + IPFW_UH_WUNLOCK(ch); + + nat64stl_free_config(cfg); + return (0); +} + +/* + * Lists all nat64stl instances currently available in kernel. + * Data layout (v0)(current): + * Request: [ ipfw_obj_lheader ] + * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ] + * + * Returns 0 on success + */ +static int +nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3, + struct sockopt_data *sd) +{ + ipfw_obj_lheader *olh; + struct nat64stl_dump_arg da; + + /* Check minimum header size */ + if (sd->valsize < sizeof(ipfw_obj_lheader)) + return (EINVAL); + + olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh)); + + IPFW_UH_RLOCK(ch); + olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch), + IPFW_TLV_NAT64STL_NAME); + olh->objsize = sizeof(ipfw_nat64stl_cfg); + olh->size = sizeof(*olh) + olh->count * olh->objsize; + + if (sd->valsize < olh->size) { + IPFW_UH_RUNLOCK(ch); + return (ENOMEM); + } + memset(&da, 0, sizeof(da)); + da.ch = ch; + da.sd = sd; + ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, + &da, IPFW_TLV_NAT64STL_NAME); + IPFW_UH_RUNLOCK(ch); + + return (0); +} + +#define __COPY_STAT_FIELD(_cfg, _stats, _field) \ + (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field) +static void +export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg, + struct ipfw_nat64stl_stats *stats) +{ + + __COPY_STAT_FIELD(cfg, stats, opcnt64); + __COPY_STAT_FIELD(cfg, stats, opcnt46); + __COPY_STAT_FIELD(cfg, stats, ofrags); + __COPY_STAT_FIELD(cfg, stats, ifrags); + __COPY_STAT_FIELD(cfg, stats, oerrors); + __COPY_STAT_FIELD(cfg, stats, noroute4); + __COPY_STAT_FIELD(cfg, stats, noroute6); + __COPY_STAT_FIELD(cfg, stats, noproto); + __COPY_STAT_FIELD(cfg, stats, nomem); + __COPY_STAT_FIELD(cfg, stats, dropped); +} + +/* + * Get nat64stl statistics. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]] + * + * Returns 0 on success + */ +static int +nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, + struct sockopt_data *sd) +{ + struct ipfw_nat64stl_stats stats; + struct nat64stl_cfg *cfg; + ipfw_obj_header *oh; + ipfw_obj_ctlv *ctlv; + size_t sz; + + sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats); + if (sd->valsize % sizeof(uint64_t)) + return (EINVAL); + if (sd->valsize < sz) + return (ENOMEM); + oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz); + if (oh == NULL) + return (EINVAL); + memset(&stats, 0, sizeof(stats)); + + IPFW_UH_RLOCK(ch); + cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_RUNLOCK(ch); + return (ESRCH); + } + export_stats(ch, cfg, &stats); + IPFW_UH_RUNLOCK(ch); + + ctlv = (ipfw_obj_ctlv *)(oh + 1); + memset(ctlv, 0, sizeof(*ctlv)); + ctlv->head.type = IPFW_TLV_COUNTERS; + ctlv->head.length = sz - sizeof(ipfw_obj_header); + ctlv->count = sizeof(stats) / sizeof(uint64_t); + ctlv->objsize = sizeof(uint64_t); + ctlv->version = IPFW_NAT64_VERSION; + memcpy(ctlv + 1, &stats, sizeof(stats)); + return (0); +} + +/* + * Reset nat64stl statistics. + * Data layout (v0)(current): + * Request: [ ipfw_obj_header ] + * + * Returns 0 on success + */ +static int +nat64stl_reset_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op, + struct sockopt_data *sd) +{ + struct nat64stl_cfg *cfg; + ipfw_obj_header *oh; + + if (sd->valsize != sizeof(*oh)) + return (EINVAL); + oh = (ipfw_obj_header *)sd->kbuf; + if (ipfw_check_object_name_generic(oh->ntlv.name) != 0 || + oh->ntlv.set >= IPFW_MAX_SETS) + return (EINVAL); + + IPFW_UH_WLOCK(ch); + cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set); + if (cfg == NULL) { + IPFW_UH_WUNLOCK(ch); + return (ESRCH); + } + COUNTER_ARRAY_ZERO(cfg->stats.stats, NAT64STATS); + IPFW_UH_WUNLOCK(ch); + return (0); +} + +static struct ipfw_sopt_handler scodes[] = { + + { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create }, + { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy }, + { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config }, + { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list }, + { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats }, + { IP_FW_NAT64STL_RESET_STATS,0, HDIR_SET, nat64stl_reset_stats }, +}; + +static int +nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype) +{ + ipfw_insn *icmd; + + icmd = cmd - 1; + if (icmd->opcode != O_EXTERNAL_ACTION || + icmd->arg1 != V_nat64stl_eid) + return (1); + + *puidx = cmd->arg1; + *ptype = 0; + return (0); +} + +static void +nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx) +{ + + cmd->arg1 = idx; +} + +static int +nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti, + struct named_object **pno) +{ + int err; + + err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti, + IPFW_TLV_NAT64STL_NAME, pno); + return (err); +} + +static struct named_object * +nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx) +{ + struct namedobj_instance *ni; + struct named_object *no; + + IPFW_UH_WLOCK_ASSERT(ch); + ni = CHAIN_TO_SRV(ch); + no = ipfw_objhash_lookup_kidx(ni, idx); + KASSERT(no != NULL, ("NAT with index %d not found", idx)); + + return (no); +} + +static int +nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set, + enum ipfw_sets_cmd cmd) +{ + + return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME, + set, new_set, cmd)); +} + +static struct opcode_obj_rewrite opcodes[] = { + { + .opcode = O_EXTERNAL_INSTANCE, + .etlv = IPFW_TLV_EACTION /* just show it isn't table */, + .classifier = nat64stl_classify, + .update = nat64stl_update_arg1, + .find_byname = nat64stl_findbyname, + .find_bykidx = nat64stl_findbykidx, + .manage_sets = nat64stl_manage_sets, + }, +}; + +static int +destroy_config_cb(struct namedobj_instance *ni, struct named_object *no, + void *arg) +{ + struct nat64stl_cfg *cfg; + struct ip_fw_chain *ch; + + ch = (struct ip_fw_chain *)arg; + cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx); + SRV_OBJECT(ch, no->kidx) = NULL; + nat64stl_detach_config(ch, cfg); + nat64stl_free_config(cfg); + return (0); +} + +int +nat64stl_init(struct ip_fw_chain *ch, int first) +{ + + V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl"); + if (V_nat64stl_eid == 0) + return (ENXIO); + IPFW_ADD_SOPT_HANDLER(first, scodes); + IPFW_ADD_OBJ_REWRITER(first, opcodes); + return (0); +} + +void +nat64stl_uninit(struct ip_fw_chain *ch, int last) +{ + + IPFW_DEL_OBJ_REWRITER(last, opcodes); + IPFW_DEL_SOPT_HANDLER(last, scodes); + ipfw_del_eaction(ch, V_nat64stl_eid); + /* + * Since we already have deregistered external action, + * our named objects become unaccessible via rules, because + * all rules were truncated by ipfw_del_eaction(). + * So, we can unlink and destroy our named objects without holding + * IPFW_WLOCK(). + */ + IPFW_UH_WLOCK(ch); + ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch, + IPFW_TLV_NAT64STL_NAME); + V_nat64stl_eid = 0; + IPFW_UH_WUNLOCK(ch); +} +