Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F146328054
D6434.id16523.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
188 KB
Referenced Files
None
Subscribers
None
D6434.id16523.diff
View Options
Index: sbin/ipfw/Makefile
===================================================================
--- sbin/ipfw/Makefile
+++ sbin/ipfw/Makefile
@@ -5,6 +5,7 @@
PACKAGE=ipfw
PROG= ipfw
SRCS= ipfw2.c dummynet.c ipv6.c main.c nat.c tables.c
+SRCS+= nat64lsn.c nat64stl.c
WARNS?= 2
.if ${MK_PF} != "no"
Index: sbin/ipfw/ipfw2.h
===================================================================
--- sbin/ipfw/ipfw2.h
+++ sbin/ipfw/ipfw2.h
@@ -229,6 +229,30 @@
TOK_UNLOCK,
TOK_VLIST,
TOK_OLIST,
+ TOK_STATS,
+
+ /* NAT64 tokens */
+ TOK_NAT64STL,
+ TOK_NAT64LSN,
+ TOK_STATES,
+ TOK_CONFIG,
+ TOK_TABLE4,
+ TOK_TABLE6,
+ TOK_PREFIX,
+ TOK_PREFIX4,
+ TOK_PREFIX6,
+ TOK_AGG_LEN,
+ TOK_AGG_COUNT,
+ TOK_MAX_PORTS,
+ TOK_JMAXLEN,
+ TOK_PORT_RANGE,
+ TOK_NH_DEL_AGE,
+ TOK_PG_DEL_AGE,
+ TOK_TCP_SYN_AGE,
+ TOK_TCP_CLOSE_AGE,
+ TOK_TCP_EST_AGE,
+ TOK_UDP_AGE,
+ TOK_ICMP_AGE,
};
/*
@@ -315,6 +339,8 @@
void ipfw_zero(int ac, char *av[], int optname);
void ipfw_list(int ac, char *av[], int show_counters);
void ipfw_internal_handler(int ac, char *av[]);
+void ipfw_nat64lsn_handler(int ac, char *av[]);
+void ipfw_nat64stl_handler(int ac, char *av[]);
int ipfw_check_object_name(const char *name);
#ifdef PF
@@ -348,7 +374,10 @@
/* tables.c */
struct _ipfw_obj_ctlv;
+struct _ipfw_obj_ntlv;
int table_check_name(const char *tablename);
void ipfw_list_ta(int ac, char *av[]);
void ipfw_list_values(int ac, char *av[]);
+void table_fill_ntlv(struct _ipfw_obj_ntlv *ntlv, const char *name,
+ uint8_t set, uint16_t uidx);
Index: sbin/ipfw/ipfw2.c
===================================================================
--- sbin/ipfw/ipfw2.c
+++ sbin/ipfw/ipfw2.c
@@ -235,6 +235,8 @@
};
static struct _s_x rule_eactions[] = {
+ { "nat64lsn", TOK_NAT64LSN },
+ { "nat64stl", TOK_NAT64STL },
{ NULL, 0 } /* terminator */
};
Index: sbin/ipfw/main.c
===================================================================
--- sbin/ipfw/main.c
+++ sbin/ipfw/main.c
@@ -425,6 +425,10 @@
if (co.use_set || try_next) {
if (_substrcmp(*av, "delete") == 0)
ipfw_delete(av);
+ else if (!strncmp(*av, "nat64stl", strlen(*av)))
+ ipfw_nat64stl_handler(ac, av);
+ else if (!strncmp(*av, "nat64lsn", strlen(*av)))
+ ipfw_nat64lsn_handler(ac, av);
else if (_substrcmp(*av, "flush") == 0)
ipfw_flush(co.do_force);
else if (_substrcmp(*av, "zero") == 0)
Index: sbin/ipfw/nat64lsn.c
===================================================================
--- /dev/null
+++ sbin/ipfw/nat64lsn.c
@@ -0,0 +1,676 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "ipfw2.h"
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/ip_fw_nat64.h>
+#include <arpa/inet.h>
+
+static void nat64lsn_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name,
+ uint8_t set);
+typedef int (nat64lsn_cb_t)(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64lsn_foreach(nat64lsn_cb_t *f, const char *name, uint8_t set,
+ int sort);
+
+static void nat64lsn_create(const char *name, uint8_t set, int ac, char **av);
+static void nat64lsn_destroy(const char *name, uint8_t set);
+static void nat64lsn_stats(const char *name, uint8_t set);
+static int nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64lsn_destroy_cb(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name,
+ uint8_t set);
+
+static struct _s_x nat64cmds[] = {
+ { "create", TOK_CREATE },
+ { "destroy", TOK_DESTROY },
+ { "list", TOK_LIST },
+ { "show", TOK_LIST },
+ { "stats", TOK_STATS },
+ { NULL, 0 }
+};
+
+static uint64_t
+nat64lsn_print_states(void *buf)
+{
+ char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], f[INET_ADDRSTRLEN];
+ char sflags[4], *sf, *proto;
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ ipfw_nat64lsn_stg *stg;
+ ipfw_nat64lsn_state *ste;
+ uint64_t next_idx;
+ size_t sz;
+ int i;
+
+ oh = (ipfw_obj_header *)buf;
+ od = (ipfw_obj_data *)(oh + 1);
+ stg = (ipfw_nat64lsn_stg *)(od + 1);
+ sz = od->head.length - sizeof(*od);
+ while (sz > 0) {
+ next_idx = stg->next_idx;
+ if (next_idx == 0xFF)
+ break;
+ sz -= sizeof(*stg);
+ if (stg->count == 0) {
+ stg++;
+ continue;
+ }
+ switch (stg->proto) {
+ case IPPROTO_TCP:
+ proto = "TCP";
+ break;
+ case IPPROTO_UDP:
+ proto = "UDP";
+ break;
+ case IPPROTO_ICMPV6:
+ proto = "ICMPv6";
+ break;
+ }
+ inet_ntop(AF_INET6, &stg->host6, s, sizeof(s));
+ inet_ntop(AF_INET, &stg->alias4, a, sizeof(a));
+ ste = (ipfw_nat64lsn_state *)(stg + 1);
+ for (i = 0; i < stg->count && sz > 0; i++) {
+ sf = sflags;
+ inet_ntop(AF_INET, &ste->daddr, f, sizeof(f));
+ if (stg->proto == IPPROTO_TCP) {
+ if (ste->flags & 0x02)
+ *sf++ = 'S';
+ if (ste->flags & 0x04)
+ *sf++ = 'E';
+ if (ste->flags & 0x01)
+ *sf++ = 'F';
+ }
+ *sf = '\0';
+ switch (stg->proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ printf("%s:%d\t%s:%d\t%s\t%s\t%d\t%s:%d\n",
+ s, ste->sport, a, ste->aport, proto,
+ sflags, ste->idle, f, ste->dport);
+ break;
+ case IPPROTO_ICMPV6:
+ printf("%s\t%s\t%s\t\t%d\t%s\n",
+ s, a, proto, ste->idle, f);
+ break;
+ default:
+ printf("%s\t%s\t%d\t\t%d\t%s\n",
+ s, a, stg->proto, ste->idle, f);
+ }
+ ste++;
+ sz -= sizeof(*ste);
+ }
+ stg = (ipfw_nat64lsn_stg *)ste;
+ }
+ return (next_idx);
+}
+
+static int
+nat64lsn_states_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ void *buf;
+ uint64_t next_idx;
+ size_t sz;
+
+ if (name != NULL && strcmp(cfg->name, name) != 0)
+ return (ESRCH);
+
+ if (set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ next_idx = 0;
+ sz = 4096;
+ if ((buf = calloc(1, sz)) == NULL)
+ err(EX_OSERR, NULL);
+ do {
+ oh = (ipfw_obj_header *)buf;
+ od = (ipfw_obj_data *)(oh + 1);
+ nat64lsn_fill_ntlv(&oh->ntlv, cfg->name, set);
+ od->head.type = IPFW_TLV_OBJDATA;
+ od->head.length = sizeof(*od) + sizeof(next_idx);
+ *((uint64_t *)(od + 1)) = next_idx;
+ if (do_get3(IP_FW_NAT64LSN_LIST_STATES, &oh->opheader, &sz))
+ err(EX_OSERR, "Error reading nat64lsn states");
+ sz = 4096;
+ next_idx = nat64lsn_print_states(buf);
+ memset(buf, 0, sz);
+ } while (next_idx != 0xFF);
+
+ free(buf);
+ return (0);
+}
+
+static struct _s_x nat64listcmds[] = {
+ { "states", TOK_STATES },
+ { "config", TOK_CONFIG },
+ { NULL, 0 }
+};
+
+static void
+ipfw_nat64lsn_list_handler(const char *name, uint8_t set, int ac, char *av[])
+{
+ int tcmd;
+
+ if (ac == 0) {
+ nat64lsn_foreach(nat64lsn_show_cb, name, set, 1);
+ return;
+ }
+ NEED1("nat64lsn list needs command");
+ tcmd = get_token(nat64listcmds, *av, "nat64lsn list command");
+ switch (tcmd) {
+ case TOK_STATES:
+ nat64lsn_foreach(nat64lsn_states_cb, name, set, 1);
+ break;
+ case TOK_CONFIG:
+ nat64lsn_foreach(nat64lsn_show_cb, name, set, 1);
+ }
+}
+
+/*
+ * This one handles all nat64lsn-related commands
+ * ipfw [set N] nat64lsn NAME {create | config} ...
+ * ipfw [set N] nat64lsn NAME stats
+ * ipfw [set N] nat64lsn {NAME | all} destroy
+ * ipfw [set N] nat64lsn {NAME | all} {list | show} [config | states]
+ */
+#define nat64lsn_check_name table_check_name
+void
+ipfw_nat64lsn_handler(int ac, char *av[])
+{
+ const char *name;
+ int tcmd;
+ uint8_t set;
+
+ if (co.use_set != 0)
+ set = co.use_set - 1;
+ else
+ set = 0;
+ ac--; av++;
+
+ NEED1("nat64lsn needs instance name");
+ name = *av;
+ if (nat64lsn_check_name(name) != 0) {
+ if (strcmp(name, "all") == 0)
+ name = NULL;
+ else
+ errx(EX_USAGE, "nat64lsn instance name %s is invalid",
+ name);
+ }
+ ac--; av++;
+ NEED1("nat64lsn needs command");
+
+ tcmd = get_token(nat64cmds, *av, "nat64lsn command");
+ if (name == NULL && tcmd != TOK_DESTROY && tcmd != TOK_LIST)
+ errx(EX_USAGE, "nat64lsn instance name required");
+ switch (tcmd) {
+ case TOK_CREATE:
+ ac--; av++;
+ nat64lsn_create(name, set, ac, av);
+ break;
+ case TOK_LIST:
+ ac--; av++;
+ ipfw_nat64lsn_list_handler(name, set, ac, av);
+ break;
+ case TOK_DESTROY:
+ if (name == NULL)
+ nat64lsn_foreach(nat64lsn_destroy_cb, NULL, set, 0);
+ else
+ nat64lsn_destroy(name, set);
+ break;
+ case TOK_STATS:
+ nat64lsn_stats(name, set);
+ }
+}
+
+static void
+nat64lsn_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set)
+{
+
+ ntlv->head.type = IPFW_TLV_EACTION_NAME(1); /* it doesn't matter */
+ ntlv->head.length = sizeof(ipfw_obj_ntlv);
+ ntlv->idx = 1;
+ ntlv->set = set;
+ strlcpy(ntlv->name, name, sizeof(ntlv->name));
+}
+
+static void
+nat64lsn_apply_mask(int af, void *prefix, uint16_t plen)
+{
+ struct in6_addr mask6, *p6;
+ struct in_addr mask4, *p4;
+
+ if (af == AF_INET) {
+ p4 = (struct in_addr *)prefix;
+ mask4.s_addr = htonl(~((1 << (32 - plen)) - 1));
+ p4->s_addr &= mask4.s_addr;
+ } else if (af == AF_INET6) {
+ p6 = (struct in6_addr *)prefix;
+ n2mask(&mask6, plen);
+ APPLY_MASK(p6, &mask6);
+ }
+}
+
+static void
+nat64lsn_parse_prefix(const char *arg, int af, void *prefix, uint16_t *plen)
+{
+ char *p, *l;
+
+ p = strdup(arg);
+ if (p == NULL)
+ err(EX_OSERR, NULL);
+ if ((l = strchr(p, '/')) != NULL)
+ *l++ = '\0';
+ if (l == NULL)
+ errx(EX_USAGE, "Prefix length required");
+ if (inet_pton(af, p, prefix) != 1)
+ errx(EX_USAGE, "Bad prefix: %s", p);
+ *plen = (uint16_t)strtol(l, &l, 10);
+ if (*l != '\0' || *plen == 0 || (af == AF_INET && *plen > 32) ||
+ (af == AF_INET6 && *plen > 96))
+ errx(EX_USAGE, "Bad prefix length: %s", arg);
+ nat64lsn_apply_mask(af, prefix, *plen);
+ free(p);
+}
+
+static uint32_t
+nat64lsn_parse_int(const char *arg, const char *desc)
+{
+ char *p;
+ uint32_t val;
+
+ val = (uint32_t)strtol(arg, &p, 10);
+ if (*p != '\0')
+ errx(EX_USAGE, "Invalid %s value: %s\n", desc, arg);
+ return (val);
+}
+
+static struct _s_x nat64newcmds[] = {
+ { "prefix4", TOK_PREFIX4 },
+ { "prefix6", TOK_PREFIX6 },
+ { "agg_len", TOK_AGG_LEN },
+ { "agg_count", TOK_AGG_COUNT },
+ { "max_ports", TOK_MAX_PORTS },
+ { "jmaxlen", TOK_JMAXLEN },
+ { "port_range", TOK_PORT_RANGE },
+ { "nh_del_age", TOK_NH_DEL_AGE },
+ { "pg_del_age", TOK_PG_DEL_AGE },
+ { "tcp_syn_age", TOK_TCP_SYN_AGE },
+ { "tcp_close_age",TOK_TCP_CLOSE_AGE },
+ { "tcp_est_age", TOK_TCP_EST_AGE },
+ { "udp_age", TOK_UDP_AGE },
+ { "icmp_age", TOK_ICMP_AGE },
+ { NULL, 0 }
+};
+
+/*
+ * Creates new nat64lsn instance
+ * ipfw nat64lsn <NAME> create
+ * agg_len <LEN>
+ * [ max_ports <N> ]
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ */
+#define NAT64LSN_HAS_PREFIX4 0x01
+#define NAT64LSN_HAS_PREFIX6 0x02
+static void
+nat64lsn_create(const char *name, uint8_t set, int ac, char *av[])
+{
+ char buf[sizeof(ipfw_obj_lheader) + sizeof(ipfw_nat64lsn_cfg)];
+ ipfw_nat64lsn_cfg *cfg;
+ ipfw_obj_lheader *olh;
+ int tcmd, flags;
+ char *p, *opt;
+
+ memset(&buf, 0, sizeof(buf));
+ olh = (ipfw_obj_lheader *)buf;
+ cfg = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+ /* Some reasonable defaults */
+ inet_pton(AF_INET6, "64:ff9b::", &cfg->prefix6);
+ cfg->plen6 = 96;
+ cfg->set = set;
+ flags = NAT64LSN_HAS_PREFIX6;
+ while (ac > 0) {
+ tcmd = get_token(nat64newcmds, *av, "option");
+ opt = *av;
+ ac--; av++;
+
+ switch (tcmd) {
+ case TOK_PREFIX4:
+ NEED1("IPv4 prefix required");
+ nat64lsn_parse_prefix(*av, AF_INET, &cfg->prefix4,
+ &cfg->plen4);
+ flags |= NAT64LSN_HAS_PREFIX4;
+ ac--; av++;
+ break;
+ case TOK_PREFIX6:
+ NEED1("IPv6 prefix required");
+ nat64lsn_parse_prefix(*av, AF_INET6, &cfg->prefix6,
+ &cfg->plen6);
+ ac--; av++;
+ break;
+ case TOK_AGG_LEN:
+ NEED1("Aggregation prefix len required");
+ cfg->agg_prefix_len = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_AGG_COUNT:
+ NEED1("Max per-prefix count required");
+ cfg->agg_prefix_max = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_MAX_PORTS:
+ NEED1("Max per-user ports required");
+ cfg->max_ports = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_JMAXLEN:
+ NEED1("job queue length required");
+ cfg->jmaxlen = nat64lsn_parse_int(*av, opt);
+ ac--; av++;
+ break;
+ case TOK_PORT_RANGE:
+ NEED1("port range x[:y] required");
+ if ((p = strchr(*av, ':')) == NULL)
+ cfg->min_port = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ else {
+ *p++ = '\0';
+ cfg->min_port = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ cfg->max_port = (uint16_t)nat64lsn_parse_int(
+ p, opt);
+ }
+ ac--; av++;
+ break;
+ case TOK_NH_DEL_AGE:
+ NEED1("host delete delay required");
+ cfg->nh_delete_delay = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_PG_DEL_AGE:
+ NEED1("portgroup delete delay required");
+ cfg->pg_delete_delay = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_SYN_AGE:
+ NEED1("tcp syn age required");
+ cfg->st_syn_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_CLOSE_AGE:
+ NEED1("tcp close age required");
+ cfg->st_close_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_TCP_EST_AGE:
+ NEED1("tcp est age required");
+ cfg->st_estab_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_UDP_AGE:
+ NEED1("udp age required");
+ cfg->st_udp_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ break;
+ case TOK_ICMP_AGE:
+ NEED1("icmp age required");
+ cfg->st_icmp_ttl = (uint16_t)nat64lsn_parse_int(
+ *av, opt);
+ ac--; av++;
+ }
+ }
+
+ /* Check validness */
+ if ((flags & NAT64LSN_HAS_PREFIX4) != NAT64LSN_HAS_PREFIX4)
+ errx(EX_USAGE, "prefix4 required");
+
+ olh->count = 1;
+ olh->objsize = sizeof(*cfg);
+ olh->size = sizeof(buf);
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ if (do_set3(IP_FW_NAT64LSN_CREATE, &olh->opheader, sizeof(buf)) != 0)
+ err(EX_OSERR, "nat64lsn instance creation failed");
+}
+
+/*
+ * Destroys given table specified by @oh->ntlv.
+ * Request: [ ipfw_obj_header ]
+ */
+static void
+nat64lsn_destroy(const char *name, uint8_t set)
+{
+ ipfw_obj_header oh;
+
+ memset(&oh, 0, sizeof(oh));
+ nat64lsn_fill_ntlv(&oh.ntlv, name, set);
+ if (do_set3(IP_FW_NAT64LSN_DESTROY, &oh.opheader, sizeof(oh)) != 0)
+ err(EX_OSERR, "failed to destroy nat instance %s", name);
+}
+
+/*
+ * Get nat64lsn instance statistics.
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ] ]
+ */
+static int
+nat64lsn_get_stats(const char *name, uint8_t set,
+ struct ipfw_nat64lsn_stats *stats)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *oc;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(*oc) + sizeof(*stats);
+ oh = calloc(1, sz);
+ nat64lsn_fill_ntlv(&oh->ntlv, name, set);
+ if (do_get3(IP_FW_NAT64LSN_STATS, &oh->opheader, &sz) == 0) {
+ oc = (ipfw_obj_ctlv *)(oh + 1);
+ memcpy(stats, oc + 1, sizeof(*stats));
+ free(oh);
+ return (0);
+ }
+ free(oh);
+ return (-1);
+}
+
+#define _P_STAT(_s, _f) printf("%8s:\t%lu\n", #_f, _s._f)
+static void
+nat64lsn_stats(const char *name, uint8_t set)
+{
+ struct ipfw_nat64lsn_stats stats;
+
+ if (nat64lsn_get_stats(name, set, &stats) != 0)
+ err(EX_OSERR, "Error retrieving stats");
+
+ _P_STAT(stats, opcnt64);
+ _P_STAT(stats, opcnt46);
+ _P_STAT(stats, ofrags);
+ _P_STAT(stats, ifrags);
+ _P_STAT(stats, oerrors);
+ _P_STAT(stats, noroute4);
+ _P_STAT(stats, noroute6);
+ _P_STAT(stats, noproto);
+ _P_STAT(stats, nomem);
+ _P_STAT(stats, dropped);
+
+ _P_STAT(stats, hostcount);
+ _P_STAT(stats, tcpchunks);
+ _P_STAT(stats, udpchunks);
+ _P_STAT(stats, icmpchunks);
+ _P_STAT(stats, jcalls);
+ _P_STAT(stats, jrequests);
+ _P_STAT(stats, jhostsreq);
+ _P_STAT(stats, jportreq);
+ _P_STAT(stats, jhostfails);
+ _P_STAT(stats, jportfails);
+ _P_STAT(stats, jreinjected);
+ _P_STAT(stats, jmaxlen);
+ _P_STAT(stats, jnomem);
+ _P_STAT(stats, screated);
+ _P_STAT(stats, sdeleted);
+ _P_STAT(stats, spgcreated);
+ _P_STAT(stats, spgdeleted);
+}
+
+static int
+nat64lsn_show_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
+{
+ char abuf[INET6_ADDRSTRLEN];
+
+ if (name != NULL && strcmp(cfg->name, name) != 0)
+ return (ESRCH);
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ if (co.use_set != 0 || cfg->set != 0)
+ printf("set %u ", cfg->set);
+ inet_ntop(AF_INET, &cfg->prefix4, abuf, sizeof(abuf));
+ printf("nat64lsn %s prefix4 %s/%u ", cfg->name, abuf, cfg->plen4);
+ inet_ntop(AF_INET6, &cfg->prefix6, abuf, sizeof(abuf));
+ printf("prefix6 %s/%u", abuf, cfg->plen6);
+
+ printf("agg_len %u agg_count %u ", cfg->agg_prefix_len,
+ cfg->agg_prefix_max);
+ printf("max_ports %u jmaxlen %u ", cfg->max_ports, cfg->jmaxlen);
+ printf("port_range %u:%u ", cfg->min_port, cfg->max_port);
+ printf("nh_del_age %u pg_del_age %u ", cfg->nh_delete_delay,
+ cfg->pg_delete_delay);
+ printf("tcp_syn_age %u tcp_close_age %u ", cfg->st_syn_ttl,
+ cfg->st_close_ttl);
+ printf("tcp_est_age %u udp_age %u icmp_age %u\n", cfg->st_estab_ttl,
+ cfg->st_udp_ttl, cfg->st_icmp_ttl);
+ return (0);
+}
+
+static int
+nat64lsn_destroy_cb(ipfw_nat64lsn_cfg *cfg, const char *name, uint8_t set)
+{
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ nat64lsn_destroy(cfg->name, cfg->set);
+ return (0);
+}
+
+
+/*
+ * Compare nat64lsn instances names.
+ * Honor number comparison.
+ */
+static int
+nat64name_cmp(const void *a, const void *b)
+{
+ ipfw_nat64lsn_cfg *ca, *cb;
+
+ ca = (ipfw_nat64lsn_cfg *)a;
+ cb = (ipfw_nat64lsn_cfg *)b;
+
+ if (ca->set > cb->set)
+ return (1);
+ else if (ca->set < cb->set)
+ return (-1);
+ return (stringnum_cmp(ca->name, cb->name));
+}
+
+/*
+ * Retrieves nat64lsn instance list from kernel,
+ * optionally sorts it and calls requested function for each instance.
+ *
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ */
+static int
+nat64lsn_foreach(nat64lsn_cb_t *f, const char *name, uint8_t set, int sort)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64lsn_cfg *cfg;
+ size_t sz;
+ int i, error;
+
+ /* Start with reasonable default */
+ sz = sizeof(*olh) + 16 * sizeof(ipfw_nat64lsn_cfg);
+
+ for (;;) {
+ if ((olh = calloc(1, sz)) == NULL)
+ return (ENOMEM);
+
+ olh->size = sz;
+ if (do_get3(IP_FW_NAT64LSN_LIST, &olh->opheader, &sz) != 0) {
+ sz = olh->size;
+ free(olh);
+ if (errno != ENOMEM)
+ return (errno);
+ continue;
+ }
+
+ if (sort != 0)
+ qsort(olh + 1, olh->count, olh->objsize,
+ nat64name_cmp);
+
+ cfg = (ipfw_nat64lsn_cfg *)(olh + 1);
+ for (i = 0; i < olh->count; i++) {
+ error = f(cfg, name, set); /* Ignore errors for now */
+ cfg = (ipfw_nat64lsn_cfg *)((caddr_t)cfg +
+ olh->objsize);
+ }
+ free(olh);
+ break;
+ }
+ return (0);
+}
+
Index: sbin/ipfw/nat64stl.c
===================================================================
--- /dev/null
+++ sbin/ipfw/nat64stl.c
@@ -0,0 +1,416 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "ipfw2.h"
+
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/ip_fw_nat64.h>
+#include <arpa/inet.h>
+
+static int nat64stl_check_prefix(struct in6_addr *prefix, int length);
+typedef int (nat64stl_cb_t)(ipfw_nat64stl_cfg *i, const char *name,
+ uint8_t set);
+static int nat64stl_foreach(nat64stl_cb_t *f, const char *name, uint8_t set,
+ int sort);
+
+static void nat64stl_create(const char *name, uint8_t set, int ac, char **av);
+static void nat64stl_destroy(const char *name, uint8_t set);
+static void nat64stl_stats(const char *name, uint8_t set);
+static int nat64stl_show_cb(ipfw_nat64stl_cfg *cfg, const char *name,
+ uint8_t set);
+static int nat64stl_destroy_cb(ipfw_nat64stl_cfg *cfg, const char *name,
+ uint8_t set);
+
+static struct _s_x nat64cmds[] = {
+ { "create", TOK_CREATE },
+ { "destroy", TOK_DESTROY },
+ { "list", TOK_LIST },
+ { "show", TOK_LIST },
+ { "stats", TOK_STATS },
+ { NULL, 0 }
+};
+
+#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b)
+#define IN6_IS_ADDR_WKPFX(a) \
+ ((a)->__u6_addr.__u6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+ (a)->__u6_addr.__u6_addr32[1] == 0 && \
+ (a)->__u6_addr.__u6_addr32[2] == 0)
+static int
+nat64stl_check_prefix(struct in6_addr *prefix, int length)
+{
+
+ if (IN6_IS_ADDR_WKPFX(prefix) && length == 96)
+ return (0);
+#if 0
+ switch (length) {
+ case 32:
+ case 40:
+ case 48:
+ case 56:
+ case 64:
+ /* Well-known prefix has 96 prefix length */
+ if (IN6_IS_ADDR_WKPFX(prefix))
+ return (1);
+ /* FALLTHROUGH */
+ case 96:
+ /* Bits 64 to 71 must be set to zero */
+ if (prefix->__u6_addr.__u6_addr8[8] != 0)
+ return (1);
+ /* XXX: looks incorrect */
+ if (IN6_IS_ADDR_MULTICAST(prefix) ||
+ IN6_IS_ADDR_UNSPECIFIED(prefix) ||
+ IN6_IS_ADDR_LOOPBACK(prefix))
+ return (1);
+ return (0);
+ }
+#endif
+ return (1);
+}
+
+/*
+ * This one handles all nat64stl-related commands
+ * ipfw [set N] nat64stl NAME {create | config} ...
+ * ipfw [set N] nat64stl NAME stats
+ * ipfw [set N] nat64stl {NAME | all} destroy
+ * ipfw [set N] nat64stl {NAME | all} {list | show}
+ */
+#define nat64stl_check_name table_check_name
+void
+ipfw_nat64stl_handler(int ac, char *av[])
+{
+ const char *name;
+ int tcmd;
+ uint8_t set;
+
+ if (co.use_set != 0)
+ set = co.use_set - 1;
+ else
+ set = 0;
+ ac--; av++;
+
+ NEED1("nat64stl needs instance name");
+ name = *av;
+ if (nat64stl_check_name(name) != 0) {
+ if (strcmp(name, "all") == 0)
+ name = NULL;
+ else
+ errx(EX_USAGE, "nat64stl instance name %s is invalid",
+ name);
+ }
+ ac--; av++;
+ NEED1("nat64stl needs command");
+
+ tcmd = get_token(nat64cmds, *av, "nat64stl command");
+ if (name == NULL && tcmd != TOK_DESTROY && tcmd != TOK_LIST)
+ errx(EX_USAGE, "nat64stl instance name required");
+ switch (tcmd) {
+ case TOK_CREATE:
+ ac--; av++;
+ nat64stl_create(name, set, ac, av);
+ break;
+ case TOK_LIST:
+ nat64stl_foreach(nat64stl_show_cb, name, set, 1);
+ break;
+ case TOK_DESTROY:
+ if (name == NULL)
+ nat64stl_foreach(nat64stl_destroy_cb, NULL, set, 0);
+ else
+ nat64stl_destroy(name, set);
+ break;
+ case TOK_STATS:
+ nat64stl_stats(name, set);
+ }
+}
+
+
+static void
+nat64stl_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set)
+{
+
+ ntlv->head.type = IPFW_TLV_EACTION_NAME(1); /* it doesn't matter */
+ ntlv->head.length = sizeof(ipfw_obj_ntlv);
+ ntlv->idx = 1;
+ ntlv->set = set;
+ strlcpy(ntlv->name, name, sizeof(ntlv->name));
+}
+
+static struct _s_x nat64newcmds[] = {
+ { "table4", TOK_TABLE4 },
+ { "table6", TOK_TABLE6 },
+ { "prefix", TOK_PREFIX },
+ { NULL, 0 }
+};
+
+/*
+ * Creates new nat64stl instance
+ * ipfw nat64stl <NAME> create table4 <name> table6 <name> prefix <prefix>
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ */
+#define NAT64STL_HAS_TABLE4 0x01
+#define NAT64STL_HAS_TABLE6 0x02
+#define NAT64STL_HAS_PREFIX 0x04
+static void
+nat64stl_create(const char *name, uint8_t set, int ac, char *av[])
+{
+ char buf[sizeof(ipfw_obj_lheader) + sizeof(ipfw_nat64stl_cfg)];
+ ipfw_nat64stl_cfg *cfg;
+ ipfw_obj_lheader *olh;
+ int tcmd, flags;
+ char *p;
+
+ memset(buf, 0, sizeof(buf));
+ olh = (ipfw_obj_lheader *)buf;
+ cfg = (ipfw_nat64stl_cfg *)(olh + 1);
+ cfg->set = set;
+ flags = 0;
+ while (ac > 0) {
+ tcmd = get_token(nat64newcmds, *av, "option");
+ ac--; av++;
+
+ switch (tcmd) {
+ case TOK_TABLE4:
+ NEED1("table name required");
+ table_fill_ntlv(&cfg->ntlv4, *av, set, 4);
+ flags |= NAT64STL_HAS_TABLE4;
+ ac--; av++;
+ break;
+ case TOK_TABLE6:
+ NEED1("table name required");
+ table_fill_ntlv(&cfg->ntlv6, *av, set, 6);
+ flags |= NAT64STL_HAS_TABLE6;
+ ac--; av++;
+ break;
+ case TOK_PREFIX:
+ NEED1("IPv6 prefix required");
+ /* Type may have suboptions after ':' */
+ if ((p = strchr(*av, '/')) != NULL)
+ *p++ = '\0';
+ if (inet_pton(AF_INET6, *av, &cfg->prefix) != 1)
+ errx(EX_USAGE,
+ "Bad prefix: %s", *av);
+ cfg->plen = strtol(p, NULL, 10);
+ if (nat64stl_check_prefix(&cfg->prefix, cfg->plen) != 0)
+ errx(EX_USAGE,
+ "Bad prefix length: %s", p);
+ flags |= NAT64STL_HAS_PREFIX;
+ ac--; av++;
+ break;
+ }
+ }
+
+ /* Check validness */
+ if ((flags & NAT64STL_HAS_TABLE4) != NAT64STL_HAS_TABLE4)
+ errx(EX_USAGE, "table4 required");
+ if ((flags & NAT64STL_HAS_TABLE6) != NAT64STL_HAS_TABLE6)
+ errx(EX_USAGE, "table6 required");
+ if ((flags & NAT64STL_HAS_PREFIX) != NAT64STL_HAS_PREFIX)
+ errx(EX_USAGE, "prefix required");
+
+ olh->count = 1;
+ olh->objsize = sizeof(*cfg);
+ olh->size = sizeof(buf);
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ if (do_set3(IP_FW_NAT64STL_CREATE, &olh->opheader, sizeof(buf)) != 0)
+ err(EX_OSERR, "nat64stl instance creation failed");
+}
+
+/*
+ * Destroys nat64stl instance.
+ * Request: [ ipfw_obj_header ]
+ */
+static void
+nat64stl_destroy(const char *name, uint8_t set)
+{
+ ipfw_obj_header oh;
+
+ memset(&oh, 0, sizeof(oh));
+ nat64stl_fill_ntlv(&oh.ntlv, name, set);
+ if (do_set3(IP_FW_NAT64STL_DESTROY, &oh.opheader, sizeof(oh)) != 0)
+ err(EX_OSERR, "failed to destroy nat instance %s", name);
+}
+
+/*
+ * Get nat64stl instance statistics.
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ] ]
+ */
+static int
+nat64stl_get_stats(const char *name, uint8_t set,
+ struct ipfw_nat64stl_stats *stats)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *oc;
+ size_t sz;
+
+ sz = sizeof(*oh) + sizeof(*oc) + sizeof(*stats);
+ oh = calloc(1, sz);
+ nat64stl_fill_ntlv(&oh->ntlv, name, set);
+ if (do_get3(IP_FW_NAT64STL_STATS, &oh->opheader, &sz) == 0) {
+ oc = (ipfw_obj_ctlv *)(oh + 1);
+ memcpy(stats, oc + 1, sizeof(*stats));
+ free(oh);
+ return (0);
+ }
+ free(oh);
+ return (-1);
+}
+
+#define _P_STAT(_s, _f) printf("%8s:\t%lu\n", #_f, _s._f)
+static void
+nat64stl_stats(const char *name, uint8_t set)
+{
+ struct ipfw_nat64stl_stats stats;
+
+ if (nat64stl_get_stats(name, set, &stats) != 0)
+ err(EX_OSERR, "Error retrieving stats");
+
+ _P_STAT(stats, opcnt64);
+ _P_STAT(stats, opcnt46);
+ _P_STAT(stats, ofrags);
+ _P_STAT(stats, ifrags);
+ _P_STAT(stats, oerrors);
+ _P_STAT(stats, noroute4);
+ _P_STAT(stats, noroute6);
+ _P_STAT(stats, noproto);
+ _P_STAT(stats, nomem);
+ _P_STAT(stats, dropped);
+}
+
+static int
+nat64stl_show_cb(ipfw_nat64stl_cfg *cfg, const char *name, uint8_t set)
+{
+ char abuf[INET6_ADDRSTRLEN];
+
+ if (name != NULL && strcmp(cfg->name, name) != 0)
+ return (ESRCH);
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ if (co.use_set != 0 || cfg->set != 0)
+ printf("set %u ", cfg->set);
+ inet_ntop(AF_INET6, &cfg->prefix, abuf, sizeof(abuf));
+ printf("nat64stl %s table4 %s table6 %s prefix %s/%u\n",
+ cfg->name, cfg->ntlv4.name, cfg->ntlv6.name,
+ abuf, cfg->plen);
+ return (0);
+}
+
+static int
+nat64stl_destroy_cb(ipfw_nat64stl_cfg *cfg, const char *name, uint8_t set)
+{
+
+ if (co.use_set != 0 && cfg->set != set)
+ return (ESRCH);
+
+ nat64stl_destroy(cfg->name, cfg->set);
+ return (0);
+}
+
+
+/*
+ * Compare nat64stl instances names.
+ * Honor number comparison.
+ */
+static int
+nat64name_cmp(const void *a, const void *b)
+{
+ ipfw_nat64stl_cfg *ca, *cb;
+
+ ca = (ipfw_nat64stl_cfg *)a;
+ cb = (ipfw_nat64stl_cfg *)b;
+
+ if (ca->set > cb->set)
+ return (1);
+ else if (ca->set < cb->set)
+ return (-1);
+ return (stringnum_cmp(ca->name, cb->name));
+}
+
+/*
+ * Retrieves nat64stl instance list from kernel,
+ * optionally sorts it and calls requested function for each instance.
+ *
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ */
+static int
+nat64stl_foreach(nat64stl_cb_t *f, const char *name, uint8_t set, int sort)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64stl_cfg *cfg;
+ size_t sz;
+ int i, error;
+
+ /* Start with reasonable default */
+ sz = sizeof(*olh) + 16 * sizeof(*cfg);
+ for (;;) {
+ if ((olh = calloc(1, sz)) == NULL)
+ return (ENOMEM);
+
+ olh->size = sz;
+ if (do_get3(IP_FW_NAT64STL_LIST, &olh->opheader, &sz) != 0) {
+ sz = olh->size;
+ free(olh);
+ if (errno != ENOMEM)
+ return (errno);
+ continue;
+ }
+
+ if (sort != 0)
+ qsort(olh + 1, olh->count, olh->objsize,
+ nat64name_cmp);
+
+ cfg = (ipfw_nat64stl_cfg *)(olh + 1);
+ for (i = 0; i < olh->count; i++) {
+ error = f(cfg, name, set); /* Ignore errors for now */
+ cfg = (ipfw_nat64stl_cfg *)((caddr_t)cfg +
+ olh->objsize);
+ }
+ free(olh);
+ break;
+ }
+ return (0);
+}
+
Index: sbin/ipfw/tables.c
===================================================================
--- sbin/ipfw/tables.c
+++ sbin/ipfw/tables.c
@@ -53,8 +53,6 @@
static int table_swap(ipfw_obj_header *oh, char *second);
static int table_get_info(ipfw_obj_header *oh, ipfw_xtable_info *i);
static int table_show_info(ipfw_xtable_info *i, void *arg);
-static void table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name,
- uint32_t set, uint16_t uidx);
static int table_flush_one(ipfw_xtable_info *i, void *arg);
static int table_show_one(ipfw_xtable_info *i, void *arg);
@@ -280,8 +278,8 @@
}
}
-static void
-table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint32_t set,
+void
+table_fill_ntlv(ipfw_obj_ntlv *ntlv, const char *name, uint8_t set,
uint16_t uidx)
{
Index: sys/conf/NOTES
===================================================================
--- sys/conf/NOTES
+++ sys/conf/NOTES
@@ -965,6 +965,8 @@
# IPFIREWALL_NAT adds support for in kernel nat in ipfw, and it requires
# LIBALIAS.
#
+# IPFIREWALL_NAT64 adds support for in kernel nat64 in ipfw.
+#
# IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
# packets without touching the TTL). This can be useful to hide firewalls
# from traceroute and similar tools.
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -3807,6 +3807,18 @@
netpfil/ipfw/ip_fw_table_value.c optional inet ipfirewall
netpfil/ipfw/ip_fw_iface.c optional inet ipfirewall
netpfil/ipfw/ip_fw_nat.c optional inet ipfirewall_nat
+netpfil/ipfw/nat64/ip_fw_nat64.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64lsn.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64lsn_control.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64stl.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64stl_control.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
+netpfil/ipfw/nat64/nat64_translate.c optional inet inet6 ipfirewall \
+ ipfirewall_nat64
netpfil/pf/if_pflog.c optional pflog pf inet
netpfil/pf/if_pfsync.c optional pfsync pf inet
netpfil/pf/pf.c optional pf inet
Index: sys/conf/options
===================================================================
--- sys/conf/options
+++ sys/conf/options
@@ -417,6 +417,7 @@
IPFIREWALL opt_ipfw.h
IPFIREWALL_DEFAULT_TO_ACCEPT opt_ipfw.h
IPFIREWALL_NAT opt_ipfw.h
+IPFIREWALL_NAT64 opt_ipfw.h
IPFIREWALL_VERBOSE opt_ipfw.h
IPFIREWALL_VERBOSE_LIMIT opt_ipfw.h
IPSEC opt_ipsec.h
Index: sys/modules/Makefile
===================================================================
--- sys/modules/Makefile
+++ sys/modules/Makefile
@@ -166,6 +166,7 @@
${_ipfilter} \
${_ipfw} \
ipfw_nat \
+ ${_ipfw_nat64} \
${_ipmi} \
ip6_mroute_mod \
ip_mroute_mod \
@@ -452,6 +453,9 @@
_if_me= if_me
_ipdivert= ipdivert
_ipfw= ipfw
+.if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES)
+_ipfw_nat64= ipfw_nat64
+.endif
.endif
.if ${MK_IPFILTER} != "no" || defined(ALL_MODULES)
Index: sys/modules/ipfw_nat64/Makefile
===================================================================
--- /dev/null
+++ sys/modules/ipfw_nat64/Makefile
@@ -0,0 +1,11 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../netpfil/ipfw/nat64
+
+KMOD= ipfw_nat64
+SRCS= ip_fw_nat64.c nat64_translate.c
+SRCS+= nat64lsn.c nat64lsn_control.c
+SRCS+= nat64stl.c nat64stl_control.c
+SRCS+= opt_ipfw.h
+
+.include <bsd.kmod.mk>
Index: sys/netinet/ip_fw.h
===================================================================
--- sys/netinet/ip_fw.h
+++ sys/netinet/ip_fw.h
@@ -109,6 +109,19 @@
#define IP_FW_DUMP_SOPTCODES 116 /* Dump available sopts/versions */
#define IP_FW_DUMP_SRVOBJECTS 117 /* Dump existing named objects */
+#define IP_FW_NAT64STL_CREATE 130 /* Create stateless NAT64 instance */
+#define IP_FW_NAT64STL_DESTROY 131 /* Destroy stateless NAT64 instance */
+#define IP_FW_NAT64STL_CONFIG 132 /* Modify stateless NAT64 instance */
+#define IP_FW_NAT64STL_LIST 133 /* List stateless NAT64 instances */
+#define IP_FW_NAT64STL_STATS 134 /* Get NAT64STL instance statistics */
+
+#define IP_FW_NAT64LSN_CREATE 140 /* Create stateful NAT64 instance */
+#define IP_FW_NAT64LSN_DESTROY 141 /* Destroy stateful NAT64 instance */
+#define IP_FW_NAT64LSN_CONFIG 142 /* Modify stateful NAT64 instance */
+#define IP_FW_NAT64LSN_LIST 143 /* List stateful NAT64 instances */
+#define IP_FW_NAT64LSN_STATS 144 /* Get NAT64LSN instance statistics */
+#define IP_FW_NAT64LSN_LIST_STATES 145 /* Get stateful NAT64 states */
+
/*
* The kernel representation of ipfw rules is made of a list of
* 'instructions' (for all practical purposes equivalent to BPF
@@ -783,10 +796,17 @@
#define IPFW_TLV_TBLENT_LIST 8
#define IPFW_TLV_RANGE 9
#define IPFW_TLV_EACTION 10
+#define IPFW_TLV_COUNTERS 11
+#define IPFW_TLV_OBJDATA 12
#define IPFW_TLV_EACTION_BASE 1000
#define IPFW_TLV_EACTION_NAME(arg) (IPFW_TLV_EACTION_BASE + (arg))
+typedef struct _ipfw_obj_data {
+ ipfw_obj_tlv head;
+ void *data[0];
+} ipfw_obj_data;
+
/* Object name TLV */
typedef struct _ipfw_obj_ntlv {
ipfw_obj_tlv head; /* TLV header */
Index: sys/netinet6/ip_fw_nat64.h
===================================================================
--- /dev/null
+++ sys/netinet6/ip_fw_nat64.h
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NETINET6_IP_FW_NAT64_H_
+#define _NETINET6_IP_FW_NAT64_H_
+
+struct ipfw_nat64stl_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+};
+
+struct ipfw_nat64lsn_stats {
+ uint64_t opcnt64; /* 6to4 of packets translated */
+ uint64_t opcnt46; /* 4to6 of packets translated */
+ uint64_t ofrags; /* number of fragments generated */
+ uint64_t ifrags; /* number of fragments received */
+ uint64_t oerrors; /* number of output errors */
+ uint64_t noroute4;
+ uint64_t noroute6;
+ uint64_t noproto; /* Protocol not supported */
+ uint64_t nomem; /* mbuf allocation filed */
+ uint64_t dropped; /* dropped due to some errors */
+
+ uint64_t nomatch4; /* No addr/port match */
+ uint64_t jcalls; /* Number of job handler calls */
+ uint64_t jrequests; /* Number of job requests */
+ uint64_t jhostsreq; /* Number of job host requests */
+ uint64_t jportreq; /* Number of portgroup requests */
+ uint64_t jhostfails; /* Number of failed host allocs */
+ uint64_t jportfails; /* Number of failed portgroup allocs */
+ uint64_t jreinjected; /* Number of packets reinjected to q */
+ uint64_t jmaxlen; /* Max queue length reached */
+ uint64_t jnomem; /* No memory to alloc queue item */
+
+ uint64_t screated; /* Number of states created */
+ uint64_t sdeleted; /* Number of states deleted */
+ uint64_t spgcreated; /* Number of portgroups created */
+ uint64_t spgdeleted; /* Number of portgroups deleted */
+ uint64_t hostcount; /* Number of hosts */
+ uint64_t tcpchunks; /* Number of TCP chunks */
+ uint64_t udpchunks; /* Number of UDP chunks */
+ uint64_t icmpchunks; /* Number of ICMP chunks */
+};
+
+typedef struct _ipfw_nat64stl_cfg {
+ char name[64]; /* NAT name */
+ ipfw_obj_ntlv ntlv6; /* object name tlv */
+ ipfw_obj_ntlv ntlv4; /* object name tlv */
+ struct in6_addr prefix; /* NAT64 prefix */
+ uint8_t plen; /* Prefix length */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare[2];
+ uint32_t flags;
+} ipfw_nat64stl_cfg;
+
+typedef struct _ipfw_nat64lsn_cfg {
+ char name[64]; /* NAT name */
+ uint32_t flags;
+ uint32_t max_ports; /* Max ports per client */
+ uint32_t agg_prefix_len; /* Prefix length to count */
+ uint32_t agg_prefix_max; /* Max hosts per agg prefix */
+ struct in_addr prefix4;
+ uint16_t plen4; /* Prefix length */
+ uint16_t plen6; /* Prefix length */
+ struct in6_addr prefix6; /* NAT64 prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint16_t min_port; /* Min port group # to use */
+ uint16_t max_port; /* Max port group # to use */
+ uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t pg_delete_delay; /* Stale portgroup del delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint8_t set; /* Named instance set [0..31] */
+ uint8_t spare;
+} ipfw_nat64lsn_cfg;
+
+typedef struct _ipfw_nat64lsn_state {
+ struct in_addr daddr; /* Remote IPv4 address */
+ uint16_t dport; /* Remote destination port */
+ uint16_t aport; /* Local alias port */
+ uint16_t sport; /* Source port */
+ uint8_t flags; /* State flags */
+ uint8_t spare[3];
+ uint16_t idle; /* Last used time */
+} ipfw_nat64lsn_state;
+
+typedef struct _ipfw_nat64lsn_stg {
+ uint64_t next_idx; /* next state index */
+ struct in_addr alias4; /* IPv4 alias address */
+ uint8_t proto; /* protocol */
+ uint8_t flags;
+ uint16_t spare;
+ struct in6_addr host6; /* Bound IPv6 host */
+ uint32_t count; /* Number of states */
+ uint32_t spare2;
+} ipfw_nat64lsn_stg;
+
+#endif /* _NETINET6_IP_FW_NAT64_H_ */
+
Index: sys/netpfil/ipfw/ip_fw_private.h
===================================================================
--- sys/netpfil/ipfw/ip_fw_private.h
+++ sys/netpfil/ipfw/ip_fw_private.h
@@ -741,8 +741,12 @@
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
uint32_t *val);
-int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl, uint16_t plen,
- void *paddr, uint32_t *val);
+int ipfw_lookup_table_extended(struct ip_fw_chain *ch, uint16_t tbl,
+ uint16_t plen, void *paddr, uint32_t *val);
+struct named_object *ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch,
+ uint16_t kidx);
+int ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx);
+void ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx);
int ipfw_init_tables(struct ip_fw_chain *ch, int first);
int ipfw_resize_tables(struct ip_fw_chain *ch, unsigned int ntables);
int ipfw_switch_tables_namespace(struct ip_fw_chain *ch, unsigned int nsets);
Index: sys/netpfil/ipfw/ip_fw_table.c
===================================================================
--- sys/netpfil/ipfw/ip_fw_table.c
+++ sys/netpfil/ipfw/ip_fw_table.c
@@ -1601,6 +1601,49 @@
return (0);
}
+struct named_object *
+ipfw_objhash_lookup_table_kidx(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ return (ipfw_objhash_lookup_kidx(CHAIN_TO_NI(ch), kidx));
+}
+
+int
+ipfw_ref_table(struct ip_fw_chain *ch, ipfw_obj_ntlv *ntlv, uint16_t *kidx)
+{
+ struct tid_info ti;
+ struct table_config *tc;
+ int error;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ntlv_to_ti(ntlv, &ti);
+ error = find_table_err(CHAIN_TO_NI(ch), &ti, &tc);
+ if (error != 0)
+ return (error);
+
+ if (tc == NULL)
+ return (ESRCH);
+
+ tc_ref(tc);
+ *kidx = tc->no.kidx;
+ return (0);
+}
+
+void
+ipfw_unref_table(struct ip_fw_chain *ch, uint16_t kidx)
+{
+
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_NI(ch);
+ no = ipfw_objhash_lookup_kidx(ni, kidx);
+ KASSERT(no != NULL, ("Table with index %d not found", kidx));
+ no->refcnt--;
+}
+
/*
* Lookup an IP @addr in table @tbl.
* Stores found value in @val.
Index: sys/netpfil/ipfw/nat64/ip_fw_nat64.h
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/ip_fw_nat64.h
@@ -0,0 +1,109 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_FW_NAT64_H_
+#define _IP_FW_NAT64_H_
+
+#define DPRINTF(fmt, ...) \
+ if (nat64_debug != 0) \
+ printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__)
+#define DLPRINTF(lvl, fmt, ...) \
+ if (nat64_debug >= lvl) \
+ printf("NAT64: %s: " fmt "\n", __func__, ## __VA_ARGS__)
+#define DP_OBJ 2
+#define DP_JQ 3
+#define DP_ST 4
+extern int nat64_debug;
+
+int nat64stl_init(struct ip_fw_chain *ch, int first);
+void nat64stl_uninit(struct ip_fw_chain *ch, int last);
+int nat64lsn_init(struct ip_fw_chain *ch, int first);
+void nat64lsn_uninit(struct ip_fw_chain *ch, int last);
+
+struct ip_fw_nat64_stats {
+ counter_u64_t opcnt64; /* 6to4 of packets translated */
+ counter_u64_t opcnt46; /* 4to6 of packets translated */
+ counter_u64_t ofrags; /* number of fragments generated */
+ counter_u64_t ifrags; /* number of fragments received */
+ counter_u64_t oerrors; /* number of output errors */
+ counter_u64_t noroute4;
+ counter_u64_t noroute6;
+ counter_u64_t nomatch4; /* No addr/port match */
+ counter_u64_t noproto; /* Protocol not supported */
+ counter_u64_t nomem; /* mbufs allocation failed */
+ counter_u64_t dropped; /* number of packets silently
+ * dropped due to some errors/
+ * unsupported/etc.
+ */
+
+ counter_u64_t jrequests;
+ counter_u64_t jcalls;
+ counter_u64_t jhostsreq;
+ counter_u64_t jportreq;
+ counter_u64_t jhostfails;
+ counter_u64_t jportfails;
+ counter_u64_t jmaxlen;
+ counter_u64_t jnomem;
+ counter_u64_t jreinjected;
+
+ counter_u64_t screated;
+ counter_u64_t sdeleted;
+ counter_u64_t spgcreated;
+ counter_u64_t spgdeleted;
+};
+
+#define IPFW_NAT64_VERSION 1
+#define NAT64STATS (sizeof(struct ip_fw_nat64_stats) / sizeof(uint64_t))
+typedef struct _nat64_stats_block {
+ counter_u64_t stats[NAT64STATS];
+} nat64_stats_block;
+#define NAT64STAT_ADD(s, f, v) \
+ counter_u64_add((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)], (v))
+#define NAT64STAT_INC(s, f) NAT64STAT_ADD(s, f, 1)
+#define NAT64STAT_FETCH(s, f) \
+ counter_u64_fetch((s)->stats[ \
+ offsetof(struct ip_fw_nat64_stats, f) / sizeof(uint64_t)])
+
+#define L3HDR(_ip, _t) ((_t)((u_int32_t *)(_ip) + (_ip)->ip_hl))
+#define TCP(p) ((struct tcphdr *)(p))
+#define UDP(p) ((struct udphdr *)(p))
+#define ICMP(p) ((struct icmphdr *)(p))
+#define ICMP6(p) ((struct icmp6_hdr *)(p))
+
+#define NAT64SKIP 0
+#define NAT64RETURN 1
+#define NAT64MFREE -1
+
+/* Well-known prefix 64:ff9b::/96 */
+#define IPV6_ADDR_INT32_WKPFX htonl(0x64ff9b)
+#define IN6_IS_ADDR_WKPFX(a) \
+ ((a)->s6_addr32[0] == IPV6_ADDR_INT32_WKPFX && \
+ (a)->s6_addr32[1] == 0 && (a)->s6_addr32[2] == 0)
+
+#endif
+
Index: sys/netpfil/ipfw/nat64/ip_fw_nat64.c
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/ip_fw_nat64.c
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+
+
+int nat64_debug = 0;
+SYSCTL_DECL(_net_inet_ip_fw);
+SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, nat64_debug, CTLFLAG_RW,
+ &nat64_debug, 0, "Debug level for NAT64 module");
+
+static int
+vnet_ipfw_nat64_init(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int first, error;
+
+ ch = &V_layer3_chain;
+ first = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ error = nat64stl_init(ch, first);
+ if (error != 0)
+ return (error);
+ error = nat64lsn_init(ch, first);
+ if (error != 0) {
+ nat64stl_uninit(ch, first);
+ return (error);
+ }
+ return (0);
+}
+
+static int
+vnet_ipfw_nat64_uninit(const void *arg __unused)
+{
+ struct ip_fw_chain *ch;
+ int last;
+
+ ch = &V_layer3_chain;
+ last = IS_DEFAULT_VNET(curvnet) ? 1: 0;
+ nat64stl_uninit(ch, last);
+ nat64lsn_uninit(ch, last);
+ return (0);
+}
+
+static int
+ipfw_nat64_modevent(module_t mod, int type, void *unused)
+{
+
+ switch (type) {
+ case MOD_LOAD:
+ case MOD_UNLOAD:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ return (0);
+}
+
+static moduledata_t ipfw_nat64_mod = {
+ "ipfw_nat64",
+ ipfw_nat64_modevent,
+ 0
+};
+
+/* Define startup order. */
+#define IPFW_NAT64_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN
+#define IPFW_NAT64_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */
+#define IPFW_NAT64_MODULE_ORDER (IPFW_NAT64_MODEVENT_ORDER + 1)
+#define IPFW_NAT64_VNET_ORDER (IPFW_NAT64_MODEVENT_ORDER + 2)
+
+DECLARE_MODULE(ipfw_nat64, ipfw_nat64_mod, IPFW_NAT64_SI_SUB_FIREWALL,
+ SI_ORDER_ANY);
+MODULE_DEPEND(ipfw_nat64, ipfw, 3, 3, 3);
+MODULE_VERSION(ipfw_nat64, 1);
+
+VNET_SYSINIT(vnet_ipfw_nat64_init, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_init, NULL);
+VNET_SYSUNINIT(vnet_ipfw_nat64_uninit, IPFW_NAT64_SI_SUB_FIREWALL,
+ IPFW_NAT64_VNET_ORDER, vnet_ipfw_nat64_uninit, NULL);
Index: sys/netpfil/ipfw/nat64/nat64_translate.h
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64_translate.h
@@ -0,0 +1,109 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_FW_NAT64_TRANSLATE_H_
+#define _IP_FW_NAT64_TRANSLATE_H_
+
+#ifdef RTALLOC_NOLOCK
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_fib_nolock((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc_nolock((ro), (fib))
+#define FREE_ROUTE(ro)
+#else
+#define IN_LOOKUP_ROUTE(ro, fib) rtalloc_ign_fib((ro), 0, (fib))
+#define IN6_LOOKUP_ROUTE(ro, fib) in6_rtalloc((ro), (fib))
+#define FREE_ROUTE(ro) RO_RTFREE((ro))
+#endif
+
+static inline int
+nat64_check_ip6(struct in6_addr *addr)
+{
+
+ /* XXX: We should really check /8 */
+ if (addr->s6_addr16[0] == 0 || /* 0000::/8 Reserved by IETF */
+ IN6_IS_ADDR_MULTICAST(addr) || IN6_IS_ADDR_LINKLOCAL(addr))
+ return (1);
+ return (0);
+}
+
+static inline int
+nat64_check_private_ip4(in_addr_t ia)
+{
+
+ /* WKPFX must not be used to represent non-global IPv4 addresses */
+// if (cfg->flags & NAT64_WKPFX) {
+ /* IN_PRIVATE */
+ if ((ia & htonl(0xff000000)) == htonl(0x0a000000) ||
+ (ia & htonl(0xfff00000)) == htonl(0xac100000) ||
+ (ia & htonl(0xffff0000)) == htonl(0xc0a80000))
+ return (1);
+ /*
+ * RFC 5735:
+ * 192.0.0.0/24 - reserved for IETF protocol assignments
+ * 192.88.99.0/24 - for use as 6to4 relay anycast addresses
+ * 198.18.0.0/15 - for use in benchmark tests
+ * 192.0.2.0/24, 198.51.100.0/24, 203.0.113.0/24 - for use
+ * in documentation and example code
+ */
+ if ((ia & htonl(0xffffff00)) == htonl(0xc0000000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0586300) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6120000) ||
+ (ia & htonl(0xffffff00)) == htonl(0xc0000200) ||
+ (ia & htonl(0xfffffe00)) == htonl(0xc6336400) ||
+ (ia & htonl(0xffffff00)) == htonl(0xcb007100))
+ return (1);
+// }
+ return (0);
+}
+
+static inline int
+nat64_check_ip4(in_addr_t ia)
+{
+
+ /* IN_LOOPBACK */
+ if ((ia & htonl(0xff000000)) == htonl(0x7f000000))
+ return (1);
+ /* IN_LINKLOCAL */
+ if ((ia & htonl(0xffff0000)) == htonl(0xa9fe0000))
+ return (1);
+ /* IN_MULTICAST & IN_EXPERIMENTAL */
+ if ((ia & htonl(0xe0000000)) == htonl(0xe0000000))
+ return (1);
+ return (0);
+}
+
+#define nat64_get_ip4(_ip6) ((_ip6)->s6_addr32[3])
+#define nat64_set_ip4(_ip6, _ip4) (_ip6)->s6_addr32[3] = (_ip4)
+
+int nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats);
+int nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats);
+int nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats);
+
+#endif
+
Index: sys/netpfil/ipfw/nat64/nat64_translate.c
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64_translate.c
@@ -0,0 +1,1314 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_ipfw.h"
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+#include <net/netisr.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <machine/in_cksum.h>
+
+#ifdef IPFIREWALL_NAT64_DIRECT_OUTPUT
+static __noinline struct sockaddr* nat64_find_route4(struct route *ro,
+ in_addr_t dest, struct mbuf *m);
+static __noinline struct sockaddr* nat64_find_route6(struct route_in6 *ro,
+ struct in6_addr *dest, struct mbuf *m);
+
+static __noinline int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats)
+{
+ int error;
+
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (error);
+}
+
+static __noinline int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats)
+{
+ struct route_in6 ro6;
+ struct route ro4, *ro;
+ struct sockaddr *dst;
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ struct ip *ip4;
+ int error;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ ro = &ro4;
+ dst = nat64_find_route4(&ro4, ip4->ip_dst.s_addr, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute4);
+ break;
+ case (IPV6_VERSION >> 4):
+ ip6 = (struct ip6_hdr *)ip4;
+ ro = (struct route *)&ro6;
+ dst = nat64_find_route6(&ro6, &ip6->ip6_dst, m);
+ if (dst == NULL)
+ NAT64STAT_INC(stats, noroute6);
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (EAFNOSUPPORT);
+ }
+ if (dst == NULL) {
+ FREE_ROUTE(ro);
+ m_freem(m);
+ return (EHOSTUNREACH);
+ }
+ ifp = ro->ro_rt->rt_ifp;
+ error = (*ifp->if_output)(ifp, m, dst, ro);
+ if (error != 0)
+ NAT64STAT_INC(stats, oerrors);
+ FREE_ROUTE(ro);
+ return (error);
+}
+#else /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+static __noinline int
+nat64_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro, nat64_stats_block *stats)
+{
+ struct ip *ip4;
+ int ret;
+
+ ip4 = mtod(m, struct ip *);
+ switch (ip4->ip_v) {
+ case IPVERSION:
+ ret = NETISR_IP;
+ break;
+ case (IPV6_VERSION >> 4):
+ ret = NETISR_IPV6;
+ break;
+ default:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (EAFNOSUPPORT);
+ }
+ ret = netisr_queue(ret, m);
+ if (ret != 0)
+ NAT64STAT_INC(stats, oerrors);
+ return (ret);
+}
+
+static __noinline int
+nat64_output_one(struct mbuf *m, nat64_stats_block *stats)
+{
+
+ return (nat64_output(NULL, m, NULL, NULL, stats));
+}
+#endif /* !IPFIREWALL_NAT64_DIRECT_OUTPUT */
+
+
+#if 0
+void print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize);
+
+void
+print_ipv6_header(struct ip6_hdr *ip6, char *buf, size_t bufsize)
+{
+ char sbuf[INET6_ADDRSTRLEN], dbuf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, &ip6->ip6_src, sbuf, sizeof(sbuf));
+ inet_ntop(AF_INET6, &ip6->ip6_dst, dbuf, sizeof(dbuf));
+ snprintf(buf, bufsize, "%s -> %s %d", sbuf, dbuf, ip6->ip6_nxt);
+}
+
+
+static __noinline int
+nat64_embed_ip4(struct nat64_cfg *cfg, in_addr_t ia, struct in6_addr *ip6)
+{
+
+ /* assume the prefix is properly filled with zeros */
+ bcopy(&cfg->prefix, ip6, sizeof(*ip6));
+ switch (cfg->plen) {
+ case 32:
+ case 96:
+ ip6->s6_addr32[cfg->plen / 32] = ia;
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia >> (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia << (24 - cfg->plen % 32);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[1] = cfg->prefix.s6_addr32[1] |
+ (ia << (cfg->plen % 32));
+ ip6->s6_addr32[2] = ia >> (24 - cfg->plen % 32);
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ip6->s6_addr32[2] = ia >> 8;
+ ip6->s6_addr32[3] = ia << 24;
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ip6->s6_addr32[2] = ia << 8;
+ ip6->s6_addr32[3] = ia >> 24;
+#endif
+ break;
+ default:
+ return (0);
+ };
+ ip6->s6_addr8[8] = 0;
+ return (1);
+}
+
+static __noinline in_addr_t
+nat64_extract_ip4(struct in6_addr *ip6, int plen)
+{
+ in_addr_t ia;
+
+ /*
+ * According to RFC 6052 p2.2:
+ * IPv4-embedded IPv6 addresses are composed of a variable-length
+ * prefix, the embedded IPv4 address, and a variable length suffix.
+ * The suffix bits are reserved for future extensions and SHOULD
+ * be set to zero.
+ */
+ switch (plen) {
+ case 32:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr32[2] != 0)
+ goto badip6;
+ break;
+ case 40:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff00ffff)) != 0)
+ goto badip6;
+ break;
+ case 48:
+ if (ip6->s6_addr32[3] != 0 ||
+ (ip6->s6_addr32[2] & htonl(0xff0000ff)) != 0)
+ goto badip6;
+ break;
+ case 56:
+ if (ip6->s6_addr32[3] != 0 || ip6->s6_addr8[8] != 0)
+ goto badip6;
+ break;
+ case 64:
+ if (ip6->s6_addr8[8] != 0 ||
+ (ip6->s6_addr32[3] & htonl(0x00ffffff)) != 0)
+ goto badip6;
+ };
+ switch (plen) {
+ case 32:
+ case 96:
+ ia = ip6->s6_addr32[plen / 32];
+ break;
+ case 40:
+ case 48:
+ case 56:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[1] << (plen % 32)) |
+ (ip6->s6_addr32[2] >> (24 - plen % 32));
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[1] >> (plen % 32)) |
+ (ip6->s6_addr32[2] << (24 - plen % 32));
+#endif
+ break;
+ case 64:
+#if BYTE_ORDER == BIG_ENDIAN
+ ia = (ip6->s6_addr32[2] << 8) | (ip6->s6_addr32[3] >> 24);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ ia = (ip6->s6_addr32[2] >> 8) | (ip6->s6_addr32[3] << 24);
+#endif
+ break;
+ default:
+ return (0);
+ };
+ if (nat64_check_ip4(ia) != 0 ||
+ nat64_check_private_ip4(ia) != 0)
+ goto badip4;
+
+ return (ia);
+badip4:
+ DPRINTF("invalid destination address: %08x", ia);
+ return (0);
+badip6:
+ DPRINTF("invalid IPv4-embedded IPv6 address");
+ return (0);
+}
+#endif
+
+/*
+ * According to RFC 1624 the equation for incremental checksum update is:
+ * HC' = ~(~HC + ~m + m') -- [Eqn. 3]
+ * HC' = HC - ~m - m' -- [Eqn. 4]
+ * So, when we are replacing IPv4 addresses to IPv6, we
+ * can assume, that new bytes previously were zeros, and vise versa -
+ * when we replacing IPv6 addresses to IPv4, now unused bytes become
+ * zeros. The payload length in pseudo header has bigger size, but one
+ * half of it should be zero. Using the equation 4 we get:
+ * HC' = HC - (~m0 + m0') -- m0 is first changed word
+ * HC' = (HC - (~m0 + m0')) - (~m1 + m1') -- m1 is second changed word
+ * HC' = HC - ~m0 - m0' - ~m1 - m1' - ... =
+ * = HC - sum(~m[i] + m'[i])
+ *
+ * The function result should be used as follows:
+ * IPv6 to IPv4: HC' = cksum_add(HC, result)
+ * IPv4 to IPv6: HC' = cksum_add(HC, ~result)
+ */
+static __noinline uint16_t
+nat64_cksum_convert(struct ip6_hdr *ip6, struct ip *ip)
+{
+ uint32_t sum;
+ uint16_t *p;
+
+ sum = ~ip->ip_src.s_addr >> 16;
+ sum += ~ip->ip_src.s_addr & 0xffff;
+ sum += ~ip->ip_dst.s_addr >> 16;
+ sum += ~ip->ip_dst.s_addr & 0xffff;
+
+ for (p = (uint16_t *)&ip6->ip6_src;
+ p < (uint16_t *)(&ip6->ip6_src + 2); p++)
+ sum += *p;
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (sum);
+}
+
+#if __FreeBSD_version < 1100000
+#define ip_fillid(ip) (ip)->ip_id = ip_newid()
+#endif
+static __noinline void
+nat64_init_ip4hdr(const struct ip6_hdr *ip6, const struct ip6_frag *frag,
+ uint16_t plen, uint8_t proto, struct ip *ip)
+{
+
+ /* assume addresses are already initialized */
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ ip->ip_len = htons(sizeof(*ip) + plen);
+ ip->ip_ttl = ip6->ip6_hlim - IPV6_HLIMDEC;
+ ip->ip_sum = 0;
+ ip->ip_p = (proto == IPPROTO_ICMPV6) ? IPPROTO_ICMP: proto;
+ ip_fillid(ip);
+ if (frag != NULL) {
+ ip->ip_off = htons(ntohs(frag->ip6f_offlg) >> 3);
+ if (frag->ip6f_offlg & IP6F_MORE_FRAG)
+ ip->ip_off |= htons(IP_MF);
+ } else {
+ ip->ip_off = htons(IP_DF);
+ }
+ ip->ip_sum = in_cksum_hdr(ip);
+}
+
+#define FRAGSZ(mtu) ((mtu) - sizeof(struct ip6_hdr) - sizeof(struct ip6_frag))
+static __noinline int
+nat64_fragment6(nat64_stats_block *stats, struct ip6_hdr *ip6, struct mbufq *mq,
+ struct mbuf *m, uint32_t mtu, uint16_t ip_id, uint16_t ip_off)
+{
+ struct ip6_frag ip6f;
+ struct mbuf *n;
+ uint16_t hlen, len, offset;
+ int plen;
+
+ plen = ntohs(ip6->ip6_plen);
+ hlen = sizeof(struct ip6_hdr);
+
+ /* Fragmentation isn't needed */
+ if (ip_off == 0 && plen <= mtu - hlen) {
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+ }
+ bcopy(ip6, mtod(m, void *), hlen);
+ if (mbufq_enqueue(mq, m) != 0) {
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (ENOBUFS);
+ }
+ return (0);
+ }
+
+ hlen += sizeof(struct ip6_frag);
+ ip6f.ip6f_reserved = 0;
+ ip6f.ip6f_nxt = ip6->ip6_nxt;
+ ip6->ip6_nxt = IPPROTO_FRAGMENT;
+ if (ip_off != 0) {
+ /*
+ * We have got an IPv4 fragment.
+ * Use offset value and ip_id from original fragment.
+ */
+ ip6f.ip6f_ident = htonl(ntohs(ip_id));
+ offset = (ntohs(ip_off) & IP_OFFMASK) << 3;
+ NAT64STAT_INC(stats, ifrags);
+ } else {
+ /* The packet size exceeds interface MTU */
+ ip6f.ip6f_ident = htonl(ip6_randomid());
+ offset = 0; /* First fragment*/
+ }
+ while (plen > 0 && m != NULL) {
+ n = NULL;
+ len = FRAGSZ(mtu) & ~7;
+ if (len > plen)
+ len = plen;
+ ip6->ip6_plen = htons(len + sizeof(ip6f));
+ ip6f.ip6f_offlg = ntohs(offset);
+ if (len < plen || (ip_off & htons(IP_MF)) != 0)
+ ip6f.ip6f_offlg |= IP6F_MORE_FRAG;
+ offset += len;
+ plen -= len;
+ if (plen > 0) {
+ n = m_split(m, len, M_NOWAIT);
+ if (n == NULL)
+ goto fail;
+ }
+ M_PREPEND(m, hlen, M_NOWAIT);
+ if (m == NULL)
+ goto fail;
+ bcopy(ip6, mtod(m, void *), sizeof(struct ip6_hdr));
+ bcopy(&ip6f, mtodo(m, sizeof(struct ip6_hdr)),
+ sizeof(struct ip6_frag));
+ if (mbufq_enqueue(mq, m) != 0)
+ goto fail;
+ m = n;
+ }
+ NAT64STAT_ADD(stats, ofrags, mbufq_len(mq));
+ return (0);
+fail:
+ if (m != NULL)
+ m_freem(m);
+ if (n != NULL)
+ m_freem(n);
+ mbufq_drain(mq);
+ NAT64STAT_INC(stats, nomem);
+ return (ENOMEM);
+}
+
+#if __FreeBSD_version < 1100000
+#define rt_expire rt_rmx.rmx_expire
+#define rt_mtu rt_rmx.rmx_mtu
+#endif
+static __noinline struct sockaddr*
+nat64_find_route6(struct route_in6 *ro, struct in6_addr *dest, struct mbuf *m)
+{
+ struct sockaddr_in6 *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in6 *)&ro->ro_dst;
+ dst->sin6_family = AF_INET6;
+ dst->sin6_len = sizeof(*dst);
+ dst->sin6_addr = *dest;
+ IN6_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in6 *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP6_PLEN 64
+static __noinline void
+nat64_icmp6_reflect(struct mbuf *m, uint8_t type, uint8_t code, uint32_t mtu,
+ nat64_stats_block *stats)
+{
+ struct icmp6_hdr *icmp6;
+ struct ip6_hdr *ip6, *oip6;
+ struct mbuf *n;
+ int len, plen;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (ip6->ip6_nxt == IPPROTO_ICMPV6)
+ goto freeit;
+ /*
+ if (icmp6_ratelimit(&ip6->ip6_src, type, code))
+ goto freeit;
+ */
+ switch (type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEEDED:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMPv6 payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP6_PLEN) ? NAT64_ICMP6_PLEN:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv6 datagram */
+ plen = len + sizeof(struct icmp6_hdr);
+ n = m_get2(sizeof(struct ip6_hdr) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ /*
+ * Move pkthdr from original mbuf. We should have initialized some
+ * fields, because we can reinject this mbuf to netisr and it will
+ * go trough input path (it requires at least rcvif should be set).
+ * Also do M_ALIGN() to reduce chances of need to allocate new mbuf
+ * in the chain, when we will do M_PREPEND() or make some type of
+ * tunneling.
+ */
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip6_hdr) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
+ oip6 = mtod(n, struct ip6_hdr *);
+ oip6->ip6_src = ip6->ip6_dst;
+ oip6->ip6_dst = ip6->ip6_src;
+ oip6->ip6_nxt = IPPROTO_ICMPV6;
+ oip6->ip6_flow = 0;
+ oip6->ip6_vfc |= IPV6_VERSION;
+ oip6->ip6_hlim = V_ip6_defhlim;
+ oip6->ip6_plen = htons(plen);
+
+ icmp6 = mtodo(n, sizeof(struct ip6_hdr));
+ icmp6->icmp6_cksum = 0;
+ icmp6->icmp6_type = type;
+ icmp6->icmp6_code = code;
+ icmp6->icmp6_mtu = htonl(mtu);
+
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr)));
+ icmp6->icmp6_cksum = in6_cksum(n, IPPROTO_ICMPV6,
+ sizeof(struct ip6_hdr), plen);
+ m_freem(m);
+ nat64_output_one(n, stats);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+static __noinline struct sockaddr*
+nat64_find_route4(struct route *ro, in_addr_t dest, struct mbuf *m)
+{
+ struct sockaddr_in *dst;
+ struct rtentry *rt;
+
+ bzero(ro, sizeof(*ro));
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ dst->sin_family = AF_INET;
+ dst->sin_len = sizeof(*dst);
+ dst->sin_addr.s_addr = dest;
+ IN_LOOKUP_ROUTE(ro, M_GETFIB(m));
+ rt = ro->ro_rt;
+ if (rt && (rt->rt_flags & RTF_UP) &&
+ (rt->rt_ifp->if_flags & IFF_UP) &&
+ (rt->rt_ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+ if (rt->rt_flags & RTF_GATEWAY)
+ dst = (struct sockaddr_in *)rt->rt_gateway;
+ } else
+ return (NULL);
+ if (((rt->rt_flags & RTF_REJECT) &&
+ (rt->rt_expire == 0 ||
+ time_uptime < rt->rt_expire)) ||
+ rt->rt_ifp->if_link_state == LINK_STATE_DOWN)
+ return (NULL);
+ return ((struct sockaddr *)dst);
+}
+
+#define NAT64_ICMP_PLEN 64
+static __noinline void
+nat64_icmp_reflect(struct mbuf *m, uint8_t type,
+ uint8_t code, uint16_t mtu, nat64_stats_block *stats)
+{
+ struct icmp *icmp;
+ struct ip *ip, *oip;
+ struct mbuf *n;
+ int len, plen;
+
+ ip = mtod(m, struct ip *);
+ if (ip->ip_p == IPPROTO_ICMP || (ip->ip_off & ~ntohs(IP_MF|IP_DF)))
+ goto freeit;
+ switch (type) {
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ case ICMP_PARAMPROB:
+ break;
+ default:
+ goto freeit;
+ }
+ /* Calculate length of ICMP payload */
+ len = (m->m_pkthdr.len > NAT64_ICMP_PLEN) ? (ip->ip_hl << 2) + 8:
+ m->m_pkthdr.len;
+
+ /* Create new ICMPv4 datagram */
+ plen = len + sizeof(struct icmphdr) + sizeof(uint32_t);
+ n = m_get2(sizeof(struct ip) + plen + max_hdr, M_NOWAIT,
+ MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return;
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, sizeof(struct ip) + plen + max_hdr);
+
+ n->m_len = n->m_pkthdr.len = sizeof(struct ip) + plen;
+ oip = mtod(n, struct ip *);
+ oip->ip_v = IPVERSION;
+ oip->ip_hl = sizeof(struct ip) >> 2;
+ oip->ip_tos = 0;
+ oip->ip_len = htons(n->m_pkthdr.len);
+ oip->ip_ttl = V_ip_defttl;
+ oip->ip_p = IPPROTO_ICMP;
+ ip_fillid(oip);
+ oip->ip_off = htons(IP_DF);
+ oip->ip_src = ip->ip_dst;
+ oip->ip_dst = ip->ip_src;
+ oip->ip_sum = 0;
+ oip->ip_sum = in_cksum_hdr(oip);
+
+ icmp = mtodo(n, sizeof(struct ip));
+ icmp->icmp_type = type;
+ icmp->icmp_code = code;
+ icmp->icmp_cksum = 0;
+ icmp->icmp_pmvoid = 0;
+ icmp->icmp_nextmtu = htons(mtu);
+ m_copydata(m, 0, len, mtodo(n, sizeof(struct ip) +
+ sizeof(struct icmphdr) + sizeof(uint32_t)));
+ icmp->icmp_cksum = in_cksum_skip(n, sizeof(struct ip) + plen,
+ sizeof(struct ip));
+ m_freem(m);
+ nat64_output_one(n, stats);
+ return;
+freeit:
+ NAT64STAT_INC(stats, dropped);
+ m_freem(m);
+}
+
+/* Translate ICMP echo request/reply into ICMPv6 */
+static void
+nat64_icmp_handle_echo(struct ip6_hdr *ip6, struct icmp6_hdr *icmp6,
+ uint16_t id, uint8_t type)
+{
+ uint16_t old;
+
+ old = *(uint16_t *)icmp6; /* save type+code in one word */
+ icmp6->icmp6_type = type;
+ /* Reflect ICMPv6 -> ICMPv4 type translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, *(uint16_t *)icmp6);
+ if (id != 0) {
+ old = icmp6->icmp6_id;
+ icmp6->icmp6_id = id;
+ /* Reflect ICMP id translation in the cksum */
+ icmp6->icmp6_cksum = cksum_adjust(icmp6->icmp6_cksum,
+ old, id);
+ }
+ /* Reflect IPv6 pseudo header in the cksum */
+ icmp6->icmp6_cksum = ~in6_cksum_pseudo(ip6, ntohs(ip6->ip6_plen),
+ IPPROTO_ICMPV6, ~icmp6->icmp6_cksum);
+}
+
+static __noinline struct mbuf *
+nat64_icmp_translate(struct mbuf *m, struct ip6_hdr *ip6, uint16_t icmpid,
+ int offset, nat64_stats_block *stats)
+{
+ struct ip ip;
+ struct icmp *icmp;
+ struct ip6_hdr *eip6;
+ struct mbuf *n;
+ uint32_t mtu;
+ int len, hlen, plen;
+ uint8_t type, code;
+
+ if (m->m_len < offset + ICMP_MINLEN)
+ m = m_pullup(m, offset + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (m);
+ }
+ mtu = 0;
+ icmp = mtodo(m, offset);
+ switch (icmp->icmp_type) {
+ case ICMP_ECHOREPLY:
+ type = ICMP6_ECHO_REPLY;
+ code = 0;
+ break;
+ case ICMP_UNREACH:
+ type = ICMP6_DST_UNREACH;
+ switch (icmp->icmp_code) {
+ case ICMP_UNREACH_HOST:
+ code = ICMP6_DST_UNREACH_NOROUTE;
+ break;
+ case ICMP_UNREACH_PROTOCOL:
+ type = ICMP6_PARAM_PROB;
+ code = ICMP6_PARAMPROB_NEXTHEADER;
+ break;
+ case ICMP_UNREACH_PORT:
+ code = ICMP6_DST_UNREACH_NOPORT;
+ break;
+ case ICMP_UNREACH_NEEDFRAG:
+ code = 0;
+ type = ICMP6_PACKET_TOO_BIG;
+ mtu = ntohs(icmp->icmp_nextmtu);
+ break;
+ case ICMP_UNREACH_FILTER_PROHIB:
+ code = ICMP6_DST_UNREACH_ADMIN;
+ break;
+ default:
+ goto freeit;
+ }
+ break;
+ case ICMP_TIMXCEED:
+ type = ICMP6_TIME_EXCEEDED;
+ code = icmp->icmp_code;
+ break;
+ case ICMP_ECHO:
+ type = ICMP6_ECHO_REQUEST;
+ code = 0;
+ break;
+ default:
+ DPRINTF("Unsupported ICMP type %d", icmp->icmp_type);
+ goto freeit;
+ }
+ /*
+ * For echo request/reply we can use original payload,
+ * but we need adjust icmp_cksum, because ICMPv6 cksum covers
+ * IPv6 pseudo header and ICMPv6 types differs from ICMPv4.
+ */
+ if (type == ICMP6_ECHO_REQUEST || type == ICMP6_ECHO_REPLY) {
+ nat64_icmp_handle_echo(ip6, ICMP6(icmp), icmpid, type);
+ return (m);
+ }
+ /*
+ * For other types of ICMP messages we need to translate inner
+ * IPv4 header to IPv6 header.
+ * Assume ICMP src is the same as payload dst
+ * E.g. we have ( GWsrc1 , NATIP1 ) in outer header
+ * and ( NATIP1, Hostdst1 ) in ICMP copy header.
+ * In that case, we already have map for NATIP1 and GWsrc1.
+ * The only thing we need is to copy IPv6 map prefix to
+ * Hostdst1.
+ */
+ hlen = offset + ICMP_MINLEN;
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip) + ICMP_MINLEN) {
+ DPRINTF("Message is too short %d", m->m_pkthdr.len);
+ goto freeit;
+ }
+ m_copydata(m, hlen, sizeof(struct ip), (char *)&ip);
+ if (ip.ip_v != IPVERSION) {
+ DPRINTF("Wrong IP version %d", ip.ip_v);
+ goto freeit;
+ }
+ hlen += ip.ip_hl << 2; /* Skip inner IP header */
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip.ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip.ip_dst.s_addr) != 0) {
+ DPRINTF("IP addresses checks failed %04x -> %04x",
+ ip.ip_src.s_addr, ip.ip_dst.s_addr);
+ goto freeit;
+ }
+ if (m->m_pkthdr.len < hlen + ICMP_MINLEN) {
+ DPRINTF("Message is too short %d", m->m_pkthdr.len);
+ goto freeit;
+ }
+#if 0
+ /*
+ * Check that inner source matches the outer destination.
+ * XXX: We need some method to convert IPv4 into IPv6 address here,
+ * and compare IPv6 addresses.
+ */
+ if (ip.ip_src.s_addr != nat64_get_ip4(&ip6->ip6_dst)) {
+ DPRINTF("Inner source doesn't match destination ",
+ "%04x vs %04x", ip.ip_src.s_addr,
+ nat64_get_ip4(&ip6->ip6_dst));
+ goto freeit;
+ }
+#endif
+ /*
+ * Create new mbuf for ICMPv6 datagram.
+ * NOTE: len is data length just after inner IP header.
+ */
+ len = m->m_pkthdr.len - hlen;
+ if (sizeof(struct ip6_hdr) +
+ sizeof(struct icmp6_hdr) + len > NAT64_ICMP6_PLEN)
+ len = NAT64_ICMP6_PLEN - sizeof(struct icmp6_hdr) -
+ sizeof(struct ip6_hdr);
+ plen = sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr) + len;
+ n = m_get2(offset + plen + max_hdr, M_NOWAIT, MT_HEADER, M_PKTHDR);
+ if (n == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ m_freem(m);
+ return (NULL);
+ }
+ m_move_pkthdr(n, m);
+ M_ALIGN(n, offset + plen + max_hdr);
+ n->m_len = n->m_pkthdr.len = offset + plen;
+ /* Adjust ip6_plen in outer header */
+ ip6->ip6_plen = htons(plen);
+ /* Construct new inner IPv6 header */
+ eip6 = mtodo(n, offset + sizeof(struct icmp6_hdr));
+ eip6->ip6_src = ip6->ip6_dst;
+ /* Use the fact that we have single /96 prefix for IPv4 map */
+ eip6->ip6_dst = ip6->ip6_src;
+ nat64_set_ip4(&eip6->ip6_dst, ip.ip_dst.s_addr);
+
+ eip6->ip6_flow = htonl(ip.ip_tos << 20);
+ eip6->ip6_vfc |= IPV6_VERSION;
+ eip6->ip6_hlim = ip.ip_ttl;
+ eip6->ip6_plen = htons(ntohs(ip.ip_len) - (ip.ip_hl << 2));
+ eip6->ip6_nxt = (ip.ip_p == IPPROTO_ICMP) ? IPPROTO_ICMPV6: ip.ip_p;
+ m_copydata(m, hlen, len, (char *)(eip6 + 1));
+ /*
+ * Check if this is an ICMP error message for echo request
+ * that we sent. I.e. ULP in the data containing invoking
+ * packet is IPPROTO_ICMP and its type is ICMP_ECHO.
+ */
+ if (ip.ip_p == IPPROTO_ICMP) {
+ icmp = (struct icmp *)(eip6 + 1);
+ if (icmp->icmp_type != ICMP_ECHO) {
+ m_freem(n);
+ goto freeit;
+ }
+ /*
+ * For our client this original datagram should looks
+ * like it was ICMPv6 datagram with type ICMP6_ECHO_REQUEST.
+ * Thus we need adjust icmp_cksum and convert type from
+ * ICMP_ECHO to ICMP6_ECHO_REQUEST.
+ */
+ nat64_icmp_handle_echo(eip6, ICMP6(icmp), icmpid,
+ ICMP6_ECHO_REQUEST);
+ }
+ m_freem(m);
+ /* Convert ICMPv4 into ICMPv6 header */
+ icmp = mtodo(n, offset);
+ ICMP6(icmp)->icmp6_type = type;
+ ICMP6(icmp)->icmp6_code = code;
+ ICMP6(icmp)->icmp6_mtu = htonl(mtu);
+ ICMP6(icmp)->icmp6_cksum = 0;
+ ICMP6(icmp)->icmp6_cksum = cksum_add(
+ ~in6_cksum_pseudo(ip6, plen, IPPROTO_ICMPV6, 0),
+ in_cksum_skip(n, n->m_pkthdr.len, offset));
+ return (n);
+freeit:
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NULL);
+}
+
+int
+nat64_do_handle_ip4(struct mbuf *m, struct in6_addr *saddr,
+ struct in6_addr *daddr, uint16_t lport, nat64_stats_block *stats)
+{
+ struct route_in6 ro;
+ struct ip6_hdr ip6;
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct mbufq mq;
+ struct sockaddr *dst;
+ uint32_t mtu;
+ uint16_t ip_id, ip_off;
+ uint16_t *csum;
+ int plen, hlen;
+ uint8_t proto;
+
+ ip = mtod(m, struct ip*);
+
+ if (ip->ip_ttl <= IPTTLDEC) {
+ nat64_icmp_reflect(m, ICMP_TIMXCEED,
+ ICMP_TIMXCEED_INTRANS, 0, stats);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_dst = *daddr;
+ ip6.ip6_src = *saddr;
+
+ hlen = ip->ip_hl << 2;
+ plen = ntohs(ip->ip_len) - hlen;
+ proto = ip->ip_p;
+
+ /* Save ip_id and ip_off, both are in network byte order */
+ ip_id = ip->ip_id;
+ ip_off = ip->ip_off & htons(IP_OFFMASK | IP_MF);
+
+ /* Fragment length must be multiple of 8 octets */
+ if ((ip->ip_off & htons(IP_MF)) != 0 && (plen & 0x7) != 0) {
+ nat64_icmp_reflect(m, ICMP_PARAMPROB,
+ ICMP_PARAMPROB_LENGTH, 0, stats);
+ return (NAT64RETURN);
+ }
+ /* Fragmented ICMP is unsupported */
+ if (proto == IPPROTO_ICMP && ip_off != 0) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ dst = nat64_find_route6(&ro, &ip6.ip6_dst, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute6);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0,
+ stats);
+ return (NAT64RETURN);
+ }
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip6) && (ip->ip_off & htons(IP_DF)) != 0) {
+ FREE_ROUTE(&ro);
+ nat64_icmp_reflect(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
+ FRAGSZ(mtu) + sizeof(struct ip), stats);
+ return (NAT64RETURN);
+ }
+
+ ip6.ip6_flow = htonl(ip->ip_tos << 20);
+ ip6.ip6_vfc |= IPV6_VERSION;
+ ip6.ip6_hlim = ip->ip_ttl - IPTTLDEC;
+ ip6.ip6_plen = htons(plen);
+ ip6.ip6_nxt = (proto == IPPROTO_ICMP) ? IPPROTO_ICMPV6: proto;
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (lport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_dport, lport);
+ tcp->th_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (lport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_dport, lport);
+ udp->uh_dport = lport;
+ }
+ *csum = cksum_add(*csum, ~nat64_cksum_convert(&ip6, ip));
+ break;
+ case IPPROTO_ICMP:
+ m = nat64_icmp_translate(m, &ip6, lport, hlen, stats);
+ if (m == NULL) {
+ FREE_ROUTE(&ro);
+ /* stats already accounted */
+ return (NAT64RETURN);
+ }
+ }
+
+ m_adj(m, hlen);
+ mbufq_init(&mq, 255);
+ nat64_fragment6(stats, &ip6, &mq, m, mtu, ip_id, ip_off);
+ while ((m = mbufq_dequeue(&mq)) != NULL) {
+ if (nat64_output(ifp, m, dst, (struct route *)&ro, stats) != 0)
+ break;
+ NAT64STAT_INC(stats, opcnt46);
+ }
+ mbufq_drain(&mq);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
+int
+nat64_handle_icmp6(struct mbuf *m, int hlen, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats)
+{
+ struct ip ip;
+ struct icmp6_hdr *icmp6;
+ struct ip6_frag *ip6f;
+ struct ip6_hbh *hbh;
+ struct ip6_hdr *ip6, *ip6i;
+ uint32_t mtu;
+ int plen;
+ uint8_t proto, type, code;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (hlen != 0)
+ goto translate;
+
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0)
+ return (NAT64SKIP);
+
+ hlen = sizeof(struct ip6_hdr);
+ plen = ntohs(ip6->ip6_plen);
+ proto = ip6->ip6_nxt;
+
+ /* Skip extension headers */
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ if (m->m_len < hlen || /* XXX: m_pullup problem */
+ (plen == 0 && proto == IPPROTO_HOPOPTS)) {
+ /* XXX: jumbo payload option */
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ if (proto != IPPROTO_ICMPV6) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+translate:
+ /* Translate ICMPv6 type and code to ICMPv4 */
+ icmp6 = mtodo(m, hlen);
+ mtu = 0;
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ type = ICMP_UNREACH;
+ switch (icmp6->icmp6_code) {
+ case ICMP6_DST_UNREACH_NOROUTE:
+ case ICMP6_DST_UNREACH_BEYONDSCOPE:
+ case ICMP6_DST_UNREACH_ADDR:
+ code = ICMP_UNREACH_HOST;
+ break;
+ case ICMP6_DST_UNREACH_ADMIN:
+ code = ICMP_UNREACH_FILTER_PROHIB;
+ break;
+ case ICMP6_DST_UNREACH_NOPORT:
+ code = ICMP_UNREACH_PORT;
+ break;
+ default:
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ break;
+ case ICMP6_PACKET_TOO_BIG:
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_NEEDFRAG;
+ mtu = ntohl(icmp6->icmp6_mtu);
+ if (mtu < 576)
+ return (NAT64MFREE);
+ mtu -= sizeof(struct ip6_hdr) + sizeof(struct ip);
+ break;
+ case ICMP6_TIME_EXCEED_TRANSIT:
+ type = ICMP_TIMXCEED;
+ code = ICMP_TIMXCEED_INTRANS;
+ break;
+ case ICMP6_PARAM_PROB:
+ if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER) {
+ type = ICMP_UNREACH;
+ code = ICMP_UNREACH_PROTOCOL;
+ break;
+ }
+ /* FALLTHROUGH */
+ default:
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /*
+ * We need at least ICMP_MINLEN bytes of original datagram payload
+ * to generate ICMP message. It is nice that ICMP_MINLEN is equal
+ * to sizeof(struct ip6_frag). So, if embedded datagram had a fragment
+ * header we will not have to do m_pullup() again.
+ *
+ * What we have here:
+ * Outer header: (IPv6iGW, v4mapPRefix+v4exthost)
+ * Inner header: (v4mapPRefix+v4host, IPv6iHost) [sport, dport]
+ * We need to translate it to:
+ *
+ * Outer header: (alias_host, v4exthost)
+ * Inner header: (v4exthost, alias_host) [sport, alias_port]
+ *
+ * Assume caller function has checked if v4mapPRefix+v4host
+ * matches configured prefix.
+ * The only two things we should be provided with are mapping between
+ * IPv6iHost <> alias_host and between dport and alias_port.
+ */
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6i = mtodo(m, hlen);
+ ip6f = NULL;
+ proto = ip6i->ip6_nxt;
+ plen = ntohs(ip6i->ip6_plen);
+ hlen += sizeof(struct ip6_hdr);
+ if (proto == IPPROTO_FRAGMENT) {
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_frag) +
+ ICMP_MINLEN)
+ goto fail;
+ ip6f = mtodo(m, hlen);
+ proto = ip6f->ip6f_nxt;
+ plen -= sizeof(struct ip6_frag);
+ hlen += sizeof(struct ip6_frag);
+ if (mtu > 0)
+ mtu -= sizeof(struct ip6_frag);
+ }
+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
+ goto fail;
+ if (nat64_check_ip6(&ip6i->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6i->ip6_dst) != 0)
+ goto fail;
+ /* Check if outer dst is the same as inner src */
+ if (!IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &ip6i->ip6_src))
+ goto fail;
+
+ /* Now we need to make a fake IPv4 packet to generate ICMP message */
+ ip.ip_dst.s_addr = aaddr;
+ ip.ip_src.s_addr = nat64_get_ip4(&ip6i->ip6_src);
+ /* XXX: Make fake ulp header */
+ ip6i->ip6_hlim += IPV6_HLIMDEC; /* init_ip4hdr will decrement it */
+ nat64_init_ip4hdr(ip6i, ip6f, plen, proto, &ip);
+ m_adj(m, hlen - sizeof(struct ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ nat64_icmp_reflect(m, type, code, (uint16_t)mtu, stats);
+ return (NAT64RETURN);
+fail:
+ /*
+ * We must call m_freem() because mbuf pointer could be
+ * changed with m_pullup().
+ */
+ m_freem(m);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64RETURN);
+}
+
+int
+nat64_do_handle_ip6(struct mbuf *m, uint32_t aaddr, uint16_t aport,
+ nat64_stats_block *stats)
+{
+ struct route ro;
+ struct ip ip;
+ struct ifnet *ifp;
+ struct ip6_frag *frag;
+ struct ip6_hbh *hbh;
+ struct ip6_hdr *ip6;
+ struct icmp6_hdr *icmp6;
+ struct sockaddr *dst;
+ uint16_t *csum;
+ uint32_t mtu;
+ int plen, hlen;
+ uint8_t proto;
+
+ /*
+ * XXX: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ if (nat64_check_ip6(&ip6->ip6_src) != 0 ||
+ nat64_check_ip6(&ip6->ip6_dst) != 0) {
+ return (NAT64SKIP);
+ }
+
+ /* Starting from this point we must not return zero */
+ ip.ip_src.s_addr = aaddr;
+ if (nat64_check_ip4(ip.ip_src.s_addr) != 0) {
+ DPRINTF("invalid source address: %08x",
+ ip.ip_src.s_addr);
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ ip.ip_dst.s_addr = nat64_get_ip4(&ip6->ip6_dst);
+ if (ip.ip_dst.s_addr == 0) {
+ /* XXX: stats? */
+ return (NAT64MFREE);
+ }
+
+ if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
+ nat64_icmp6_reflect(m, ICMP6_TIME_EXCEEDED,
+ ICMP6_TIME_EXCEED_TRANSIT, 0, stats);
+ return (NAT64RETURN);
+ }
+
+ hlen = sizeof(struct ip6_hdr);
+ plen = ntohs(ip6->ip6_plen);
+ proto = ip6->ip6_nxt;
+ /* Skip extension headers */
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ if (m->m_len < hlen || /* XXX: m_pullup problem */
+ (plen == 0 && proto == IPPROTO_HOPOPTS)) {
+ /* XXX: jumbo payload option */
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ frag = NULL;
+ if (proto == IPPROTO_FRAGMENT) {
+ /* XXX: ipfw_chk should m_pullup up to frag header */
+ frag = mtodo(m, hlen);
+ proto = frag->ip6f_nxt;
+ hlen += sizeof(*frag);
+ /* Fragmented ICMPv6 is unsupported */
+ if (proto == IPPROTO_ICMPV6) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ /* Fragment length must be multiple of 8 octets */
+ if ((frag->ip6f_offlg & IP6F_MORE_FRAG) != 0 &&
+ ((plen + sizeof(struct ip6_hdr) - hlen) & 0x7) != 0) {
+ nat64_icmp6_reflect(m, ICMP6_PARAM_PROB,
+ ICMP6_PARAMPROB_HEADER,
+ offsetof(struct ip6_hdr, ip6_plen), stats);
+ return (NAT64RETURN);
+ }
+ }
+ plen -= hlen - sizeof(struct ip6_hdr);
+ if (plen < 0 || m->m_pkthdr.len < plen + hlen) {
+ DPRINTF("plen %d, pkthdr.len %d, hlen %d",
+ plen, m->m_pkthdr.len, hlen);
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+
+ icmp6 = NULL; /* Make gcc happy */
+ if (proto == IPPROTO_ICMPV6) {
+ icmp6 = mtodo(m, hlen);
+ if (icmp6->icmp6_type != ICMP6_ECHO_REQUEST &&
+ icmp6->icmp6_type != ICMP6_ECHO_REPLY)
+ return (nat64_handle_icmp6(m, hlen, aaddr, aport,
+ stats));
+ }
+ dst = nat64_find_route4(&ro, ip.ip_dst.s_addr, m);
+ if (dst == NULL) {
+ FREE_ROUTE(&ro);
+ NAT64STAT_INC(stats, noroute4);
+ nat64_icmp6_reflect(m, ICMP6_DST_UNREACH,
+ ICMP6_DST_UNREACH_NOROUTE, 0, stats);
+ return (NAT64RETURN);
+ }
+
+ ifp = ro.ro_rt->rt_ifp;
+ if (ro.ro_rt->rt_mtu != 0)
+ mtu = min(ro.ro_rt->rt_mtu, ifp->if_mtu);
+ else
+ mtu = ifp->if_mtu;
+ if (mtu < plen + sizeof(ip)) {
+ FREE_ROUTE(&ro);
+ nat64_icmp6_reflect(m, ICMP6_PACKET_TOO_BIG, 0, mtu, stats);
+ return (NAT64RETURN);
+ }
+ nat64_init_ip4hdr(ip6, frag, plen, proto, &ip);
+ /* Convert checksums. */
+ switch (proto) {
+ case IPPROTO_TCP:
+ csum = &TCP(mtodo(m, hlen))->th_sum;
+ if (aport != 0) {
+ struct tcphdr *tcp = TCP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, tcp->th_sport, aport);
+ tcp->th_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_UDP:
+ csum = &UDP(mtodo(m, hlen))->uh_sum;
+ if (aport != 0) {
+ struct udphdr *udp = UDP(mtodo(m, hlen));
+ *csum = cksum_adjust(*csum, udp->uh_sport, aport);
+ udp->uh_sport = aport;
+ }
+ *csum = cksum_add(*csum, nat64_cksum_convert(ip6, &ip));
+ break;
+ case IPPROTO_ICMPV6:
+ /* Checksum in ICMPv6 covers pseudo header */
+ csum = &icmp6->icmp6_cksum;
+ *csum = cksum_add(*csum, in6_cksum_pseudo(ip6, plen,
+ IPPROTO_ICMPV6, 0));
+ /* Convert ICMPv6 types to ICMP */
+ mtu = *(uint16_t *)icmp6; /* save old word for cksum_adjust */
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST)
+ icmp6->icmp6_type = ICMP_ECHO;
+ else /* ICMP6_ECHO_REPLY */
+ icmp6->icmp6_type = ICMP_ECHOREPLY;
+ *csum = cksum_adjust(*csum, (uint16_t)mtu, *(uint16_t *)icmp6);
+ if (aport != 0) {
+ uint16_t old_id = icmp6->icmp6_id;
+ icmp6->icmp6_id = aport;
+ *csum = cksum_adjust(*csum, old_id, aport);
+ }
+ break;
+ };
+
+ m_adj(m, hlen - sizeof(ip));
+ bcopy(&ip, mtod(m, void *), sizeof(ip));
+ if (nat64_output(ifp, m, dst, &ro, stats) == 0)
+ NAT64STAT_INC(stats, opcnt64);
+ FREE_ROUTE(&ro);
+ return (NAT64RETURN);
+}
+
Index: sys/netpfil/ipfw/nat64/nat64lsn.h
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64lsn.h
@@ -0,0 +1,341 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_FW_NAT64LSN_H_
+#define _IP_FW_NAT64LSN_H_
+
+#define NAT64_CHUNK_SIZE_BITS 6 /* 64 ports */
+#define NAT64_CHUNK_SIZE (1 << NAT64_CHUNK_SIZE_BITS)
+
+#define NAT64_MIN_PORT 1024
+#define NAT64_MIN_CHUNK (NAT64_MIN_PORT >> NAT64_CHUNK_SIZE_BITS)
+#define DEFAULT_MAX_PORTS (4 * NAT64_CHUNK_SIZE)
+
+#define DEFAULT_NH_DEL_DELAY 120
+#define DEFAULT_PG_DEL_DELAY 300
+#define DEFAULT_ST_SYN_TTL 30
+#define DEFAULT_ST_CLOSE_TTL 30
+#define DEFAULT_ST_ESTAB_TTL (4 * 3600)
+#define DEFAULT_ST_UDP_TTL 15
+#define DEFAULT_ST_ICMP_TTL 15
+
+#define DEFAULT_JMAXLEN 1024 /* Max outstanding requests in jq */
+
+struct st_ptr {
+ uint8_t idx;
+ uint8_t off;
+};
+
+struct nat64lsn_portgroup;
+/*
+ * Alloc 256 bytes per host (x64 cache, 16 portgroups)
+ */
+struct nat64lsn_host {
+ struct in6_addr addr;
+ struct nat64lsn_host *next;
+ struct rwlock h_lock; /* Host states lock */
+ struct st_ptr *phash;
+ uint16_t hsize; /* ports hash size */
+ uint16_t pg_count; /* Number of portgroups used */
+ uint16_t timestamp; /* Last altered */
+ uint16_t spare;
+ /* 64 bytes on amd64 */
+ struct nat64lsn_portgroup *pg_ptr[8];/* portgroup indices, 512 ports */
+ //uintptr_t pg_idx[8];/* portgroup indices, 512 ports*/
+ /* 128 bytes on amd64 */
+ struct st_ptr st_hash[64];
+};
+
+#define NAT64_RLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_RLOCKED)
+#define NAT64_WLOCK_ASSERT(h) rw_assert(&(h)->h_lock, RA_WLOCKED)
+
+#define NAT64_RLOCK(h) rw_rlock(&(h)->h_lock)
+#define NAT64_RUNLOCK(h) rw_runlock(&(h)->h_lock)
+#define NAT64_WLOCK(h) rw_wlock(&(h)->h_lock)
+#define NAT64_WUNLOCK(h) rw_wunlock(&(h)->h_lock)
+#define NAT64_LOCK(h) NAT64_WLOCK(h)
+#define NAT64_UNLOCK(h) NAT64_WUNLOCK(h)
+#define NAT64_LOCK_INIT(h) do { \
+ rw_init(&(h)->h_lock, "NAT64 host lock"); \
+ } while (0)
+
+#define NAT64_LOCK_DESTROY(h) do { \
+ rw_destroy(&(h)->h_lock); \
+ } while (0)
+
+/* Internal proto index */
+#define NAT_PROTO_TCP 1
+#define NAT_PROTO_UDP 2
+#define NAT_PROTO_ICMP 3
+
+#define NAT_MAX_PROTO 4
+extern uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+VNET_DECLARE(uint16_t, nat64lsn_eid);
+#define V_nat64lsn_eid VNET(nat64lsn_eid)
+#define IPFW_TLV_NAT64LSN_NAME IPFW_TLV_EACTION_NAME(V_nat64lsn_eid)
+
+/* Timestamp macro */
+#define _CT ((int)time_uptime % 65536)
+#define SET_AGE(x) (x) = _CT
+#define GET_AGE(x) ((_CT >= (x)) ? _CT - (x) : \
+ (int)65536 + _CT - (x))
+
+#ifdef __LP64__
+/* ffsl() is capable of checking 64-bit ints */
+#define _FFS64
+#endif
+
+/* 16 bytes */
+struct nat64lsn_state {
+ union {
+ struct {
+ in_addr_t faddr; /* Remote IPv4 address */
+ uint16_t fport; /* Remote IPv4 port */
+ uint16_t lport; /* Local IPv6 port */
+ }s;
+ uint64_t hkey;
+ } u;
+ uint8_t nat_proto;
+ uint8_t flags;
+ uint16_t timestamp;
+ struct st_ptr cur; /* Index of portgroup in nat64lsn_host */
+ struct st_ptr next; /* Next entry index */
+};
+
+/*
+ * 1024+32 bytes per 64 states, used to store state
+ * AND for outside-in state lookup
+ */
+struct nat64lsn_portgroup {
+ struct nat64lsn_host *host; /* IPv6 source host info */
+ in_addr_t aaddr; /* Alias addr, network format */
+ uint16_t aport; /* Base port */
+ uint16_t timestamp;
+ uint8_t nat_proto;
+ uint8_t spare[7];
+#ifdef _FFS64
+ uint64_t freemask; /* Mask of free entries */
+#else
+ uint32_t freemask[2]; /* Mask of free entries */
+#endif
+ struct nat64lsn_state states[NAT64_CHUNK_SIZE]; /* State storage */
+};
+#ifdef _FFS64
+#define PG_MARK_BUSY_IDX(_pg, _idx) (_pg)->freemask &= ~((u_long)1<<(_idx))
+#define PG_MARK_FREE_IDX(_pg, _idx) (_pg)->freemask |= ((u_long)1<<(_idx))
+#define PG_IS_FREE_IDX(_pg, _idx) ((_pg)->freemask & ((u_long)1<<(_idx)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) (ffsl((_pg)->freemask))
+#define PG_IS_EMPTY(_pg) (((_pg)->freemask + 1) == 0)
+#else
+#define PG_MARK_BUSY_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] &= ~((u_long)1<<((_idx) % 32))
+#define PG_MARK_FREE_IDX(_pg, _idx) \
+ (_pg)->freemask[(_idx) / 32] |= ((u_long)1<<((_idx) % 32))
+#define PG_IS_FREE_IDX(_pg, _idx) \
+ ((_pg)->freemask[(_idx) / 32] & ((u_long)1<<((_idx) % 32)))
+#define PG_IS_BUSY_IDX(_pg, _idx) (PG_IS_FREE_IDX(_pg, _idx) == 0)
+#define PG_GET_FREE_IDX(_pg) _pg_get_free_idx(_pg)
+#define PG_IS_EMPTY(_pg) \
+ ((((_pg)->freemask[0] + 1) == 0 && ((_pg)->freemask[1] + 1) == 0))
+
+static inline int
+_pg_get_free_idx(const struct nat64lsn_portgroup *pg)
+{
+ int i;
+
+ if ((i = ffsl(pg->freemask[0])) != 0)
+ return (i);
+ if ((i = ffsl(pg->freemask[1])) != 0)
+ return (i + 32);
+ return (0);
+}
+
+#endif
+
+TAILQ_HEAD(nat64lsn_job_head, nat64lsn_job_item);
+
+#define NAT64LSN_FLAGSMASK 0x0
+struct nat64lsn_cfg {
+ struct named_object no;
+ //struct nat64_exthost *ex; /* Pointer to external addr array */
+ struct nat64lsn_portgroup **pg; /* XXX: array of pointers */
+ struct nat64lsn_host **ih; /* Host hash */
+ uint32_t prefix4; /* IPv4 prefix */
+ uint32_t pmask4; /* IPv4 prefix mask */
+ uint32_t ihsize; /* IPv6 host hash size */
+ uint8_t plen4;
+ uint8_t plen6;
+ uint8_t nomatch_verdict;/* What to return to ipfw on no-match */
+ uint8_t nomatch_final; /* Exit outer loop? */
+ struct in6_addr prefix6; /* IPv6 prefix to embed IPv4 hosts */
+
+ uint32_t ihcount; /* Number of items in host hash */
+ int max_chunks; /* Max chunks per c/lient */
+ int agg_prefix_len; /* Prefix length to count */
+ int agg_prefix_max; /* Max hosts per agg prefix */
+ uint32_t jmaxlen; /* Max jobqueue length */
+ uint32_t flags;
+ uint16_t min_chunk; /* Min port group # to use */
+ uint16_t max_chunk; /* Max port group # to use */
+ uint16_t nh_delete_delay; /* Stale host delete delay */
+ uint16_t pg_delete_delay; /* Stale portgroup del delay */
+ uint16_t st_syn_ttl; /* TCP syn expire */
+ uint16_t st_close_ttl; /* TCP fin expire */
+ uint16_t st_estab_ttl; /* TCP established expire */
+ uint16_t st_udp_ttl; /* UDP expire */
+ uint16_t st_icmp_ttl; /* ICMP expire */
+ uint32_t protochunks[NAT_MAX_PROTO];/* Number of chunks used */
+
+ struct callout periodic;
+ struct callout jcallout;
+ struct ip_fw_chain *ch;
+ struct vnet *vp;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+ char name[64]; /* Nat instance name */
+ nat64_stats_block stats;
+};
+
+struct nat64lsn_cfg *nat64lsn_init_instance(struct ip_fw_chain *ch,
+ size_t numaddr);
+void nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_start_instance(struct nat64lsn_cfg *cfg);
+void nat64lsn_init_internal(void);
+void nat64lsn_uninit_internal(void);
+int ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st,
+ const char *px, int off);
+/*
+ * Portgroup layout
+ * addr x nat_proto x port_off
+ *
+ */
+
+#define _ADDR_PG_PROTO_COUNT (65536 >> NAT64_CHUNK_SIZE_BITS)
+#define _ADDR_PG_COUNT (_ADDR_PG_PROTO_COUNT * NAT_MAX_PROTO)
+
+#define GET_ADDR_IDX(_cfg, _addr) ((_addr) - ((_cfg)->prefix4))
+#define __GET_PORTGROUP_IDX(_proto, _port) \
+ (_proto - 1) * _ADDR_PG_PROTO_COUNT +\
+ (_port >> NAT64_CHUNK_SIZE_BITS)
+
+#define _GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port) \
+ GET_ADDR_IDX(_cfg, _addr) * _ADDR_PG_COUNT + \
+ __GET_PORTGROUP_IDX(_proto, _port)
+#define GET_PORTGROUP(_cfg, _addr, _proto, _port) \
+ (_cfg)->pg[_GET_PORTGROUP_IDX(_cfg, _addr, _proto, _port)]
+
+#define GET_PORTGROUP_BYSIDX(_cfg, _nh, _idx) \
+ ((struct nat64lsn_portgroup *)(_nh)->pg_ptr[(_idx) - 1])
+
+
+/* Chained hash table */
+#define CHT_FIND(_ph, _hsize, _PX, _x, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ for ( ; _x != NULL; _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x == NULL) \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_UNLOCK_BUCK(_ph, _PX, _buck) \
+ _PX##unlock(_ph, _buck);
+
+#define CHT_UNLOCK_KEY(_ph, _hsize, _PX, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_INSERT_HEAD(_ph, _hsize, _PX, _i) do { \
+ unsigned int _buck = _PX##hash(_PX##val(_i)) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _PX##next(_i) = _PX##first(_ph, _buck); \
+ _PX##first(_ph, _buck) = _i; \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_REMOVE(_ph, _hsize, _PX, _x, _tmp, _key) do { \
+ unsigned int _buck = _PX##hash(_key) & (_hsize - 1); \
+ _PX##lock(_ph, _buck); \
+ _x = _PX##first(_ph, _buck); \
+ _tmp = NULL; \
+ for ( ; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_PX##cmp(_key, _PX##val(_x))) \
+ break; \
+ } \
+ if (_x != NULL) { \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _buck) = _PX##next(_x); \
+ else \
+ _PX##next(_tmp) = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _buck); \
+} while(0)
+
+#define CHT_FOREACH_SAFE(_ph, _hsize, _PX, _x, _tmp, _cb, _arg) do { \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _PX##lock(_ph, _i); \
+ _x = _PX##first(_ph, _i); \
+ _tmp = NULL; \
+ for (; _x != NULL; _tmp = _x, _x = _PX##next(_x)) { \
+ if (_cb(_x, _arg) == 0) \
+ continue; \
+ if (_tmp == NULL) \
+ _PX##first(_ph, _i) = _PX##next(_x); \
+ else \
+ _tmp = _PX##next(_x); \
+ } \
+ _PX##unlock(_ph, _i); \
+ } \
+} while(0)
+
+#define CHT_RESIZE(_ph, _hsize, _nph, _nhsize, _PX, _x, _y) do { \
+ unsigned int _buck; \
+ for (unsigned int _i = 0; _i < _hsize; _i++) { \
+ _x = _PX##first(_ph, _i); \
+ _y = _x; \
+ while (_y != NULL) { \
+ _buck = _PX##hash(_PX##val(_x)) & (_nhsize - 1);\
+ _y = _PX##next(_x); \
+ _PX##next(_x) = _PX##first(_nph, _buck); \
+ _PX##first(_nph, _buck) = _x; \
+ } \
+ } \
+} while(0)
+
+#endif /* _IP_FW_NAT64LSN_H_ */
+
Index: sys/netpfil/ipfw/nat64/nat64lsn.c
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64lsn.c
@@ -0,0 +1,1599 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+
+static void nat64lsn_periodic(void *data);
+#define PERIODIC_DELAY 4
+static uint8_t nat64lsn_proto_map[256];
+uint8_t nat64lsn_rproto_map[NAT_MAX_PROTO];
+
+#define NAT64_FLAG_FIN 0x01 /* FIN was seen */
+#define NAT64_FLAG_SYN 0x02 /* First syn in->out */
+#define NAT64_FLAG_ESTAB 0x04 /* Packet with Ack */
+#define NAT64_FLAGS_TCP (NAT64_FLAG_SYN|NAT64_FLAG_ESTAB|NAT64_FLAG_FIN)
+
+#define NAT64_FLAG_RDR 0x80 /* Port redirect */
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64lsn_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+/*
+ * Delayed job queue, used to create new hosts
+ * and new portgroups
+ */
+enum nat64lsn_jtype {
+ JTYPE_NEWHOST = 1,
+ JTYPE_NEWPORTGROUP,
+ JTYPE_DELPORTGROUP,
+};
+
+struct nat64lsn_job_item {
+ TAILQ_ENTRY(nat64lsn_job_item) next;
+ enum nat64lsn_jtype jtype;
+ struct nat64lsn_host *nh;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+ uint8_t done;
+ int delcount;
+ unsigned int fhash; /* Flow hash */
+ uint32_t aaddr; /* Last used address (net) */
+ struct nat64lsn_portgroup *pg;
+ struct mbuf *m;
+ struct ipfw_flow_id f_id;
+ uint64_t delmask;
+};
+
+static struct mtx jmtx;
+#define JQUEUE_LOCK_INIT() mtx_init(&jmtx, "qlock", NULL, MTX_DEF)
+#define JQUEUE_LOCK_DESTROY() mtx_destroy(&jmtx)
+#define JQUEUE_LOCK() mtx_lock(&jmtx)
+#define JQUEUE_UNLOCK() mtx_unlock(&jmtx)
+
+static void nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static void nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_head *jhead, int jlen);
+
+static struct nat64lsn_job_item *nat64_create_job(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, int jtype);
+static int nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr);
+static int nat64lsn_request_shost(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate4(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm);
+static int nat64lsn_translate6(struct nat64lsn_cfg *cfg,
+ struct ipfw_flow_id *f_id, struct mbuf **pm);
+
+static int alloc_portgroup(struct nat64lsn_job_item *ji);
+static void destroy_portgroup(struct nat64lsn_portgroup *pg);
+static void destroy_host6(struct nat64lsn_host *nh);
+static int alloc_host6(struct nat64lsn_job_item *ji);
+
+static int attach_portgroup(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_job_item *ji);
+static int attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji);
+
+
+/* XXX tmp */
+static uma_zone_t nat64lsn_host_zone;
+static uma_zone_t nat64lsn_buck_zone;
+
+static unsigned int nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg,
+ struct nat64lsn_host *nh);
+
+#define I6_hash(x) (djb_hash((const unsigned char *)(x), 16))
+#define I6_first(_ph, h) (_ph)[h]
+#define I6_next(x) (x)->next
+#define I6_val(x) (&(x)->addr)
+#define I6_cmp(a, b) IN6_ARE_ADDR_EQUAL(a, b)
+#define I6_lock(a, b)
+#define I6_unlock(a, b)
+
+#define I6HASH_FIND(_cfg, _res, _a) \
+ CHT_FIND(_cfg->ih, _cfg->ihsize, I6_, _res, _a)
+#define I6HASH_INSERT(_cfg, _i) \
+ CHT_INSERT_HEAD(_cfg->ih, _cfg->ihsize, I6_, _i)
+#define I6HASH_REMOVE(_cfg, _res, _tmp, _a) \
+ CHT_REMOVE(_cfg->ih, _cfg->ihsize, I6_, _res, _tmp, _a)
+
+#define I6HASH_FOREACH_SAFE(_cfg, _x, _tmp, _cb, _arg) \
+ CHT_FOREACH_SAFE(_cfg->ih, _cfg->ihsize, I6_, _x, _tmp, _cb, _arg)
+
+#define HASH_IN4(x) djb_hash((const unsigned char *)(x), 8)
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+/*
+static size_t
+bitmask_size(size_t num, int *level)
+{
+ size_t x;
+ int c;
+
+ for (c = 0, x = num; num > 1; num /= 64, c++)
+ ;
+
+ return (x);
+}
+
+static void
+bitmask_prepare(uint64_t *pmask, size_t bufsize, int level)
+{
+ size_t x, z;
+
+ memset(pmask, 0xFF, bufsize);
+ for (x = 0, z = 1; level > 1; x += z, z *= 64, level--)
+ ;
+ pmask[x] ~= 0x01;
+}
+*/
+
+/*
+ * Inspects icmp packets to see if the message contains different
+ * packet header so we need to alter @addr and @port.
+ */
+static __noinline int
+inspect_icmp_mbuf(struct mbuf **m, uint8_t *nat_proto, uint32_t *addr,
+ uint16_t *port)
+{
+ struct ip *ip;
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ struct icmphdr *icmp;
+ int off;
+ uint8_t proto;
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ off = (ip->ip_hl << 2) + ICMP_MINLEN;
+ if ((*m)->m_len < off)
+ *m = m_pullup(*m, off);
+ if (*m == NULL)
+ return (ENOMEM);
+
+ ip = mtod(*m, struct ip *); /* Outer IP header */
+ icmp = L3HDR(ip, struct icmphdr *);
+ switch (icmp->icmp_type) {
+ case ICMP_ECHO:
+ case ICMP_ECHOREPLY:
+ /* Use icmp ID as distinguisher */
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ case ICMP_UNREACH:
+ case ICMP_TIMXCEED:
+ break;
+ default:
+ return (EOPNOTSUPP);
+ }
+ /*
+ * ICMP_UNREACH and ICMP_TIMXCEED contains IP header + 64 bits
+ * of ULP header.
+ */
+ if ((*m)->m_pkthdr.len < off + sizeof(struct ip) + ICMP_MINLEN)
+ return (EINVAL);
+ if ((*m)->m_len < off + sizeof(struct ip) + ICMP_MINLEN)
+ *m = m_pullup(*m, off + sizeof(struct ip) + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ ip = mtodo(*m, off); /* Inner IP header */
+ proto = ip->ip_p;
+ off += ip->ip_hl << 2; /* Skip inner IP header */
+ *addr = ntohl(ip->ip_src.s_addr);
+ if ((*m)->m_len < off + ICMP_MINLEN)
+ *m = m_pullup(*m, off + ICMP_MINLEN);
+ if (*m == NULL)
+ return (ENOMEM);
+ switch (proto) {
+ case IPPROTO_TCP:
+ tcp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_TCP;
+ *port = ntohs(tcp->th_sport);
+ return (0);
+ case IPPROTO_UDP:
+ udp = mtodo(*m, off);
+ *nat_proto = NAT_PROTO_UDP;
+ *port = ntohs(udp->uh_sport);
+ return (0);
+ case IPPROTO_ICMP:
+ /*
+ * We will translate only ICMP errors for our ICMP
+ * echo requests.
+ */
+ icmp = mtodo(*m, off);
+ if (icmp->icmp_type != ICMP_ECHO)
+ return (EOPNOTSUPP);
+ *port = ntohs(*((uint16_t *)(icmp + 1)));
+ return (0);
+ };
+ return (EOPNOTSUPP);
+}
+
+static inline uint8_t
+convert_tcp_flags(uint8_t flags)
+{
+ uint8_t result;
+
+ result = flags & (TH_FIN|TH_SYN);
+ result |= (flags & TH_RST) >> 2; /* Treat RST as FIN */
+ result |= (flags & TH_ACK) >> 2; /* Treat ACK as estab */
+
+ return (result);
+}
+
+static int
+nat64lsn_translate4(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct in6_addr src6;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ struct nat64lsn_state *st;
+ struct ip *ip;
+ uint32_t addr;
+ uint16_t state_flags, state_ts;
+ uint16_t port, lport;
+ uint8_t nat_proto;
+ int ret;
+
+ addr = f_id->dst_ip;
+ port = f_id->dst_port;
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* Check if protocol is supported and get its short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+
+ /* We might need to handle icmp differently */
+ if (nat_proto == NAT_PROTO_ICMP) {
+ ret = inspect_icmp_mbuf(pm, &nat_proto, &addr, &port);
+ if (ret != 0) {
+ if (ret == ENOMEM)
+ NAT64STAT_INC(&cfg->stats, nomem);
+ else
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (cfg->nomatch_verdict);
+ }
+ /* XXX: Check addr for validity */
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+ return (cfg->nomatch_verdict);
+ }
+ }
+
+ /* Calc portgroup offset w.r.t protocol */
+ pg = GET_PORTGROUP(cfg, addr, nat_proto, port);
+
+ /* Check if this port is occupied by any portgroup */
+ if (pg == NULL) {
+ NAT64STAT_INC(&cfg->stats, nomatch4);
+#if 0
+ DLPRINTF(DP_ST, "NOMATCH %u %d %d (%d)", addr, nat_proto, port,
+ _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
+#endif
+ return (cfg->nomatch_verdict);
+ }
+
+ /* TODO: Check flags to see if we need to do some static mapping */
+ nh = pg->host;
+
+ /* Prepare some fields we might need to update */
+ SET_AGE(state_ts);
+ ip = mtod(*pm, struct ip *);
+ if (ip->ip_p == IPPROTO_TCP)
+ state_flags = convert_tcp_flags(
+ L3HDR(ip, struct tcphdr *)->th_flags);
+ else
+ state_flags = 0;
+
+ /* Lock host and get port mapping */
+ NAT64_LOCK(nh);
+
+ st = &pg->states[port & (NAT64_CHUNK_SIZE - 1)];
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != state_flags)
+ st->flags |= state_flags;
+ lport = htons(st->u.s.lport);
+
+ NAT64_UNLOCK(nh);
+
+ src6.s6_addr32[0] = cfg->prefix6.s6_addr32[0];
+ src6.s6_addr32[1] = cfg->prefix6.s6_addr32[1];
+ src6.s6_addr32[2] = cfg->prefix6.s6_addr32[2];
+ src6.s6_addr32[3] = htonl(f_id->src_ip);
+
+ ret = nat64_do_handle_ip4(*pm, &src6, &nh->addr, lport,
+ &cfg->stats);
+
+ if (ret == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (ret == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL;
+
+ return (IP_FW_DENY);
+}
+
+void
+nat64lsn_dump_state(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg,
+ const struct nat64lsn_state *st, const char *px, int off)
+{
+ char s[INET6_ADDRSTRLEN], a[INET_ADDRSTRLEN], d[INET_ADDRSTRLEN];
+
+ if (nat64_debug < DP_ST)
+ return;
+ inet_ntop(AF_INET6, &pg->host->addr, s, sizeof(s));
+ inet_ntop(AF_INET, &pg->aaddr, a, sizeof(a));
+ inet_ntop(AF_INET, &st->u.s.faddr, d, sizeof(d));
+
+ DLPRINTF(DP_ST, "%s: ST [%p|%d]: %s:%d %d <%s:%d> %s:%d AGE %d",
+ px, st, off,
+ s, st->u.s.lport, pg->nat_proto, a, pg->aport + off,
+ d, st->u.s.fport, GET_AGE(st->timestamp));
+}
+
+/*
+ * Check if particular TCP state is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static int
+nat64lsn_periodic_check_tcp(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_state *st, int age)
+{
+ int ttl;
+
+ if (st->flags & NAT64_FLAG_FIN)
+ ttl = cfg->st_close_ttl;
+ else if (st->flags & NAT64_FLAG_ESTAB)
+ ttl = cfg->st_estab_ttl;
+ else if (st->flags & NAT64_FLAG_SYN)
+ ttl = cfg->st_syn_ttl;
+ else
+ ttl = cfg->st_syn_ttl;
+
+ if (age > ttl)
+ return (1);
+ return (0);
+}
+
+/*
+ * Check if nat state @st is stale and should be deleted.
+ * Return 1 if true, 0 otherwise.
+ */
+static int
+nat64lsn_periodic_chkstate(const struct nat64lsn_cfg *cfg,
+ const struct nat64lsn_portgroup *pg, const struct nat64lsn_state *st)
+{
+ int age, delete;
+
+ age = GET_AGE(st->timestamp);
+ delete = 0;
+
+ /* Skip immutable records */
+ if (st->flags & NAT64_FLAG_RDR)
+ return (0);
+
+ switch (pg->nat_proto) {
+ case NAT_PROTO_TCP:
+ delete = nat64lsn_periodic_check_tcp(cfg, st, age);
+ break;
+ case NAT_PROTO_UDP:
+ if (age > cfg->st_udp_ttl)
+ delete = 1;
+ break;
+ case NAT_PROTO_ICMP:
+ if (age > cfg->st_icmp_ttl)
+ delete = 1;
+ break;
+ }
+
+ return (delete);
+}
+
+
+/*
+ * The following structures and functions
+ * are used to perform SLIST_FOREACH_SAFE()
+ * analog for states identified by struct st_ptr.
+ */
+
+struct st_idx {
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ struct st_ptr sidx_next;
+};
+
+static struct st_idx *
+st_first(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_ptr *sidx, struct st_idx *si)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ if (sidx->idx == 0) {
+ memset(si, 0, sizeof(*si));
+ return (si);
+ }
+
+ pg = GET_PORTGROUP_BYSIDX(cfg, nh, sidx->idx);
+ st = &pg->states[sidx->off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_next(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh,
+ struct st_idx *si)
+{
+ struct st_ptr sidx;
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+
+ sidx = si->sidx_next;
+ if (sidx.idx == 0) {
+ memset(si, 0, sizeof(*si));
+ si->st = NULL;
+ si->pg = NULL;
+ return (si);
+ }
+
+ pg = GET_PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+
+ si->pg = pg;
+ si->st = st;
+ si->sidx_next = st->next;
+
+ return (si);
+}
+
+static struct st_idx *
+st_save_cond(struct st_idx *si_dst, struct st_idx *si)
+{
+ if (si->st != NULL)
+ *si_dst = *si;
+
+ return (si_dst);
+}
+
+unsigned int
+nat64lsn_periodic_chkstates(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh)
+{
+ struct st_idx si, si_prev;
+ int i;
+ unsigned int delcount;
+
+ delcount = 0;
+ for (i = 0; i < nh->hsize; i++) {
+ memset(&si_prev, 0, sizeof(si_prev));
+ for (st_first(cfg, nh, &nh->phash[i], &si);
+ si.st != NULL;
+ st_save_cond(&si_prev, &si), st_next(cfg, nh, &si)) {
+ if (nat64lsn_periodic_chkstate(cfg, si.pg, si.st) == 0)
+ continue;
+ nat64lsn_dump_state(cfg, si.pg, si.st, "DELETE STATE",
+ si.st->cur.off);
+ /* Unlink from hash */
+ if (si_prev.st != NULL)
+ si_prev.st->next = si.st->next;
+ else
+ nh->phash[i] = si.st->next;
+ /* Delete state and free its data */
+ PG_MARK_FREE_IDX(si.pg, si.st->cur.off);
+ memset(si.st, 0, sizeof(struct nat64lsn_state));
+ si.st = NULL;
+ delcount++;
+
+ /* Update portgroup timestamp */
+ SET_AGE(si.pg->timestamp);
+ }
+ }
+ NAT64STAT_ADD(&cfg->stats, sdeleted, delcount);
+ return (delcount);
+}
+
+/*
+ * Checks if portgroup is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_pg(const struct nat64lsn_cfg *cfg, const struct nat64lsn_portgroup *pg)
+{
+
+ if (!PG_IS_EMPTY(pg))
+ return (0);
+ if (GET_AGE(pg->timestamp) < cfg->pg_delete_delay)
+ return (0);
+ return (1);
+}
+
+/*
+ * Checks if host record is not used and can be deleted,
+ * Returns 1 if stale, 0 otherwise
+ */
+static int
+stale_nh(const struct nat64lsn_cfg *cfg, const struct nat64lsn_host *nh)
+{
+
+ if (nh->pg_count != 0)
+ return (0);
+ if (GET_AGE(nh->timestamp) < cfg->nh_delete_delay)
+ return (0);
+ return (1);
+}
+
+struct nat64lsn_periodic_data {
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_job_head jhead;
+ int jlen;
+};
+
+static int
+nat64lsn_periodic_chkhost(struct nat64lsn_host *nh, struct nat64lsn_periodic_data *d)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_job_item *ji;
+ u_long delmask;
+ int delcount, i;
+
+ delmask = 0;
+ delcount = 0;
+
+ char a[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DLPRINTF(DP_JQ, "Checking %s host %s on cpu %d",
+ stale_nh(d->cfg, nh) ? "stale" : "non-stale", a, curcpu);
+ if (!stale_nh(d->cfg, nh)) {
+ /* Non-stale host. Inspect internals */
+ NAT64_LOCK(nh);
+
+ /* Stage 1: Check&expire states */
+ if (nat64lsn_periodic_chkstates(d->cfg, nh) != 0)
+ SET_AGE(nh->timestamp);
+
+ /* Stage 2: Check if we need to expire */
+ for (i = 0; i < nh->pg_count; i++) {
+ pg = GET_PORTGROUP_BYSIDX(d->cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+
+ /* Check if we can delete portgroup */
+ if (stale_pg(d->cfg, pg) == 0)
+ continue;
+
+ DLPRINTF(DP_JQ, "Check PG %d", i);
+ delmask |= ((u_long)1 << i);
+ delcount++;
+ }
+
+ NAT64_UNLOCK(nh);
+ if (delmask == 0)
+ return (0);
+ }
+
+ DLPRINTF(DP_JQ, "Queueing portgroup delmask %lX", delmask);
+ /* We have something to delete - add it to queue */
+ ji = nat64_create_job(d->cfg, NULL, JTYPE_DELPORTGROUP);
+ if (ji == NULL)
+ return (0);
+
+ ji->haddr = nh->addr;
+ ji->delmask = delmask;
+ ji->delcount = delcount;
+
+ TAILQ_INSERT_TAIL(&d->jhead, ji, next);
+ d->jlen++;
+
+ return (0);
+}
+
+/*
+ * This procedure is used to perform various maintance
+ * on dynamic hash list. Currently it is called every second.
+ */
+static void
+nat64lsn_periodic(void *data)
+{
+ struct ip_fw_chain *ch;
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_periodic_data d;
+ struct nat64lsn_host *nh, *tmp;
+
+ cfg = (struct nat64lsn_cfg *) data;
+ ch = cfg->ch;
+ CURVNET_SET(cfg->vp);
+
+ memset(&d, 0, sizeof(d));
+ d.cfg = cfg;
+ TAILQ_INIT(&d.jhead);
+
+ IPFW_RLOCK(ch);
+
+ /* Stage 1: foreach host, check all its portgroups */
+ I6HASH_FOREACH_SAFE(cfg, nh, tmp, nat64lsn_periodic_chkhost, &d);
+
+ /* Enqueue everything we have requested */
+ nat64lsn_enqueue_jobs(cfg, &d.jhead, d.jlen);
+
+ callout_schedule(&cfg->periodic, hz * PERIODIC_DELAY);
+
+ IPFW_RUNLOCK(ch);
+
+ CURVNET_RESTORE();
+}
+
+static void
+reinject_mbuf(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji->m == NULL)
+ return;
+
+ /* Request has failed or packet type is wrong */
+ if (ji->f_id.addr_type != 6 || ji->done == 0) {
+ m_freem(ji->m);
+ ji->m = NULL;
+ return;
+ }
+
+ /*
+ * XXX: Limit recursion level
+ */
+
+ NAT64STAT_INC(&cfg->stats, jreinjected);
+ DLPRINTF(DP_JQ, "Reinject mbuf");
+ nat64lsn_translate6(cfg, &ji->f_id, &ji->m);
+}
+
+static void
+destroy_portgroup(struct nat64lsn_portgroup *pg)
+{
+
+ uma_zfree(nat64lsn_buck_zone, pg);
+}
+
+static int
+alloc_portgroup(struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = uma_zalloc(nat64lsn_buck_zone, M_NOWAIT | M_ZERO);
+ if (pg == NULL)
+ return (1);
+
+ memset(&pg->freemask, 0xFF, sizeof(pg->freemask));
+ pg->nat_proto = ji->nat_proto;
+
+ ji->pg = pg;
+
+ return (0);
+
+}
+
+static void
+destroy_host6(struct nat64lsn_host *nh)
+{
+
+ char a[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, &nh->addr, a, sizeof(a));
+ DLPRINTF(DP_OBJ, "DESTROY HOST %s %p", a, nh);
+ NAT64_LOCK_DESTROY(nh);
+ uma_zfree(nat64lsn_host_zone, nh);
+}
+
+static int
+alloc_host6(struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh;
+ char __a[INET6_ADDRSTRLEN];
+
+ nh = uma_zalloc(nat64lsn_host_zone, M_NOWAIT | M_ZERO);
+ if (nh == NULL)
+ return (1);
+
+ nh->addr = ji->haddr;
+ NAT64_LOCK_INIT(nh);
+ nh->hsize = sizeof(nh->st_hash) / sizeof(nh->st_hash[0]);
+ nh->phash = nh->st_hash;
+
+ ji->nh = nh;
+ /* Then, alloc portgroup */
+ if (alloc_portgroup(ji) != 0) {
+ destroy_host6(ji->nh);
+ ji->nh = NULL;
+ return (1);
+ }
+
+ inet_ntop(AF_INET6, &nh->addr, __a, sizeof(__a));
+ DLPRINTF(DP_OBJ, "ALLOC HOST %s %p", __a, ji->nh);
+
+ return (0);
+}
+
+/*
+ * Finds free @pg index inside @nh
+ */
+static int
+find_nh_pg_idx(const struct nat64lsn_host *nh, int *idx)
+{
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ if (nh->pg_ptr[i] == NULL) {
+ *idx = i;
+ return (0);
+ }
+ }
+ return (1);
+}
+
+static int
+attach_host6(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh;
+
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh != NULL)
+ return (1);
+
+ /* Add new host to list */
+ nh = ji->nh;
+ nh->pg_count = 0;
+ I6HASH_INSERT(cfg, nh);
+ cfg->ihcount++;
+ ji->nh = NULL;
+
+ /*
+ * Try to add portgroup.
+ * Note it will automatically set
+ * 'done' on ji if successful.
+ */
+ return (attach_portgroup(cfg, ji));
+}
+
+static int
+find_pg_place_addr(const struct nat64lsn_cfg *cfg, int addr_off,
+ int nat_proto, uint16_t *aport, int *ppg_idx)
+{
+ int j, pg_idx;
+
+ pg_idx = addr_off * _ADDR_PG_COUNT +
+ (nat_proto - 1) * _ADDR_PG_PROTO_COUNT;
+
+ for (j = NAT64_MIN_CHUNK; j < _ADDR_PG_PROTO_COUNT; j++) {
+ if (cfg->pg[pg_idx + j] != NULL)
+ continue;
+
+ *aport = j * NAT64_CHUNK_SIZE;
+ *ppg_idx = pg_idx + j;
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * XXX: This function needs to be rewritten to
+ * use free bitmask for faster pg finding,
+ * additionally, it should take into consideration
+ * a) randomization and
+ * b) previous addresses allocated to given nat instance
+ *
+ */
+static int
+find_portgroup_place(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji,
+ uint32_t *aaddr, uint16_t *aport, int *ppg_idx)
+{
+ struct nat64lsn_host *nh;
+ int i, nat_proto;
+
+ /*
+ * Find source host and bind: we can't rely on
+ * pg->host
+ */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL)
+ return (1);
+
+ /*
+ * XXX: Use bitmask index to be able to find/check if IP address
+ * has some spare pg's
+ */
+ nat_proto = ji->nat_proto;
+
+ /* First, try to use same address */
+ if (ji->aaddr != 0) {
+ i = ntohl(ji->aaddr) - cfg->prefix4;
+ if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ /* Next, try to use random address based on flow hash */
+ i = ji->fhash % (1 << (32 - cfg->plen4));
+ if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0) {
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+
+
+ /* Last one: simply find ANY available */
+ for (i = 0; i < (1 << (32 - cfg->plen4)); i++) {
+ if (find_pg_place_addr(cfg, i, nat_proto, aport, ppg_idx) != 0){
+ /* Found! */
+ *aaddr = htonl(cfg->prefix4 + i);
+ return (0);
+ }
+ }
+
+ return (1);
+}
+
+static int
+attach_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_host *nh;
+ uint32_t aaddr;
+ uint16_t aport;
+ int nh_pg_idx, pg_idx;
+
+ pg = ji->pg;
+
+ /*
+ * Find source host and bind: we can't rely on
+ * pg->host
+ */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL)
+ return (1);
+
+ /* Find spare port chunk */
+ if (find_portgroup_place(cfg, ji, &aaddr, &aport, &pg_idx) != 0)
+ return (1);
+
+ if (find_nh_pg_idx(nh, &nh_pg_idx) != 0)
+ return (1);
+
+ cfg->pg[pg_idx] = pg;
+ cfg->protochunks[pg->nat_proto]++;
+ NAT64STAT_INC(&cfg->stats, spgcreated);
+
+ pg->aaddr = aaddr;
+ pg->aport = aport;
+ pg->host = nh;
+ SET_AGE(pg->timestamp);
+
+ nh->pg_ptr[nh_pg_idx] = pg;
+ if (nh->pg_count == nh_pg_idx)
+ nh->pg_count++;
+ SET_AGE(nh->timestamp);
+
+ ji->pg = NULL;
+ ji->done = 1;
+
+ return (0);
+}
+
+static void
+consider_del_portgroup(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+ struct nat64lsn_host *nh, *nh_tmp;
+ struct nat64lsn_portgroup *pg, *pg_list[8];
+ int i, pg_lidx, idx;
+
+ /* Find source host */
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ if (nh == NULL || nh->pg_count == 0)
+ return;
+
+ memset(pg_list, 0, sizeof(pg_list));
+ pg_lidx = 0;
+
+ NAT64_LOCK(nh);
+
+ for (i = nh->pg_count - 1; i >= 0; i--) {
+ if ((ji->delmask & (1 << i)) == 0)
+ continue;
+ pg = GET_PORTGROUP_BYSIDX(cfg, nh, i + 1);
+
+ /* Check conditions once again */
+ if (stale_pg(cfg, pg) == 0)
+ continue;
+
+ /* DO delete */
+ pg_list[pg_lidx++] = pg;
+ nh->pg_ptr[i] = NULL;
+
+ idx = _GET_PORTGROUP_IDX(cfg, ntohl(pg->aaddr), pg->nat_proto,
+ pg->aport);
+ KASSERT(cfg->pg[idx] == pg, ("Non matched pg"));
+ DLPRINTF(DP_OBJ, "DELETE PORTGROUP %d %p %p", idx,
+ cfg->pg[idx], pg);
+ cfg->pg[idx] = NULL;
+ cfg->protochunks[pg->nat_proto]--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+
+ /* Decrease pg_count */
+ while (nh->pg_count > 0 &&
+ GET_PORTGROUP_BYSIDX(cfg, nh, nh->pg_count) == NULL)
+ nh->pg_count--;
+
+ /* Check if on-stack buffer has ended */
+ if (pg_lidx == sizeof(pg_list)/sizeof(pg_list[0]))
+ break;
+ }
+
+ NAT64_UNLOCK(nh);
+
+ if (stale_nh(cfg, nh)) {
+ I6HASH_REMOVE(cfg, nh, nh_tmp, &ji->haddr);
+ KASSERT(nh != NULL, ("Unable to find address"));
+ cfg->ihcount--;
+ ji->nh = nh;
+ I6HASH_FIND(cfg, nh, &ji->haddr);
+ KASSERT(nh == NULL, ("Failed to delete address"));
+ }
+
+ /* TODO: Delay freeing portgroups */
+ while (pg_lidx > 0) {
+ pg_lidx--;
+ NAT64STAT_INC(&cfg->stats, spgdeleted);
+ destroy_portgroup(pg_list[pg_lidx]);
+ }
+}
+
+/*
+ * Main request handler.
+ * Responsible for handling jqueue, e.g.
+ * creating new hosts, addind/deleting portgroups.
+ */
+static void
+nat64lsn_do_request(void *data)
+{
+ IPFW_RLOCK_TRACKER;
+ struct nat64lsn_job_head jhead;
+ struct nat64lsn_job_item *ji;
+ int jcount, nhsize;
+ struct nat64lsn_cfg *cfg = (struct nat64lsn_cfg *) data;
+ struct ip_fw_chain *ch;
+ int delcount;
+
+ CURVNET_SET(cfg->vp);
+
+ TAILQ_INIT(&jhead);
+
+ /* XXX: We're running unlocked here */
+
+ ch = cfg->ch;
+ delcount = 0;
+ IPFW_RLOCK(ch);
+
+ /* Grab queue */
+ JQUEUE_LOCK();
+ TAILQ_SWAP(&jhead, &cfg->jhead, nat64lsn_job_item, next);
+ jcount = cfg->jlen;
+ cfg->jlen = 0;
+ JQUEUE_UNLOCK();
+
+ /* check if we need to resize hash */
+ nhsize = 0;
+ if (cfg->ihcount > cfg->ihsize && cfg->ihsize < 65536) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount > nhsize && nhsize < 65536; nhsize *= 2)
+ ;
+ } else if (cfg->ihcount < cfg->ihsize * 4) {
+ nhsize = cfg->ihsize;
+ for ( ; cfg->ihcount < nhsize * 4 && nhsize > 32; nhsize /= 2)
+ ;
+ }
+
+ IPFW_RUNLOCK(ch);
+
+ if (TAILQ_EMPTY(&jhead)) {
+ CURVNET_RESTORE();
+ return;
+ }
+
+ NAT64STAT_INC(&cfg->stats, jcalls);
+ DLPRINTF(DP_JQ, "count=%d", jcount);
+
+ /*
+ * TODO:
+ * What we should do here is to build a hash
+ * to ensure we don't have lots of duplicate requests.
+ * Skip this for now.
+ *
+ * TODO: Limit per-call number of items
+ */
+
+ /* Pre-allocate everything for entire chain */
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (alloc_host6(ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jhostfails);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (alloc_portgroup(ji) != 0)
+ NAT64STAT_INC(&cfg->stats, jportfails);
+ break;
+ case JTYPE_DELPORTGROUP:
+ delcount += ji->delcount;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /*
+ * TODO: Alloc hew hash
+ */
+ nhsize = 0;
+ if (nhsize > 0) {
+ /* XXX: */
+ }
+
+ /* Apply all changes in batch */
+ IPFW_UH_WLOCK(ch);
+ IPFW_WLOCK(ch);
+
+ TAILQ_FOREACH(ji, &jhead, next) {
+ switch (ji->jtype) {
+ case JTYPE_NEWHOST:
+ if (ji->nh != NULL)
+ attach_host6(cfg, ji);
+ break;
+ case JTYPE_NEWPORTGROUP:
+ if (ji->pg != NULL)
+ attach_portgroup(cfg, ji);
+ break;
+ case JTYPE_DELPORTGROUP:
+ consider_del_portgroup(cfg, ji);
+ break;
+ }
+ }
+
+ if (nhsize > 0) {
+ /* XXX: Move everything to new hash */
+ }
+
+ IPFW_WUNLOCK(ch);
+ IPFW_UH_WUNLOCK(ch);
+
+ /* Flush unused entries */
+ while (!TAILQ_EMPTY(&jhead)) {
+ ji = TAILQ_FIRST(&jhead);
+ TAILQ_REMOVE(&jhead, ji, next);
+ if (ji->nh != NULL)
+ destroy_host6(ji->nh);
+ if (ji->pg != NULL)
+ destroy_portgroup(ji->pg);
+ if (ji->m != NULL)
+ reinject_mbuf(cfg, ji);
+ free(ji, M_IPFW);
+ }
+
+ CURVNET_RESTORE();
+}
+
+static struct nat64lsn_job_item *
+nat64_create_job(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ int jtype)
+{
+ struct nat64lsn_job_item *ji;
+ struct in6_addr haddr;
+ uint8_t nat_proto;
+
+ /*
+ * Do not try to lock possibly contested mutex if we're near the limit.
+ * Drop packet instead.
+ */
+ if (cfg->jlen >= cfg->jmaxlen) {
+ NAT64STAT_INC(&cfg->stats, jmaxlen);
+ return (NULL);
+ }
+
+ memset(&haddr, 0, sizeof(haddr));
+ nat_proto = 0;
+ if (f_id != NULL) {
+ haddr = f_id->src_ip6;
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+
+ DLPRINTF(DP_JQ, "REQUEST pg nat_proto %d on proto %d",
+ nat_proto, f_id->proto);
+
+ if (nat_proto == 0)
+ return (NULL);
+ }
+
+ ji = malloc(sizeof(struct nat64lsn_job_item), M_IPFW, M_NOWAIT | M_ZERO);
+
+ if (ji == NULL) {
+ NAT64STAT_INC(&cfg->stats, jnomem);
+ return (NULL);
+ }
+
+ ji->jtype = jtype;
+
+ if (f_id != NULL) {
+ ji->f_id = *f_id;
+ ji->haddr = haddr;
+ ji->nat_proto = nat_proto;
+ }
+
+ return (ji);
+}
+
+static void
+nat64lsn_enqueue_job(struct nat64lsn_cfg *cfg, struct nat64lsn_job_item *ji)
+{
+
+ if (ji == NULL)
+ return;
+
+ JQUEUE_LOCK();
+ TAILQ_INSERT_TAIL(&cfg->jhead, ji, next);
+ cfg->jlen++;
+ NAT64STAT_INC(&cfg->stats, jrequests);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static void
+nat64lsn_enqueue_jobs(struct nat64lsn_cfg *cfg, struct nat64lsn_job_head *jhead,
+ int jlen)
+{
+
+ if (TAILQ_EMPTY(jhead))
+ return;
+
+ /* Attach current queue to execution one */
+ JQUEUE_LOCK();
+ TAILQ_CONCAT(&cfg->jhead, jhead, next);
+ cfg->jlen += jlen;
+ NAT64STAT_ADD(&cfg->stats, jrequests, jlen);
+
+ if (callout_pending(&cfg->jcallout) == 0)
+ callout_reset(&cfg->jcallout, 1, nat64lsn_do_request, cfg);
+ JQUEUE_UNLOCK();
+}
+
+static unsigned int
+flow6_hash(const struct ipfw_flow_id *f_id)
+{
+ unsigned char hbuf[36];
+
+ memcpy(hbuf, &f_id->dst_ip6, 16);
+ memcpy(&hbuf[16], &f_id->src_ip6, 16);
+ memcpy(&hbuf[32], &f_id->dst_port, 2);
+ memcpy(&hbuf[32], &f_id->src_port, 2);
+
+ return (djb_hash(hbuf, sizeof(hbuf)));
+}
+
+static int
+nat64lsn_request_shost(struct nat64lsn_cfg *cfg, const struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64_create_job(cfg, f_id, JTYPE_NEWHOST);
+ if (ji == NULL)
+ m_freem(m);
+ else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jhostsreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static int
+nat64lsn_request_portgroup(struct nat64lsn_cfg *cfg,
+ const struct ipfw_flow_id *f_id, struct mbuf **pm, uint32_t aaddr)
+{
+ struct nat64lsn_job_item *ji;
+ struct mbuf *m;
+
+ m = *pm;
+ *pm = NULL;
+
+ ji = nat64_create_job(cfg, f_id, JTYPE_NEWPORTGROUP);
+ if (ji == NULL)
+ m_freem(m);
+ else {
+ ji->m = m;
+ /* Provide pseudo-random value based on flow */
+ ji->fhash = flow6_hash(f_id);
+ ji->aaddr = aaddr;
+ nat64lsn_enqueue_job(cfg, ji);
+ NAT64STAT_INC(&cfg->stats, jportreq);
+ }
+
+ return (IP_FW_PASS);
+}
+
+static struct nat64lsn_state *
+nat64lsn_create_state(struct nat64lsn_cfg *cfg, struct nat64lsn_host *nh,
+ int nat_proto, struct nat64lsn_state *kst, uint32_t *aaddr)
+{
+ struct nat64lsn_portgroup *pg;
+ struct nat64lsn_state *st;
+ int i, hval, off;
+
+ /* XXX: create additional bitmask for selecting proper portgroup */
+ for (i = 0; i < nh->pg_count; i++) {
+ pg = GET_PORTGROUP_BYSIDX(cfg, nh, i + 1);
+ if (pg == NULL)
+ continue;
+ if (*aaddr == 0)
+ *aaddr = pg->aaddr;
+ if (pg->nat_proto != nat_proto)
+ continue;
+
+ off = PG_GET_FREE_IDX(pg);
+ if (off != 0) {
+ /* We have found spare state. Use it */
+ off--;
+ PG_MARK_BUSY_IDX(pg, off);
+ st = &pg->states[off];
+
+ /*
+ * Fill in new info. Assume state was zeroed.
+ * Timestamp and flags will be filled by caller.
+ */
+ st->u.s = kst->u.s;
+ st->cur.idx = i + 1;
+ st->cur.off = off;
+
+ /* Insert into host hash table */
+ hval = HASH_IN4(&st->u.hkey) & (nh->hsize - 1);
+ st->next = nh->phash[hval];
+ nh->phash[hval] = st->cur;
+
+ nat64lsn_dump_state(cfg, pg, st, "ALLOC STATE", off);
+
+ NAT64STAT_INC(&cfg->stats, screated);
+
+ return (st);
+ }
+ /* Saev last used alias affress */
+ *aaddr = pg->aaddr;
+ }
+
+ return (NULL);
+}
+
+static int
+nat64lsn_translate6(struct nat64lsn_cfg *cfg, struct ipfw_flow_id *f_id,
+ struct mbuf **pm)
+{
+ struct nat64lsn_host *nh;
+ uint32_t aaddr;
+ int hval;
+ int nat_proto;
+ int action;
+ uint16_t state_ts, state_flags;
+ struct st_ptr sidx;
+ struct nat64lsn_state *st, kst;
+ struct nat64lsn_portgroup *pg;
+ uint16_t aport;
+ char __a[INET6_ADDRSTRLEN];
+
+ /* Check if af/protocol is supported and get it short id */
+ nat_proto = nat64lsn_proto_map[f_id->proto];
+ if (nat_proto == 0) {
+ NAT64STAT_INC(&cfg->stats, noproto);
+ return (IP_FW_DENY);
+ }
+
+ /* Try to find host first */
+ I6HASH_FIND(cfg, nh, &f_id->src_ip6);
+
+ if (nh == NULL)
+ return (nat64lsn_request_shost(cfg, f_id, pm));
+
+ /* Fill-in on-stack state structure */
+ kst.u.s.faddr = f_id->dst_ip6.s6_addr32[3];
+ kst.u.s.fport = f_id->dst_port;
+ kst.u.s.lport = f_id->src_port;
+
+ /* Prepare some fields we might need to update */
+ SET_AGE(state_ts);
+ state_flags = 0;
+ struct ip6_hdr *ip6 = mtod(*pm, struct ip6_hdr *);
+ if (ip6->ip6_nxt == IPPROTO_TCP) {
+ /* Check TCP flags */
+ /* XXX: We should get ulp offset from f* */
+ uint8_t tflags;
+ tflags = TCP(mtodo(*pm, sizeof(struct ip6_hdr)))->th_flags;
+ state_flags = convert_tcp_flags(tflags);
+ }
+ if (ip6->ip6_nxt == IPPROTO_ICMPV6) {
+ /* Alter local port data */
+ /* XXX: We should get ulp offset from f* */
+ struct icmp6_hdr *icmp6;
+ icmp6 = mtodo(*pm, sizeof(struct ip6_hdr));
+ if (icmp6->icmp6_type == ICMP6_ECHO_REQUEST ||
+ icmp6->icmp6_type == ICMP6_ECHO_REPLY)
+ kst.u.s.lport = ntohs(icmp6->icmp6_id);
+ }
+
+ hval = HASH_IN4(&kst.u.hkey) & (nh->hsize - 1);
+
+ pg = NULL;
+ st = NULL;
+
+ /* OK, let's find state in host hash */
+ NAT64_LOCK(nh);
+ sidx = nh->phash[hval];
+ int k = 0;
+ while (sidx.idx != 0) {
+ pg = GET_PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ st = &pg->states[sidx.off];
+ //DPRINTF("SISX: %d/%d next: %d/%d", sidx.idx, sidx.off,
+ //st->next.idx, st->next.off);
+ if (st->u.hkey == kst.u.hkey && pg->nat_proto == nat_proto)
+ break;
+ if (k++ > 1000) {
+ DPRINTF("FFFUCK %d/%d %d/%d\n", sidx.idx, sidx.off,
+ st->next.idx, st->next.off);
+ inet_ntop(AF_INET6, &nh->addr, __a, sizeof(__a));
+ DPRINTF("TR host %s %p on cpu %d", __a, nh, curcpu);
+ }
+ sidx = st->next;
+ }
+
+ if (sidx.idx == 0) {
+ aaddr = 0;
+ st = nat64lsn_create_state(cfg, nh, nat_proto, &kst, &aaddr);
+ if (st == NULL) {
+ /* No free states. Request more if we can */
+ /* XXX: Limit on pg_count! */
+ NAT64_UNLOCK(nh);
+ //DPRINTF("No state, unlock for %p", nh);
+ return (nat64lsn_request_portgroup(cfg, f_id,
+ pm, aaddr));
+ }
+
+ /* We've got new state. */
+ sidx = st->cur;
+ pg = GET_PORTGROUP_BYSIDX(cfg, nh, sidx.idx);
+ }
+
+ /* Okay, state found */
+
+ /* Update necessary fileds */
+ if (st->timestamp != state_ts)
+ st->timestamp = state_ts;
+ if ((st->flags & state_flags) != 0)
+ st->flags |= state_flags;
+
+ /* Copy needed state data */
+ aaddr = pg->aaddr;
+ aport = htons(pg->aport + sidx.off);
+
+ NAT64_UNLOCK(nh);
+
+ action = nat64_do_handle_ip6(*pm, aaddr, aport, &cfg->stats);
+
+ if (action == NAT64SKIP)
+ return (IP_FW_PASS);
+ if (action == NAT64MFREE)
+ m_freem(*pm);
+ *pm = NULL;
+
+ return (IP_FW_DENY);
+}
+
+/*
+ * Main dataplane entry point.
+ */
+int
+ipfw_nat64lsn(struct ip_fw_chain *ch, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64lsn_cfg *cfg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(ch);
+
+ *done = 1; /* terminate the search */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64lsn_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(ch, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ ret = nat64lsn_translate4(cfg, &args->f_id, &args->m);
+ break;
+ case 6:
+ ret = nat64lsn_translate6(cfg, &args->f_id, &args->m);
+ break;
+ default:
+ return (0);
+ }
+ return (ret);
+}
+
+void
+nat64lsn_init_internal(void)
+{
+
+ memset(nat64lsn_proto_map, 0, sizeof(nat64lsn_proto_map));
+ /* Set up supported protocol map */
+ nat64lsn_proto_map[IPPROTO_TCP] = NAT_PROTO_TCP;
+ nat64lsn_proto_map[IPPROTO_UDP] = NAT_PROTO_UDP;
+ nat64lsn_proto_map[IPPROTO_ICMP] = NAT_PROTO_ICMP;
+ nat64lsn_proto_map[IPPROTO_ICMPV6] = NAT_PROTO_ICMP;
+ /* Fill in reverse proto map */
+ memset(nat64lsn_rproto_map, 0, sizeof(nat64lsn_rproto_map));
+ nat64lsn_rproto_map[NAT_PROTO_TCP] = IPPROTO_TCP;
+ nat64lsn_rproto_map[NAT_PROTO_UDP] = IPPROTO_UDP;
+ nat64lsn_rproto_map[NAT_PROTO_ICMP] = IPPROTO_ICMPV6;
+
+ JQUEUE_LOCK_INIT();
+ nat64lsn_host_zone = uma_zcreate("NAT64 hosts zone",
+ sizeof(struct nat64lsn_host), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ nat64lsn_buck_zone = uma_zcreate("NAT64 buck zone",
+ sizeof(struct nat64lsn_portgroup), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+}
+
+void
+nat64lsn_uninit_internal(void)
+{
+
+ JQUEUE_LOCK_DESTROY();
+ uma_zdestroy(nat64lsn_host_zone);
+ uma_zdestroy(nat64lsn_buck_zone);
+}
+
+void
+nat64lsn_start_instance(struct nat64lsn_cfg *cfg)
+{
+
+ callout_reset(&cfg->periodic, hz * PERIODIC_DELAY,
+ nat64lsn_periodic, cfg);
+}
+
+struct nat64lsn_cfg *
+nat64lsn_init_instance(struct ip_fw_chain *ch, size_t numaddr)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64lsn_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ TAILQ_INIT(&cfg->jhead);
+ cfg->vp = curvnet;
+ cfg->ch = ch;
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+
+ cfg->ihsize = 64;
+ cfg->ih = malloc(sizeof(void *) * cfg->ihsize, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ cfg->pg = malloc(sizeof(void *) * numaddr * _ADDR_PG_COUNT, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ callout_init(&cfg->periodic, CALLOUT_MPSAFE);
+ callout_init(&cfg->jcallout, CALLOUT_MPSAFE);
+
+ return (cfg);
+}
+
+void
+nat64lsn_destroy_instance(struct nat64lsn_cfg *cfg)
+{
+
+ JQUEUE_LOCK();
+ callout_drain(&cfg->jcallout);
+ JQUEUE_UNLOCK();
+
+ callout_drain(&cfg->periodic);
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+
+ free(cfg->ih, M_IPFW);
+ free(cfg->pg, M_IPFW);
+ free(cfg, M_IPFW);
+}
+
Index: sys/netpfil/ipfw/nat64/nat64lsn_control.c
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64lsn_control.c
@@ -0,0 +1,818 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64lsn.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64lsn_eid) = 0;
+
+static struct nat64lsn_cfg *
+nat64lsn_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64lsn_cfg *cfg;
+
+ cfg = (struct nat64lsn_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64LSN_NAME, name);
+
+ return (cfg);
+}
+
+/*
+ * Creates new nat64lsn instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64lsn_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64lsn_cfg *uc;
+ struct nat64lsn_cfg *cfg;
+ struct namedobj_instance *ni;
+ uint32_t addr4, mask4;
+ int error, v;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64lsn_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+
+ if (uc->max_ports == 0)
+ uc->max_ports = DEFAULT_MAX_PORTS;
+ else if (uc->max_ports > NAT64_CHUNK_SIZE * 8)
+ uc->max_ports = NAT64_CHUNK_SIZE * 8;
+ else {
+ v = (int)uc->max_ports + NAT64_CHUNK_SIZE - 1;
+ uc->max_ports = (v / NAT64_CHUNK_SIZE) * NAT64_CHUNK_SIZE;
+ }
+
+ if (uc->agg_prefix_len > 127 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ if (uc->plen4 > 32)
+ return (EINVAL);
+ if (uc->plen6 > 128 || ((uc->plen6 % 8) != 0))
+ return (EINVAL);
+
+ /* XXX: Check prefix4 to be global */
+ addr4 = ntohl(uc->prefix4.s_addr);
+ mask4 = ~((1 << (32 - uc->plen4)) - 1);
+ if ((addr4 & mask4) != addr4)
+ return (EINVAL);
+
+ /* XXX: Check prefix6 */
+ if (uc->jmaxlen == 0)
+ uc->jmaxlen = DEFAULT_JMAXLEN;
+ if (uc->jmaxlen > 65536)
+ uc->jmaxlen = 65536;
+ if (uc->min_port == 0)
+ uc->min_port = NAT64_MIN_PORT;
+ if (uc->max_port == 0)
+ uc->max_port = 65535;
+ if (uc->min_port > uc->max_port)
+ return (EINVAL);
+ v = (int)uc->min_port + NAT64_CHUNK_SIZE - 1;
+ uc->min_port = (v / NAT64_CHUNK_SIZE) * NAT64_CHUNK_SIZE;
+ v = (int)uc->max_port + NAT64_CHUNK_SIZE - 1;
+ uc->max_port = (v / NAT64_CHUNK_SIZE) * NAT64_CHUNK_SIZE;
+
+ if (uc->nh_delete_delay == 0)
+ uc->nh_delete_delay = DEFAULT_NH_DEL_DELAY;
+ if (uc->pg_delete_delay == 0)
+ uc->pg_delete_delay = DEFAULT_PG_DEL_DELAY;
+
+ if (uc->st_syn_ttl == 0)
+ uc->st_syn_ttl = DEFAULT_ST_SYN_TTL;
+ if (uc->st_close_ttl == 0)
+ uc->st_close_ttl = DEFAULT_ST_CLOSE_TTL;
+ if (uc->st_estab_ttl == 0)
+ uc->st_estab_ttl = DEFAULT_ST_ESTAB_TTL;
+ if (uc->st_udp_ttl == 0)
+ uc->st_udp_ttl = DEFAULT_ST_UDP_TTL;
+ if (uc->st_icmp_ttl == 0)
+ uc->st_icmp_ttl = DEFAULT_ST_ICMP_TTL;
+
+
+ ni = CHAIN_TO_SRV(ch);
+ error = 0;
+
+ IPFW_UH_RLOCK(ch);
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64lsn_init_instance(ch, 1 << (32 - uc->plen4));
+ strlcpy(cfg->name, uc->name, sizeof(cfg->name));
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64LSN_NAME;
+ cfg->no.set = uc->set;
+
+ cfg->prefix4 = addr4;
+ cfg->pmask4 = addr4 | ~mask4;
+ /* XXX: Copy 96 bits */
+ cfg->plen6 = 96;
+ memcpy(&cfg->prefix6, &uc->prefix6, cfg->plen6 / 8);
+ cfg->plen4 = uc->plen4;
+ cfg->flags = uc->flags & 0x0;
+ cfg->max_chunks = uc->max_ports / NAT64_CHUNK_SIZE;
+ cfg->agg_prefix_len = uc->agg_prefix_len;
+ cfg->agg_prefix_max = uc->agg_prefix_max;
+
+ cfg->min_chunk = uc->min_port / NAT64_CHUNK_SIZE;;
+ cfg->max_chunk = uc->max_port / NAT64_CHUNK_SIZE;
+
+ cfg->jmaxlen = uc->jmaxlen;
+ cfg->nh_delete_delay = uc->nh_delete_delay;
+ cfg->pg_delete_delay = uc->pg_delete_delay;
+ cfg->st_syn_ttl = uc->st_syn_ttl;
+ cfg->st_close_ttl = uc->st_close_ttl;
+ cfg->st_estab_ttl = uc->st_estab_ttl;
+ cfg->st_udp_ttl = uc->st_udp_ttl;
+ cfg->st_icmp_ttl = uc->st_icmp_ttl;
+
+ cfg->nomatch_verdict = IP_FW_DENY;
+ cfg->nomatch_final = 1; /* Exit outer loop by default */
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64lsn_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (EEXIST);
+ }
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64lsn_destroy_instance(cfg);
+ return (ENOSPC);
+ }
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_start_instance(cfg);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+}
+
+static void
+nat64lsn_detach_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)op3;
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64lsn_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ struct ipfw_nat64lsn_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, nomatch4);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+
+ __COPY_STAT_FIELD(cfg, stats, jcalls);
+ __COPY_STAT_FIELD(cfg, stats, jrequests);
+ __COPY_STAT_FIELD(cfg, stats, jhostsreq);
+ __COPY_STAT_FIELD(cfg, stats, jportreq);
+ __COPY_STAT_FIELD(cfg, stats, jhostfails);
+ __COPY_STAT_FIELD(cfg, stats, jportfails);
+ __COPY_STAT_FIELD(cfg, stats, jmaxlen);
+ __COPY_STAT_FIELD(cfg, stats, jnomem);
+ __COPY_STAT_FIELD(cfg, stats, jreinjected);
+ __COPY_STAT_FIELD(cfg, stats, screated);
+ __COPY_STAT_FIELD(cfg, stats, sdeleted);
+ __COPY_STAT_FIELD(cfg, stats, spgcreated);
+ __COPY_STAT_FIELD(cfg, stats, spgdeleted);
+
+ stats->hostcount = cfg->ihcount;
+ stats->tcpchunks = cfg->protochunks[NAT_PROTO_TCP];
+ stats->udpchunks = cfg->protochunks[NAT_PROTO_UDP];
+ stats->icmpchunks = cfg->protochunks[NAT_PROTO_ICMP];
+}
+#undef __COPY_STAT_FIELD
+
+static void
+nat64lsn_export_config(struct ip_fw_chain *ch, struct nat64lsn_cfg *cfg,
+ ipfw_nat64lsn_cfg *uc)
+{
+
+ uc->flags = cfg->flags & NAT64LSN_FLAGSMASK;
+ uc->max_ports = cfg->max_chunks * NAT64_CHUNK_SIZE;
+ uc->agg_prefix_len = cfg->agg_prefix_len;
+ uc->agg_prefix_max = cfg->agg_prefix_max;
+
+ uc->prefix4.s_addr = htonl(cfg->prefix4);
+ uc->prefix6 = cfg->prefix6;
+ uc->plen4 = cfg->plen4;
+ uc->plen6 = cfg->plen6;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+}
+
+struct nat64_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64_dump_arg *da = (struct nat64_dump_arg *)arg;
+ ipfw_nat64lsn_cfg *uc;
+
+ uc = (struct _ipfw_nat64lsn_cfg *)ipfw_get_sopt_space(da->sd,
+ sizeof(*uc));
+ nat64lsn_export_config(da->ch, (struct nat64lsn_cfg *)no, uc);
+ return (0);
+}
+
+/*
+ * Lists all nat64 lsn instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64lsn_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64LSN_NAME);
+ olh->objsize = sizeof(ipfw_nat64lsn_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb, &da,
+ IPFW_TLV_NAT64LSN_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+/*
+ * Get nat64 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_lheader ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Get nat64 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_counter_tlv ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64lsn_stats stats;
+ struct nat64lsn_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+/*
+ * Reply: [ ipfw_obj_header ipfw_obj_data [ ipfw_nat64lsn_stg
+ * ipfw_nat64lsn_state x count, ... ] ]
+ */
+static int
+export_pg_states(struct nat64lsn_cfg *cfg, struct nat64lsn_portgroup *pg,
+ ipfw_nat64lsn_stg *stg, struct sockopt_data *sd)
+{
+ ipfw_nat64lsn_state *ste;
+ struct nat64lsn_state *st;
+ int i, count;
+
+ NAT64_LOCK(pg->host);
+ count = 0;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_BUSY_IDX(pg, i))
+ count++;
+ }
+ if (count == 0) {
+ stg->count = 0;
+ NAT64_UNLOCK(pg->host);
+ return (0);
+ }
+ ste = (ipfw_nat64lsn_state *)ipfw_get_sopt_space(sd,
+ count * sizeof(ipfw_nat64lsn_state));
+ if (ste == NULL) {
+ NAT64_UNLOCK(pg->host);
+ return (1);
+ }
+
+ stg->alias4.s_addr = pg->aaddr;
+ stg->proto = nat64lsn_rproto_map[pg->nat_proto];
+ stg->flags = 0;
+ stg->host6 = pg->host->addr;
+ stg->count = count;
+ for (i = 0; i < 64; i++) {
+ if (PG_IS_FREE_IDX(pg, i))
+ continue;
+ st = &pg->states[i];
+ ste->daddr.s_addr = st->u.s.faddr;
+ ste->dport = st->u.s.fport;
+ ste->aport = pg->aport + i;
+ ste->sport = st->u.s.lport;
+ ste->flags = st->flags; /* XXX filter flags */
+ ste->idle = GET_AGE(st->timestamp);
+ ste++;
+ }
+ NAT64_UNLOCK(pg->host);
+
+ return (0);
+}
+
+static int
+get_next_idx(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+
+ if (*port < 65536 - NAT64_CHUNK_SIZE) {
+ *port += NAT64_CHUNK_SIZE;
+ return (0);
+ }
+ *port = 0;
+
+ if (*nat_proto < NAT_MAX_PROTO - 1) {
+ *nat_proto += 1;
+ return (0);
+ }
+ *nat_proto = 1;
+
+ if (*addr < cfg->pmask4) {
+ *addr += 1;
+ return (0);
+ }
+
+ /* End of space. */
+ return (1);
+}
+
+#define PACK_IDX(addr, proto, port) \
+ ((uint64_t)addr << 32) | ((uint32_t)port << 16) | (proto << 8)
+#define UNPACK_IDX(idx, addr, proto, port) \
+ (addr) = (uint32_t)((idx) >> 32); \
+ (port) = (uint16_t)(((idx) >> 16) & 0xFFFF); \
+ (proto) = (uint8_t)(((idx) >> 8) & 0xFF)
+
+static struct nat64lsn_portgroup *
+get_next_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+ uint64_t pre_pack, post_pack;
+
+ pg = NULL;
+ pre_pack = PACK_IDX(*addr, *nat_proto, *port);
+ for (;;) {
+ if (get_next_idx(cfg, addr, nat_proto, port) != 0) {
+ /* End of states */
+ break;
+ }
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg != NULL)
+ break;
+ }
+
+ post_pack = PACK_IDX(*addr, *nat_proto, *port);
+ if (pre_pack == post_pack)
+ DPRINTF("WHOOPS: %u %d %d", *addr, *nat_proto, *port);
+ return (pg);
+}
+
+static __noinline struct nat64lsn_portgroup *
+get_first_pg(struct nat64lsn_cfg *cfg, uint32_t *addr, uint8_t *nat_proto,
+ uint16_t *port)
+{
+ struct nat64lsn_portgroup *pg;
+
+ pg = GET_PORTGROUP(cfg, *addr, *nat_proto, *port);
+ if (pg == NULL)
+ pg = get_next_pg(cfg, addr, nat_proto, port);
+
+ return (pg);
+}
+
+/*
+ * Lists nat64lsn states.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ipfw_obj_data [ uint64_t ]]
+ * Reply: [ ipfw_obj_header ipfw_obj_data [
+ * ipfw_nat64lsn_stg ipfw_nat64lsn_state x N] ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64lsn_states(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ ipfw_obj_data *od;
+ ipfw_nat64lsn_stg *stg;
+ struct nat64lsn_cfg *cfg;
+ struct nat64lsn_portgroup *pg, *pg_next;
+ uint64_t next_idx;
+ size_t sz;
+ uint32_t addr, states;
+ uint16_t port;
+ uint8_t nat_proto;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(uint64_t);
+ /* Check minimum header size */
+ if (sd->valsize < sz)
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ od = (ipfw_obj_data *)(oh + 1);
+ if (od->head.type != IPFW_TLV_OBJDATA ||
+ od->head.length != sz - sizeof(ipfw_obj_header))
+ return (EINVAL);
+
+ next_idx = *(uint64_t *)(od + 1);
+ /* Translate index to the request position to start from */
+ UNPACK_IDX(next_idx, addr, nat_proto, port);
+ if (nat_proto >= NAT_MAX_PROTO)
+ return (EINVAL);
+ if (nat_proto == 0 && addr != 0)
+ return (EINVAL);
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64lsn_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ /* Fill in starting point */
+ if (addr == 0) {
+ addr = cfg->prefix4;
+ nat_proto = 1;
+ port = 0;
+ }
+ if (addr < cfg->prefix4 || addr > cfg->pmask4) {
+ IPFW_UH_RUNLOCK(ch);
+ DPRINTF("%lu %u %u", next_idx, addr, cfg->pmask4);
+ return (EINVAL);
+ }
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_data) +
+ sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_space(sd, sz);
+ od = (ipfw_obj_data *)(oh + 1);
+ od->head.type = IPFW_TLV_OBJDATA;
+ od->head.length = sz - sizeof(ipfw_obj_header);
+ stg = (ipfw_nat64lsn_stg *)(od + 1);
+
+ pg = get_first_pg(cfg, &addr, &nat_proto, &port);
+ if (pg == NULL) {
+ /* No states */
+ stg->next_idx = 0xFF;
+ stg->count = 0;
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+ }
+ states = 0;
+ pg_next = NULL;
+ while (pg != NULL) {
+ pg_next = get_next_pg(cfg, &addr, &nat_proto, &port);
+ if (pg_next == NULL)
+ stg->next_idx = 0xFF;
+ else
+ stg->next_idx = PACK_IDX(addr, nat_proto, port);
+
+ DPRINTF("EXPORT %p %u %d %d (%d)", pg, addr, nat_proto, port,
+ _GET_PORTGROUP_IDX(cfg, addr, nat_proto, port));
+
+ if (export_pg_states(cfg, pg, stg, sd) != 0) {
+ IPFW_UH_RUNLOCK(ch);
+ return (states == 0 ? ENOMEM: 0);
+ }
+ states += stg->count;
+ od->head.length += stg->count * sizeof(ipfw_nat64lsn_state);
+ sz += stg->count * sizeof(ipfw_nat64lsn_state);
+ if (pg_next != NULL) {
+ sz += sizeof(ipfw_nat64lsn_stg);
+ if (sd->valsize < sz)
+ break;
+ stg = (ipfw_nat64lsn_stg *)ipfw_get_sopt_space(sd,
+ sizeof(ipfw_nat64lsn_stg));
+ }
+ pg = pg_next;
+ }
+ IPFW_UH_RUNLOCK(ch);
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+ { IP_FW_NAT64LSN_CREATE, 0, HDIR_BOTH, nat64lsn_create },
+ { IP_FW_NAT64LSN_DESTROY,0, HDIR_SET, nat64lsn_destroy },
+ { IP_FW_NAT64LSN_CONFIG, 0, HDIR_BOTH, nat64_config },
+ { IP_FW_NAT64LSN_LIST, 0, HDIR_GET, nat64lsn_list },
+ { IP_FW_NAT64LSN_STATS, 0, HDIR_GET, nat64lsn_stats },
+ { IP_FW_NAT64LSN_LIST_STATES,0, HDIR_GET, nat64lsn_states },
+};
+
+static int
+nat64lsn_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64lsn_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64lsn_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64lsn_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64LSN_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64lsn_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT64LSN with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64lsn_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64LSN_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64lsn_classify,
+ .update = nat64lsn_update_arg1,
+ .find_byname = nat64lsn_findbyname,
+ .find_bykidx = nat64lsn_findbykidx,
+ .manage_sets = nat64lsn_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64lsn_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64lsn_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64lsn_detach_config(ch, cfg);
+ nat64lsn_destroy_instance(cfg);
+ return (0);
+}
+
+int
+nat64lsn_init(struct ip_fw_chain *ch, int first)
+{
+
+ if (first != 0)
+ nat64lsn_init_internal();
+ V_nat64lsn_eid = ipfw_add_eaction(ch, ipfw_nat64lsn, "nat64lsn");
+ if (V_nat64lsn_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64lsn_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64lsn_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64LSN_NAME);
+ V_nat64lsn_eid = 0;
+ if (last != 0)
+ nat64lsn_uninit_internal();
+}
+
Index: sys/netpfil/ipfw/nat64/nat64stl.h
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64stl.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_FW_NAT64STL_H_
+#define _IP_FW_NAT64STL_H_
+
+struct nat64stl_cfg {
+ struct named_object no;
+
+ uint16_t map64; /* table with 6to4 mapping */
+ uint16_t map46; /* table with 4to6 mapping */
+
+ struct in6_addr prefix; /* IPv6 prefix */
+ uint8_t plen; /* prefix length */
+ uint8_t flags; /* flags for internal use */
+#define NAT64_KIDX 0x0001
+#define NAT64_46T 0x0002
+#define NAT64_64T 0x0004
+#define NAT64_FLAGSMASK 0 /* flags to pass to userland */
+ char name[64];
+ nat64_stats_block stats;
+};
+
+VNET_DECLARE(uint16_t, nat64stl_eid);
+#define V_nat64stl_eid VNET(nat64stl_eid)
+#define IPFW_TLV_NAT64STL_NAME IPFW_TLV_EACTION_NAME(V_nat64stl_eid)
+
+int ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done);
+
+#endif
+
Index: sys/netpfil/ipfw/nat64/nat64stl.c
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64stl.c
@@ -0,0 +1,235 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ip6.h>
+#include <netinet/icmp6.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64_translate.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+
+#define NAT64_LOOKUP(chain, cmd) \
+ (struct nat64stl_cfg *)SRV_OBJECT((chain), (cmd)->arg1)
+
+static int
+nat64stl_handle_ip4(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct in6_addr saddr, daddr;
+ struct ip *ip;
+
+ ip = mtod(m, struct ip*);
+ if (nat64_check_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_ip4(ip->ip_dst.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_src.s_addr) != 0 ||
+ nat64_check_private_ip4(ip->ip_dst.s_addr) != 0)
+ return (NAT64SKIP);
+
+ daddr = TARG_VAL(chain, tablearg, nh6);
+ if (nat64_check_ip6(&daddr) != 0)
+ return (NAT64MFREE);
+ nat64_set_ip4(&saddr, ip->ip_src.s_addr);
+
+ return (nat64_do_handle_ip4(m, &saddr, &daddr, 0, &cfg->stats));
+}
+
+static int
+nat64stl_handle_ip6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m, uint32_t tablearg)
+{
+ struct ip6_hdr *ip6;
+ uint32_t aaddr;
+
+ aaddr = htonl(TARG_VAL(chain, tablearg, nh4));
+
+ /*
+ * NOTE: we expect ipfw_chk() did m_pullup() up to upper level
+ * protocol's headers. Also we skip some checks, that ip6_input(),
+ * ip6_forward(), ip6_fastfwd() and ipfw_chk() already did.
+ */
+ ip6 = mtod(m, struct ip6_hdr *);
+ /* Check ip6_dst matches configured prefix */
+ if (bcmp(&ip6->ip6_dst, &cfg->prefix, cfg->plen / 8) != 0)
+ return (NAT64SKIP);
+
+ return (nat64_do_handle_ip6(m, aaddr, 0, &cfg->stats));
+}
+
+static int
+nat64stl_handle_icmp6(struct ip_fw_chain *chain, struct nat64stl_cfg *cfg,
+ struct mbuf *m)
+{
+ nat64_stats_block *stats;
+ struct ip6_hdr *ip6, *ip6i;
+ struct icmp6_hdr *icmp6;
+ struct ip6_hbh *hbh;
+ uint32_t tablearg;
+ int hlen;
+ uint8_t proto;
+
+ stats = &cfg->stats;
+ ip6 = mtod(m, struct ip6_hdr *);
+ hlen = sizeof(struct ip6_hdr);
+ proto = ip6->ip6_nxt;
+ /* Skip extension headers */
+ while (proto == IPPROTO_HOPOPTS || proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_DSTOPTS) {
+ hbh = mtodo(m, hlen);
+ if (m->m_len < hlen || /* XXX: m_pullup problem */
+ (ip6->ip6_plen == 0 && proto == IPPROTO_HOPOPTS)) {
+ /* XXX: jumbo payload option */
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ proto = hbh->ip6h_nxt;
+ hlen += hbh->ip6h_len << 3;
+ }
+ if (proto != IPPROTO_ICMPV6) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ icmp6 = mtodo(m, hlen);
+ switch (icmp6->icmp6_type) {
+ case ICMP6_DST_UNREACH:
+ case ICMP6_PACKET_TOO_BIG:
+ case ICMP6_TIME_EXCEED_TRANSIT:
+ case ICMP6_PARAM_PROB:
+ break;
+ default:
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ hlen += sizeof(struct icmp6_hdr);
+ if (m->m_pkthdr.len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN) {
+ NAT64STAT_INC(stats, dropped);
+ return (NAT64MFREE);
+ }
+ if (m->m_len < hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN)
+ m = m_pullup(m, hlen + sizeof(struct ip6_hdr) + ICMP_MINLEN);
+ if (m == NULL) {
+ NAT64STAT_INC(stats, nomem);
+ return (NAT64RETURN);
+ }
+ /*
+ * Use destination address from inner IPv6 header to determine
+ * IPv4 mapped address.
+ */
+ ip6i = mtodo(m, hlen);
+ if (ipfw_lookup_table_extended(chain, cfg->map64,
+ sizeof(struct in6_addr), &ip6i->ip6_dst, &tablearg) == 0) {
+ m_freem(m);
+ return (NAT64RETURN);
+ }
+ return (nat64_handle_icmp6(m, 0,
+ htonl(TARG_VAL(chain, tablearg, nh4)), 0, stats));
+}
+
+int
+ipfw_nat64stl(struct ip_fw_chain *chain, struct ip_fw_args *args,
+ ipfw_insn *cmd, int *done)
+{
+ ipfw_insn *icmd;
+ struct nat64stl_cfg *cfg;
+ uint32_t tablearg;
+ int ret;
+
+ IPFW_RLOCK_ASSERT(chain);
+
+ *done = 0; /* try next rule if not matched */
+ icmd = cmd + 1;
+ if (cmd->opcode != O_EXTERNAL_ACTION ||
+ cmd->arg1 != V_nat64stl_eid ||
+ icmd->opcode != O_EXTERNAL_INSTANCE ||
+ (cfg = NAT64_LOOKUP(chain, icmd)) == NULL)
+ return (0);
+
+ switch (args->f_id.addr_type) {
+ case 4:
+ ret = ipfw_lookup_table(chain, cfg->map46,
+ htonl(args->f_id.dst_ip), &tablearg);
+ break;
+ case 6:
+ ret = ipfw_lookup_table_extended(chain, cfg->map64,
+ sizeof(struct in6_addr), &args->f_id.src_ip6, &tablearg);
+ break;
+ default:
+ return (0);
+ }
+ if (ret == 0) {
+ /*
+ * In case when packet is ICMPv6 message from an intermediate
+ * router, the source address of message will not match the
+ * addresses from our map64 table.
+ */
+ if (args->f_id.proto != IPPROTO_ICMPV6)
+ return (0);
+
+ ret = nat64stl_handle_icmp6(chain, cfg, args->m);
+ } else {
+ if (args->f_id.addr_type == 4)
+ ret = nat64stl_handle_ip4(chain, cfg, args->m,
+ tablearg);
+ else
+ ret = nat64stl_handle_ip6(chain, cfg, args->m,
+ tablearg);
+ }
+ if (ret == NAT64SKIP)
+ return (0);
+
+ *done = 1; /* terminate the search */
+ if (ret == NAT64MFREE)
+ m_freem(args->m);
+ args->m = NULL;
+ return (IP_FW_DENY);
+}
+
+
Index: sys/netpfil/ipfw/nat64/nat64stl_control.c
===================================================================
--- /dev/null
+++ sys/netpfil/ipfw/nat64/nat64stl_control.c
@@ -0,0 +1,535 @@
+/*-
+ * Copyright (c) 2015-2016 Yandex LLC
+ * Copyright (c) 2015-2016 Alexander V. Chernikov <melifaro@FreeBSD.org>
+ * Copyright (c) 2015-2016 Andrey V. Elsukov <ae@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rmlock.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/syslog.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfil.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_fw.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6_var.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/nat64/ip_fw_nat64.h>
+#include <netpfil/ipfw/nat64/nat64stl.h>
+#include <netinet6/ip_fw_nat64.h>
+
+VNET_DEFINE(uint16_t, nat64stl_eid) = 0;
+
+static struct nat64stl_cfg *nat64stl_alloc_config(const char *name, uint8_t set);
+static void nat64stl_free_config(struct nat64stl_cfg *cfg);
+static struct nat64stl_cfg *nat64stl_find(struct namedobj_instance *ni,
+ const char *name, uint8_t set);
+
+static struct nat64stl_cfg *
+nat64stl_alloc_config(const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = malloc(sizeof(struct nat64stl_cfg), M_IPFW, M_WAITOK | M_ZERO);
+ COUNTER_ARRAY_ALLOC(cfg->stats.stats, NAT64STATS, M_WAITOK);
+ cfg->no.name = cfg->name;
+ cfg->no.etlv = IPFW_TLV_NAT64STL_NAME;
+ cfg->no.set = set;
+ strlcpy(cfg->name, name, sizeof(cfg->name));
+ return (cfg);
+}
+
+static void
+nat64stl_free_config(struct nat64stl_cfg *cfg)
+{
+
+ COUNTER_ARRAY_FREE(cfg->stats.stats, NAT64STATS);
+ free(cfg, M_IPFW);
+}
+
+static int
+nat64stl_config(struct ip_fw_chain *chain, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+
+ return (EOPNOTSUPP);
+}
+
+static void
+nat64stl_export_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *uc)
+{
+ struct named_object *no;
+
+ uc->prefix = cfg->prefix;
+ uc->plen = cfg->plen;
+ uc->flags = cfg->flags & NAT64_FLAGSMASK;
+ uc->set = cfg->no.set;
+ strlcpy(uc->name, cfg->no.name, sizeof(uc->name));
+
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map64);
+ ipfw_export_obj_ntlv(no, &uc->ntlv6);
+ no = ipfw_objhash_lookup_table_kidx(ch, cfg->map46);
+ ipfw_export_obj_ntlv(no, &uc->ntlv4);
+}
+
+struct nat64stl_dump_arg {
+ struct ip_fw_chain *ch;
+ struct sockopt_data *sd;
+};
+
+static int
+export_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_dump_arg *da = (struct nat64stl_dump_arg *)arg;
+ ipfw_nat64stl_cfg *uc;
+
+ uc = (ipfw_nat64stl_cfg *)ipfw_get_sopt_space(da->sd, sizeof(*uc));
+ nat64stl_export_config(da->ch, (struct nat64stl_cfg *)no, uc);
+ return (0);
+}
+
+static struct nat64stl_cfg *
+nat64stl_find(struct namedobj_instance *ni, const char *name, uint8_t set)
+{
+ struct nat64stl_cfg *cfg;
+
+ cfg = (struct nat64stl_cfg *)ipfw_objhash_lookup_name_type(ni, set,
+ IPFW_TLV_NAT64STL_NAME, name);
+
+ return (cfg);
+}
+
+
+static int
+nat64stl_create_internal(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ ipfw_nat64stl_cfg *i)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ if (ipfw_objhash_alloc_idx(CHAIN_TO_SRV(ch), &cfg->no.kidx) != 0)
+ return (ENOSPC);
+ cfg->flags |= NAT64_KIDX;
+
+ if (ipfw_ref_table(ch, &i->ntlv4, &cfg->map46) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64_46T;
+
+ if (ipfw_ref_table(ch, &i->ntlv6, &cfg->map64) != 0)
+ return (EINVAL);
+ cfg->flags |= NAT64_64T;
+
+ ipfw_objhash_add(CHAIN_TO_SRV(ch), &cfg->no);
+
+ return (0);
+}
+
+/*
+ * Creates new nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ipfw_nat64stl_cfg ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_create(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ ipfw_nat64stl_cfg *uc;
+ struct namedobj_instance *ni;
+ struct nat64stl_cfg *cfg;
+ int error;
+
+ if (sd->valsize != sizeof(*olh) + sizeof(*uc))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)sd->kbuf;
+ uc = (ipfw_nat64stl_cfg *)(olh + 1);
+
+ if (ipfw_check_object_name_generic(uc->name) != 0)
+ return (EINVAL);
+ if (!IN6_IS_ADDR_WKPFX(&uc->prefix))
+ return (EINVAL);
+ if (uc->plen != 96 || uc->set >= IPFW_MAX_SETS)
+ return (EINVAL);
+
+ /* XXX: check types of tables */
+
+ ni = CHAIN_TO_SRV(ch);
+ error = 0;
+
+ IPFW_UH_RLOCK(ch);
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (EEXIST);
+ }
+ IPFW_UH_RUNLOCK(ch);
+
+ cfg = nat64stl_alloc_config(uc->name, uc->set);
+ cfg->prefix = uc->prefix;
+ cfg->plen = uc->plen;
+ cfg->flags = 0;
+
+ IPFW_UH_WLOCK(ch);
+
+ if (nat64stl_find(ni, uc->name, uc->set) != NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (EEXIST);
+ }
+ error = nat64stl_create_internal(ch, cfg, uc);
+ if (error == 0) {
+ /* Okay, let's link data */
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = cfg;
+ IPFW_WUNLOCK(ch);
+
+ IPFW_UH_WUNLOCK(ch);
+ return (0);
+ }
+
+ if (cfg->flags & NAT64_KIDX)
+ ipfw_objhash_free_idx(ni, cfg->no.kidx);
+ if (cfg->flags & NAT64_46T)
+ ipfw_unref_table(ch, cfg->map46);
+ if (cfg->flags & NAT64_64T)
+ ipfw_unref_table(ch, cfg->map64);
+
+ IPFW_UH_WUNLOCK(ch);
+ nat64stl_free_config(cfg);
+ return (error);
+}
+
+static void
+nat64stl_detach_config(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg)
+{
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+
+ ipfw_objhash_del(CHAIN_TO_SRV(ch), &cfg->no);
+ ipfw_objhash_free_idx(CHAIN_TO_SRV(ch), cfg->no.kidx);
+ ipfw_unref_table(ch, cfg->map46);
+ ipfw_unref_table(ch, cfg->map64);
+}
+
+/*
+ * Destroys nat64 instance.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_destroy(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_header *oh;
+ struct nat64stl_cfg *cfg;
+
+ if (sd->valsize != sizeof(*oh))
+ return (EINVAL);
+
+ oh = (ipfw_obj_header *)sd->kbuf;
+ if (ipfw_check_object_name_generic(oh->ntlv.name) != 0)
+ return (EINVAL);
+
+ IPFW_UH_WLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_WUNLOCK(ch);
+ return (ESRCH);
+ }
+ if (cfg->no.refcnt > 0) {
+ IPFW_UH_WUNLOCK(ch);
+ return (EBUSY);
+ }
+
+ IPFW_WLOCK(ch);
+ SRV_OBJECT(ch, cfg->no.kidx) = NULL;
+ IPFW_WUNLOCK(ch);
+
+ nat64stl_detach_config(ch, cfg);
+ IPFW_UH_WUNLOCK(ch);
+
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+/*
+ * Lists all nat64 instances currently available in kernel.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_lheader ]
+ * Reply: [ ipfw_obj_lheader ipfw_nat64stl_cfg x N ]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_list(struct ip_fw_chain *ch, ip_fw3_opheader *op3,
+ struct sockopt_data *sd)
+{
+ ipfw_obj_lheader *olh;
+ struct nat64stl_dump_arg da;
+
+ /* Check minimum header size */
+ if (sd->valsize < sizeof(ipfw_obj_lheader))
+ return (EINVAL);
+
+ olh = (ipfw_obj_lheader *)ipfw_get_sopt_header(sd, sizeof(*olh));
+
+ IPFW_UH_RLOCK(ch);
+ olh->count = ipfw_objhash_count_type(CHAIN_TO_SRV(ch),
+ IPFW_TLV_NAT64STL_NAME);
+ olh->objsize = sizeof(ipfw_nat64stl_cfg);
+ olh->size = sizeof(*olh) + olh->count * olh->objsize;
+
+ if (sd->valsize < olh->size) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ENOMEM);
+ }
+ memset(&da, 0, sizeof(da));
+ da.ch = ch;
+ da.sd = sd;
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), export_config_cb,
+ &da, IPFW_TLV_NAT64STL_NAME);
+ IPFW_UH_RUNLOCK(ch);
+
+ return (0);
+}
+
+#define __COPY_STAT_FIELD(_cfg, _stats, _field) \
+ (_stats)->_field = NAT64STAT_FETCH(&(_cfg)->stats, _field)
+static void
+export_stats(struct ip_fw_chain *ch, struct nat64stl_cfg *cfg,
+ struct ipfw_nat64stl_stats *stats)
+{
+
+ __COPY_STAT_FIELD(cfg, stats, opcnt64);
+ __COPY_STAT_FIELD(cfg, stats, opcnt46);
+ __COPY_STAT_FIELD(cfg, stats, ofrags);
+ __COPY_STAT_FIELD(cfg, stats, ifrags);
+ __COPY_STAT_FIELD(cfg, stats, oerrors);
+ __COPY_STAT_FIELD(cfg, stats, noroute4);
+ __COPY_STAT_FIELD(cfg, stats, noroute6);
+ __COPY_STAT_FIELD(cfg, stats, noproto);
+ __COPY_STAT_FIELD(cfg, stats, nomem);
+ __COPY_STAT_FIELD(cfg, stats, dropped);
+}
+
+/*
+ * Get nat64 statistics.
+ * Data layout (v0)(current):
+ * Request: [ ipfw_obj_header ]
+ * Reply: [ ipfw_obj_header ipfw_obj_ctlv [ uint64_t x N ]]
+ *
+ * Returns 0 on success
+ */
+static int
+nat64stl_stats(struct ip_fw_chain *ch, ip_fw3_opheader *op,
+ struct sockopt_data *sd)
+{
+ struct ipfw_nat64stl_stats stats;
+ struct nat64stl_cfg *cfg;
+ ipfw_obj_header *oh;
+ ipfw_obj_ctlv *ctlv;
+ size_t sz;
+
+ sz = sizeof(ipfw_obj_header) + sizeof(ipfw_obj_ctlv) + sizeof(stats);
+ if (sd->valsize % sizeof(uint64_t))
+ return (EINVAL);
+ if (sd->valsize < sz)
+ return (ENOMEM);
+ oh = (ipfw_obj_header *)ipfw_get_sopt_header(sd, sz);
+ if (oh == NULL)
+ return (EINVAL);
+ memset(&stats, 0, sizeof(stats));
+
+ IPFW_UH_RLOCK(ch);
+ cfg = nat64stl_find(CHAIN_TO_SRV(ch), oh->ntlv.name, oh->ntlv.set);
+ if (cfg == NULL) {
+ IPFW_UH_RUNLOCK(ch);
+ return (ESRCH);
+ }
+ export_stats(ch, cfg, &stats);
+ IPFW_UH_RUNLOCK(ch);
+
+ ctlv = (ipfw_obj_ctlv *)(oh + 1);
+ memset(ctlv, 0, sizeof(*ctlv));
+ ctlv->head.type = IPFW_TLV_COUNTERS;
+ ctlv->head.length = sz - sizeof(ipfw_obj_header);
+ ctlv->count = sizeof(stats) / sizeof(uint64_t);
+ ctlv->objsize = sizeof(uint64_t);
+ ctlv->version = IPFW_NAT64_VERSION;
+ memcpy(ctlv + 1, &stats, sizeof(stats));
+ return (0);
+}
+
+static struct ipfw_sopt_handler scodes[] = {
+
+ { IP_FW_NAT64STL_CREATE, 0, HDIR_SET, nat64stl_create },
+ { IP_FW_NAT64STL_DESTROY,0, HDIR_SET, nat64stl_destroy },
+ { IP_FW_NAT64STL_CONFIG, 0, HDIR_BOTH, nat64stl_config },
+ { IP_FW_NAT64STL_LIST, 0, HDIR_GET, nat64stl_list },
+ { IP_FW_NAT64STL_STATS, 0, HDIR_GET, nat64stl_stats },
+};
+
+static int
+nat64stl_classify(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
+{
+ ipfw_insn *icmd;
+
+ icmd = cmd - 1;
+ if (icmd->opcode != O_EXTERNAL_ACTION ||
+ icmd->arg1 != V_nat64stl_eid)
+ return (1);
+
+ *puidx = cmd->arg1;
+ *ptype = 0;
+ return (0);
+}
+
+static void
+nat64stl_update_arg1(ipfw_insn *cmd, uint16_t idx)
+{
+
+ cmd->arg1 = idx;
+}
+
+static int
+nat64stl_findbyname(struct ip_fw_chain *ch, struct tid_info *ti,
+ struct named_object **pno)
+{
+ int err;
+
+ err = ipfw_objhash_find_type(CHAIN_TO_SRV(ch), ti,
+ IPFW_TLV_NAT64STL_NAME, pno);
+ return (err);
+}
+
+static struct named_object *
+nat64stl_findbykidx(struct ip_fw_chain *ch, uint16_t idx)
+{
+ struct namedobj_instance *ni;
+ struct named_object *no;
+
+ IPFW_UH_WLOCK_ASSERT(ch);
+ ni = CHAIN_TO_SRV(ch);
+ no = ipfw_objhash_lookup_kidx(ni, idx);
+ KASSERT(no != NULL, ("NAT with index %d not found", idx));
+
+ return (no);
+}
+
+static int
+nat64stl_manage_sets(struct ip_fw_chain *ch, uint16_t set, uint8_t new_set,
+ enum ipfw_sets_cmd cmd)
+{
+
+ return (ipfw_obj_manage_sets(CHAIN_TO_SRV(ch), IPFW_TLV_NAT64STL_NAME,
+ set, new_set, cmd));
+}
+
+static struct opcode_obj_rewrite opcodes[] = {
+ {
+ .opcode = O_EXTERNAL_INSTANCE,
+ .etlv = IPFW_TLV_EACTION /* just show it isn't table */,
+ .classifier = nat64stl_classify,
+ .update = nat64stl_update_arg1,
+ .find_byname = nat64stl_findbyname,
+ .find_bykidx = nat64stl_findbykidx,
+ .manage_sets = nat64stl_manage_sets,
+ },
+};
+
+static int
+destroy_config_cb(struct namedobj_instance *ni, struct named_object *no,
+ void *arg)
+{
+ struct nat64stl_cfg *cfg;
+ struct ip_fw_chain *ch;
+
+ ch = (struct ip_fw_chain *)arg;
+ cfg = (struct nat64stl_cfg *)SRV_OBJECT(ch, no->kidx);
+ SRV_OBJECT(ch, no->kidx) = NULL;
+ nat64stl_detach_config(ch, cfg);
+ nat64stl_free_config(cfg);
+ return (0);
+}
+
+int
+nat64stl_init(struct ip_fw_chain *ch, int first)
+{
+
+ V_nat64stl_eid = ipfw_add_eaction(ch, ipfw_nat64stl, "nat64stl");
+ if (V_nat64stl_eid == 0)
+ return (ENXIO);
+ IPFW_ADD_SOPT_HANDLER(first, scodes);
+ IPFW_ADD_OBJ_REWRITER(first, opcodes);
+ return (0);
+}
+
+void
+nat64stl_uninit(struct ip_fw_chain *ch, int last)
+{
+
+ IPFW_DEL_OBJ_REWRITER(last, opcodes);
+ IPFW_DEL_SOPT_HANDLER(last, scodes);
+ ipfw_del_eaction(ch, V_nat64stl_eid);
+ /*
+ * Since we already have deregistered external action,
+ * our named objects become unaccessible via rules, because
+ * all rules were truncated by ipfw_del_eaction().
+ * So, we can unlink and destroy our named objects without holding
+ * IPFW_WLOCK().
+ */
+ ipfw_objhash_foreach_type(CHAIN_TO_SRV(ch), destroy_config_cb, ch,
+ IPFW_TLV_NAT64STL_NAME);
+ V_nat64stl_eid = 0;
+}
+
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Mar 2, 7:11 PM (14 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29167552
Default Alt Text
D6434.id16523.diff (188 KB)
Attached To
Mode
D6434: [RFC/RFT] NAT64 implementation for ipfw.
Attached
Detach File
Event Timeline
Log In to Comment