Page MenuHomeFreeBSD

D54172.diff
No OneTemporary

D54172.diff

diff --git a/lib/libifconfig/libifconfig.c b/lib/libifconfig/libifconfig.c
--- a/lib/libifconfig/libifconfig.c
+++ b/lib/libifconfig/libifconfig.c
@@ -562,7 +562,9 @@
(strncmp(name, "vlan",
strlen("vlan")) == 0) ||
(strncmp(name, "vxlan",
- strlen("vxlan")) == 0)) {
+ strlen("vxlan")) == 0) ||
+ (strncmp(name, "geneve",
+ strlen("geneve")) == 0)) {
h->error.errtype = OTHER;
h->error.errcode = ENOSYS;
return (-1);
diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile
--- a/sbin/ifconfig/Makefile
+++ b/sbin/ifconfig/Makefile
@@ -30,6 +30,7 @@
SRCS+= iffib.c # non-default FIB support
SRCS+= ifvlan.c # SIOC[GS]ETVLAN support
SRCS+= ifvxlan.c # VXLAN support
+SRCS+= ifgeneve.c # GENEVE support
SRCS+= ifgre.c # GRE keys etc
SRCS+= ifgif.c # GIF reversed header workaround
SRCS+= ifipsec.c # IPsec VTI
diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8
--- a/sbin/ifconfig/ifconfig.8
+++ b/sbin/ifconfig/ifconfig.8
@@ -28,7 +28,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd October 12, 2025
+.Dd December 11, 2025
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -3253,6 +3253,116 @@
.It Cm vxlanflushall
Delete all addresses, including static addresses, from the forwarding table.
.El
+.Ss Generic Network Virtualization Encapsulation Parameters
+The following parameters are used to configure
+.Xr geneve 4
+interfaces.
+.Bl -tag -width indent
+.It Cm geneveid Ar identifier
+This value is a 24-bit Virtual Network Identifier (VNI) that identifies the
+virtual network identifier of the interface.
+.It Cm genevemode Ar mode
+Set the
+.Nm
+protocol operating
+.Ar mode
+value. Supported modes are currently:
+.Bl -tag -width indent
+.It Cm l2
+Default.
+.It Cm l3
+.El
+.It Cm genevelocal Ar address
+The source address used in the encapsulating IPv4/IPv6 header.
+The address should already be assigned to an existing interface.
+When the interface is configured in unicast mode, the listening socket
+is bound to this address.
+.It Cm geneveremote Ar address
+The interface can be configured in a unicast, or point-to-point, mode
+to create a tunnel between two hosts.
+This is the IP address of the remote end of the tunnel.
+.It Cm genevegroup Ar address
+The interface can be configured in a multicast mode
+to create a virtual network of hosts.
+This is the IP multicast group address the interface will join.
+.It Cm genevelocalport Ar port
+The port number the interface will listen on.
+The default port number is 6081.
+.It Cm geneveremoteport Ar port
+The destination port number used in the encapsulating IPv4/IPv6 header.
+The remote host should be listening on this port.
+The default port number is 6081.
+.It Cm geneveportrange Ar low high
+The range of source ports used in the encapsulating IPv4/IPv6 header.
+The port selected within the range is based on a hash of the inner frame.
+A range is useful to provide entropy within the outer IP header
+for more effective load balancing.
+The default range is between the
+.Xr sysctl 8
+variables
+.Va net.inet.ip.portrange.first
+and
+.Va net.inet.ip.portrange.last
+.It Cm genevetimeout Ar timeout
+The maximum time, in seconds, before an entry in the forwarding table
+is pruned.
+The default is 1200 seconds (20 minutes).
+.It Cm genevemaxaddr Ar max
+The maximum number of entries in the forwarding table.
+The default is 2000.
+.It Cm genevedev Ar dev
+When the interface is configured in multicast mode, the
+.Cm dev
+interface is used to transmit IP multicast packets.
+.It Cm genevedf Ar df
+Set the Do not fragment (DF) bit in the encapsulating header.
+Supported
+.Ar df
+values are currently:
+.Bl -tag -width indent
+.It Cm set
+Do not allow fragmentation on the output IPv4/IPv6 packets and
+set the Do not fragment (DF) bit in the encapsulating IPv4 header.
+.It Cm unset
+Default.
+.It Cm inherit
+The Do not fragment (DF) bit copied from inner IPv4 header to the
+outer IPv4 header.
+.El
+.It Cm genevettl Ar ttl
+The TTL used in the encapsulating IPv4/IPv6 header.
+.Bl -tag -width indent
+.It Cm 0-255
+The default is 64.
+.It Cm inherit
+The TTL copied from inner encapsulated header to the outer header.
+.El
+.It Cm genevedscpinherit
+Inherit DSCP or Traffic Class value from the inner IPv4/IPv6 header.
+.It Fl genevedscpinherit
+Unconfigure DSCP or Traffic Class inheritence from the inner IPv4/IPv6 header.
+This is the default.
+.It Cm genevelearn
+When in L2 unicast mode, The source IP address and inner source Ethernet
+MAC address of received packets are used to dynamically populate the
+forwarding table.
+When in L2 multicast mode, an entry in the forwarding table allows the
+interface to send the frame directly to the remote host instead of
+broadcasting the frame to the multicast group.
+This is the default.
+.It Fl genevelearn
+In L2 mode, geneve forwarding table is not populated by received packets.
+.It Cm geneveexternal
+make this tunnel externally controlled.
+.It Fl geneveexternal
+enable manual configuration for this tunnel.
+This is the default
+.It Cm geneveflush
+Delete all dynamically-learned addresses from the forwarding table when in L2
+mode.
+.It Cm geneveflushall
+Delete all addresses, including static addresses, from the forwarding table.
+.El
.Ss CARP Parameters
The following parameters are used to configure
.Xr carp 4
diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c
--- a/sbin/ifconfig/ifconfig.c
+++ b/sbin/ifconfig/ifconfig.c
@@ -321,7 +321,7 @@
static void setformat(char *input)
{
- char *formatstr, *category, *modifier;
+ char *formatstr, *category, *modifier;
formatstr = strdup(input);
while ((category = strsep(&formatstr, ",")) != NULL) {
@@ -369,7 +369,7 @@
struct ifa_queue *q)
{
struct ifaddrs *right, *temp, *last, *result, *next, *tail;
-
+
right = list;
temp = list;
last = list;
@@ -783,10 +783,10 @@
err(EXIT_FAILURE, "getifaddrs");
char *cp = NULL;
-
+
if (calcorders(ifap, &q) != 0)
err(EXIT_FAILURE, "calcorders");
-
+
sifap = sortifaddrs(ifap, cmpifaddrs, &q);
TAILQ_FOREACH_SAFE(cur, &q, link, tmp)
@@ -1373,7 +1373,7 @@
{
struct ifreq my_ifr;
int s;
-
+
memset(&my_ifr, 0, sizeof(my_ifr));
(void) strlcpy(my_ifr.ifr_name, ifname, sizeof(my_ifr.ifr_name));
if (us < 0) {
@@ -1989,7 +1989,7 @@
ifd = ifa->ifa_data;
if (ifd->ifi_vhid == 0)
return;
-
+
printf(" vhid %d", ifd->ifi_vhid);
}
diff --git a/sbin/ifconfig/ifconfig_netlink.h b/sbin/ifconfig/ifconfig_netlink.h
--- a/sbin/ifconfig/ifconfig_netlink.h
+++ b/sbin/ifconfig/ifconfig_netlink.h
@@ -34,4 +34,7 @@
#include <netlink/netlink_snl_route.h>
#include <netlink/netlink_snl_route_compat.h>
#include <netlink/netlink_snl_route_parsers.h>
+
+
+void ifcreate_nl(if_ctx *, struct nlmsghdr *);
#endif
diff --git a/sbin/ifconfig/ifconfig_netlink.c b/sbin/ifconfig/ifconfig_netlink.c
--- a/sbin/ifconfig/ifconfig_netlink.c
+++ b/sbin/ifconfig/ifconfig_netlink.c
@@ -457,7 +457,7 @@
nl_init_socket(&ss);
- struct ifmap *ifmap = prepare_ifmap(&ss, args->ifname);
+ struct ifmap *ifmap = prepare_ifmap(&ss, args->ifname);
struct iface **sorted_ifaces = snl_allocz(&ss, ifmap->count * sizeof(void *));
for (uint32_t i = 0, num = 0; i < ifmap->size; i++) {
if (ifmap->ifaces[i] != NULL) {
@@ -493,3 +493,20 @@
snl_free(&ss);
}
+void
+ifcreate_nl(if_ctx *ctx, struct nlmsghdr *hdr)
+{
+ struct snl_state *ss = ctx->io_ss;
+ struct snl_errmsg_data errmsg = {};
+
+ if (!snl_send_message(ss, hdr))
+ err(1, "unable to send netlink message");
+
+ hdr = snl_read_reply(ss, hdr->nlmsg_seq);
+ if (hdr->nlmsg_type != NL_RTM_NEWLINK) {
+ if (!snl_parse_errmsg(ss, hdr, &errmsg))
+ errx(EINVAL, "(NETLINK)");
+ if (errmsg.error_str != NULL)
+ errx(errmsg.error, "(NETLINK) %s", errmsg.error_str);
+ }
+}
diff --git a/sbin/ifconfig/ifgeneve.c b/sbin/ifconfig/ifgeneve.c
new file mode 100644
--- /dev/null
+++ b/sbin/ifconfig/ifgeneve.c
@@ -0,0 +1,1561 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Seyed Pouria Mousavizadeh Tehrani <info@spmzt.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/nv.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <netdb.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_strings.h>
+#include <net/if_geneve.h>
+#include <netinet/in.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <err.h>
+#include <errno.h>
+
+#include "ifconfig.h"
+
+#ifndef WITHOUT_NETLINK
+#include "ifconfig_netlink.h"
+#else
+#include <net/route.h>
+
+enum ifla_geneve_df {
+ IFLA_GENEVE_DF_UNSET,
+ IFLA_GENEVE_DF_SET,
+ IFLA_GENEVE_DF_INHERIT,
+ __IFLA_GENEVE_DF_MAX,
+};
+#endif
+
+static struct geneve_params gnvp = {
+ .ifla_proto = GENEVE_PROTO_ETHER,
+};
+
+static int
+get_proto(const char *cp, uint16_t *valp)
+{
+ uint16_t val;
+
+ if (!strcmp(cp, "l2"))
+ val = GENEVE_PROTO_ETHER;
+ else if (!strcmp(cp, "l3"))
+ val = GENEVE_PROTO_INHERIT;
+ else
+ return (-1);
+
+ *valp = val;
+ return (0);
+}
+
+static int
+get_val(const char *cp, u_long *valp)
+{
+ char *endptr;
+ u_long val;
+
+ errno = 0;
+ val = strtoul(cp, &endptr, 0);
+ if (cp[0] == '\0' || endptr[0] != '\0' || errno == ERANGE)
+ return (-1);
+
+ *valp = val;
+ return (0);
+}
+
+static int
+get_df(const char *cp, enum ifla_geneve_df *valp)
+{
+ enum ifla_geneve_df df;
+
+ if (!strcmp(cp, "set"))
+ df = IFLA_GENEVE_DF_SET;
+ else if (!strcmp(cp, "inherit"))
+ df = IFLA_GENEVE_DF_INHERIT;
+ else if (!strcmp(cp, "unset"))
+ df = IFLA_GENEVE_DF_UNSET;
+ else
+ return (-1);
+
+ *valp = df;
+ return (0);
+}
+
+static bool
+is_multicast(struct addrinfo *ai)
+{
+#if (defined INET || defined INET6)
+ struct sockaddr *sa;
+ sa = ai->ai_addr;
+#endif
+
+ switch (ai->ai_family) {
+#ifdef INET
+ case AF_INET: {
+ struct sockaddr_in *sin = satosin(sa);
+
+ return (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)));
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = satosin6(sa);
+
+ return (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr));
+ }
+#endif
+ default:
+ errx(1, "address family not supported");
+ }
+}
+
+/*
+ * geneve mode is read-only after creation,
+ * therefore there is no need for separate netlink implementation
+ */
+static void
+setgeneve_mode_clone(if_ctx *ctx __unused, const char *arg, int dummy __unused)
+{
+ uint16_t val;
+
+ if (get_proto(arg, &val) < 0)
+ errx(1, "invalid inner protocol: %s", arg);
+
+ gnvp.ifla_proto = val;
+}
+
+#ifndef WITHOUT_NETLINK
+
+struct nl_parsed_geneve {
+ /* essential */
+ uint32_t ifla_vni;
+ uint16_t ifla_proto;
+ uint16_t ifla_local_port;
+ uint16_t ifla_remote_port;
+ struct sockaddr *ifla_local;
+ struct sockaddr *ifla_remote;
+
+ /* optional */
+ bool ifla_dscp_inherit;
+ bool ifla_ttl_inherit;
+ bool ifla_external;
+ uint8_t ifla_ttl;
+ enum ifla_geneve_df ifla_df;
+ struct ifla_geneve_port_range *ifla_port_range;
+
+ /* multicast specific */
+ union sockaddr_union ifla_mc_ifindex; /* read-only */
+ char *ifla_mc_ifname;
+
+ /* l2 specific */
+ bool ifla_ftable_learn;
+ bool ifla_ftable_flush;
+ uint32_t ifla_ftable_max;
+ uint32_t ifla_ftable_timeout;
+ uint32_t ifla_ftable_count; /* read-only */
+ uint32_t ifla_ftable_nospace; /* read-only */
+ uint32_t ifla_ftable_lock_upgrade_failed; /* read-only */
+ uint64_t ifla_stats_txcsum; /* read-only */
+ uint64_t ifla_stats_tso; /* read-only */
+ uint64_t ifla_stats_rxcsum; /* read-only */
+};
+
+struct nla_geneve_info {
+ const char *kind;
+ struct nl_parsed_geneve data;
+};
+
+struct nla_geneve_link {
+ uint32_t ifi_index;
+ struct nla_geneve_info linkinfo;
+};
+
+static inline void
+geneve_nl_init(if_ctx *ctx, struct snl_writer *nw, uint32_t flags)
+{
+ struct nlmsghdr *hdr;
+
+ snl_init_writer(ctx->io_ss, nw);
+ hdr = snl_create_msg_request(nw, NL_RTM_NEWLINK);
+ hdr->nlmsg_flags |= flags;
+ snl_reserve_msg_object(nw, struct ifinfomsg);
+ snl_add_msg_attr_string(nw, IFLA_IFNAME, ctx->ifname);
+}
+
+static inline void
+geneve_nl_fini(if_ctx *ctx, struct snl_writer *nw)
+{
+ struct nlmsghdr *hdr;
+
+ if (!(hdr = snl_finalize_msg(nw)))
+ err(1, "unable to send netlink message");
+
+ ifcreate_nl(ctx, hdr);
+}
+
+#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
+static const struct snl_attr_parser nla_geneve_linkinfo_data[] = {
+ { .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = snl_attr_get_uint16 },
+ { .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = snl_attr_get_ip },
+ { .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = snl_attr_get_ip },
+ { .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = snl_attr_get_uint16 },
+ { .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = snl_attr_get_uint16 },
+ { .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range), .cb = snl_attr_dup_struct },
+ { .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = snl_attr_get_uint8 },
+ { .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = snl_attr_get_uint8 },
+ { .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = snl_attr_get_bool },
+ { .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = snl_attr_get_bool },
+ { .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = snl_attr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = snl_attr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = snl_attr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_FTABLE_COUNT, .off = _OUT(ifla_ftable_count), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_FTABLE_NOSPACE_CNT, .off = _OUT(ifla_ftable_nospace), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_FTABLE_LOCK_UP_FAIL_CNT, .off = _OUT(ifla_ftable_lock_upgrade_failed), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = snl_attr_get_string },
+ { .type = IFLA_GENEVE_MC_IFINDEX, .off = _OUT(ifla_mc_ifindex), .cb = snl_attr_get_uint32 },
+ { .type = IFLA_GENEVE_TXCSUM_CNT, .off = _OUT(ifla_stats_txcsum), .cb = snl_attr_get_uint64 },
+ { .type = IFLA_GENEVE_TSO_CNT, .off = _OUT(ifla_stats_tso), .cb = snl_attr_get_uint64 },
+ { .type = IFLA_GENEVE_RXCSUM_CNT, .off = _OUT(ifla_stats_rxcsum), .cb = snl_attr_get_uint64 },
+};
+#undef _OUT
+SNL_DECLARE_ATTR_PARSER(geneve_linkinfo_data_parser, nla_geneve_linkinfo_data);
+
+#define _OUT(_field) offsetof(struct nla_geneve_info, _field)
+static const struct snl_attr_parser ap_geneve_linkinfo[] = {
+ { .type = IFLA_INFO_KIND, .off = _OUT(kind), .cb = snl_attr_get_string },
+ { .type = IFLA_INFO_DATA, .off = _OUT(data),
+ .arg = &geneve_linkinfo_data_parser, .cb = snl_attr_get_nested },
+};
+#undef _OUT
+SNL_DECLARE_ATTR_PARSER(geneve_linkinfo_parser, ap_geneve_linkinfo);
+
+#define _IN(_field) offsetof(struct ifinfomsg, _field)
+#define _OUT(_field) offsetof(struct nla_geneve_link, _field)
+static const struct snl_attr_parser ap_geneve_link[] = {
+ { .type = IFLA_LINKINFO, .off = _OUT(linkinfo),
+ .arg = &geneve_linkinfo_parser, .cb = snl_attr_get_nested },
+};
+
+static const struct snl_field_parser fp_geneve_link[] = {
+ { .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = snl_field_get_uint32 },
+};
+#undef _IN
+#undef _OUT
+SNL_DECLARE_PARSER(geneve_parser, struct ifinfomsg, fp_geneve_link, ap_geneve_link);
+
+static const struct snl_hdr_parser *all_parsers[] = {
+ &geneve_linkinfo_data_parser,
+ &geneve_linkinfo_parser,
+ &geneve_parser,
+};
+
+static void
+geneve_status_nl(if_ctx *ctx)
+{
+ struct snl_writer nw;
+ struct nlmsghdr *hdr;
+ struct snl_errmsg_data errmsg;
+ struct nla_geneve_link geneve_link;
+ char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+ struct sockaddr *lsa, *rsa;
+ int mc;
+ bool ipv6 = false;
+
+ if (strncmp(ctx->ifname, "geneve", sizeof("geneve") - 1) != 0)
+ return;
+
+ snl_init_writer(ctx->io_ss, &nw);
+ hdr = snl_create_msg_request(&nw, NL_RTM_GETLINK);
+ hdr->nlmsg_flags |= NLM_F_DUMP;
+ snl_reserve_msg_object(&nw, struct ifinfomsg);
+ snl_add_msg_attr_string(&nw, IFLA_IFNAME, ctx->ifname);
+
+ if (!(hdr = snl_finalize_msg(&nw)) || (!snl_send_message(ctx->io_ss, hdr)))
+ return;
+
+ hdr = snl_read_reply(ctx->io_ss, hdr->nlmsg_seq);
+ if (hdr->nlmsg_type != NL_RTM_NEWLINK) {
+ if (!snl_parse_errmsg(ctx->io_ss, hdr, &errmsg))
+ errx(EINVAL, "(NETLINK)");
+ if (errmsg.error_str != NULL)
+ errx(errmsg.error, "(NETLINK) %s", errmsg.error_str);
+ }
+
+ if (!snl_parse_nlmsg(ctx->io_ss, hdr, &geneve_parser, &geneve_link))
+ return;
+
+ struct nla_geneve_info geneve_info = geneve_link.linkinfo;
+ struct nl_parsed_geneve geneve_data = geneve_info.data;
+
+ printf("\tgeneve mode: ");
+ switch (geneve_data.ifla_proto) {
+ case GENEVE_PROTO_INHERIT:
+ printf("l3");
+ break;
+ case GENEVE_PROTO_ETHER:
+ default:
+ printf("l2");
+ break;
+ }
+
+ printf("\n\tgeneve config:\n");
+ /* Just report nothing if the network identity isn't set yet. */
+ if (geneve_data.ifla_vni >= GENEVE_VNI_MAX) {
+ printf("\t\tvirtual network identifier (vni): not configured\n");
+ return;
+ }
+
+ lsa = geneve_data.ifla_local;
+ rsa = geneve_data.ifla_remote;
+
+ if ((lsa == NULL) ||
+ (getnameinfo(lsa, lsa->sa_len, src, sizeof(src),
+ NULL, 0, NI_NUMERICHOST) != 0))
+ src[0] = '\0';
+ if ((rsa == NULL) ||
+ (getnameinfo(rsa, rsa->sa_len, dst, sizeof(dst),
+ NULL, 0, NI_NUMERICHOST) != 0))
+ dst[0] = '\0';
+ else {
+ ipv6 = rsa->sa_family == AF_INET6;
+ if (!ipv6) {
+ struct sockaddr_in *sin = satosin(rsa);
+ mc = IN_MULTICAST(ntohl(sin->sin_addr.s_addr));
+ } else {
+ struct sockaddr_in6 *sin6 = satosin6(rsa);
+ mc = IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr);
+ }
+ }
+
+ printf("\t\tvirtual network identifier (vni): %d", geneve_data.ifla_vni);
+ if (src[0] != '\0')
+ printf("\n\t\tlocal: %s%s%s:%u", ipv6 ? "[" : "", src, ipv6 ? "]" : "",
+ geneve_data.ifla_local_port);
+ if (dst[0] != '\0') {
+ printf("\n\t\t%s: %s%s%s:%u", mc ? "group" : "remote", ipv6 ? "[" : "",
+ dst, ipv6 ? "]" : "", geneve_data.ifla_local_port);
+ if (mc)
+ printf(", dev: %s", geneve_data.ifla_mc_ifname);
+ }
+
+ if (ctx->args->verbose) {
+ printf("\n\t\tportrange: %u-%u",
+ geneve_data.ifla_port_range->low,
+ geneve_data.ifla_port_range->high);
+
+ if (geneve_data.ifla_ttl_inherit)
+ printf(", ttl: inherit");
+ else
+ printf(", ttl: %d", geneve_data.ifla_ttl);
+
+ if (geneve_data.ifla_dscp_inherit)
+ printf(", dscp: inherit");
+
+ if (geneve_data.ifla_df == IFLA_GENEVE_DF_INHERIT)
+ printf(", df: inherit");
+ else if (geneve_data.ifla_df == IFLA_GENEVE_DF_SET)
+ printf(", df: set");
+ else if (geneve_data.ifla_df == IFLA_GENEVE_DF_UNSET)
+ printf(", df: unset");
+
+ if (geneve_data.ifla_external)
+ printf(", externally controlled");
+
+ if (geneve_data.ifla_proto == GENEVE_PROTO_ETHER) {
+ printf("\n\t\tftable mode: %slearning",
+ geneve_data.ifla_ftable_learn ? "" : "no");
+ printf(", count: %d, max: %d, timeout: %d",
+ geneve_data.ifla_ftable_count,
+ geneve_data.ifla_ftable_max,
+ geneve_data.ifla_ftable_timeout);
+ printf(", nospace: %u",
+ geneve_data.ifla_ftable_nospace);
+ }
+
+ printf("\n\t\tstats: tso %lu, txcsum %lu, rxcsum %lu",
+ geneve_data.ifla_stats_tso,
+ geneve_data.ifla_stats_txcsum,
+ geneve_data.ifla_stats_rxcsum);
+ }
+
+ putchar('\n');
+}
+
+
+static void
+geneve_create_nl(if_ctx *ctx, struct ifreq *ifr)
+{
+ struct snl_writer nw = {};
+ struct nlmsghdr *hdr;
+ int off, off2;
+
+ snl_init_writer(ctx->io_ss, &nw);
+ hdr = snl_create_msg_request(&nw, RTM_NEWLINK);
+ hdr->nlmsg_flags |= (NLM_F_CREATE | NLM_F_EXCL);
+ snl_reserve_msg_object(&nw, struct ifinfomsg);
+ snl_add_msg_attr_string(&nw, IFLA_IFNAME, ifr->ifr_name);
+
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+ snl_add_msg_attr_u16(&nw, IFLA_GENEVE_PROTOCOL, gnvp.ifla_proto);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_vni_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || val >= GENEVE_VNI_MAX)
+ errx(1, "invalid network identifier: %s", arg);
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+ snl_add_msg_attr_u32(&nw, IFLA_GENEVE_ID, val);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_local_nl(if_ctx *ctx, const char *addr, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ struct addrinfo *ai;
+ const struct sockaddr *sa;
+ int error;
+
+ if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
+ errx(1, "error in parsing local address string: %s",
+ gai_strerror(error));
+
+ if (is_multicast(ai))
+ errx(1, "local address cannot be multicast");
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ sa = ai->ai_addr;
+ snl_add_msg_attr_ip(&nw, IFLA_GENEVE_LOCAL, sa);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_remote_nl(if_ctx *ctx, const char *addr, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ struct addrinfo *ai;
+ const struct sockaddr *sa;
+ int error;
+
+ if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
+ errx(1, "error in parsing remote address string: %s",
+ gai_strerror(error));
+
+ if (is_multicast(ai))
+ errx(1, "remote address cannot be multicast");
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ sa = ai->ai_addr;
+ snl_add_msg_attr_ip(&nw, IFLA_GENEVE_REMOTE, sa);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_group_nl(if_ctx *ctx, const char *addr, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ struct addrinfo *ai;
+ struct sockaddr *sa;
+ int error;
+
+ if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
+ errx(1, "error in parsing local address string: %s",
+ gai_strerror(error));
+
+ if (!is_multicast(ai))
+ errx(1, "group address must be multicast");
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ sa = ai->ai_addr;
+ snl_add_msg_attr_ip(&nw, IFLA_GENEVE_REMOTE, sa);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+
+static void
+setgeneve_local_port_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || val >= UINT16_MAX)
+ errx(1, "invalid local port: %s", arg);
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_u16(&nw, IFLA_GENEVE_LOCAL_PORT, val);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_remote_port_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || val >= UINT16_MAX)
+ errx(1, "invalid remote port: %s", arg);
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_u16(&nw, IFLA_GENEVE_PORT, val);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_port_range_nl(if_ctx *ctx, const char *arg1, const char *arg2)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long min, max;
+
+ if (get_val(arg1, &min) < 0 || min >= UINT16_MAX)
+ errx(1, "invalid port range minimum: %s", arg1);
+ if (get_val(arg2, &max) < 0 || max >= UINT16_MAX)
+ errx(1, "invalid port range maximum: %s", arg2);
+ if (max < min)
+ errx(1, "invalid port range");
+
+ const struct ifla_geneve_port_range port_range = {
+ .low = min,
+ .high = max
+ };
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr(&nw, IFLA_GENEVE_PORT_RANGE,
+ sizeof(port_range), (const void *)&port_range);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_timeout_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || (val & ~0xFFFFFFFF) != 0)
+ errx(1, "invalid timeout value: %s", arg);
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_u32(&nw, IFLA_GENEVE_FTABLE_TIMEOUT, val);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_maxaddr_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || (val & ~0xFFFFFFFF) != 0)
+ errx(1, "invalid maxaddr value: %s", arg);
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_u32(&nw, IFLA_GENEVE_FTABLE_MAX, val);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_dev_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_string(&nw, IFLA_GENEVE_MC_IFNAME, arg);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_ttl_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ u_long val;
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+ if ((get_val(arg, &val) < 0 || val > 256) == 0) {
+ snl_add_msg_attr_u8(&nw, IFLA_GENEVE_TTL, val);
+ snl_add_msg_attr_bool(&nw, IFLA_GENEVE_TTL_INHERIT, false);
+ } else if (!strcmp(arg, "inherit")) {
+ snl_add_msg_attr_bool(&nw, IFLA_GENEVE_TTL_INHERIT, true);
+ } else
+ errx(1, "invalid TTL value: %s", arg);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_df_nl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+ enum ifla_geneve_df df;
+
+ if (get_df(arg, &df) < 0)
+ errx(1, "invalid df value: %s", arg);
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_u8(&nw, IFLA_GENEVE_DF, df);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_inherit_dscp_nl(if_ctx *ctx, const char *arg __unused, int d)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_bool(&nw, IFLA_GENEVE_DSCP_INHERIT, d != 0);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_learn_nl(if_ctx *ctx, const char *arg __unused, int d)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_bool(&nw, IFLA_GENEVE_FTABLE_LEARN, d != 0);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_flush_nl(if_ctx *ctx, const char *val __unused, int d)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_bool(&nw, IFLA_GENEVE_FTABLE_FLUSH, d != 0);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static void
+setgeneve_external_nl(if_ctx *ctx, const char *val __unused, int d)
+{
+ struct snl_writer nw = {};
+ int off, off2;
+
+ geneve_nl_init(ctx, &nw, 0);
+ off = snl_add_msg_attr_nested(&nw, IFLA_LINKINFO);
+ snl_add_msg_attr_string(&nw, IFLA_INFO_KIND, "geneve");
+
+ off2 = snl_add_msg_attr_nested(&nw, IFLA_INFO_DATA);
+
+ snl_add_msg_attr_bool(&nw, IFLA_GENEVE_COLLECT_METADATA, d != 0);
+
+ snl_end_attr_nested(&nw, off2);
+ snl_end_attr_nested(&nw, off);
+
+ geneve_nl_fini(ctx, &nw);
+}
+
+static struct cmd geneve_cmds[] = {
+
+ DEF_CLONE_CMD_ARG("genevemode", setgeneve_mode_clone),
+
+ DEF_CMD_ARG("geneveid", setgeneve_vni_nl),
+ DEF_CMD_ARG("genevelocal", setgeneve_local_nl),
+ DEF_CMD_ARG("geneveremote", setgeneve_remote_nl),
+ DEF_CMD_ARG("genevegroup", setgeneve_group_nl),
+ DEF_CMD_ARG("genevelocalport", setgeneve_local_port_nl),
+ DEF_CMD_ARG("geneveremoteport", setgeneve_remote_port_nl),
+ DEF_CMD_ARG2("geneveportrange", setgeneve_port_range_nl),
+ DEF_CMD_ARG("genevetimeout", setgeneve_timeout_nl),
+ DEF_CMD_ARG("genevemaxaddr", setgeneve_maxaddr_nl),
+ DEF_CMD_ARG("genevedev", setgeneve_dev_nl),
+ DEF_CMD_ARG("genevettl", setgeneve_ttl_nl),
+ DEF_CMD_ARG("genevedf", setgeneve_df_nl),
+ DEF_CMD("genevedscpinherit", 1, setgeneve_inherit_dscp_nl),
+ DEF_CMD("-genevedscpinherit", 0, setgeneve_inherit_dscp_nl),
+ DEF_CMD("genevelearn", 1, setgeneve_learn_nl),
+ DEF_CMD("-genevelearn", 0, setgeneve_learn_nl),
+ DEF_CMD("geneveflush", 1, setgeneve_flush_nl),
+ DEF_CMD("geneveflushall", 0, setgeneve_flush_nl),
+ DEF_CMD("geneveexternal", 1, setgeneve_external_nl),
+ DEF_CMD("-geneveexternal", 0, setgeneve_external_nl),
+
+ DEF_CMD_SARG("genevehwcsum", IFCAP2_GENEVE_HWCSUM_NAME,
+ setifcapnv),
+ DEF_CMD_SARG("-genevehwcsum", "-"IFCAP2_GENEVE_HWCSUM_NAME,
+ setifcapnv),
+ DEF_CMD_SARG("genevehwtso", IFCAP2_GENEVE_HWTSO_NAME,
+ setifcapnv),
+ DEF_CMD_SARG("-genevehwtso", "-"IFCAP2_GENEVE_HWTSO_NAME,
+ setifcapnv),
+};
+
+#else
+
+static int
+geneve_set_ioctl(if_ctx *ctx, nvlist_t **nvl)
+{
+ void *data;
+ size_t nvlen;
+ struct ifreq ifr = {};
+
+ data = nvlist_pack(*nvl, &nvlen);
+
+ ifr.ifr_cap_nv.buffer = malloc(IFR_CAP_NV_MAXBUFSIZE);
+ ifr.ifr_cap_nv.buf_length = IFR_CAP_NV_MAXBUFSIZE;
+ memcpy(ifr.ifr_cap_nv.buffer, data, nvlen);
+ ifr.ifr_cap_nv.length = nvlen;
+
+ free(data);
+ nvlist_destroy(*nvl);
+
+ if (ioctl_ctx_ifr(ctx, SIOCSDRVSPEC, &ifr) != 0) {
+ free(ifr.ifr_cap_nv.buffer);
+ return (-1);
+ }
+
+ return (0);
+}
+
+static int
+geneve_get_ioctl(if_ctx *ctx, nvlist_t **nvl)
+{
+ struct ifreq ifr = {};
+
+ ifr.ifr_cap_nv.buffer = malloc(IFR_CAP_NV_MAXBUFSIZE);
+ ifr.ifr_cap_nv.buf_length = IFR_CAP_NV_MAXBUFSIZE;
+
+ if (ioctl_ctx_ifr(ctx, SIOCGDRVSPEC, &ifr) != 0) {
+ free(ifr.ifr_cap_nv.buffer);
+ return (-1);
+ }
+
+ *nvl = nvlist_unpack(ifr.ifr_cap_nv.buffer, ifr.ifr_cap_nv.length, 0);
+ if (*nvl == NULL) {
+ free(ifr.ifr_cap_nv.buffer);
+ return (EIO);
+ }
+
+ free(ifr.ifr_cap_nv.buffer);
+ return (0);
+}
+
+static int
+geneve_check_nvl(nvlist_t *nvl)
+{
+ const struct sockaddr *lsa, *rsa;
+ size_t llen, rlen;
+ int error = -1;
+
+ if (!nvlist_exists_number(nvl, "vni"))
+ return (error);
+
+ if (!nvlist_exists_binary(nvl, "local_sa"))
+ return (error);
+
+ if (!nvlist_exists_binary(nvl, "remote_sa"))
+ return (error);
+
+ if (!nvlist_exists_number(nvl, "proto"))
+ return (error);
+
+ lsa = nvlist_get_binary(nvl, "local_sa", &llen);
+ rsa = nvlist_get_binary(nvl, "remote_sa", &rlen);
+
+ if (lsa->sa_family != rsa->sa_family)
+ errx(1, "cannot mix IPv4 and IPv6 addresses");
+
+ error = 0;
+
+ return (error);
+}
+
+static void
+geneve_status(if_ctx *ctx)
+{
+ nvlist_t *nvl;
+ char src[NI_MAXHOST], dst[NI_MAXHOST];
+ char srcport[NI_MAXSERV], dstport[NI_MAXSERV];
+ struct sockaddr *lsa, *rsa;
+ size_t llen, rlen;
+ int vni, mc, proto;
+ bool ipv6 = false;
+ enum ifla_geneve_df df;
+
+ nvl = nvlist_create(0);
+
+ if (geneve_get_ioctl(ctx, &nvl) != 0)
+ return;
+
+ if (geneve_check_nvl(nvl) != 0)
+ return;
+
+ proto = nvlist_get_number(nvl, "proto");
+ printf("\tgeneve mode: ");
+ switch (proto) {
+ case GENEVE_PROTO_INHERIT:
+ printf("l3");
+ break;
+ case GENEVE_PROTO_ETHER:
+ default:
+ printf("l2");
+ break;
+ }
+
+ vni = nvlist_get_number(nvl, "vni");
+ printf("\n\tgeneve config:\n");
+ /* Just report nothing if the network identity isn't set yet. */
+ if (vni >= GENEVE_VNI_MAX) {
+ printf("\t\tvirtual network identifier (vni): not configured\n");
+ return;
+ }
+
+ lsa = nvlist_take_binary(nvl, "local_sa", &llen);
+ rsa = nvlist_take_binary(nvl, "remote_sa", &rlen);
+
+ if (getnameinfo(lsa, lsa->sa_len, src, sizeof(src),
+ srcport, sizeof(srcport), NI_NUMERICHOST | NI_NUMERICSERV) != 0)
+ src[0] = srcport[0] = '\0';
+ if (getnameinfo(rsa, rsa->sa_len, dst, sizeof(dst),
+ dstport, sizeof(dstport), NI_NUMERICHOST | NI_NUMERICSERV) != 0)
+ dst[0] = dstport[0] = '\0';
+ else {
+ ipv6 = rsa->sa_family == AF_INET6;
+ if (!ipv6) {
+ struct sockaddr_in *sin = satosin(rsa);
+ mc = IN_MULTICAST(ntohl(sin->sin_addr.s_addr));
+ } else {
+ struct sockaddr_in6 *sin6 = satosin6(rsa);
+ mc = IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr);
+ }
+ }
+
+ printf("\t\tvirtual network identifier (vni): %d", vni);
+ if (src[0] != '\0')
+ printf("\n\t\tlocal: %s%s%s:%s", ipv6 ? "[" : "", src, ipv6 ? "]" : "",
+ srcport);
+ if (dst[0] != '\0') {
+ printf("\n\t\t%s %s%s%s:%s", mc ? "group" : "remote", ipv6 ? "[" : "",
+ dst, ipv6 ? "]" : "", dstport);
+ if (mc)
+ printf(", dev: %s", nvlist_get_string(nvl, "mc_ifname"));
+ }
+
+ if (ctx->args->verbose) {
+ printf("\n\t\tportrange: %u-%u",
+ (uint16_t)nvlist_get_number(nvl, "min_port"),
+ (uint16_t)nvlist_get_number(nvl, "max_port"));
+
+ if (nvlist_get_bool(nvl, "inherit_ttl"))
+ printf(", ttl: inherit");
+ else
+ printf(", ttl: %d", (uint8_t)nvlist_get_number(nvl, "ttl"));
+
+ if (nvlist_get_bool(nvl, "inherit_dscp"))
+ printf(", dscp: inherit");
+
+ df = nvlist_get_number(nvl, "df");
+ if (df == IFLA_GENEVE_DF_INHERIT)
+ printf(", df: inherit");
+ else if (df == IFLA_GENEVE_DF_SET)
+ printf(", df: set");
+ else if (df == IFLA_GENEVE_DF_UNSET)
+ printf(", df: unset");
+
+ if (nvlist_get_bool(nvl, "external"))
+ printf(", externally controlled");
+
+ if (proto == GENEVE_PROTO_ETHER) {
+ printf("\n\t\tftable mode: %slearning",
+ nvlist_get_bool(nvl, "learn") ? "" : "no");
+ printf(", count: %u, max: %u, timeout: %u",
+ (uint32_t)nvlist_get_number(nvl, "ftable_cnt"),
+ (uint32_t)nvlist_get_number(nvl, "ftable_max"),
+ (uint32_t)nvlist_get_number(nvl, "ftable_timeout"));
+ }
+ }
+
+ putchar('\n');
+}
+
+static void
+geneve_create(if_ctx *ctx, struct ifreq *ifr)
+{
+ ifr->ifr_data = (caddr_t) &gnvp;
+ ifcreate_ioctl(ctx, ifr);
+}
+
+static void
+setgeneve_vni(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || val >= GENEVE_VNI_MAX)
+ errx(1, "invalid network identifier: %s", arg);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set vni");
+
+ nvlist_add_number(nvl, "vni", val);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_VNI");
+}
+
+static void
+setgeneve_local(if_ctx *ctx, const char *addr, int dummy __unused)
+{
+ nvlist_t *nvl;
+ struct addrinfo *ai;
+#if (defined INET || defined INET6)
+ struct sockaddr *sa;
+#endif
+ int error;
+
+ if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
+ errx(1, "error in parsing local address string: %s",
+ gai_strerror(error));
+
+ if (is_multicast(ai))
+ errx(1, "local address cannot be multicast");
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set local address");
+
+#if (defined INET || defined INET6)
+ sa = ai->ai_addr;
+#endif
+
+ switch (ai->ai_family) {
+#ifdef INET
+ case AF_INET: {
+ struct sockaddr_in *sin = satosin(sa);
+
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ errx(1, "local address cannot be multicast");
+
+ nvlist_add_binary(nvl, "local_sa", sin,
+ sizeof(struct sockaddr_in));
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = satosin6(sa);
+
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ errx(1, "local address cannot be multicast");
+
+ nvlist_add_binary(nvl, "local_sa", sin6,
+ sizeof(struct sockaddr_in6));
+ break;
+ }
+#endif
+ default:
+ errx(1, "local address %s not supported", addr);
+ }
+
+ freeaddrinfo(ai);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_LOCAL_ADDR");
+}
+
+static void
+setgeneve_remote(if_ctx *ctx, const char *addr, int dummy __unused)
+{
+ nvlist_t *nvl;
+ struct addrinfo *ai;
+#if (defined INET || defined INET6)
+ struct sockaddr *sa;
+#endif
+ int error;
+
+ if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
+ errx(1, "error in parsing remote address string: %s",
+ gai_strerror(error));
+
+ if (is_multicast(ai))
+ errx(1, "remote address cannot be multicast");
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set remote address");
+
+#if (defined INET || defined INET6)
+ sa = ai->ai_addr;
+#endif
+
+ switch (ai->ai_family) {
+#ifdef INET
+ case AF_INET: {
+ struct sockaddr_in *sin = satosin(sa);
+
+ if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ errx(1, "remote address cannot be multicast");
+
+ nvlist_add_binary(nvl, "remote_sa", sin,
+ sizeof(struct sockaddr_in));
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = satosin6(sa);
+
+ if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ errx(1, "remote address cannot be multicast");
+
+ nvlist_add_binary(nvl, "remote_sa", sin6,
+ sizeof(struct sockaddr_in6));
+ break;
+ }
+#endif
+ default:
+ errx(1, "remote address %s not supported", addr);
+ }
+
+ freeaddrinfo(ai);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_REMOTE_ADDR");
+}
+
+static void
+setgeneve_group(if_ctx *ctx, const char *addr, int dummy __unused)
+{
+ nvlist_t *nvl;
+ struct addrinfo *ai;
+#if (defined INET || defined INET6)
+ struct sockaddr *sa;
+#endif
+ int error;
+
+ if ((error = getaddrinfo(addr, NULL, NULL, &ai)) != 0)
+ errx(1, "error in parsing group address string: %s",
+ gai_strerror(error));
+
+ if (!is_multicast(ai))
+ errx(1, "group address must be multicast");
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set group");
+
+#if (defined INET || defined INET6)
+ sa = ai->ai_addr;
+#endif
+
+ switch (ai->ai_family) {
+#ifdef INET
+ case AF_INET: {
+ struct sockaddr_in *sin = satosin(sa);
+
+ if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
+ errx(1, "group address must be multicast");
+
+ nvlist_add_binary(nvl, "remote_sa", sin,
+ sizeof(struct sockaddr_in));
+ break;
+ }
+#endif
+#ifdef INET6
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = satosin6(sa);
+
+ if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
+ errx(1, "group address must be multicast");
+
+ nvlist_add_binary(nvl, "remote_sa", sin6,
+ sizeof(struct sockaddr_in6));
+ break;
+ }
+#endif
+ default:
+ errx(1, "group address %s not supported", addr);
+ }
+
+ freeaddrinfo(ai);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_REMOTE_ADDR");
+
+ nvlist_destroy(nvl);
+}
+
+static void
+setgeneve_local_port(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || val >= UINT16_MAX)
+ errx(1, "invalid local port: %s", arg);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set local port");
+
+ nvlist_add_number(nvl, "local_port", val);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_LOCAL_PORT");
+}
+
+static void
+setgeneve_remote_port(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || val >= UINT16_MAX)
+ errx(1, "invalid remote port: %s", arg);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set remote port");
+
+ nvlist_add_number(nvl, "remote_port", val);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_REMOTE_PORT");
+}
+
+static void
+setgeneve_port_range(if_ctx *ctx, const char *arg1, const char *arg2)
+{
+ nvlist_t *nvl;
+ u_long min, max;
+
+ if (get_val(arg1, &min) < 0 || min >= UINT16_MAX)
+ errx(1, "invalid port range minimum: %s", arg1);
+ if (get_val(arg2, &max) < 0 || max >= UINT16_MAX)
+ errx(1, "invalid port range maximum: %s", arg2);
+ if (max < min)
+ errx(1, "invalid port range");
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set port range");
+
+ nvlist_add_number(nvl, "min_port", min);
+ nvlist_add_number(nvl, "max_port", max);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_PORT_RANGE");
+}
+
+static void
+setgeneve_timeout(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || (val & ~0xFFFFFFFF) != 0)
+ errx(1, "invalid timeout value: %s", arg);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set timeout");
+
+ nvlist_add_number(nvl, "ftable_timeout", val & 0xFFFFFFFF);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_FTABLE_TIMEOUT");
+}
+
+static void
+setgeneve_maxaddr(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ u_long val;
+
+ if (get_val(arg, &val) < 0 || (val & ~0xFFFFFFFF) != 0)
+ errx(1, "invalid maxaddr value: %s", arg);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set maxaddr");
+
+ nvlist_add_number(nvl, "ftable_max", val & 0xFFFFFFFF);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_FTABLE_MAX");
+}
+
+static void
+setgeneve_dev(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set multicast interface");
+
+ nvlist_add_string(nvl, "mc_ifname", arg);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_MULTICAST_IF");
+}
+
+static void
+setgeneve_ttl(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ u_long val;
+
+ if ((get_val(arg, &val) < 0 || val > 256) == 0) {
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set ttl");
+
+ nvlist_add_number(nvl, "ttl", val);
+ nvlist_add_bool(nvl, "inherit_ttl", false);
+ } else if (!strcmp(arg, "inherit")) {
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set ttl");
+
+ nvlist_add_bool(nvl, "inherit_ttl", true);
+ } else
+ errx(1, "invalid TTL value: %s", arg);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_TTL");
+}
+
+static void
+setgeneve_df(if_ctx *ctx, const char *arg, int dummy __unused)
+{
+ nvlist_t *nvl;
+ enum ifla_geneve_df df;
+
+ if (get_df(arg, &df) < 0)
+ errx(1, "invalid df value: %s", arg);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set df");
+
+ nvlist_add_number(nvl, "df", df);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_DF");
+}
+
+static void
+setgeneve_inherit_dscp(if_ctx *ctx, const char *arg __unused, int d)
+{
+ nvlist_t *nvl;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set dscp inherit");
+
+ nvlist_add_bool(nvl, "inherit_dscp", d != 0);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_DSCP_INHERIT");
+}
+
+static void
+setgeneve_learn(if_ctx *ctx, const char *arg __unused, int d)
+{
+ nvlist_t *nvl;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to set learn");
+
+ nvlist_add_bool(nvl, "learn", d != 0);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_SET_LEARN");
+}
+
+static void
+setgeneve_flush(if_ctx *ctx, const char *val __unused, int d)
+{
+ nvlist_t *nvl;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to flush");
+
+ nvlist_add_bool(nvl, "flush", d != 0);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_FLUSH");
+}
+
+static void
+setgeneve_external(if_ctx *ctx, const char *val __unused, int d)
+{
+ nvlist_t *nvl;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ err(1, "no memory to flush");
+
+ nvlist_add_bool(nvl, "external", d != 0);
+
+ if (geneve_set_ioctl(ctx, &nvl) != 0)
+ err(1, "GENEVE_CMD_FLUSH");
+}
+
+
+
+static struct cmd geneve_cmds[] = {
+
+ DEF_CLONE_CMD_ARG("genevemode", setgeneve_mode_clone),
+
+ DEF_CMD_ARG("geneveid", setgeneve_vni),
+ DEF_CMD_ARG("genevelocal", setgeneve_local),
+ DEF_CMD_ARG("geneveremote", setgeneve_remote),
+ DEF_CMD_ARG("genevegroup", setgeneve_group),
+ DEF_CMD_ARG("genevelocalport", setgeneve_local_port),
+ DEF_CMD_ARG("geneveremoteport", setgeneve_remote_port),
+ DEF_CMD_ARG2("geneveportrange", setgeneve_port_range),
+ DEF_CMD_ARG("genevetimeout", setgeneve_timeout),
+ DEF_CMD_ARG("genevemaxaddr", setgeneve_maxaddr),
+ DEF_CMD_ARG("genevedev", setgeneve_dev),
+ DEF_CMD_ARG("genevettl", setgeneve_ttl),
+ DEF_CMD_ARG("genevedf", setgeneve_df),
+ DEF_CMD("genevedscpinherit", 1, setgeneve_inherit_dscp),
+ DEF_CMD("-genevedscpinherit", 0, setgeneve_inherit_dscp),
+ DEF_CMD("genevelearn", 1, setgeneve_learn),
+ DEF_CMD("-genevelearn", 0, setgeneve_learn),
+ DEF_CMD("geneveflush", 1, setgeneve_flush),
+ DEF_CMD("geneveflushall", 0, setgeneve_flush),
+ DEF_CMD("geneveexternal", 1, setgeneve_external),
+ DEF_CMD("-geneveexternal", 0, setgeneve_external),
+
+ DEF_CMD_SARG("genevehwcsum", IFCAP2_GENEVE_HWCSUM_NAME,
+ setifcapnv),
+ DEF_CMD_SARG("-genevehwcsum", "-"IFCAP2_GENEVE_HWCSUM_NAME,
+ setifcapnv),
+ DEF_CMD_SARG("genevehwtso", IFCAP2_GENEVE_HWTSO_NAME,
+ setifcapnv),
+ DEF_CMD_SARG("-genevehwtso", "-"IFCAP2_GENEVE_HWTSO_NAME,
+ setifcapnv),
+};
+
+#endif
+
+static struct afswtch af_geneve = {
+ .af_name = "af_geneve",
+ .af_af = AF_UNSPEC,
+#ifndef WITHOUT_NETLINK
+ .af_other_status = geneve_status_nl,
+#else
+ .af_other_status = geneve_status,
+#endif
+};
+
+static __constructor void
+geneve_ctor(void)
+{
+ size_t i;
+
+ for (i = 0; i < nitems(geneve_cmds); i++)
+ cmd_register(&geneve_cmds[i]);
+ af_register(&af_geneve);
+#ifndef WITHOUT_NETLINK
+ clone_setdefcallback_prefix("geneve", geneve_create_nl);
+ SNL_VERIFY_PARSERS(all_parsers);
+#else
+ clone_setdefcallback_prefix("geneve", geneve_create);
+#endif
+}
diff --git a/sbin/route/route_netlink.c b/sbin/route/route_netlink.c
--- a/sbin/route/route_netlink.c
+++ b/sbin/route/route_netlink.c
@@ -879,7 +879,7 @@
uint32_t nlm_seq = hdr->nlmsg_seq;
nl_helper_init(&h);
-
+
while ((hdr = snl_read_reply_multi(&ss, nlm_seq, &e)) != NULL) {
struct snl_parsed_route r = { .rtax_weight = RT_DEFAULT_WEIGHT };
int error;
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -186,6 +186,7 @@
gem.4 \
genet.4 \
genetlink.4 \
+ geneve.4 \
geom.4 \
geom_linux_lvm.4 \
geom_uzip.4 \
@@ -722,6 +723,7 @@
MLINKS+=fxp.4 if_fxp.4
MLINKS+=gem.4 if_gem.4
MLINKS+=genet.4 if_genet.4
+MLINKS+=geneve.4 if_geneve.4
MLINKS+=geom.4 GEOM.4
MLINKS+=gif.4 if_gif.4
MLINKS+=gpio.4 gpiobus.4
diff --git a/share/man/man4/geneve.4 b/share/man/man4/geneve.4
new file mode 100644
--- /dev/null
+++ b/share/man/man4/geneve.4
@@ -0,0 +1,387 @@
+.\" Copyright (c) 2025 Seyed Pouria Mousavizadeh Tehrani
+.\"
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\"
+.Dd December 11, 2025
+.Dt GENEVE 4
+.Os
+.Sh NAME
+.Nm geneve
+.Nd "Generic Network Virtualization Encapsulation interface"
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device geneve"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+if_geneve_load="YES"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver creates a generic network virtualization tunnel interfaces
+for Tentant Systems over an L3 (IP/UDP) underlay network that provides
+a Layer 2 (ethernet) or Layer 3 service using
+.Nm
+protocol.
+.Pp
+This driver corresponds to RFC 8926 for format specification and by default
+uses the multicast-learning-based approach for its control plane.
+To provide control plane independence all of the driver-specific operations
+are implemented using
+.Xr rtnetlink 4
+and all the
+.Xr ioctl 2
+calls are implemented using the
+.Xr nv 9
+library.
+Each
+.Nm
+interface is created at runtime using interface cloning.
+This is most easily done with the
+.Xr ifconfig 8
+.Cm create
+command or using the
+.Va cloned_interfaces
+variable in
+.Xr rc.conf 5 .
+The interface may be removed with the
+.Xr ifconfig 8
+.Cm destroy
+command.
+.Pp
+The
+.Nm
+interface must be configured in either L2 or L3 mode.
+An L2
+.Nm
+tunnel could be used as a backplane between the virtual switches
+residing in hypervisors, switches, or other appliances.
+.Pp
+The L3
+.Nm
+tunnel provides virtualized IP forwarding service similar to IP/VRF.
+.Pp
+By default the
+.Nm
+driver creates an L2 interface that supports the usual network
+.Xr ioctl 2 Ns s
+and thus can be used with
+.Xr ifconfig 8
+like any other Ethernet interface.
+An L2
+.Nm
+interface encapsulates the Ethernet frame by prepending IP/UDP and
+.Nm
+headers.
+Thus, the encapsulated (inner) frame is able to be transmitted
+over a routed, Layer 3 network to the remote host.
+.Pp
+The
+.Nm
+interface may be configured in either unicast or multicast mode.
+When in unicast mode,
+the interface creates a tunnel to a single remote host,
+and all traffic is transmitted to that host.
+When in multicast mode,
+the interface joins an IP multicast group,
+and receives packets sent to the group address,
+and transmits packets to either the multicast group address,
+or directly to the remote host if there is an appropriate
+forwarding table entry.
+.Pp
+When the
+.Nm
+interface is brought up, a
+.Xr udp 4
+.Xr socket 9
+is created based on the configuration,
+such as the local address for unicast mode or
+the group address for multicast mode,
+and the listening (local) port number.
+Since multiple
+.Nm
+interfaces may be created that either
+use the same local address
+or join the same group address,
+and use the same port,
+the driver may share a socket among multiple interfaces.
+However, each interface within a socket must belong to
+a unique
+.Nm
+segment per
+.Xr vnet 9 .
+The analogous
+.Xr vlan 4
+configuration would be a physical interface configured as
+the parent device for multiple VLAN interfaces, each with
+a unique VLAN tag.
+Each
+.Nm
+segment is identified by a 24-bit value in the
+.Nm
+header called the
+.Dq Virtual Network Identifier ,
+or VNI.
+This value can be set with
+.Xr ifconfig 8
+.Cm geneveid
+parameter.
+.Pp
+When configured with the
+.Xr ifconfig 8
+.Cm genevelearn
+parameter, the interface dynamically creates forwarding table entries
+from received packets.
+An entry in the forwarding table maps the inner source MAC address
+to the outer remote IP address.
+During transmit, the interface attempts to lookup an entry for
+the encapsulated destination MAC address.
+If an entry is found, the IP address in the entry is used to directly
+transmit the encapsulated frame to the destination.
+Otherwise, when configured in multicast mode,
+the interface must flood the frame to all hosts in the group.
+The maximum number of entries in the table is configurable with the
+.Xr ifconfig 8
+.Cm genevemaxaddr
+command.
+Stale entries in the table are periodically pruned.
+The timeout is configurable with the
+.Xr ifconfig 8
+.Cm genevetimeout
+command.
+.Sh MTU
+Since the
+.Nm
+interface encapsulates the Ethernet frame with an IP, UDP, and
+.Nm
+header, the resulting frame may be larger than the MTU of the
+physical network.
+The
+.Nm
+specification recommends the physical network MTU be configured
+to use jumbo frames to accommodate the encapsulated frame size.
+.Pp
+By default, the
+.Nm
+driver sets its MTU to usual ethernet MTU of 1500 bytes, reduced by
+the size of geneve headers prepended which is depends on
+.Cm genevemode .
+.Pp
+Alternatively, the
+.Xr ifconfig 8
+.Cm mtu
+command may be used to set the fixed MTU size on the
+.Nm
+interface to allow the encapsulated frame to fit in the
+current MTU of the physical network.
+If the
+.Cm mtu
+command was used, system no longer adjust the
+.Nm
+interface MTU on routing or address changes.
+.Sh Hop Limit (TTL)
+TTL value of
+.Nm
+interface can change by using the
+.Xr ifconfig 8
+.Cm genevettl
+command and it also can be inherited from carrying packet.
+You can set the
+.Cm genevettl
+to a number value or
+.Cm inherit
+option to be inherited at the encapsulation and decapsulation point.
+.Sh Traffic Class (ToS)
+Just like the TTL value, ToS value can be inherited at the encapsulation point
+using
+.Xr ifconfig 8
+.Cm genevedscpinherit .
+As defined in RFC 8926, ECN value follows the RFC 6040 for both ingress and
+egress traffic.
+.Sh Don't Fragment
+To make sure fragmentation does not happing during transmission, you can
+set the
+.Xr ifconfig 8
+.Cm genevedf
+value to
+.Cm set
+value which sets the DF bit on IPv4 header and IP_DONTFRAG option on both IPv4
+and IPv6 sockets.
+Similar to other options, it can be set to
+.Cm inherit
+value.
+.Sh Multicast
+To create the
+.Nm
+interface with multicast underlay, one must use
+.Xr ifconfig 8
+.Cm genevegroup
+instead of
+.Cm geneveremote
+and set it to a multicast address (e.g. ff08::db8:0:1, 239.0.0.1).
+One can set the outbound multicast interface with
+.Xr ifconfig 8
+.Cm genevedev
+to bound its multicast group to specific interface.
+.Pp
+The
+.Cm ip_mroute
+kernel module for IPv4 underlay and
+.Cm ip6_mroute
+for IPv6 underlay must be loaded for
+.Xr multicast 4
+to function.
+.Sh HARDWARE
+The
+.Nm
+driver supports hardware checksum offload (receive and transmit) and TSO on the
+encapsulated traffic over physical interfaces that support these features.
+The
+.Nm
+interface examines the
+.Cm genevedev
+interface, if one is specified, or the interface hosting the
+.Cm genevelocal
+address, and configures its capabilities based on the hardware offload
+capabilities of that physical interface.
+If multiple physical interfaces will transmit or receive traffic for the
+.Nm
+then they all must have the same hardware capabilities.
+The transmit routine of a
+.Nm
+interface may fail with
+.Er ENXIO
+if an outbound physical interface does not support
+an offload that the
+.Nm
+interface is requesting.
+This can happen if there are multiple physical interfaces involved, with
+different hardware capabilities, or an interface capability was disabled after
+the
+.Nm
+interface had already started.
+.Sh EXAMPLES
+.Bd -literal
+ Host A (198.51.100.10)
+ +--------------------+
+ | VNI 100 10.1.1.0/24|
+ | VNI 200 10.2.2.0/24|
+ +---------+----------+
+ |
+ (198.51.100.0/24)
+ |
+ +---------------v---------------+
+ | Host B (203.0.113.1) |
+ | +------+-------+ |
+ | geneve0| |geneve1|
+ | +------v----+ +-----v-----+ |
+ | | bridge0 | | bridge1 | |
+ | | (VNI 100) | | (VNI 200) | |
+ | +------+----+ +----+------+ |
+ | | | |
+ +--------v-------------v--------+
+ epair0b| |epair1b
+ +------+----+ +----+------+
+ | Jail A | | Jail B |
+ | (10.1.1.x)| | (10.2.2.x)|
+ +-----------+ +-----------+
+.Ed
+Assume host A has the (external) IP address 198.51.100.10 and
+two internal addresses of 10.1.1.1/24 and 10.2.2.1/24, while
+host B has the external address of 203.0.113.10 and two jails
+with their own separate
+.Xr VNET 9 .
+the following commands will configure the tunnel:
+.Pp
+On host A, create a l2
+.Nm
+interface in unicast mode:
+.Bd -literal
+ifconfig geneve0 create geneveid 100 genevelocal 198.51.100.10 geneveremote 203.0.113.1
+ifconfig geneve1 create geneveid 200 genevelocal 198.51.100.10 geneveremote 203.0.113.1
+.Ed
+.Pp
+On host B:
+.Bd -literal
+ifconfig geneve0 create geneveid 100 genevelocal 203.0.113.1 geneveremote 198.51.100.10
+ifconfig geneve1 create geneveid 200 genevelocal 203.0.113.1 geneveremote 198.51.100.10
+ifconfig bridge0 addm geneve0 addm epair0a
+ifconfig bridge1 addm geneve1 addm epair1a
+.Ed
+.Pp
+The example below demonstrate multicast configuration with IPv6:
+.Bd -literal
+ ----------- VNI 42 -----------
+ / \\
+2001:db8::1/64 --- Host A ------ Multicast ------- Host B --- 2001:db8::2/64
+ 3fff::1 [em0] ff08::db8:1 [em0] 3fff::2
+.Ed
+.Pp
+Create a
+.Nm
+interface in multicast mode,
+with the
+.Cm genevelocal
+address of 3fff::1,
+and the
+.Cm genevegroup
+address of ff08::db8:0:1.
+The em0 interface will be used to transmit multicast packets.
+On host A:
+.Bd -literal
+ifconfig geneve0 create geneveid 42 genevelocal 3fff::1 genevegroup ff08::db8:1 genevedev em0
+.Ed
+.Pp
+On host B:
+.Bd -literal
+ifconfig geneve0 create geneveid 42 genevelocal 3fff::2 genevegroup ff08::db8:1 genevedev em0
+.Ed
+.Pp
+Once created, the
+.Nm
+interface can be configured with
+.Xr ifconfig 8 .
+.Pp
+The following when placed in the file
+.Pa /etc/rc.conf
+will cause a geneve interface called
+.Dq Li geneve0
+to be created, and will configure the interface in unicast mode.
+.Bd -literal
+cloned_interfaces="geneve0"
+create_args_geneve0="geneveid 108 genevelocal 192.168.100.1 geneveremote 192.168.100.2"
+.Ed
+.Sh SEE ALSO
+.Xr inet 4 ,
+.Xr inet6 4 ,
+.Xr multicast 4 ,
+.Xr rtnetlink 4 ,
+.Xr vlan 4 ,
+.Xr rc.conf 5 ,
+.Xr ifconfig 8 ,
+.Xr sysctl 8
+.Rs
+.%A "J. Gross, Ed."
+.%A "I. Gross, Ed."
+.%A "T. Sridhar, Ed."
+.%T "Geneve: Generic Network Virtualization Encapsulation"
+.%D November 2020
+.%O "RFC 8926"
+.Re
+.Sh AUTHORS
+.An -nosplit
+The
+.Nm
+driver was written by
+.An Seyed Pouria Mousavizadeh Tehrani Aq info@spmzt.net
+.Sh BUGS
+Current geneve implementation with netlink can't set geneve options
+other than genevemode during interface cloning in ifconfig without
+specifying the interface index.
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -880,6 +880,10 @@
# frames in UDP packets according to RFC7348.
device vxlan
+# The `geneve' device implements the GENEVE encapsulation of virtual
+# overlays according to RFC8926.
+device geneve
+
# The `wlan' device provides generic code to support 802.11
# drivers, including host AP mode; it is MANDATORY for the wi,
# and ath drivers and will eventually be required by all 802.11 drivers.
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4213,6 +4213,7 @@
net/if_tuntap.c optional tuntap
net/if_vlan.c optional vlan
net/if_vxlan.c optional vxlan inet | vxlan inet6
+net/if_geneve.c optional geneve inet | geneve inet6
net/ifdi_if.m optional ether pci iflib
net/iflib.c optional ether pci iflib
net/mp_ring.c optional ether iflib
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -4227,6 +4227,7 @@
case PRIV_NET_SETIFVNET:
case PRIV_NET_SETIFFIB:
case PRIV_NET_OVPN:
+ case PRIV_NET_GENEVE:
case PRIV_NET_ME:
case PRIV_NET_WG:
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -168,6 +168,7 @@
if_tuntap \
if_vlan \
if_vxlan \
+ if_geneve \
${_if_wg} \
iflib \
${_igc} \
diff --git a/sys/modules/if_geneve/Makefile b/sys/modules/if_geneve/Makefile
new file mode 100644
--- /dev/null
+++ b/sys/modules/if_geneve/Makefile
@@ -0,0 +1,7 @@
+.PATH: ${SRCTOP}/sys/net
+
+KMOD= if_geneve
+SRCS= if_geneve.c
+SRCS+= opt_inet.h opt_inet6.h
+
+.include <bsd.kmod.mk>
diff --git a/sys/net/if.h b/sys/net/if.h
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -256,7 +256,9 @@
#define IFCAP_B_RXTLS4 32 /* can do TLS receive for TCP */
#define IFCAP_B_RXTLS6 33 /* can do TLS receive for TCP6 */
#define IFCAP_B_IPSEC_OFFLOAD 34 /* inline IPSEC offload */
-#define __IFCAP_B_SIZE 35
+#define IFCAP_B_GENEVE_HWCSUM 35 /* can do IFCAN_HWCSUM on GENEVE */
+#define IFCAP_B_GENEVE_HWTSO 36 /* can do IFCAP_TSO on GENEVE */
+#define __IFCAP_B_SIZE 37
#define IFCAP_B_MAX (__IFCAP_B_MAX - 1)
#define IFCAP_B_SIZE (__IFCAP_B_SIZE)
@@ -300,6 +302,8 @@
#define IFCAP2_RXTLS4 (IFCAP_B_RXTLS4 - 32)
#define IFCAP2_RXTLS6 (IFCAP_B_RXTLS6 - 32)
#define IFCAP2_IPSEC_OFFLOAD (IFCAP_B_IPSEC_OFFLOAD - 32)
+#define IFCAP2_GENEVE_HWCSUM (IFCAP_B_GENEVE_HWCSUM - 32)
+#define IFCAP2_GENEVE_HWTSO (IFCAP_B_GENEVE_HWTSO - 32)
#define IFCAP2_BIT(x) (1UL << (x))
diff --git a/sys/net/if.c b/sys/net/if.c
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -2378,6 +2378,8 @@
CAP2NV(RXTLS4),
CAP2NV(RXTLS6),
CAP2NV(IPSEC_OFFLOAD),
+ CAP2NV(GENEVE_HWCSUM),
+ CAP2NV(GENEVE_HWTSO),
{0, NULL}
};
#undef CAPNV
diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c
--- a/sys/net/if_clone.c
+++ b/sys/net/if_clone.c
@@ -536,10 +536,12 @@
ifc->create_nl = req2->create_nl_f;
ifc->modify_nl = req2->modify_nl_f;
ifc->dump_nl = req2->dump_nl_f;
+ if (req2->dump_nl_f != NULL)
+ ifc->dump_nl = req2->dump_nl_f;
+ else
+ ifc->dump_nl = ifc_dump_ifp_nl_default;
}
- ifc->dump_nl = ifc_dump_ifp_nl_default;
-
if (if_clone_attach(ifc) != 0)
return (NULL);
diff --git a/sys/net/if_geneve.h b/sys/net/if_geneve.h
new file mode 100644
--- /dev/null
+++ b/sys/net/if_geneve.h
@@ -0,0 +1,87 @@
+/*-
+ * Copyright (c) 2025 Seyed Pouria Mousavizadeh Tehrani <info@spmzt.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NET_IF_GENEVE_H_
+#define _NET_IF_GENEVE_H_
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/udp.h>
+#include <netinet/in.h>
+/*
+#include <net/if.h>
+#include <net/ethernet.h>
+
+*/
+
+#define GENEVE_VERSION 0
+#define GENEVE_HDR_VNI_SHIFT 8
+
+#define GENEVE_VNI_MAX (1 << 24)
+#define GENEVE_VNI_MASK (GENEVE_VNI_MAX - 1)
+
+#define GENEVE_SO_MC_MAX_GROUPS 32
+
+/* GENEVE header according to RFC 8926 */
+struct genevehdr {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ uint8_t geneve_optlen:6, /* Opt Len */
+ geneve_ver:2; /* version */
+ uint8_t geneve_flags:6, /* GENEVE Flags */
+ geneve_critical:1, /* critical options present */
+ geneve_control:1; /* control packets */
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ uint8_t geneve_ver:2, /* version */
+ geneve_optlen:6; /* Opt Len */
+ uint8_t geneve_control:1, /* control packets */
+ geneve_critical:1, /* critical options present */
+ geneve_flags:6; /* GENEVE Flags */
+#endif
+ uint16_t geneve_proto; /* protocol type (follows Ethertypes) */
+#define GENEVE_PROTO_ETHER 0x6558 /* Ethernet itself */
+#define GENEVE_PROTO_INHERIT 0x0 /* inherit inner layer 3 headers itself */
+ uint32_t geneve_vni; /* virtual network identifier */
+} __packed;
+
+struct geneveudphdr {
+ struct udphdr geneve_udp;
+ struct genevehdr geneve_hdr;
+} __packed;
+
+union sockaddr_union {
+ struct sockaddr sa;
+ struct sockaddr_in sin;
+ struct sockaddr_in6 sin6;
+};
+
+struct geneve_params {
+ uint16_t ifla_proto;
+};
+
+#define GENEVE_UDPPORT 6081
+
+#endif /* _NET_IF_GENEVE_H_ */
diff --git a/sys/net/if_geneve.c b/sys/net/if_geneve.c
new file mode 100644
--- /dev/null
+++ b/sys/net/if_geneve.c
@@ -0,0 +1,4695 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Seyed Pouria Mousavizadeh Tehrani <info@spmzt.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/hash.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/nv.h>
+#include <sys/refcount.h>
+#include <sys/rmlock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/sdt.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sockio.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <sys/jail.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_private.h>
+#include <net/if_arp.h>
+#include <net/if_clone.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
+#include <net/if_geneve.h>
+#include <net/netisr.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/in_var.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/scope6_var.h>
+#include <netinet/udp.h>
+#include <netinet/udp_var.h>
+#include <netinet/in_fib.h>
+#include <netinet6/in6_fib.h>
+#include <netinet/ip_ecn.h>
+
+#include <netlink/netlink.h>
+#include <netlink/netlink_ctl.h>
+#include <netlink/netlink_var.h>
+#include <netlink/netlink_route.h>
+#include <netlink/route/route_var.h>
+
+#include <security/mac/mac_framework.h>
+
+SDT_PROVIDER_DEFINE(if_geneve);
+
+struct geneve_softc;
+LIST_HEAD(geneve_softc_head, geneve_softc);
+
+struct sx geneve_sx;
+SX_SYSINIT(geneve, &geneve_sx, "GENEVE global start/stop lock");
+
+static unsigned geneve_osd_jail_slot;
+
+struct geneve_socket_mc_info {
+ union sockaddr_union gnvsomc_saddr;
+ union sockaddr_union gnvsomc_gaddr;
+ int gnvsomc_ifidx;
+ int gnvsomc_users;
+};
+
+/* The maximum MTU of encapsulated geneve packet. */
+#define GENEVE_MAX_L3MTU (IP_MAXPACKET - \
+ 60 /* Maximum IPv4 header len */ - \
+ sizeof(struct udphdr) - \
+ sizeof(struct genevehdr))
+#define GENEVE_MAX_MTU (GENEVE_MAX_L3MTU - \
+ ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
+
+#define GENEVE_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU | IFCAP_NV)
+
+#define GENEVE_SO_MC_MAX_GROUPS 32
+
+#define GENEVE_SO_VNI_HASH_SHIFT 6
+#define GENEVE_SO_VNI_HASH_SIZE (1 << GENEVE_SO_VNI_HASH_SHIFT)
+#define GENEVE_SO_VNI_HASH(_vni) ((_vni) % GENEVE_SO_VNI_HASH_SIZE)
+
+struct geneve_socket {
+ struct socket *gnvso_sock;
+ struct rmlock gnvso_lock;
+ u_int gnvso_refcnt;
+ union sockaddr_union gnvso_laddr;
+ LIST_ENTRY(geneve_socket) gnvso_entry;
+ struct geneve_softc_head gnvso_vni_hash[GENEVE_SO_VNI_HASH_SIZE];
+ struct geneve_socket_mc_info gnvso_mc[GENEVE_SO_MC_MAX_GROUPS];
+ struct vnet *gnvso_vnet;
+};
+
+#define GENEVE_SO_RLOCK(_gnvso, _p) rm_rlock(&(_gnvso)->gnvso_lock, (_p))
+#define GENEVE_SO_RUNLOCK(_gnvso, _p) rm_runlock(&(_gnvso)->gnvso_lock, (_p))
+#define GENEVE_SO_WLOCK(_gnvso) rm_wlock(&(_gnvso)->gnvso_lock)
+#define GENEVE_SO_WUNLOCK(_gnvso) rm_wunlock(&(_gnvso)->gnvso_lock)
+#define GENEVE_SO_LOCK_ASSERT(_gnvso) \
+ rm_assert(&(_gnvso)->gnvso_lock, RA_LOCKED)
+#define GENEVE_SO_LOCK_WASSERT(_gnvso) \
+ rm_assert(&(_gnvso)->gnvso_lock, RA_WLOCKED)
+
+#define GENEVE_SO_ACQUIRE(_gnvso) refcount_acquire(&(_gnvso)->gnvso_refcnt)
+#define GENEVE_SO_RELEASE(_gnvso) refcount_release(&(_gnvso)->gnvso_refcnt)
+
+struct gnv_ftable_entry {
+ LIST_ENTRY(gnv_ftable_entry) gnvfe_hash;
+ uint16_t gnvfe_flags;
+ uint8_t gnvfe_mac[ETHER_ADDR_LEN];
+ union sockaddr_union gnvfe_raddr;
+ time_t gnvfe_expire;
+};
+
+#define GENEVE_FE_FLAG_DYNAMIC 0x01
+#define GENEVE_FE_FLAG_STATIC 0x02
+
+#define GENEVE_FE_IS_DYNAMIC(_fe) \
+ ((_fe)->gnvfe_flags & GENEVE_FE_FLAG_DYNAMIC)
+
+#define GENEVE_SC_FTABLE_SHIFT 9
+#define GENEVE_SC_FTABLE_SIZE (1 << GENEVE_SC_FTABLE_SHIFT)
+#define GENEVE_SC_FTABLE_MASK (GENEVE_SC_FTABLE_SIZE - 1)
+#define GENEVE_SC_FTABLE_HASH(_sc, _mac) \
+ (geneve_mac_hash(_sc, _mac) % GENEVE_SC_FTABLE_SIZE)
+
+LIST_HEAD(geneve_ftable_head, gnv_ftable_entry);
+
+struct geneve_statistics {
+ uint32_t ftable_nospace;
+ uint32_t ftable_lock_upgrade_failed;
+ counter_u64_t txcsum;
+ counter_u64_t tso;
+ counter_u64_t rxcsum;
+};
+
+struct geneve_softc {
+ LIST_ENTRY(geneve_softc) gnv_entry;
+
+ struct ifnet *gnv_ifp;
+ uint32_t gnv_flags;
+#define GENEVE_FLAG_INIT 0x0001
+#define GENEVE_FLAG_TEARDOWN 0x0002
+#define GENEVE_FLAG_LEARN 0x0004
+#define GENEVE_FLAG_USER_MTU 0x0008
+#define GENEVE_FLAG_TTL_INHERIT 0x0010
+#define GENEVE_FLAG_DSCP_INHERIT 0x0020
+#define GENEVE_FLAG_COLLECT_METADATA 0x0040
+
+ struct vnet *gnv_vnet;
+
+ int gnv_reqcap;
+ int gnv_reqcap2;
+ struct geneve_socket *gnv_sock;
+ union sockaddr_union gnv_src_addr;
+ union sockaddr_union gnv_dst_addr;
+ uint32_t gnv_fibnum;
+ uint32_t gnv_vni;
+ uint32_t gnv_port_hash_key;
+ uint16_t gnv_proto;
+ uint16_t gnv_min_port;
+ uint16_t gnv_max_port;
+ uint8_t gnv_ttl;
+ enum ifla_geneve_df gnv_df;
+
+ /* Lookup table from MAC address to forwarding entry. */
+ uint32_t gnv_ftable_cnt;
+ uint32_t gnv_ftable_max;
+ uint32_t gnv_ftable_timeout;
+ uint32_t gnv_ftable_hash_key;
+ struct geneve_ftable_head *gnv_ftable;
+
+ /* Derived from gnv_dst_addr. */
+ struct gnv_ftable_entry gnv_default_fe;
+
+ struct ip_moptions *gnv_im4o;
+ struct ip6_moptions *gnv_im6o;
+
+ struct rmlock gnv_lock;
+ volatile u_int gnv_refcnt;
+
+ int gnv_so_mc_index;
+ struct geneve_statistics gnv_stats;
+ struct callout gnv_callout;
+ struct ether_addr gnv_hwaddr;
+ int gnv_mc_ifindex;
+ struct ifnet *gnv_mc_ifp;
+ struct ifmedia gnv_media;
+ char gnv_mc_ifname[IFNAMSIZ];
+
+ /* For rate limiting errors on the tx fast path. */
+ struct timeval err_time;
+ int err_pps;
+};
+
+#define GENEVE_RLOCK(_sc, _p) rm_rlock(&(_sc)->gnv_lock, (_p))
+#define GENEVE_RUNLOCK(_sc, _p) rm_runlock(&(_sc)->gnv_lock, (_p))
+#define GENEVE_WLOCK(_sc) rm_wlock(&(_sc)->gnv_lock)
+#define GENEVE_WUNLOCK(_sc) rm_wunlock(&(_sc)->gnv_lock)
+#define GENEVE_LOCK_WOWNED(_sc) rm_wowned(&(_sc)->gnv_lock)
+#define GENEVE_LOCK_ASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_LOCKED)
+#define GENEVE_LOCK_WASSERT(_sc) rm_assert(&(_sc)->gnv_lock, RA_WLOCKED)
+#define GENEVE_UNLOCK(_sc, _p) do { \
+ if (GENEVE_LOCK_WOWNED(_sc)) \
+ GENEVE_WUNLOCK(_sc); \
+ else \
+ GENEVE_RUNLOCK(_sc, _p); \
+} while (0)
+
+#define GENEVE_ACQUIRE(_sc) refcount_acquire(&(_sc)->gnv_refcnt)
+#define GENEVE_RELEASE(_sc) refcount_release(&(_sc)->gnv_refcnt)
+
+#define SATOCONSTSIN(sa) ((const struct sockaddr_in *)(sa))
+#define SATOCONSTSIN6(sa) ((const struct sockaddr_in6 *)(sa))
+
+struct geneve_pkt_info {
+ u_int isr;
+ uint16_t ethertype;
+ uint8_t ecn;
+ uint8_t ttl;
+};
+
+struct nl_parsed_geneve {
+ /* essential */
+ uint32_t ifla_vni;
+ uint16_t ifla_proto;
+ struct sockaddr *ifla_local;
+ struct sockaddr *ifla_remote;
+ uint16_t ifla_local_port;
+ uint16_t ifla_remote_port;
+
+ /* optional */
+ struct ifla_geneve_port_range ifla_port_range;
+ enum ifla_geneve_df ifla_df;
+ uint8_t ifla_ttl;
+ bool ifla_ttl_inherit;
+ bool ifla_dscp_inherit;
+ bool ifla_external;
+
+ /* l2 specific */
+ bool ifla_ftable_learn;
+ bool ifla_ftable_flush;
+ uint32_t ifla_ftable_max;
+ uint32_t ifla_ftable_timeout;
+ uint32_t ifla_ftable_count; /* read-only */
+
+ /* multicast specific */
+ char *ifla_mc_ifname;
+ uint32_t ifla_mc_ifindex; /* read-only */
+};
+
+/* The multicast-based learning parts of the code are taken from if_vxlan */
+static int geneve_ftable_addr_cmp(const uint8_t *, const uint8_t *);
+static void geneve_ftable_init(struct geneve_softc *);
+static void geneve_ftable_fini(struct geneve_softc *);
+static void geneve_ftable_flush(struct geneve_softc *, int);
+static void geneve_ftable_expire(struct geneve_softc *);
+static int geneve_ftable_update_locked(struct geneve_softc *,
+ const union sockaddr_union *, const uint8_t *,
+ struct rm_priotracker *);
+static int geneve_ftable_learn(struct geneve_softc *,
+ const struct sockaddr *, const uint8_t *);
+
+static struct gnv_ftable_entry *
+ geneve_ftable_entry_alloc(void);
+static void geneve_ftable_entry_free(struct gnv_ftable_entry *);
+static void geneve_ftable_entry_init(struct geneve_softc *,
+ struct gnv_ftable_entry *, const uint8_t *,
+ const struct sockaddr *, uint32_t);
+static void geneve_ftable_entry_destroy(struct geneve_softc *,
+ struct gnv_ftable_entry *);
+static int geneve_ftable_entry_insert(struct geneve_softc *,
+ struct gnv_ftable_entry *);
+static struct gnv_ftable_entry *
+ geneve_ftable_entry_lookup(struct geneve_softc *,
+ const uint8_t *);
+
+static struct geneve_socket *
+ geneve_socket_alloc(struct vnet *);
+static void geneve_socket_destroy(struct geneve_socket *);
+static void geneve_socket_release(struct geneve_socket *);
+static struct geneve_socket *
+ geneve_socket_lookup(union sockaddr_union *);
+static void geneve_socket_insert(struct geneve_socket *);
+static int geneve_socket_init(struct geneve_socket *, struct ifnet *);
+static int geneve_socket_bind(struct geneve_socket *, struct ifnet *);
+static int geneve_socket_create(struct ifnet *, int,
+ const union sockaddr_union *, struct geneve_socket *);
+static int geneve_socket_set_df(struct geneve_socket *, bool);
+
+static struct geneve_socket *
+ geneve_socket_mc_lookup(const union sockaddr_union *);
+static int geneve_sockaddr_mc_info_match(
+ const struct geneve_socket_mc_info *,
+ const union sockaddr_union *,
+ const union sockaddr_union *, int);
+static int geneve_socket_mc_join_group(struct geneve_socket *,
+ const union sockaddr_union *, const union sockaddr_union *,
+ int *, union sockaddr_union *);
+static int geneve_socket_mc_leave_group(struct geneve_socket *,
+ const union sockaddr_union *,
+ const union sockaddr_union *, int);
+static int geneve_socket_mc_add_group(struct geneve_socket *,
+ const union sockaddr_union *,
+ const union sockaddr_union *, int, int *);
+static void geneve_socket_mc_release_group_by_idx(struct geneve_socket *,
+ int);
+
+static struct geneve_softc *
+ geneve_socket_lookup_softc_locked(struct geneve_socket *,
+ uint32_t);
+static struct geneve_softc *
+ geneve_socket_lookup_softc(struct geneve_socket *, uint32_t);
+static int geneve_socket_insert_softc(struct geneve_socket *,
+ struct geneve_softc *);
+static void geneve_socket_remove_softc(struct geneve_socket *,
+ struct geneve_softc *);
+
+static struct ifnet *
+ geneve_multicast_if_ref(struct geneve_softc *, uint32_t);
+static void geneve_free_multicast(struct geneve_softc *);
+static int geneve_setup_multicast_interface(struct geneve_softc *);
+
+static int geneve_setup_multicast(struct geneve_softc *);
+static int geneve_setup_socket(struct geneve_softc *);
+static void geneve_setup_interface_hdrlen(struct geneve_softc *);
+static int geneve_valid_init_config(struct geneve_softc *);
+static void geneve_init_complete(struct geneve_softc *);
+static void geneve_init(void *);
+static void geneve_release(struct geneve_softc *);
+static void geneve_teardown_wait(struct geneve_softc *);
+static void geneve_teardown_complete(struct geneve_softc *);
+static void geneve_teardown_locked(struct geneve_softc *);
+static void geneve_teardown(struct geneve_softc *);
+static void geneve_timer(void *);
+
+static int geneve_set_vni(struct geneve_softc *, uint32_t);
+static int geneve_set_local_addr(struct geneve_softc *, union sockaddr_union *);
+static int geneve_set_remote_addr(struct geneve_softc *, union sockaddr_union *);
+static int geneve_set_local_port(struct geneve_softc *, uint16_t);
+static int geneve_set_remote_port(struct geneve_softc *, uint16_t);
+static int geneve_set_port_range(struct geneve_softc *, uint16_t, uint16_t);
+static int geneve_set_df(struct geneve_softc *sc, enum ifla_geneve_df df);
+static int geneve_set_ttl(struct geneve_softc *, uint8_t);
+static int geneve_set_ttl_inherit(struct geneve_softc *, bool);
+static int geneve_set_dscp_inherit(struct geneve_softc *, bool);
+static int geneve_set_collect_metadata(struct geneve_softc *, bool);
+static int geneve_set_learn(struct geneve_softc *, bool);
+static int geneve_set_ftable_timeout(struct geneve_softc *, uint32_t);
+static int geneve_set_ftable_max(struct geneve_softc *, uint32_t);
+static int geneve_set_mc_if(struct geneve_softc * , char *);
+static int geneve_flush_ftable(struct geneve_softc *, bool);
+static uint16_t geneve_get_local_port(struct geneve_softc *);
+static uint16_t geneve_get_remote_port(struct geneve_softc *);
+
+static int geneve_set_vni_nl(struct geneve_softc *, struct nl_pstate *,
+ uint32_t);
+static int geneve_set_local_addr_nl(struct geneve_softc *, struct nl_pstate *,
+ struct sockaddr *);
+static int geneve_set_remote_addr_nl(struct geneve_softc *, struct nl_pstate *,
+ struct sockaddr *);
+static int geneve_set_local_port_nl(struct geneve_softc *, struct nl_pstate *,
+ uint16_t);
+static int geneve_set_remote_port_nl(struct geneve_softc *, struct nl_pstate *,
+ uint16_t);
+static int geneve_set_port_range_nl(struct geneve_softc *, struct nl_pstate *,
+ struct ifla_geneve_port_range);
+static int geneve_set_df_nl(struct geneve_softc *, struct nl_pstate *,
+ enum ifla_geneve_df);
+static int geneve_set_ttl_nl(struct geneve_softc *, struct nl_pstate *,
+ uint8_t);
+static int geneve_set_ttl_inherit_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static int geneve_set_dscp_inherit_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static int geneve_set_collect_metadata_nl(struct geneve_softc *,
+ struct nl_pstate *, bool);
+static int geneve_set_learn_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static int geneve_set_ftable_max_nl(struct geneve_softc *, struct nl_pstate *,
+ uint32_t);
+static int geneve_set_ftable_timeout_nl(struct geneve_softc *,
+ struct nl_pstate *, uint32_t);
+static int geneve_set_mc_if_nl(struct geneve_softc *, struct nl_pstate *,
+ char *);
+static int geneve_flush_ftable_nl(struct geneve_softc *, struct nl_pstate *,
+ bool);
+static void geneve_get_local_addr_nl(struct geneve_softc *, struct nl_writer *);
+static void geneve_get_remote_addr_nl(struct geneve_softc *, struct nl_writer *);
+
+static int geneve_set_vni_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_local_addr_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_remote_addr_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_local_port_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_remote_port_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_port_range_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_df_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_ttl_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_ttl_inherit_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_dscp_inherit_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_learn_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_ftable_timeout_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_ftable_max_nvl(struct geneve_softc *, nvlist_t *);
+static int geneve_set_mc_if_nvl(struct geneve_softc * , nvlist_t *);
+static int geneve_flush_ftable_nvl(struct geneve_softc *, nvlist_t *);
+
+static int geneve_ioctl_get_nvl(struct geneve_softc *, struct ifreq *);
+static int geneve_ioctl_set_nvl(struct geneve_softc *, struct ifreq *);
+static int geneve_ioctl_ifflags(struct geneve_softc *);
+static int geneve_ioctl(struct ifnet *, u_long, caddr_t);
+
+#if defined(INET) || defined(INET6)
+static uint16_t geneve_pick_source_port(struct geneve_softc *, struct mbuf *);
+static void geneve_encap_header(struct geneve_softc *, struct mbuf *,
+ int, uint16_t, uint16_t, uint16_t);
+#endif
+static uint16_t geneve_get_ethertype(struct mbuf *);
+static int geneve_inherit_l3_hdr(struct mbuf *, struct geneve_softc *,
+ uint16_t, uint8_t *, uint8_t *, u_short *);
+static int geneve_encap4(struct geneve_softc *,
+ const union sockaddr_union *, struct mbuf *);
+static int geneve_encap6(struct geneve_softc *,
+ const union sockaddr_union *, struct mbuf *);
+static int geneve_transmit(struct ifnet *, struct mbuf *);
+static void geneve_qflush(struct ifnet *);
+static int geneve_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, struct route *);
+static uint32_t geneve_map_etype_to_af(uint32_t);
+static bool geneve_udp_input(struct mbuf *, int, struct inpcb *,
+ const struct sockaddr *, void *);
+static int geneve_input_ether(struct geneve_softc *, struct mbuf **,
+ const struct sockaddr *, struct geneve_pkt_info *);
+static int geneve_input_inherit(struct geneve_softc *,
+ struct mbuf **, int, struct geneve_pkt_info *);
+static int geneve_next_option(struct geneve_socket *, struct genevehdr *,
+ struct mbuf **);
+static void geneve_input_csum(struct mbuf *m, struct ifnet *ifp,
+ counter_u64_t rxcsum);
+
+static void geneve_stats_alloc(struct geneve_softc *);
+static void geneve_stats_free(struct geneve_softc *);
+static void geneve_set_default_config(struct geneve_softc *);
+static int geneve_set_reqcap(struct geneve_softc *, struct ifnet *, int,
+ int);
+static void geneve_set_hwcaps(struct geneve_softc *);
+static int geneve_clone_create(struct if_clone *, char *, size_t,
+ struct ifc_data *, struct ifnet **);
+static int geneve_clone_destroy(struct if_clone *, struct ifnet *,
+ uint32_t);
+static int geneve_clone_create_nl(struct if_clone *, char *, size_t,
+ struct ifc_data_nl *);
+static int geneve_clone_modify_nl(struct ifnet *, struct ifc_data_nl *);
+static void geneve_clone_dump_nl(struct ifnet *, struct nl_writer *);
+
+static uint32_t geneve_mac_hash(struct geneve_softc *, const uint8_t *);
+static int geneve_media_change(struct ifnet *);
+static void geneve_media_status(struct ifnet *, struct ifmediareq *);
+
+static int geneve_sockaddr_cmp(const union sockaddr_union *,
+ const struct sockaddr *);
+static void geneve_sockaddr_copy(union sockaddr_union *,
+ const struct sockaddr *);
+static int geneve_sockaddr_in_equal(const union sockaddr_union *,
+ const struct sockaddr *);
+static void geneve_sockaddr_in_copy(union sockaddr_union *,
+ const struct sockaddr *);
+static int geneve_sockaddr_supported(const union sockaddr_union *, int);
+static int geneve_sockaddr_in_any(const union sockaddr_union *);
+static int geneve_sockaddr_in6_embedscope(struct sockaddr_in6 *);
+
+static int geneve_can_change_config(struct geneve_softc *);
+static int geneve_check_proto(uint16_t);
+static int geneve_check_multicast_addr(const union sockaddr_union *);
+static int geneve_check_sockaddr(const union sockaddr_union *, const int);
+static int geneve_check_sockaddr_in(const union sockaddr_union *, const int);
+
+static int geneve_prison_remove(void *, void *);
+static void vnet_geneve_load(void);
+static void vnet_geneve_unload(void);
+static void geneve_module_init(void);
+static void geneve_module_deinit(void);
+static int geneve_modevent(module_t, int, void *);
+
+
+static const char geneve_name[] = "geneve";
+static MALLOC_DEFINE(M_GENEVE, geneve_name,
+ "Generic Network Virtualization Encapsulation Interface");
+#define MTAG_GENEVE_LOOP 0x93d66dc0 /* geneve mtag */
+
+VNET_DEFINE_STATIC(struct if_clone *, geneve_cloner);
+#define V_geneve_cloner VNET(geneve_cloner)
+
+static struct mtx geneve_list_mtx;
+#define GENEVE_LIST_LOCK() mtx_lock(&geneve_list_mtx)
+#define GENEVE_LIST_UNLOCK() mtx_unlock(&geneve_list_mtx)
+
+static LIST_HEAD(, geneve_socket) geneve_socket_list = LIST_HEAD_INITIALIZER(geneve_socket_list);
+
+/* Default maximum number of addresses in the forwarding table. */
+#ifndef GENEVE_FTABLE_MAX
+#define GENEVE_FTABLE_MAX 2000
+#endif
+
+/* Timeout (in seconds) of addresses learned in the forwarding table. */
+#ifndef GENEVE_FTABLE_TIMEOUT
+#define GENEVE_FTABLE_TIMEOUT (20 * 60)
+#endif
+
+/*
+ * Maximum timeout (in seconds) of addresses learned in the forwarding
+ * table.
+ */
+#ifndef GENEVE_FTABLE_MAX_TIMEOUT
+#define GENEVE_FTABLE_MAX_TIMEOUT (60 * 60 * 24)
+#endif
+
+/* Number of seconds between pruning attempts of the forwarding table. */
+#ifndef GENEVE_FTABLE_PRUNE
+#define GENEVE_FTABLE_PRUNE (5 * 60)
+#endif
+
+static int geneve_ftable_prune_period = GENEVE_FTABLE_PRUNE;
+
+
+#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
+static const struct nlattr_parser nla_p_geneve_create[] = {
+ { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(geneve_create_parser, nla_p_geneve_create);
+
+#define _OUT(_field) offsetof(struct nl_parsed_geneve, _field)
+static const struct nlattr_parser nla_p_geneve[] = {
+ { .type = IFLA_GENEVE_ID, .off = _OUT(ifla_vni), .cb = nlattr_get_uint32 },
+ { .type = IFLA_GENEVE_PROTOCOL, .off = _OUT(ifla_proto), .cb = nlattr_get_uint16 },
+ { .type = IFLA_GENEVE_LOCAL, .off = _OUT(ifla_local), .cb = nlattr_get_ip },
+ { .type = IFLA_GENEVE_REMOTE, .off = _OUT(ifla_remote), .cb = nlattr_get_ip },
+ { .type = IFLA_GENEVE_LOCAL_PORT, .off = _OUT(ifla_local_port), .cb = nlattr_get_uint16 },
+ { .type = IFLA_GENEVE_PORT, .off = _OUT(ifla_remote_port), .cb = nlattr_get_uint16 },
+ { .type = IFLA_GENEVE_PORT_RANGE, .off = _OUT(ifla_port_range),
+ .arg = (void *)sizeof(struct ifla_geneve_port_range), .cb = nlattr_get_bytes },
+ { .type = IFLA_GENEVE_DF, .off = _OUT(ifla_df), .cb = nlattr_get_uint8 },
+ { .type = IFLA_GENEVE_TTL, .off = _OUT(ifla_ttl), .cb = nlattr_get_uint8 },
+ { .type = IFLA_GENEVE_TTL_INHERIT, .off = _OUT(ifla_ttl_inherit), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_DSCP_INHERIT, .off = _OUT(ifla_dscp_inherit), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_COLLECT_METADATA, .off = _OUT(ifla_external), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_LEARN, .off = _OUT(ifla_ftable_learn), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_FLUSH, .off = _OUT(ifla_ftable_flush), .cb = nlattr_get_bool },
+ { .type = IFLA_GENEVE_FTABLE_MAX, .off = _OUT(ifla_ftable_max), .cb = nlattr_get_uint32 },
+ { .type = IFLA_GENEVE_FTABLE_TIMEOUT, .off = _OUT(ifla_ftable_timeout), .cb = nlattr_get_uint32 },
+ { .type = IFLA_GENEVE_MC_IFNAME, .off = _OUT(ifla_mc_ifname), .cb = nlattr_get_string },
+};
+#undef _OUT
+NL_DECLARE_ATTR_PARSER(geneve_modify_parser, nla_p_geneve);
+
+static const struct nlhdr_parser *all_parsers[] = {
+ &geneve_create_parser, &geneve_modify_parser,
+};
+
+static int
+geneve_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
+{
+ int i, d;
+
+ for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
+ d = ((int)a[i]) - ((int)b[i]);
+
+ return (d);
+}
+
+static void
+geneve_ftable_init(struct geneve_softc *sc)
+{
+ int i;
+
+ sc->gnv_ftable = malloc(sizeof(struct geneve_ftable_head) *
+ GENEVE_SC_FTABLE_SIZE, M_GENEVE, M_ZERO | M_WAITOK);
+
+ for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++)
+ LIST_INIT(&sc->gnv_ftable[i]);
+ sc->gnv_ftable_hash_key = arc4random();
+}
+
+static void
+geneve_ftable_fini(struct geneve_softc *sc)
+{
+ int i;
+
+ for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+ KASSERT(LIST_EMPTY(&sc->gnv_ftable[i]),
+ ("%s: geneve %p ftable[%d] not empty", __func__, sc, i));
+ }
+ MPASS(sc->gnv_ftable_cnt == 0);
+
+ free(sc->gnv_ftable, M_GENEVE);
+ sc->gnv_ftable = NULL;
+}
+
+static void
+geneve_ftable_flush(struct geneve_softc *sc, int all)
+{
+ struct gnv_ftable_entry *fe, *tfe;
+ int i;
+
+ for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
+ if (all || GENEVE_FE_IS_DYNAMIC(fe))
+ geneve_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static void
+geneve_ftable_expire(struct geneve_softc *sc)
+{
+ struct gnv_ftable_entry *fe, *tfe;
+ int i;
+
+ GENEVE_LOCK_WASSERT(sc);
+
+ for (i = 0; i < GENEVE_SC_FTABLE_SIZE; i++) {
+ LIST_FOREACH_SAFE(fe, &sc->gnv_ftable[i], gnvfe_hash, tfe) {
+ if (GENEVE_FE_IS_DYNAMIC(fe) &&
+ time_uptime >= fe->gnvfe_expire)
+ geneve_ftable_entry_destroy(sc, fe);
+ }
+ }
+}
+
+static int
+geneve_ftable_update_locked(struct geneve_softc *sc,
+ const union sockaddr_union *unsa, const uint8_t *mac,
+ struct rm_priotracker *tracker)
+{
+ struct gnv_ftable_entry *fe;
+ int error __unused;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+again:
+ /*
+ * A forwarding entry for this MAC address might already exist. If
+ * so, update it, otherwise create a new one. We may have to upgrade
+ * the lock if we have to change or create an entry.
+ */
+ fe = geneve_ftable_entry_lookup(sc, mac);
+ if (fe != NULL) {
+ fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
+
+ if (!GENEVE_FE_IS_DYNAMIC(fe) ||
+ geneve_sockaddr_in_equal(&fe->gnvfe_raddr, &unsa->sa))
+ return (0);
+ if (!GENEVE_LOCK_WOWNED(sc)) {
+ GENEVE_RUNLOCK(sc, tracker);
+ GENEVE_WLOCK(sc);
+ sc->gnv_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+ geneve_sockaddr_in_copy(&fe->gnvfe_raddr, &unsa->sa);
+ return (0);
+ }
+
+ if (!GENEVE_LOCK_WOWNED(sc)) {
+ GENEVE_RUNLOCK(sc, tracker);
+ GENEVE_WLOCK(sc);
+ sc->gnv_stats.ftable_lock_upgrade_failed++;
+ goto again;
+ }
+
+ if (sc->gnv_ftable_cnt >= sc->gnv_ftable_max) {
+ sc->gnv_stats.ftable_nospace++;
+ return (ENOSPC);
+ }
+
+ fe = geneve_ftable_entry_alloc();
+ if (fe == NULL)
+ return (ENOMEM);
+
+ geneve_ftable_entry_init(sc, fe, mac, &unsa->sa, GENEVE_FE_FLAG_DYNAMIC);
+
+ /* The prior lookup failed, so the insert should not. */
+ error = geneve_ftable_entry_insert(sc, fe);
+ MPASS(error == 0);
+
+ return (0);
+}
+
+static int
+geneve_ftable_learn(struct geneve_softc *sc, const struct sockaddr *sa,
+ const uint8_t *mac)
+{
+ struct rm_priotracker tracker;
+ union sockaddr_union unsa;
+ int error;
+
+ /*
+ * The source port may be randomly selected by the remote host, so
+ * use the port of the default destination address.
+ */
+ geneve_sockaddr_copy(&unsa, sa);
+ unsa.sin.sin_port = sc->gnv_dst_addr.sin.sin_port;
+
+ if (unsa.sa.sa_family == AF_INET6) {
+ error = geneve_sockaddr_in6_embedscope(&unsa.sin6);
+ if (error)
+ return (error);
+ }
+
+ GENEVE_RLOCK(sc, &tracker);
+ error = geneve_ftable_update_locked(sc, &unsa, mac, &tracker);
+ GENEVE_UNLOCK(sc, &tracker);
+
+ return (error);
+}
+
+static struct gnv_ftable_entry *
+geneve_ftable_entry_alloc(void)
+{
+ struct gnv_ftable_entry *fe;
+
+ fe = malloc(sizeof(*fe), M_GENEVE, M_ZERO | M_NOWAIT);
+
+ return (fe);
+}
+
+static void
+geneve_ftable_entry_free(struct gnv_ftable_entry *fe)
+{
+
+ free(fe, M_GENEVE);
+}
+
+static void
+geneve_ftable_entry_init(struct geneve_softc *sc, struct gnv_ftable_entry *fe,
+ const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
+{
+
+ fe->gnvfe_flags = flags;
+ fe->gnvfe_expire = time_uptime + sc->gnv_ftable_timeout;
+ memcpy(fe->gnvfe_mac, mac, ETHER_ADDR_LEN);
+ geneve_sockaddr_copy(&fe->gnvfe_raddr, sa);
+}
+
+static void
+geneve_ftable_entry_destroy(struct geneve_softc *sc,
+ struct gnv_ftable_entry *fe)
+{
+
+ sc->gnv_ftable_cnt--;
+ LIST_REMOVE(fe, gnvfe_hash);
+ geneve_ftable_entry_free(fe);
+}
+
+static int
+geneve_ftable_entry_insert(struct geneve_softc *sc,
+ struct gnv_ftable_entry *fe)
+{
+ struct gnv_ftable_entry *lfe;
+ uint32_t hash;
+ int dir;
+
+ GENEVE_LOCK_WASSERT(sc);
+ hash = GENEVE_SC_FTABLE_HASH(sc, fe->gnvfe_mac);
+
+ lfe = LIST_FIRST(&sc->gnv_ftable[hash]);
+ if (lfe == NULL) {
+ LIST_INSERT_HEAD(&sc->gnv_ftable[hash], fe, gnvfe_hash);
+ goto out;
+ }
+
+ do {
+ dir = geneve_ftable_addr_cmp(fe->gnvfe_mac, lfe->gnvfe_mac);
+ if (dir == 0)
+ return (EEXIST);
+ if (dir > 0) {
+ LIST_INSERT_BEFORE(lfe, fe, gnvfe_hash);
+ goto out;
+ } else if (LIST_NEXT(lfe, gnvfe_hash) == NULL) {
+ LIST_INSERT_AFTER(lfe, fe, gnvfe_hash);
+ goto out;
+ } else
+ lfe = LIST_NEXT(lfe, gnvfe_hash);
+ } while (lfe != NULL);
+
+out:
+ sc->gnv_ftable_cnt++;
+
+ return (0);
+}
+
+static struct gnv_ftable_entry *
+geneve_ftable_entry_lookup(struct geneve_softc *sc, const uint8_t *mac)
+{
+ struct gnv_ftable_entry *fe;
+ uint32_t hash;
+ int dir;
+
+ GENEVE_LOCK_ASSERT(sc);
+ hash = GENEVE_SC_FTABLE_HASH(sc, mac);
+
+ LIST_FOREACH(fe, &sc->gnv_ftable[hash], gnvfe_hash) {
+ dir = geneve_ftable_addr_cmp(mac, fe->gnvfe_mac);
+ if (dir == 0)
+ return (fe);
+ if (dir > 0)
+ break;
+ }
+
+ return (NULL);
+}
+
+static struct geneve_socket *
+geneve_socket_alloc(struct vnet *vnet)
+{
+ struct geneve_socket *gnvso;
+ int i;
+
+ gnvso = malloc(sizeof(*gnvso), M_GENEVE, M_WAITOK | M_ZERO);
+ rm_init(&gnvso->gnvso_lock, "genevesorm");
+ refcount_init(&gnvso->gnvso_refcnt, 0);
+ for (i = 0; i < GENEVE_SO_VNI_HASH_SIZE; i++)
+ LIST_INIT(&gnvso->gnvso_vni_hash[i]);
+ gnvso->gnvso_vnet = vnet;
+
+ return (gnvso);
+}
+
+static void
+geneve_socket_destroy(struct geneve_socket *gnvso)
+{
+ struct socket *so;
+
+ so = gnvso->gnvso_sock;
+ if (so != NULL) {
+ gnvso->gnvso_sock = NULL;
+ soclose(so);
+ }
+
+ rm_destroy(&gnvso->gnvso_lock);
+ free(gnvso, M_GENEVE);
+}
+
+static void
+geneve_socket_release(struct geneve_socket *gnvso)
+{
+ int destroy;
+
+ GENEVE_LIST_LOCK();
+ destroy = GENEVE_SO_RELEASE(gnvso);
+ if (destroy != 0)
+ LIST_REMOVE(gnvso, gnvso_entry);
+ GENEVE_LIST_UNLOCK();
+
+ if (destroy != 0)
+ geneve_socket_destroy(gnvso);
+}
+
+static struct geneve_socket *
+geneve_socket_lookup(union sockaddr_union *unsa)
+{
+ struct geneve_socket *gnvso;
+
+ GENEVE_LIST_LOCK();
+ LIST_FOREACH(gnvso, &geneve_socket_list, gnvso_entry) {
+ if ((geneve_sockaddr_cmp(&gnvso->gnvso_laddr, &unsa->sa) == 0) ||
+ (gnvso->gnvso_vnet == curvnet)) {
+ GENEVE_SO_ACQUIRE(gnvso);
+ break;
+ }
+ }
+ GENEVE_LIST_UNLOCK();
+
+ return (gnvso);
+}
+
+static void
+geneve_socket_insert(struct geneve_socket *gnvso)
+{
+ GENEVE_LIST_LOCK();
+ GENEVE_SO_ACQUIRE(gnvso);
+ LIST_INSERT_HEAD(&geneve_socket_list, gnvso, gnvso_entry);
+ GENEVE_LIST_UNLOCK();
+}
+
+static int
+geneve_socket_init(struct geneve_socket *gnvso, struct ifnet *ifp)
+{
+ struct thread *td;
+ int error;
+
+ td = curthread;
+ error = socreate(gnvso->gnvso_laddr.sa.sa_family, &gnvso->gnvso_sock,
+ SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
+ if (error) {
+ if_printf(ifp, "cannot create socket: %d\n", error);
+ return (error);
+ }
+
+ /*
+ * XXX: If Geneve traffic is shared with other UDP listeners on
+ * the same IP address, tunnel endpoints SHOULD implement a mechanism
+ * to ensure ICMP return traffic arising from network errors is
+ * directed to the correct listener. Unfortunately,
+ * udp_set_kernel_tunneling does not handle icmp errors from transit
+ * devices other than specified source.
+ */
+ error = udp_set_kernel_tunneling(gnvso->gnvso_sock,
+ geneve_udp_input, NULL, gnvso);
+ if (error) {
+ if_printf(ifp, "cannot set tunneling function: %d\n", error);
+ return (error);
+ }
+
+ return (0);
+}
+
+static int
+geneve_socket_bind(struct geneve_socket *gnvso, struct ifnet *ifp)
+{
+ union sockaddr_union laddr;
+ int error;
+
+ laddr = gnvso->gnvso_laddr;
+
+ error = sobind(gnvso->gnvso_sock, &laddr.sa, curthread);
+
+ if (error)
+ return (error);
+
+ return (0);
+}
+
+static int
+geneve_socket_create(struct ifnet *ifp, int multicast,
+ const union sockaddr_union *unsa, struct geneve_socket *gnvso)
+{
+ union sockaddr_union laddr;
+ int error;
+
+ laddr = *unsa;
+
+ /*
+ * If this socket will be multicast, then only the local port
+ * must be specified when binding.
+ */
+ if (multicast != 0) {
+ if (laddr.sa.sa_family == AF_INET)
+ laddr.sin.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+ else
+ laddr.sin6.sin6_addr = in6addr_any;
+#endif
+ }
+ gnvso->gnvso_laddr = laddr;
+
+ error = geneve_socket_init(gnvso, ifp);
+ if (error)
+ return (error);
+
+ error = geneve_socket_bind(gnvso, ifp);
+ if (error)
+ return (error);
+
+ /*
+ * There is a small window between the bind completing and
+ * inserting the socket, so that a concurrent create may fail.
+ * Let's not worry about that for now.
+ */
+ if_printf(ifp, "new geneve socket inserted to socket list\n");
+ geneve_socket_insert(gnvso);
+
+ return (0);
+}
+
+static struct geneve_socket *
+geneve_socket_mc_lookup(const union sockaddr_union *unsa)
+{
+ union sockaddr_union laddr;
+ struct geneve_socket *gnvso;
+
+ laddr = *unsa;
+
+ if (laddr.sa.sa_family == AF_INET)
+ laddr.sin.sin_addr.s_addr = INADDR_ANY;
+#ifdef INET6
+ else
+ laddr.sin6.sin6_addr = in6addr_any;
+#endif
+
+ gnvso = geneve_socket_lookup(&laddr);
+
+ return (gnvso);
+}
+
+static int
+geneve_sockaddr_mc_info_match(const struct geneve_socket_mc_info *mc,
+ const union sockaddr_union *group, const union sockaddr_union *local,
+ int ifidx)
+{
+ if (!geneve_sockaddr_in_any(local) &&
+ !geneve_sockaddr_in_equal(&mc->gnvsomc_saddr, &local->sa))
+ return (0);
+ if (!geneve_sockaddr_in_equal(&mc->gnvsomc_gaddr, &group->sa))
+ return (0);
+ if (ifidx != 0 && ifidx != mc->gnvsomc_ifidx)
+ return (0);
+
+ return (1);
+}
+
+static int
+geneve_socket_mc_join_group(struct geneve_socket *gnvso,
+ const union sockaddr_union *group, const union sockaddr_union *local,
+ int *ifidx, union sockaddr_union *source)
+{
+ struct sockopt sopt;
+ int error;
+
+ *source = *local;
+
+ if (group->sa.sa_family == AF_INET) {
+ struct ip_mreq mreq;
+
+ mreq.imr_multiaddr = group->sin.sin_addr;
+ mreq.imr_interface = local->sin.sin_addr;
+
+ memset(&sopt, 0, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_ADD_MEMBERSHIP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(gnvso->gnvso_sock, &sopt);
+ if (error)
+ return (error);
+
+ /*
+ * BMV: Ideally, there would be a formal way for us to get
+ * the local interface that was selected based on the
+ * imr_interface address. We could then update *ifidx so
+ * geneve_sockaddr_mc_info_match() would return a match for
+ * later creates that explicitly set the multicast interface.
+ *
+ * If we really need to, we can of course look in the INP's
+ * membership list:
+ * sotoinpcb(gnvso->gnvso_sock)->inp_moptions->
+ * imo_head[]->imf_inm->inm_ifp
+ * similarly to imo_match_group().
+ */
+ source->sin.sin_addr = local->sin.sin_addr;
+
+ } else if (group->sa.sa_family == AF_INET6) {
+ struct ipv6_mreq mreq;
+
+ mreq.ipv6mr_multiaddr = group->sin6.sin6_addr;
+ mreq.ipv6mr_interface = *ifidx;
+
+ memset(&sopt, 0, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_JOIN_GROUP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(gnvso->gnvso_sock, &sopt);
+ if (error)
+ return (error);
+
+ /*
+ * BMV: As with IPv4, we would really like to know what
+ * interface in6p_lookup_mcast_ifp() selected.
+ */
+ } else
+ error = EAFNOSUPPORT;
+
+ return (error);
+}
+
+static int
+geneve_socket_mc_leave_group(struct geneve_socket *gnvso,
+ const union sockaddr_union *group, const union sockaddr_union *source,
+ int ifidx)
+{
+ struct sockopt sopt;
+ int error;
+
+ memset(&sopt, 0, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+
+ if (group->sa.sa_family == AF_INET) {
+ struct ip_mreq mreq;
+
+ mreq.imr_multiaddr = group->sin.sin_addr;
+ mreq.imr_interface = source->sin.sin_addr;
+
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_DROP_MEMBERSHIP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(gnvso->gnvso_sock, &sopt);
+ } else if (group->sa.sa_family == AF_INET6) {
+ struct ipv6_mreq mreq;
+
+ mreq.ipv6mr_multiaddr = group->sin6.sin6_addr;
+ mreq.ipv6mr_interface = ifidx;
+
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_LEAVE_GROUP;
+ sopt.sopt_val = &mreq;
+ sopt.sopt_valsize = sizeof(mreq);
+ error = sosetopt(gnvso->gnvso_sock, &sopt);
+ } else
+ error = EAFNOSUPPORT;
+
+ return (error);
+}
+
+static int
+geneve_socket_mc_add_group(struct geneve_socket *gnvso,
+ const union sockaddr_union *group, const union sockaddr_union *local,
+ int ifidx, int *idx)
+{
+ union sockaddr_union source;
+ struct geneve_socket_mc_info *mc;
+ int i, empty, error;
+
+ /*
+ * Within a socket, the same multicast group may be used by multiple
+ * interfaces, each with a different network identifier. But a socket
+ * may only join a multicast group once, so keep track of the users
+ * here.
+ */
+
+ GENEVE_SO_WLOCK(gnvso);
+ for (empty = 0, i = 0; i < GENEVE_SO_MC_MAX_GROUPS; i++) {
+ mc = &gnvso->gnvso_mc[i];
+
+ if (mc->gnvsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+ empty++;
+ continue;
+ }
+ if (geneve_sockaddr_mc_info_match(mc, group, local, ifidx))
+ goto out;
+ }
+ GENEVE_SO_WUNLOCK(gnvso);
+
+ if (empty == 0)
+ return (ENOSPC);
+
+ error = geneve_socket_mc_join_group(gnvso, group, local, &ifidx, &source);
+ if (error)
+ return (error);
+
+ GENEVE_SO_WLOCK(gnvso);
+ for (i = 0; i < GENEVE_SO_MC_MAX_GROUPS; i++) {
+ mc = &gnvso->gnvso_mc[i];
+
+ if (mc->gnvsomc_gaddr.sa.sa_family == AF_UNSPEC) {
+ geneve_sockaddr_copy(&mc->gnvsomc_gaddr, &group->sa);
+ geneve_sockaddr_copy(&mc->gnvsomc_saddr, &source.sa);
+ mc->gnvsomc_ifidx = ifidx;
+ goto out;
+ }
+ }
+ GENEVE_SO_WUNLOCK(gnvso);
+
+ error = geneve_socket_mc_leave_group(gnvso, group, &source, ifidx);
+ MPASS(error == 0);
+
+ return (ENOSPC);
+
+out:
+ mc->gnvsomc_users++;
+ GENEVE_SO_WUNLOCK(gnvso);
+
+ *idx = i;
+
+ return (0);
+}
+
+static void
+geneve_socket_mc_release_group_by_idx(struct geneve_socket *vso, int idx)
+{
+ union sockaddr_union group, source;
+ struct geneve_socket_mc_info *mc;
+ int ifidx, leave;
+
+ KASSERT(idx >= 0 && idx < GENEVE_SO_MC_MAX_GROUPS,
+ ("%s: vso %p idx %d out of bounds", __func__, vso, idx));
+
+ leave = 0;
+ mc = &vso->gnvso_mc[idx];
+
+ GENEVE_SO_WLOCK(vso);
+ mc->gnvsomc_users--;
+ if (mc->gnvsomc_users == 0) {
+ group = mc->gnvsomc_gaddr;
+ source = mc->gnvsomc_saddr;
+ ifidx = mc->gnvsomc_ifidx;
+ memset(mc, 0, sizeof(*mc));
+ leave = 1;
+ }
+ GENEVE_SO_WUNLOCK(vso);
+
+ if (leave != 0) {
+ /*
+ * Our socket's membership in this group may have already
+ * been removed if we joined through an interface that's
+ * been detached.
+ */
+ geneve_socket_mc_leave_group(vso, &group, &source, ifidx);
+ }
+}
+
+static struct geneve_softc *
+geneve_socket_lookup_softc_locked(struct geneve_socket *gnvso, uint32_t vni)
+{
+ struct geneve_softc *sc;
+ uint32_t hash;
+
+ GENEVE_SO_LOCK_ASSERT(gnvso);
+ hash = GENEVE_SO_VNI_HASH(vni);
+
+ LIST_FOREACH(sc, &gnvso->gnvso_vni_hash[hash], gnv_entry) {
+ if ((sc->gnv_vni == vni) &&
+ (sc->gnv_vnet == gnvso->gnvso_vnet)) {
+ GENEVE_ACQUIRE(sc);
+ break;
+ }
+ }
+
+ return (sc);
+}
+
+static struct geneve_softc *
+geneve_socket_lookup_softc(struct geneve_socket *gnvso, uint32_t vni)
+{
+ struct rm_priotracker tracker;
+ struct geneve_softc *sc;
+
+ GENEVE_SO_RLOCK(gnvso, &tracker);
+ sc = geneve_socket_lookup_softc_locked(gnvso, vni);
+ GENEVE_SO_RUNLOCK(gnvso, &tracker);
+
+ return (sc);
+}
+
+static int
+geneve_socket_insert_softc(struct geneve_socket *gnvso, struct geneve_softc *sc)
+{
+ struct geneve_softc *tsc;
+ uint32_t vni, hash;
+
+ vni = sc->gnv_vni;
+ hash = GENEVE_SO_VNI_HASH(vni);
+
+ GENEVE_SO_WLOCK(gnvso);
+ tsc = geneve_socket_lookup_softc_locked(gnvso, vni);
+ if (tsc != NULL) {
+ GENEVE_SO_WUNLOCK(gnvso);
+ geneve_release(tsc);
+ return (EEXIST);
+ }
+
+ GENEVE_ACQUIRE(sc);
+ LIST_INSERT_HEAD(&gnvso->gnvso_vni_hash[hash], sc, gnv_entry);
+ GENEVE_SO_WUNLOCK(gnvso);
+
+ return (0);
+}
+
+static void
+geneve_socket_remove_softc(struct geneve_socket *gnvso, struct geneve_softc *sc)
+{
+
+ GENEVE_SO_WLOCK(gnvso);
+ LIST_REMOVE(sc, gnv_entry);
+ GENEVE_SO_WUNLOCK(gnvso);
+
+ geneve_release(sc);
+}
+
+static struct ifnet *
+geneve_multicast_if_ref(struct geneve_softc *sc, uint32_t af)
+{
+ struct ifnet *ifp;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+ if (af == AF_INET && sc->gnv_im4o != NULL)
+ ifp = sc->gnv_im4o->imo_multicast_ifp;
+ else if (af == AF_INET6 && sc->gnv_im6o != NULL)
+ ifp = sc->gnv_im6o->im6o_multicast_ifp;
+ else
+ ifp = NULL;
+
+ if (ifp != NULL)
+ if_ref(ifp);
+
+ return (ifp);
+}
+
+static void
+geneve_free_multicast(struct geneve_softc *sc)
+{
+
+ if (sc->gnv_mc_ifp != NULL) {
+ if_rele(sc->gnv_mc_ifp);
+ sc->gnv_mc_ifp = NULL;
+ sc->gnv_mc_ifindex = 0;
+ }
+
+ if (sc->gnv_im4o != NULL) {
+ free(sc->gnv_im4o, M_GENEVE);
+ sc->gnv_im4o = NULL;
+ }
+
+ if (sc->gnv_im6o != NULL) {
+ free(sc->gnv_im6o, M_GENEVE);
+ sc->gnv_im6o = NULL;
+ }
+}
+
+static int
+geneve_setup_multicast_interface(struct geneve_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = ifunit_ref(sc->gnv_mc_ifname);
+ if (ifp == NULL) {
+ if_printf(sc->gnv_ifp, "multicast interface %s does "
+ "not exist\n", sc->gnv_mc_ifname);
+ return (ENOENT);
+ }
+
+ if ((ifp->if_flags & IFF_MULTICAST) == 0) {
+ if_printf(sc->gnv_ifp, "interface %s does not support "
+ "multicast\n", sc->gnv_mc_ifname);
+ if_rele(ifp);
+ return (ENOTSUP);
+ }
+
+ sc->gnv_mc_ifp = ifp;
+ sc->gnv_mc_ifindex = ifp->if_index;
+
+ return (0);
+}
+
+static int
+geneve_setup_multicast(struct geneve_softc *sc)
+{
+ const union sockaddr_union *group;
+ int error;
+
+ group = &sc->gnv_dst_addr;
+ error = 0;
+
+ if (sc->gnv_mc_ifname[0] != '\0') {
+ error = geneve_setup_multicast_interface(sc);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Initialize an multicast options structure that is sufficiently
+ * populated for use in the respective IP output routine. This
+ * structure is typically stored in the socket, but our sockets
+ * may be shared among multiple interfaces.
+ */
+ if (group->sa.sa_family == AF_INET) {
+ sc->gnv_im4o = malloc(sizeof(struct ip_moptions), M_GENEVE,
+ M_ZERO | M_WAITOK);
+ sc->gnv_im4o->imo_multicast_ifp = sc->gnv_mc_ifp;
+ sc->gnv_im4o->imo_multicast_ttl = sc->gnv_ttl;
+ sc->gnv_im4o->imo_multicast_vif = -1;
+ } else if (group->sa.sa_family == AF_INET6) {
+ sc->gnv_im6o = malloc(sizeof(struct ip6_moptions), M_GENEVE,
+ M_ZERO | M_WAITOK);
+ sc->gnv_im6o->im6o_multicast_ifp = sc->gnv_mc_ifp;
+ sc->gnv_im6o->im6o_multicast_hlim = sc->gnv_ttl;
+ }
+
+ return (error);
+}
+
+static int
+geneve_setup_socket(struct geneve_softc *sc)
+{
+ struct geneve_socket *gnvso;
+ struct ifnet *ifp;
+ union sockaddr_union *saddr, *daddr;
+ int multicast, error;
+
+ ifp = sc->gnv_ifp;
+ saddr = &sc->gnv_src_addr;
+ daddr = &sc->gnv_dst_addr;
+
+ multicast = geneve_check_multicast_addr(daddr);
+ MPASS(multicast != -1);
+ sc->gnv_so_mc_index = -1;
+
+ /*
+ * Try to create the socket. If that fails, attempt to use an
+ * existing socket.
+ */
+ gnvso = geneve_socket_alloc(sc->gnv_vnet);
+ if (gnvso == NULL)
+ return (ENOMEM);
+
+ error = geneve_socket_create(ifp, multicast, saddr, gnvso);
+ if (error) {
+ geneve_socket_destroy(gnvso);
+ if_printf(ifp, "can't create new socket (error: %d)\n", error);
+
+ if (multicast != 0)
+ gnvso = geneve_socket_mc_lookup(saddr);
+ else
+ gnvso = geneve_socket_lookup(saddr);
+
+ if (gnvso == NULL) {
+ if_printf(ifp, "can't find existing socket\n");
+ goto out;
+ }
+ }
+
+ if (sc->gnv_df == IFLA_GENEVE_DF_SET) {
+ error = geneve_socket_set_df(gnvso, true);
+ if (error)
+ goto out;
+ }
+
+ if (multicast != 0) {
+ error = geneve_setup_multicast(sc);
+ if (error)
+ goto out;
+
+ error = geneve_socket_mc_add_group(gnvso, daddr, saddr,
+ sc->gnv_mc_ifindex, &sc->gnv_so_mc_index);
+ if (error)
+ goto out;
+ }
+
+ sc->gnv_sock = gnvso;
+ error = geneve_socket_insert_softc(gnvso, sc);
+ if (error) {
+ sc->gnv_sock = NULL;
+ if_printf(ifp, "network identifier %d already exists\n",
+ sc->gnv_vni);
+ goto out;
+ }
+
+ return (0);
+
+out:
+ if (gnvso != NULL) {
+ if (sc->gnv_so_mc_index != -1) {
+ geneve_socket_mc_release_group_by_idx(gnvso,
+ sc->gnv_so_mc_index);
+ sc->gnv_so_mc_index = -1;
+ }
+ if (multicast != 0)
+ geneve_free_multicast(sc);
+ geneve_socket_release(gnvso);
+ }
+
+ return (error);
+}
+
+static void
+geneve_setup_interface_hdrlen(struct geneve_softc *sc)
+{
+ struct ifnet *ifp;
+
+ GENEVE_LOCK_WASSERT(sc);
+
+ ifp = sc->gnv_ifp;
+
+ ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct geneveudphdr);
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ ifp->if_hdrlen += ETHER_HDR_LEN;
+
+ if (sc->gnv_dst_addr.sa.sa_family == AF_INET)
+ ifp->if_hdrlen += sizeof(struct ip);
+ else
+ ifp->if_hdrlen += sizeof(struct ip6_hdr);
+
+ if ((sc->gnv_flags & GENEVE_FLAG_USER_MTU) == 0)
+ ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
+}
+
+static int
+geneve_socket_set_df(struct geneve_socket *gnvso, bool df)
+{
+ struct sockopt sopt;
+ int error, optval;
+
+ error = 0;
+ memset(&sopt, 0, sizeof(sopt));
+ sopt.sopt_dir = SOPT_SET;
+
+ switch (gnvso->gnvso_laddr.sa.sa_family) {
+#ifdef INET
+ case AF_INET:
+ sopt.sopt_level = IPPROTO_IP;
+ sopt.sopt_name = IP_DONTFRAG;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ sopt.sopt_level = IPPROTO_IPV6;
+ sopt.sopt_name = IPV6_DONTFRAG;
+ break;
+#endif
+ default:
+ return (EAFNOSUPPORT);
+ }
+
+ if (df)
+ optval = 1;
+ else
+ optval = 0;
+ sopt.sopt_val = &optval;
+ sopt.sopt_valsize = sizeof(optval);
+
+ error = sosetopt(gnvso->gnvso_sock, &sopt);
+
+ return (error);
+}
+
+static int
+geneve_valid_init_config(struct geneve_softc *sc)
+{
+ const char *reason;
+
+ if (sc->gnv_vni >= GENEVE_VNI_MAX) {
+ if_printf(sc->gnv_ifp, "%u", sc->gnv_vni);
+ reason = "invalid virtual network identifier specified";
+ goto fail;
+ }
+
+ if (geneve_sockaddr_supported(&sc->gnv_src_addr, 1) == 0) {
+ reason = "source address type is not supported";
+ goto fail;
+ }
+
+ if (geneve_sockaddr_supported(&sc->gnv_dst_addr, 0) == 0) {
+ reason = "destination address type is not supported";
+ goto fail;
+ }
+
+ if (geneve_sockaddr_in_any(&sc->gnv_dst_addr) != 0) {
+ reason = "no valid destination address specified";
+ goto fail;
+ }
+
+ if (geneve_check_multicast_addr(&sc->gnv_dst_addr) == 0 &&
+ sc->gnv_mc_ifname[0] != '\0') {
+ reason = "can only specify interface with a group address";
+ goto fail;
+ }
+
+ if (geneve_sockaddr_in_any(&sc->gnv_src_addr) == 0) {
+ if (&sc->gnv_src_addr.sa.sa_family ==
+ &sc->gnv_dst_addr.sa.sa_family) {
+ reason = "source and destination address must both "
+ "be either IPv4 or IPv6";
+ goto fail;
+ }
+ }
+
+ if (sc->gnv_src_addr.sin.sin_port == 0) {
+ reason = "local port not specified";
+ goto fail;
+ }
+
+ if (sc->gnv_dst_addr.sin.sin_port == 0) {
+ reason = "remote port not specified";
+ goto fail;
+ }
+
+ return (0);
+
+fail:
+ if_printf(sc->gnv_ifp, "cannot initialize interface: %s\n", reason);
+ return (EINVAL);
+}
+
+static void
+geneve_init_complete(struct geneve_softc *sc)
+{
+
+ GENEVE_WLOCK(sc);
+ sc->gnv_flags &= ~GENEVE_FLAG_INIT;
+ wakeup(sc);
+ GENEVE_WUNLOCK(sc);
+}
+
+static void
+geneve_init(void *xsc)
+{
+ static const uint8_t empty_mac[ETHER_ADDR_LEN];
+ struct geneve_softc *sc;
+ struct ifnet *ifp;
+
+ sc = xsc;
+ ifp = sc->gnv_ifp;
+
+ sx_xlock(&geneve_sx);
+ GENEVE_WLOCK(sc);
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ GENEVE_WUNLOCK(sc);
+ sx_xunlock(&geneve_sx);
+ return;
+ }
+ sc->gnv_flags |= GENEVE_FLAG_INIT;
+ GENEVE_WUNLOCK(sc);
+
+ if (geneve_valid_init_config(sc) != 0)
+ goto out;
+
+ if (geneve_setup_socket(sc) != 0)
+ goto out;
+
+ /* Initialize the default forwarding entry. */
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
+ geneve_ftable_entry_init(sc, &sc->gnv_default_fe,
+ empty_mac, &sc->gnv_dst_addr.sa,
+ GENEVE_FE_FLAG_STATIC);
+
+ GENEVE_WLOCK(sc);
+ callout_reset(&sc->gnv_callout, geneve_ftable_prune_period * hz,
+ geneve_timer, sc);
+ GENEVE_WUNLOCK(sc);
+ }
+ ifp->if_drv_flags |= IFF_DRV_RUNNING;
+
+ if_link_state_change(ifp, LINK_STATE_UP);
+
+out:
+ geneve_init_complete(sc);
+ sx_xunlock(&geneve_sx);
+}
+
+static void
+geneve_release(struct geneve_softc *sc)
+{
+
+ /*
+ * The softc may be destroyed as soon as we release our reference,
+ * so we cannot serialize the wakeup with the softc lock. We use a
+ * timeout in our sleeps so a missed wakeup is unfortunate but not
+ * fatal.
+ */
+ if (GENEVE_RELEASE(sc) != 0)
+ wakeup(sc);
+}
+
+static void
+geneve_teardown_wait(struct geneve_softc *sc)
+{
+
+ GENEVE_LOCK_WASSERT(sc);
+ while (sc->gnv_flags & GENEVE_FLAG_TEARDOWN)
+ rm_sleep(sc, &sc->gnv_lock, 0, "gnvtrn", hz);
+}
+
+static void
+geneve_teardown_complete(struct geneve_softc *sc)
+{
+
+ GENEVE_WLOCK(sc);
+ sc->gnv_flags &= ~GENEVE_FLAG_TEARDOWN;
+ wakeup(sc);
+ GENEVE_WUNLOCK(sc);
+}
+
+static void
+geneve_teardown_locked(struct geneve_softc *sc)
+{
+ struct ifnet *ifp;
+ struct geneve_socket *gnvso;
+
+ sx_assert(&geneve_sx, SA_XLOCKED);
+ GENEVE_LOCK_WASSERT(sc);
+ MPASS(sc->gnv_flags & GENEVE_FLAG_TEARDOWN);
+
+ ifp = sc->gnv_ifp;
+ ifp->if_flags &= ~IFF_UP;
+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ callout_stop(&sc->gnv_callout);
+ gnvso = sc->gnv_sock;
+ sc->gnv_sock = NULL;
+
+ GENEVE_WUNLOCK(sc);
+ if_link_state_change(ifp, LINK_STATE_DOWN);
+
+ if (gnvso != NULL) {
+ geneve_socket_remove_softc(gnvso, sc);
+
+ if (sc->gnv_so_mc_index != -1) {
+ geneve_socket_mc_release_group_by_idx(gnvso,
+ sc->gnv_so_mc_index);
+ sc->gnv_so_mc_index = -1;
+ }
+ }
+
+ GENEVE_WLOCK(sc);
+ while (sc->gnv_refcnt != 0)
+ rm_sleep(sc, &sc->gnv_lock, 0, "gnvdrn", hz);
+ GENEVE_WUNLOCK(sc);
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ callout_drain(&sc->gnv_callout);
+
+ geneve_free_multicast(sc);
+ if (gnvso != NULL)
+ geneve_socket_release(gnvso);
+
+ geneve_teardown_complete(sc);
+}
+
+static void
+geneve_teardown(struct geneve_softc *sc)
+{
+
+ sx_xlock(&geneve_sx);
+ GENEVE_WLOCK(sc);
+ if (sc->gnv_flags & GENEVE_FLAG_TEARDOWN) {
+ geneve_teardown_wait(sc);
+ GENEVE_WUNLOCK(sc);
+ sx_xunlock(&geneve_sx);
+ return;
+ }
+
+ sc->gnv_flags |= GENEVE_FLAG_TEARDOWN;
+ geneve_teardown_locked(sc);
+ sx_xunlock(&geneve_sx);
+}
+
+static void
+geneve_timer(void *xsc)
+{
+ struct geneve_softc *sc;
+
+ sc = xsc;
+ GENEVE_LOCK_WASSERT(sc);
+
+ geneve_ftable_expire(sc);
+ callout_schedule(&sc->gnv_callout, geneve_ftable_prune_period * hz);
+}
+
+static int
+geneve_ioctl_ifflags(struct geneve_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->gnv_ifp;
+
+ if (ifp->if_flags & IFF_UP) {
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ geneve_init(sc);
+ } else {
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ geneve_teardown(sc);
+ }
+
+ return (0);
+}
+
+static int
+geneve_set_vni(struct geneve_softc *sc, uint32_t vni)
+{
+ int error;
+
+ if (vni >= GENEVE_VNI_MAX)
+ return (EINVAL);
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc)) {
+ sc->gnv_vni = vni;
+ error = 0;
+ } else
+ error = EBUSY;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_local_addr(struct geneve_softc *sc, union sockaddr_union *sa)
+{
+ int error = 0;
+
+ if (geneve_check_multicast_addr(sa) != 0)
+ return (EINVAL);
+
+ if (sa->sa.sa_family == AF_INET6) {
+ error = geneve_sockaddr_in6_embedscope(&sa->sin6);
+ if (error)
+ return (error);
+ }
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc)) {
+ geneve_sockaddr_in_copy(&sc->gnv_src_addr, &sa->sa);
+ geneve_set_hwcaps(sc);
+ } else
+ error = EBUSY;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_remote_addr(struct geneve_softc *sc, union sockaddr_union *sa)
+{
+ int error = 0;
+
+ if (sa->sa.sa_family == AF_INET6) {
+ error = geneve_sockaddr_in6_embedscope(&sa->sin6);
+ if (error)
+ return (error);
+ }
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc)) {
+ geneve_sockaddr_in_copy(&sc->gnv_dst_addr, &sa->sa);
+ geneve_setup_interface_hdrlen(sc);
+ } else
+ error = EBUSY;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_local_port(struct geneve_softc *sc, uint16_t port)
+{
+
+ if (port == 0 || port > UINT16_MAX)
+ return (EINVAL);
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc) == 0) {
+ GENEVE_WUNLOCK(sc);
+ return (EBUSY);
+ }
+
+ switch (sc->gnv_src_addr.sa.sa_family) {
+ case AF_INET:
+ sc->gnv_src_addr.sin.sin_port = htons(port);
+ break;
+ case AF_INET6:
+ sc->gnv_src_addr.sin6.sin6_port = htons(port);
+ break;
+ }
+
+ GENEVE_WUNLOCK(sc);
+ return (0);
+}
+
+static int
+geneve_set_remote_port(struct geneve_softc *sc, uint16_t port)
+{
+
+ if (port == 0 || port > UINT16_MAX)
+ return (EINVAL);
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc) == 0) {
+ GENEVE_WUNLOCK(sc);
+ return (EBUSY);
+ }
+
+ switch (sc->gnv_dst_addr.sa.sa_family) {
+ case AF_INET:
+ sc->gnv_dst_addr.sin.sin_port = htons(port);
+ break;
+ case AF_INET6:
+ sc->gnv_dst_addr.sin6.sin6_port = htons(port);
+ break;
+ }
+
+ GENEVE_WUNLOCK(sc);
+ return (0);
+}
+
+static int
+geneve_set_port_range(struct geneve_softc *sc, uint16_t min, uint16_t max)
+{
+ int error;
+
+ if (min == 0 || max > UINT16_MAX || max < min)
+ return (EINVAL);
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc)) {
+ sc->gnv_min_port = min;
+ sc->gnv_max_port = max;
+ error = 0;
+ } else
+ error = EBUSY;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_df(struct geneve_softc *sc, enum ifla_geneve_df df)
+{
+ int error = 0;
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc))
+ sc->gnv_df = df;
+ else
+ error = EBUSY;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_ttl(struct geneve_softc *sc, uint8_t ttl)
+{
+ int error;
+
+ GENEVE_WLOCK(sc);
+ sc->gnv_ttl = ttl;
+ if (sc->gnv_im4o != NULL)
+ sc->gnv_im4o->imo_multicast_ttl = sc->gnv_ttl;
+ if (sc->gnv_im6o != NULL)
+ sc->gnv_im6o->im6o_multicast_hlim = sc->gnv_ttl;
+ error = 0;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_ttl_inherit(struct geneve_softc *sc, bool inherit)
+{
+
+ GENEVE_WLOCK(sc);
+ if (inherit)
+ sc->gnv_flags |= GENEVE_FLAG_TTL_INHERIT;
+ else
+ sc->gnv_flags &= ~GENEVE_FLAG_TTL_INHERIT;
+ GENEVE_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+geneve_set_dscp_inherit(struct geneve_softc *sc, bool inherit)
+{
+
+ GENEVE_WLOCK(sc);
+ if (inherit)
+ sc->gnv_flags |= GENEVE_FLAG_DSCP_INHERIT;
+ else
+ sc->gnv_flags &= ~GENEVE_FLAG_DSCP_INHERIT;
+ GENEVE_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+geneve_set_collect_metadata(struct geneve_softc *sc, bool external)
+{
+
+ GENEVE_WLOCK(sc);
+ if (external)
+ sc->gnv_flags |= GENEVE_FLAG_COLLECT_METADATA;
+ else
+ sc->gnv_flags &= ~GENEVE_FLAG_COLLECT_METADATA;
+ GENEVE_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+geneve_set_learn(struct geneve_softc *sc, bool learn)
+{
+
+ GENEVE_WLOCK(sc);
+ if (learn)
+ sc->gnv_flags |= GENEVE_FLAG_LEARN;
+ else
+ sc->gnv_flags &= ~GENEVE_FLAG_LEARN;
+ GENEVE_WUNLOCK(sc);
+
+ return (0);
+}
+
+static int
+geneve_set_ftable_timeout(struct geneve_softc *sc, uint32_t timeout)
+{
+ int error;
+
+ error = 0;
+ GENEVE_WLOCK(sc);
+ if (timeout <= GENEVE_FTABLE_MAX_TIMEOUT)
+ sc->gnv_ftable_timeout = timeout;
+ else
+ error = EINVAL;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_ftable_max(struct geneve_softc *sc, uint32_t max)
+{
+ int error;
+
+ error = 0;
+ GENEVE_WLOCK(sc);
+ if (max <= GENEVE_FTABLE_MAX)
+ sc->gnv_ftable_max = max;
+ else
+ error = EINVAL;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_set_mc_if(struct geneve_softc * sc, char *ifname)
+{
+ int error;
+
+ GENEVE_WLOCK(sc);
+ if (geneve_can_change_config(sc)) {
+ strlcpy(sc->gnv_mc_ifname, ifname, IFNAMSIZ);
+ geneve_set_hwcaps(sc);
+ error = 0;
+ } else
+ error = EBUSY;
+ GENEVE_WUNLOCK(sc);
+
+ return (error);
+}
+
+static int
+geneve_flush_ftable(struct geneve_softc *sc, bool flush)
+{
+
+ GENEVE_WLOCK(sc);
+ geneve_ftable_flush(sc, flush);
+ GENEVE_WUNLOCK(sc);
+
+ return (0);
+}
+
+
+/* nvlist helpers */
+static int
+geneve_set_vni_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ int vni;
+
+ if (!nvlist_exists_number(nvl, "vni"))
+ return (EINVAL);
+
+ vni = nvlist_get_number(nvl, "vni");
+
+ return (geneve_set_vni(sc, vni));
+}
+
+static int
+geneve_set_local_addr_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ union sockaddr_union *sa;
+ size_t len;
+ int error;
+
+ if (nvl == NULL)
+ return (EINVAL);
+
+ if (!nvlist_exists_binary(nvl, "local_sa"))
+ return (EINVAL);
+
+ sa = nvlist_take_binary(nvl, "local_sa", &len);
+
+ if (geneve_check_sockaddr_in(sa, len)) {
+ free(sa, M_NVLIST);
+ return (EINVAL);
+ }
+
+ error = geneve_set_local_addr(sc, sa);
+ free(sa, M_NVLIST);
+ return (error);
+}
+
+static int
+geneve_set_remote_addr_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ union sockaddr_union *sa;
+ size_t len;
+ int error;
+
+ if (nvl == NULL)
+ return (EINVAL);
+
+ if (!nvlist_exists_binary(nvl, "remote_sa"))
+ return (EINVAL);
+
+ sa = nvlist_take_binary(nvl, "remote_sa", &len);
+
+ if (geneve_check_sockaddr_in(sa, len)) {
+ free(sa, M_NVLIST);
+ return (EINVAL);
+ }
+
+ error = geneve_set_remote_addr(sc, sa);
+ free(sa, M_NVLIST);
+ return (error);
+}
+
+static int
+geneve_set_local_port_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ uint64_t port;
+
+ if (!nvlist_exists_number(nvl, "local_port"))
+ return (EINVAL);
+
+ port = nvlist_get_number(nvl, "local_port");
+
+ return (geneve_set_local_port(sc, port));
+}
+
+static int
+geneve_set_remote_port_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ uint64_t port;
+
+ if (!nvlist_exists_number(nvl, "remote_port"))
+ return (EINVAL);
+
+ port = nvlist_get_number(nvl, "remote_port");
+
+ return (geneve_set_remote_port(sc, port));
+}
+
+static int
+geneve_set_port_range_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ uint64_t min, max;
+
+ if (nvl == NULL)
+ return (EINVAL);
+
+ if (!nvlist_exists_number(nvl, "min_port") ||
+ !nvlist_exists_number(nvl, "max_port"))
+ return (EINVAL);
+
+ min = nvlist_get_number(nvl, "min_port");
+ max = nvlist_get_number(nvl, "max_port");
+
+ return (geneve_set_port_range(sc, min, max));
+}
+
+static int
+geneve_set_df_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ enum ifla_geneve_df df;
+ uint64_t df_raw;
+
+ df = IFLA_GENEVE_DF_UNSET;
+
+ if (!nvlist_exists_number(nvl, "df"))
+ return (0);
+
+ df_raw = nvlist_get_number(nvl, "df");
+
+ if (df_raw >= IFLA_GENEVE_DF_MAX)
+ return (EINVAL);
+
+ df = (enum ifla_geneve_df)df_raw;
+
+ return (geneve_set_df(sc, df));
+}
+
+static int
+geneve_set_ttl_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ uint64_t ttl;
+
+ if (!nvlist_exists_number(nvl, "ttl"))
+ return (EINVAL);
+
+ ttl = nvlist_get_number(nvl, "ttl");
+
+ if (ttl > MAXTTL)
+ return (EINVAL);
+
+ return (geneve_set_ttl(sc, (uint8_t)ttl));
+}
+
+static int
+geneve_set_ttl_inherit_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ bool inherit;
+
+ if (!nvlist_exists_bool(nvl, "inherit_ttl"))
+ return (EINVAL);
+
+ inherit = nvlist_get_bool(nvl, "inherit_ttl");
+
+ return (geneve_set_ttl_inherit(sc, inherit));
+}
+
+static int
+geneve_set_dscp_inherit_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ bool inherit;
+
+ if (!nvlist_exists_bool(nvl, "inherit_dscp"))
+ return (EINVAL);
+
+ inherit = nvlist_get_bool(nvl, "inherit_dscp");
+
+ return (geneve_set_dscp_inherit(sc, inherit));
+}
+
+static int
+geneve_set_collect_metadata_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ bool external;
+
+ if (!nvlist_exists_bool(nvl, "collect_metadata"))
+ return (EINVAL);
+
+ external = nvlist_get_bool(nvl, "collect_metadata");
+
+ return (geneve_set_collect_metadata(sc, external));
+}
+
+static int
+geneve_set_learn_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ bool learn;
+
+ if (!nvlist_exists_bool(nvl, "learn"))
+ return (EINVAL);
+
+ learn = nvlist_get_bool(nvl, "learn");
+
+ return (geneve_set_learn(sc, learn));
+}
+
+static int
+geneve_set_ftable_timeout_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ uint64_t timeout;
+
+ if (!nvlist_exists_number(nvl, "ftable_timeout"))
+ return (EINVAL);
+
+ timeout = nvlist_get_number(nvl, "ftable_timeout");
+
+ if (timeout > UINT32_MAX)
+ return (EINVAL);
+
+ return (geneve_set_ftable_timeout(sc, timeout));
+}
+
+static int
+geneve_set_ftable_max_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ uint64_t max;
+
+ if (!nvlist_exists_number(nvl, "ftable_max"))
+ return (EINVAL);
+
+ max = nvlist_get_number(nvl, "ftable_max");
+
+ if (max > UINT32_MAX)
+ return (EINVAL);
+
+ return (geneve_set_ftable_max(sc, max));
+}
+
+static int
+geneve_set_mc_if_nvl(struct geneve_softc * sc, nvlist_t *nvl)
+{
+ char *ifname;
+ int error;
+
+ if (!nvlist_exists_string(nvl, "mc_ifname"))
+ return (EINVAL);
+
+ ifname = nvlist_take_string(nvl, "mc_ifname");
+
+ error = geneve_set_mc_if(sc, ifname);
+
+ free(ifname, M_NVLIST);
+ return (error);
+}
+
+static int
+geneve_flush_ftable_nvl(struct geneve_softc *sc, nvlist_t *nvl)
+{
+ bool flush;
+
+ if (!nvlist_exists_bool(nvl, "flush_ftable"))
+ return (EINVAL);
+
+ flush = nvlist_get_bool(nvl, "flush_ftable");
+
+ return (geneve_flush_ftable(sc, flush));
+}
+
+static uint16_t
+geneve_get_local_port(struct geneve_softc *sc)
+{
+ uint16_t port = 0;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+ switch (sc->gnv_src_addr.sa.sa_family) {
+ case AF_INET:
+ port = ntohs(sc->gnv_src_addr.sin.sin_port);
+ break;
+ case AF_INET6:
+ port = ntohs(sc->gnv_src_addr.sin6.sin6_port);
+ break;
+ }
+
+ return (port);
+}
+
+static uint16_t
+geneve_get_remote_port(struct geneve_softc *sc)
+{
+ uint16_t port = 0;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+ switch (sc->gnv_dst_addr.sa.sa_family) {
+ case AF_INET:
+ port = ntohs(sc->gnv_dst_addr.sin.sin_port);
+ break;
+ case AF_INET6:
+ port = ntohs(sc->gnv_dst_addr.sin6.sin6_port);
+ break;
+ }
+
+ return (port);
+}
+
+/* Netlink Helpers */
+static int
+geneve_set_vni_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint32_t vni)
+{
+ int error;
+
+ error = geneve_set_vni(sc, vni);
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt, "geneve vni is invalid: %u", vni);
+
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_local_addr_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ struct sockaddr *sa)
+{
+ union sockaddr_union *unsa = (union sockaddr_union *)sa;
+ int error;
+
+ error = geneve_check_sockaddr(unsa, sa->sa_len);
+ if (error == 0)
+ error = geneve_set_local_addr(sc, unsa);
+
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt, "local address is invalid.");
+
+ if (error == EAFNOSUPPORT)
+ nlmsg_report_err_msg(npt, "address family is not supported.");
+
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_remote_addr_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ struct sockaddr *sa)
+{
+ union sockaddr_union *unsa = (union sockaddr_union *)sa;
+ int error;
+
+ error = geneve_check_sockaddr(unsa, sa->sa_len);
+ if (error == 0)
+ error = geneve_set_remote_addr(sc, unsa);
+
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt, "address is invalid.");
+
+ if (error == EAFNOSUPPORT)
+ nlmsg_report_err_msg(npt, "address family is not supported.");
+
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_local_port_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint16_t port)
+{
+ int error;
+
+ error = geneve_set_local_port(sc, port);
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt, "local port is invalid: %u", port);
+
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_remote_port_nl(struct geneve_softc *sc, struct nl_pstate *npt, uint16_t port)
+{
+ int error;
+
+ error = geneve_set_remote_port(sc, port);
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt, "remote port is invalid: %u", port);
+
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_port_range_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ struct ifla_geneve_port_range port_range)
+{
+ uint16_t min, max;
+ int error;
+
+ min = port_range.low;
+ max = port_range.high;
+
+ error = geneve_set_port_range(sc, min, max);
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt, "port range is invalid: %u-%u", min, max);
+
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_df_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ enum ifla_geneve_df df)
+{
+ int error;
+
+ error = geneve_set_df(sc, df);
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_set_ttl_nl(struct geneve_softc *sc, struct nl_pstate *npt __unused,
+ uint8_t ttl)
+{
+
+ return (geneve_set_ttl(sc, ttl));
+}
+
+static int
+geneve_set_ttl_inherit_nl(struct geneve_softc *sc,
+ struct nl_pstate *npt __unused, bool inherit)
+{
+
+ return (geneve_set_ttl_inherit(sc, inherit));
+}
+
+static int
+geneve_set_dscp_inherit_nl(struct geneve_softc *sc,
+ struct nl_pstate *npt __unused, bool inherit)
+{
+
+ return (geneve_set_dscp_inherit(sc, inherit));
+}
+
+static int
+geneve_set_collect_metadata_nl(struct geneve_softc *sc,
+ struct nl_pstate *npt __unused, bool external)
+{
+
+ return (geneve_set_collect_metadata(sc, external));
+}
+
+static int
+geneve_set_learn_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ bool learn)
+{
+
+ return (geneve_set_learn(sc, learn));
+}
+
+static int
+geneve_set_ftable_max_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ uint32_t max)
+{
+ int error;
+
+ error = geneve_set_ftable_max(sc, max);
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt,
+ "maximum number of entries in the table can not be more than %u",
+ GENEVE_FTABLE_MAX);
+
+ return (error);
+}
+
+static int
+geneve_set_ftable_timeout_nl(struct geneve_softc *sc,
+ struct nl_pstate *npt, uint32_t timeout)
+{
+ int error;
+
+ error = geneve_set_ftable_timeout(sc, timeout);
+ if (error == EINVAL)
+ nlmsg_report_err_msg(npt,
+ "maximum timeout for stale entries in the table can not be more than %u",
+ GENEVE_FTABLE_MAX_TIMEOUT);
+
+ return (error);
+}
+
+static int
+geneve_set_mc_if_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ char *ifname)
+{
+ int error;
+
+ error = geneve_set_mc_if(sc, ifname);
+ if (error == EBUSY)
+ nlmsg_report_err_msg(npt, "geneve interface is busy.");
+
+ return (error);
+}
+
+static int
+geneve_flush_ftable_nl(struct geneve_softc *sc, struct nl_pstate *npt,
+ bool flush)
+{
+
+ return (geneve_flush_ftable(sc, flush));
+}
+
+static void
+geneve_get_local_addr_nl(struct geneve_softc *sc, struct nl_writer *nw)
+{
+ struct sockaddr *sa;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+ sa = &sc->gnv_src_addr.sa;
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
+ nlattr_add_in_addr(nw, IFLA_GENEVE_LOCAL, in4);
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
+ nlattr_add_in6_addr(nw, IFLA_GENEVE_LOCAL, in6);
+ }
+}
+
+static void
+geneve_get_remote_addr_nl(struct geneve_softc *sc, struct nl_writer *nw)
+{
+ struct sockaddr *sa;
+
+ GENEVE_LOCK_ASSERT(sc);
+
+ sa = &sc->gnv_dst_addr.sa;
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
+ nlattr_add_in_addr(nw, IFLA_GENEVE_REMOTE, in4);
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
+ nlattr_add_in6_addr(nw, IFLA_GENEVE_REMOTE, in6);
+ }
+}
+
+
+static int
+geneve_ioctl_get_nvl(struct geneve_softc *sc, struct ifreq *ifr)
+{
+ nvlist_t *nvl = NULL;
+ struct rm_priotracker tracker;
+ size_t len, slen, dlen;
+ void *buf;
+ int error;
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ return (ENOMEM);
+
+ GENEVE_RLOCK(sc, &tracker);
+ nvlist_add_binary(nvl, "local_sa", &sc->gnv_src_addr,
+ sizeof(union sockaddr_union));
+ nvlist_add_binary(nvl, "remote_sa", &sc->gnv_dst_addr,
+ sizeof(union sockaddr_union));
+ nvlist_add_string(nvl, "mc_ifname", sc->gnv_mc_ifname);
+ nvlist_add_number(nvl, "mc_ifindex", sc->gnv_mc_ifindex);
+ nvlist_add_number(nvl, "vni", sc->gnv_vni);
+ nvlist_add_number(nvl, "proto", sc->gnv_proto);
+ nvlist_add_number(nvl, "min_port", sc->gnv_min_port);
+ nvlist_add_number(nvl, "max_port", sc->gnv_max_port);
+ nvlist_add_number(nvl, "ttl", sc->gnv_ttl);
+ nvlist_add_number(nvl, "df", sc->gnv_df);
+ nvlist_add_number(nvl, "ftable_cnt", sc->gnv_ftable_cnt);
+ nvlist_add_number(nvl, "ftable_max", sc->gnv_ftable_max);
+ nvlist_add_number(nvl, "ftable_timeout", sc->gnv_ftable_timeout);
+ nvlist_add_bool(nvl, "inherit_ttl",
+ (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) != 0);
+ nvlist_add_bool(nvl, "inherit_dscp",
+ (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT) != 0);
+ nvlist_add_bool(nvl, "learn", (sc->gnv_flags & GENEVE_FLAG_LEARN) != 0);
+ GENEVE_RUNLOCK(sc, &tracker);
+
+#ifdef INET6
+ union sockaddr_union *sa, *dsa;
+
+ sa = nvlist_take_binary(nvl, "local_sa", &slen);
+ dsa = nvlist_take_binary(nvl, "remote_sa", &dlen);
+
+ if (sa->sa.sa_family == AF_INET6)
+ sa6_recoverscope(&sa->sin6);
+ if (dsa->sa.sa_family == AF_INET6)
+ sa6_recoverscope(&dsa->sin6);
+
+ nvlist_add_binary(nvl, "local_sa", &sc->gnv_src_addr,
+ sizeof(union sockaddr_union));
+ nvlist_add_binary(nvl, "remote_sa", &sc->gnv_dst_addr,
+ sizeof(union sockaddr_union));
+
+ free(sa, M_NVLIST);
+ free(dsa, M_NVLIST);
+#endif
+
+ buf = nvlist_pack(nvl, &len);
+ nvlist_destroy(nvl);
+ if (buf == NULL)
+ return (ENOMEM);
+
+ if (len >= IFR_CAP_NV_MAXBUFSIZE)
+ return (E2BIG);
+
+ ifr->ifr_cap_nv.length = len;
+ error = copyout(buf, ifr->ifr_cap_nv.buffer, len);
+
+ free(buf, M_NVLIST);
+
+ return (error);
+}
+
+static int
+geneve_ioctl_set_nvl(struct geneve_softc *sc, struct ifreq *ifr)
+{
+ nvlist_t *nvl = NULL;
+ void *buf;
+ int error;
+
+ if (ifr == NULL)
+ return (EINVAL);
+
+ if (ifr->ifr_cap_nv.length == 0)
+ return (EINVAL);
+
+ if (ifr->ifr_cap_nv.buf_length > IFR_CAP_NV_MAXBUFSIZE)
+ return (E2BIG);
+
+ nvl = nvlist_create(0);
+ if (nvl == NULL)
+ return (ENOMEM);
+
+ buf = malloc(ifr->ifr_cap_nv.length, M_GENEVE, M_WAITOK);
+ error = copyin(ifr->ifr_cap_nv.buffer, buf, ifr->ifr_cap_nv.length);
+ if (error != 0) {
+ free(buf, M_GENEVE);
+ return (error);
+ }
+
+ nvl = nvlist_unpack(buf, ifr->ifr_cap_nv.length, 0);
+ free(buf, M_GENEVE);
+ if (nvl == NULL)
+ return (EINVAL);
+
+ if (error == 0 && nvlist_exists_number(nvl, "vni"))
+ error = geneve_set_vni_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_binary(nvl, "local_sa"))
+ error = geneve_set_local_addr_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_binary(nvl, "remote_sa"))
+ error = geneve_set_remote_addr_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_number(nvl, "local_port"))
+ error = geneve_set_local_port_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_number(nvl, "remote_port"))
+ error = geneve_set_remote_port_nvl(sc, nvl);
+
+ if (error == 0 && (nvlist_exists_number(nvl, "min_port") ||
+ nvlist_exists_number(nvl, "max_port")))
+ error = geneve_set_port_range_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_number(nvl, "ttl"))
+ error = geneve_set_ttl_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_number(nvl, "df"))
+ error = geneve_set_df_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_bool(nvl, "inherit_ttl"))
+ error = geneve_set_ttl_inherit_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_bool(nvl, "inherit_dscp"))
+ error = geneve_set_dscp_inherit_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_bool(nvl, "collect_metadata"))
+ error = geneve_set_collect_metadata_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_bool(nvl, "learn"))
+ error = geneve_set_learn_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_bool(nvl, "flush"))
+ error = geneve_flush_ftable_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_number(nvl, "ftable_timeout"))
+ error = geneve_set_ftable_timeout_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_number(nvl, "ftable_max"))
+ error = geneve_set_ftable_max_nvl(sc, nvl);
+
+ if (error == 0 && nvlist_exists_string(nvl, "mc_ifname"))
+ error = geneve_set_mc_if_nvl(sc, nvl);
+
+ nvlist_destroy(nvl);
+ return (error);
+}
+
+static int
+geneve_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+ struct rm_priotracker tracker;
+ struct geneve_softc *sc;
+ struct siocsifcapnv_driver_data *drv_ioctl_data, drv_ioctl_data_d;
+ struct ifreq *ifr;
+ int max;
+ int error;
+
+ CURVNET_ASSERT_SET();
+
+ sc = ifp->if_softc;
+ ifr = (struct ifreq *)data;
+
+ error = 0;
+
+ switch (cmd) {
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ break;
+
+ case SIOCGDRVSPEC:
+ break;
+ case SIOCSDRVSPEC:
+ error = priv_check(curthread, PRIV_NET_GENEVE);
+ if (error)
+ return (error);
+ break;
+ }
+
+ switch (cmd) {
+ case SIOCGDRVSPEC:
+ error = geneve_ioctl_get_nvl(sc, ifr);
+ break;
+
+ case SIOCSDRVSPEC:
+ error = geneve_ioctl_set_nvl(sc, ifr);
+ break;
+
+ case SIOCSIFFLAGS:
+ error = geneve_ioctl_ifflags(sc);
+ break;
+
+ case SIOCSIFMEDIA:
+ case SIOCGIFMEDIA:
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ error = ifmedia_ioctl(ifp, ifr, &sc->gnv_media, cmd);
+ else
+ error = EINVAL;
+ break;
+
+ case SIOCSIFMTU:
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ max = GENEVE_MAX_MTU;
+ else
+ max = GENEVE_MAX_L3MTU;
+
+ if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > max) {
+ error = EINVAL;
+ } else {
+ GENEVE_WLOCK(sc);
+ ifp->if_mtu = ifr->ifr_mtu;
+ sc->gnv_flags |= GENEVE_FLAG_USER_MTU;
+ GENEVE_WUNLOCK(sc);
+ }
+ break;
+
+ case SIOCGIFCAPNV:
+ error = 0;
+ break;
+
+ case SIOCSIFCAP:
+ drv_ioctl_data = &drv_ioctl_data_d;
+ drv_ioctl_data->reqcap = ifr->ifr_reqcap;
+ drv_ioctl_data->reqcap2 = if_getcapenable2(ifp);
+ drv_ioctl_data->nvcap = NULL;
+ case SIOCSIFCAPNV:
+ if (cmd == SIOCSIFCAPNV)
+ drv_ioctl_data = (struct siocsifcapnv_driver_data *) data;
+
+ GENEVE_WLOCK(sc);
+ error = geneve_set_reqcap(sc, ifp, drv_ioctl_data->reqcap,
+ drv_ioctl_data->reqcap2);
+ if (error == 0)
+ geneve_set_hwcaps(sc);
+ GENEVE_WUNLOCK(sc);
+ break;
+
+ case SIOCGTUNFIB:
+ GENEVE_RLOCK(sc, &tracker);
+ ifr->ifr_fib = sc->gnv_fibnum;
+ GENEVE_RUNLOCK(sc, &tracker);
+ break;
+
+ case SIOCSTUNFIB:
+ if ((error = priv_check(curthread, PRIV_NET_GENEVE)) != 0)
+ break;
+
+ if (ifr->ifr_fib >= rt_numfibs)
+ error = EINVAL;
+ else {
+ GENEVE_WLOCK(sc);
+ sc->gnv_fibnum = ifr->ifr_fib;
+ GENEVE_WUNLOCK(sc);
+ }
+ break;
+
+ case SIOCSIFADDR:
+ ifp->if_flags |= IFF_UP;
+ case SIOCGIFADDR:
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ error = ether_ioctl(ifp, cmd, data);
+ break;
+
+ default:
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ error = ether_ioctl(ifp, cmd, data);
+ else
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+
+#if defined(INET) || defined(INET6)
+static uint16_t
+geneve_pick_source_port(struct geneve_softc *sc, struct mbuf *m)
+{
+ int range;
+ uint32_t hash;
+
+ range = sc->gnv_max_port - sc->gnv_min_port + 1;
+
+ /* RFC 8926 Section 3.3-2.2.1 */
+ if (M_HASHTYPE_ISHASH(m))
+ hash = m->m_pkthdr.flowid;
+ else
+ hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
+ sc->gnv_port_hash_key);
+
+ return (sc->gnv_min_port + (hash % range));
+}
+
+static void
+geneve_encap_header(struct geneve_softc *sc, struct mbuf *m, int ipoff,
+ uint16_t srcport, uint16_t dstport, uint16_t proto)
+{
+ struct geneveudphdr *hdr;
+ struct udphdr *udph;
+ struct genevehdr *gnvh;
+ int len;
+
+ len = m->m_pkthdr.len - ipoff;
+ MPASS(len >= sizeof(struct geneveudphdr));
+ hdr = mtodo(m, ipoff);
+
+ udph = &hdr->geneve_udp;
+ udph->uh_sport = srcport;
+ udph->uh_dport = dstport;
+ udph->uh_ulen = htons(len);
+ udph->uh_sum = 0;
+
+ gnvh = &hdr->geneve_hdr;
+ gnvh->geneve_ver = 0;
+ gnvh->geneve_optlen = 0;
+ gnvh->geneve_critical = 0;
+ gnvh->geneve_control = 0;
+ gnvh->geneve_flags = 0;
+ gnvh->geneve_proto = proto;
+ gnvh->geneve_vni = htonl(sc->gnv_vni << GENEVE_HDR_VNI_SHIFT);
+}
+#endif
+
+#if defined(INET6) || defined(INET)
+/*
+ * Return the CSUM_INNER_* equivalent of CSUM_* caps.
+ */
+static uint32_t
+csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
+{
+ uint32_t csum_flags = encap;
+ const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;
+
+ /*
+ * csum_flags can request either v4 or v6 offload but not both.
+ * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
+ * so those bits are no good to detect the IP version. Other bits are
+ * always set with CSUM_TSO and we use those to figure out the IP
+ * version.
+ */
+ if (csum_flags_in & v4) {
+ if (csum_flags_in & CSUM_IP)
+ csum_flags |= CSUM_INNER_IP;
+ if (csum_flags_in & CSUM_IP_UDP)
+ csum_flags |= CSUM_INNER_IP_UDP;
+ if (csum_flags_in & CSUM_IP_TCP)
+ csum_flags |= CSUM_INNER_IP_TCP;
+ if (csum_flags_in & CSUM_IP_TSO)
+ csum_flags |= CSUM_INNER_IP_TSO;
+ } else {
+#ifdef INVARIANTS
+ const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;
+ MPASS((csum_flags_in & v6) != 0);
+#endif
+ if (csum_flags_in & CSUM_IP6_UDP)
+ csum_flags |= CSUM_INNER_IP6_UDP;
+ if (csum_flags_in & CSUM_IP6_TCP)
+ csum_flags |= CSUM_INNER_IP6_TCP;
+ if (csum_flags_in & CSUM_IP6_TSO)
+ csum_flags |= CSUM_INNER_IP6_TSO;
+ }
+
+ return (csum_flags);
+}
+#endif
+
+static uint16_t
+geneve_get_ethertype(struct mbuf *m)
+{
+ struct ip *ip;
+ struct ip6_hdr *ip6;
+
+ /*
+ * We should pullup, but we're only interested in the first byte, so
+ * that'll always be contiguous.
+ */
+ ip = mtod(m, struct ip *);
+ if (ip->ip_v == IPVERSION)
+ return (ETHERTYPE_IP);
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ if ((ip6->ip6_vfc & IPV6_VERSION_MASK) == IPV6_VERSION)
+ return (ETHERTYPE_IPV6);
+
+ return (0);
+}
+
+/* RFC 8926 Section 4.4.2. DSCP, ECN, and TTL */
+static int
+geneve_inherit_l3_hdr(struct mbuf *m, struct geneve_softc *sc, uint16_t proto,
+ uint8_t *tos, uint8_t *ttl, u_short *ip_off)
+{
+ struct ether_header *eh;
+ struct ip *ip_inner, iphdr;
+ struct ip6_hdr *ip6_inner, ip6hdr;
+ int offset;
+ int error;
+
+ error = 0;
+ *tos = 0;
+ *ttl = sc->gnv_ttl;
+ if (sc->gnv_df == IFLA_GENEVE_DF_SET)
+ *ip_off = htons(IP_DF);
+ else
+ *ip_off = 0;
+
+ /* Set offset and address family if proto is ethernet */
+ if (proto == GENEVE_PROTO_ETHER) {
+ eh = mtod(m, struct ether_header *);
+ if (eh->ether_type == htons(ETHERTYPE_IP)) {
+ if (m->m_pkthdr.len < ETHER_HDR_LEN + sizeof(struct ip)) {
+ m_freem(m);
+ return (EINVAL);
+ }
+ proto = ETHERTYPE_IP;
+ } else if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
+ if (m->m_pkthdr.len < ETHER_HDR_LEN + sizeof(struct ip6_hdr)) {
+ m_freem(m);
+ return (EINVAL);
+ }
+ proto = ETHERTYPE_IPV6;
+ } else
+ return (0);
+
+ offset = ETHER_HDR_LEN;
+ } else
+ offset = 0;
+
+ switch (proto) {
+ case (ETHERTYPE_IP):
+ if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
+ m_copydata(m, offset, sizeof(struct ip), (caddr_t)&iphdr);
+ ip_inner = &iphdr;
+ } else
+ ip_inner = mtodo(m, offset);
+
+ *tos = ip_inner->ip_tos;
+ if (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT)
+ *ttl = ip_inner->ip_ttl;
+ if (sc->gnv_df == IFLA_GENEVE_DF_INHERIT)
+ *ip_off = ip_inner->ip_off;
+ break;
+
+ case (ETHERTYPE_IPV6):
+ if (__predict_false(m->m_len < offset + sizeof(struct ip6_hdr))) {
+ m_copydata(m, offset, sizeof(struct ip6_hdr), (caddr_t)&ip6hdr);
+ ip6_inner = &ip6hdr;
+ } else
+ ip6_inner = mtodo(m, offset);
+
+ *tos = IPV6_TRAFFIC_CLASS(ip6_inner);
+ if (sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT)
+ *ttl = ip6_inner->ip6_hlim;
+ break;
+
+ default:
+ return (error);
+ }
+
+ return (error);
+}
+
+static int
+geneve_encap4(struct geneve_softc *sc, const union sockaddr_union *funsa,
+ struct mbuf *m)
+{
+#ifdef INET
+ struct ifnet *ifp;
+ struct ip *ip;
+ struct in_addr srcaddr, dstaddr;
+ uint16_t srcport, dstport, proto;
+ uint8_t tos, ecn, ttl;
+ u_short ip_off;
+ int plen, mcast, error;
+ struct route route, *ro;
+ struct sockaddr_in *sin;
+ uint32_t csum_flags;
+
+ NET_EPOCH_ASSERT();
+
+ ifp = sc->gnv_ifp;
+ srcaddr = sc->gnv_src_addr.sin.sin_addr;
+ srcport = htons(geneve_pick_source_port(sc, m));
+ dstaddr = funsa->sin.sin_addr;
+ dstport = funsa->sin.sin_port;
+ plen = m->m_pkthdr.len;
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ proto = sc->gnv_proto;
+ else
+ proto = geneve_get_ethertype(m);
+
+ error = geneve_inherit_l3_hdr(m, sc, proto, &tos, &ttl, &ip_off);
+ if (error) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
+ }
+
+ M_PREPEND(m, sizeof(struct ip) + sizeof(struct geneveudphdr), M_NOWAIT);
+ if (m == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ ip = mtod(m, struct ip *);
+
+ ecn = (tos & IPTOS_ECN_MASK);
+ /* RFC 6040 MUST be followed for IP packets encapsulated in geneve */
+ ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &ecn);
+
+ if (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT)
+ ip->ip_tos |= (tos & ~IPTOS_ECN_MASK);
+
+ ip->ip_len = htons(m->m_pkthdr.len);
+ ip->ip_off = ip_off;
+ ip->ip_ttl = ttl;
+ ip->ip_p = IPPROTO_UDP;
+ ip->ip_sum = 0;
+ ip->ip_src = srcaddr;
+ ip->ip_dst = dstaddr;
+
+ geneve_encap_header(sc, m, sizeof(struct ip), srcport, dstport,
+ htons(proto));
+
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+ m->m_flags &= ~(M_MCAST | M_BCAST);
+
+ m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
+ if (m->m_pkthdr.csum_flags != 0) {
+ /*
+ * HW checksum (L3 and/or L4) or TSO has been requested. Look
+ * up the ifnet for the outbound route and verify that the
+ * outbound ifnet can perform the requested operation on the
+ * inner frame.
+ */
+ memset(&route, 0, sizeof(route));
+ ro = &route;
+ sin = (struct sockaddr_in *)&ro->ro_dst;
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ip->ip_dst;
+ ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE, 0);
+ if (ro->ro_nh == NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (EHOSTUNREACH);
+ }
+
+ csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
+ CSUM_ENCAP_GENEVE);
+ if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
+ csum_flags) {
+ if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
+ const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
+
+ if_printf(ifp, "interface %s is missing hwcaps "
+ "0x%08x, csum_flags 0x%08x -> 0x%08x, "
+ "hwassist 0x%08x\n", nh_ifp->if_xname,
+ csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
+ m->m_pkthdr.csum_flags, csum_flags,
+ (uint32_t)nh_ifp->if_hwassist);
+ }
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENXIO);
+ }
+ m->m_pkthdr.csum_flags = csum_flags;
+ if (csum_flags & (CSUM_INNER_IP | CSUM_INNER_IP_UDP |
+ CSUM_INNER_IP6_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
+ counter_u64_add(sc->gnv_stats.txcsum, 1);
+ if (csum_flags & CSUM_INNER_TSO)
+ counter_u64_add(sc->gnv_stats.tso, 1);
+ }
+ } else
+ ro = NULL;
+
+ error = ip_output(m, NULL, ro, 0, sc->gnv_im4o, NULL);
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ if (mcast != 0)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+ return (error);
+#else
+ m_freem(m);
+ return (ENOTSUP);
+#endif
+}
+
+static int
+geneve_encap6(struct geneve_softc *sc, const union sockaddr_union *funsa,
+ struct mbuf *m)
+{
+#ifdef INET6
+ struct ifnet *ifp;
+ struct ip6_hdr *ip6;
+ const struct in6_addr *srcaddr, *dstaddr;
+ uint16_t srcport, dstport, proto;
+ uint8_t tos, ecn, etos, ttl;
+ u_short ip6_df;
+ int plen, mcast, error;
+ struct route_in6 route, *ro;
+ struct sockaddr_in6 *sin6;
+ uint32_t csum_flags;
+ struct ip6_pktopts opts;
+
+ NET_EPOCH_ASSERT();
+
+ ifp = sc->gnv_ifp;
+ srcaddr = &sc->gnv_src_addr.sin6.sin6_addr;
+ srcport = htons(geneve_pick_source_port(sc, m));
+ dstaddr = &funsa->sin6.sin6_addr;
+ dstport = funsa->sin6.sin6_port;
+ plen = m->m_pkthdr.len;
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER)
+ proto = sc->gnv_proto;
+ else
+ proto = geneve_get_ethertype(m);
+
+ error = geneve_inherit_l3_hdr(m, sc, proto, &tos, &ttl, &ip6_df);
+ if (error) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (error);
+ }
+
+ ip6_initpktopts(&opts);
+ if (ip6_df)
+ opts.ip6po_flags = IP6PO_DONTFRAG;
+
+ M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct geneveudphdr),
+ M_NOWAIT);
+ if (m == NULL) {
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENOBUFS);
+ }
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = 0;
+ ip6->ip6_vfc = IPV6_VERSION;
+
+ ecn = (tos & IPTOS_ECN_MASK);
+ /* RFC 6040 MUST be followed for IP packets encapsulated in geneve */
+ ip_ecn_ingress(ECN_ALLOWED, &etos, &ecn);
+ ip6->ip6_flow |= htonl((u_int32_t)etos << IPV6_FLOWLABEL_LEN);
+
+ if (sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT)
+ ip6->ip6_flow |= htonl((u_int32_t)tos << IPV6_FLOWLABEL_LEN);
+
+ ip6->ip6_plen = 0;
+ ip6->ip6_nxt = IPPROTO_UDP;
+ ip6->ip6_hlim = ttl;
+ ip6->ip6_src = *srcaddr;
+ ip6->ip6_dst = *dstaddr;
+
+ geneve_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport,
+ htons(proto));
+
+ mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
+ m->m_flags &= ~(M_MCAST | M_BCAST);
+
+ ro = NULL;
+ m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
+ if (mcast != 0 ||
+ m->m_pkthdr.csum_flags != 0) {
+ /*
+ * HW checksum (L3 and/or L4) or TSO has been requested. Look
+ * up the ifnet for the outbound route and verify that the
+ * outbound ifnet can perform the requested operation on the
+ * inner frame.
+ * XXX: There's a rare scenario with ipv6 over multicast
+ * underlay where, when mc_ifname is set, it causes panics
+ * inside a jail. We'll force geneve to select its own outbound
+ * interface to avoid this.
+ */
+ memset(&route, 0, sizeof(route));
+ ro = &route;
+ sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_len = sizeof(*sin6);
+ sin6->sin6_addr = ip6->ip6_dst;
+ ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0, NHR_NONE, 0);
+ if (ro->ro_nh == NULL) {
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (EHOSTUNREACH);
+ }
+ }
+ if (m->m_pkthdr.csum_flags != 0) {
+ csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
+ CSUM_ENCAP_GENEVE);
+ if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
+ csum_flags) {
+ if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
+ const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;
+
+ if_printf(ifp, "interface %s is missing hwcaps "
+ "0x%08x, csum_flags 0x%08x -> 0x%08x, "
+ "hwassist 0x%08x\n", nh_ifp->if_xname,
+ csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
+ m->m_pkthdr.csum_flags, csum_flags,
+ (uint32_t)nh_ifp->if_hwassist);
+ }
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ENXIO);
+ }
+ m->m_pkthdr.csum_flags = csum_flags;
+ if (csum_flags &
+ (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
+ counter_u64_add(sc->gnv_stats.txcsum, 1);
+ if (csum_flags & CSUM_INNER_TSO)
+ counter_u64_add(sc->gnv_stats.tso, 1);
+ }
+ } else if (ntohs(dstport) != V_zero_checksum_port) {
+ struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));
+
+ hdr->uh_sum = in6_cksum_pseudo(ip6,
+ m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
+ m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
+ m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
+ }
+ error = ip6_output(m, &opts, ro, 0, sc->gnv_im6o, NULL, NULL);
+ if (error == 0) {
+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
+ if (mcast != 0)
+ if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
+ } else
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+
+ return (error);
+#else
+ m_freem(m);
+ return (ENOTSUP);
+#endif
+}
+
+static int
+geneve_transmit(struct ifnet *ifp, struct mbuf *m)
+{
+ struct rm_priotracker tracker;
+ union sockaddr_union unsa;
+ struct geneve_softc *sc;
+ struct gnv_ftable_entry *fe;
+ struct ifnet *mcifp;
+ struct ether_header *eh;
+ uint32_t af;
+ int error;
+
+ mcifp = NULL;
+ sc = ifp->if_softc;
+ GENEVE_RLOCK(sc, &tracker);
+ M_SETFIB(m, sc->gnv_fibnum);
+
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+ GENEVE_RUNLOCK(sc, &tracker);
+ m_freem(m);
+ return (ENETDOWN);
+ }
+ if (__predict_false(if_tunnel_check_nesting(ifp, m,
+ MTAG_GENEVE_LOOP, 1) != 0)) {
+ GENEVE_RUNLOCK(sc, &tracker);
+ m_freem(m);
+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
+ return (ELOOP);
+ }
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
+ fe = NULL;
+ eh = mtod(m, struct ether_header *);
+
+ ETHER_BPF_MTAP(ifp, m);
+ if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
+ fe = geneve_ftable_entry_lookup(sc, eh->ether_dhost);
+ if (fe == NULL)
+ fe = &sc->gnv_default_fe;
+ geneve_sockaddr_copy(&unsa, &fe->gnvfe_raddr.sa);
+ } else
+ geneve_sockaddr_copy(&unsa, &sc->gnv_dst_addr.sa);
+
+ af = unsa.sa.sa_family;
+ if (geneve_check_multicast_addr(&unsa) != 0)
+ mcifp = geneve_multicast_if_ref(sc, af);
+
+ GENEVE_ACQUIRE(sc);
+ GENEVE_RUNLOCK(sc, &tracker);
+
+ if (af == AF_INET)
+ error = geneve_encap4(sc, &unsa, m);
+ else if (af == AF_INET6)
+ error = geneve_encap6(sc, &unsa, m);
+ else
+ error = EAFNOSUPPORT;
+
+ geneve_release(sc);
+ if (mcifp != NULL)
+ if_rele(mcifp);
+
+ return (error);
+}
+
+static int
+geneve_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ struct route *ro)
+{
+ int error;
+ u_int32_t af;
+
+#ifdef MAC
+ error = mac_ifnet_check_transmit(ifp, m);
+ if (error) {
+ m_freem(m);
+ return (error);
+ }
+#endif
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
+ memmove(&af, dst->sa_data, sizeof(af));
+ else
+ af = RO_GET_FAMILY(ro, dst);
+
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+
+ error = (ifp->if_transmit)(ifp, m);
+ if (error)
+ return (ENOBUFS);
+ return (0);
+}
+
+static int
+geneve_next_option(struct geneve_socket *gnvso, struct genevehdr *gnvh,
+ struct mbuf **m0)
+{
+ int optlen;
+ int error;
+
+ error = 0;
+
+ /*
+ * We MUST NOT forward the packet if control (O) bit is set
+ * and currently there is not standard specification for it.
+ * Therefore, we drop it.
+ */
+ if (gnvh->geneve_control)
+ return (EINVAL);
+
+ optlen = gnvh->geneve_optlen;
+ if (optlen == 0)
+ return (error);
+
+ /*
+ * XXX: Geneve options processing
+ * We MUST drop the packet if there are options to process
+ * and we are not able to process it.
+ */
+ if (gnvh->geneve_critical)
+ error = EINVAL;
+
+ return (error);
+};
+
+static void
+geneve_qflush(struct ifnet *ifp __unused)
+{
+};
+
+static void
+geneve_input_csum(struct mbuf *m, struct ifnet *ifp, counter_u64_t rxcsum)
+{
+ if (((ifp->if_capenable & IFCAP_RXCSUM &&
+ m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) ||
+ (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
+ (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) == 0))) {
+ uint32_t csum_flags = 0;
+
+ if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
+ csum_flags |= CSUM_L3_CALC;
+ if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
+ csum_flags |= CSUM_L3_VALID;
+ if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
+ csum_flags |= CSUM_L4_CALC;
+ if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
+ csum_flags |= CSUM_L4_VALID;
+ m->m_pkthdr.csum_flags = csum_flags;
+ counter_u64_add(rxcsum, 1);
+ } else {
+ /* clear everything */
+ m->m_pkthdr.csum_flags = 0;
+ m->m_pkthdr.csum_data = 0;
+ }
+}
+
+static uint32_t
+geneve_map_etype_to_af(uint32_t ethertype)
+{
+
+ if (ethertype == ETHERTYPE_IP)
+ return (AF_INET);
+ if (ethertype == ETHERTYPE_IPV6)
+ return (AF_INET6);
+ if (ethertype == ETHERTYPE_ARP)
+ return (AF_LINK);
+ return (0);
+}
+
+static bool
+geneve_udp_input(struct mbuf *m, int offset, struct inpcb *inpcb,
+ const struct sockaddr *srcsa, void *xgnvso)
+{
+ struct geneve_socket *gnvso;
+ struct geneve_pkt_info info;
+ struct genevehdr *gnvh, gnvhdr;
+ struct geneve_softc *sc;
+ struct ip *iphdr;
+ struct ip6_hdr *ip6hdr;
+ uint32_t vni;
+ uint16_t optlen, proto;
+ int32_t plen, af;
+ struct ifnet *ifp;
+ int error;
+
+ M_ASSERTPKTHDR(m);
+ plen = m->m_pkthdr.len;
+ gnvso = xgnvso;
+ memset(&info, 0, sizeof(info));
+
+ if (m->m_pkthdr.len < offset + sizeof(struct geneveudphdr))
+ return (false);
+
+ /* Get ECN and TTL values for future processing */
+ info.ethertype = geneve_get_ethertype(m);
+ if (info.ethertype == ETHERTYPE_IP) {
+ iphdr = mtodo(m, offset - sizeof(struct ip));
+ info.ecn = (iphdr->ip_tos & IPTOS_ECN_MASK);
+ info.ttl = iphdr->ip_ttl;
+ } else if (info.ethertype == ETHERTYPE_IPV6) {
+ ip6hdr = mtodo(m, offset - sizeof(struct ip6_hdr));
+ info.ecn = IPV6_ECN(ip6hdr);
+ info.ttl = ip6hdr->ip6_hlim;
+ }
+
+ offset += sizeof(struct udphdr);
+
+ /* Get geneve header */
+ if (__predict_false(m->m_len < offset + sizeof(struct genevehdr))) {
+ m_copydata(m, offset, sizeof(struct genevehdr), (caddr_t)&gnvhdr);
+ gnvh = &gnvhdr;
+ } else
+ gnvh = mtodo(m, offset);
+
+ /*
+ * Drop if there is a reserved bit or unknown version set in the header.
+ * As defined in RFC 8926 3.4
+ */
+ if (gnvh->geneve_ver != htons(GENEVE_VERSION) ||
+ gnvh->geneve_vni & ~GENEVE_VNI_MASK)
+ return (false);
+
+ /*
+ * The length of the option fields, expressed in 4-byte multiples, not
+ * including the 8-byte fixed tunnel header.
+ */
+ optlen = ntohs(gnvh->geneve_optlen) * 4;
+ error = geneve_next_option(gnvso, gnvh, &m);
+ if (error != 0)
+ return (false);
+
+ proto = ntohs(gnvh->geneve_proto);
+ vni = ntohl(gnvh->geneve_vni) >> GENEVE_HDR_VNI_SHIFT;
+
+ m_adj(m, offset + sizeof(struct genevehdr) + optlen);
+
+ sc = geneve_socket_lookup_softc(gnvso, vni);
+ if (sc == NULL)
+ return (false);
+
+ ifp = sc->gnv_ifp;
+ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+ goto out;
+
+ /* if next protocol is ethernet, check its ethertype and learn it */
+ if (proto == GENEVE_PROTO_ETHER) {
+ offset = ETHER_HDR_LEN;
+ error = geneve_input_ether(sc, &m, srcsa, &info);
+ if (error != 0)
+ goto out;
+ } else {
+ info.ethertype = proto;
+ af = geneve_map_etype_to_af(info.ethertype);
+ offset = 0;
+ }
+
+ error = geneve_input_inherit(sc, &m, offset, &info);
+ if (error != 0)
+ goto out;
+
+ if (ifp == m->m_pkthdr.rcvif)
+ /* XXX Does not catch more complex loops. */
+ goto out;
+
+ m_clrprotoflags(m);
+ m->m_pkthdr.rcvif = ifp;
+ M_SETFIB(m, ifp->if_fib);
+ geneve_input_csum(m, ifp, sc->gnv_stats.rxcsum);
+ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
+ if_inc_counter(ifp, IFCOUNTER_IBYTES, plen);
+ if (sc->gnv_mc_ifp != NULL)
+ if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1);
+
+ MPASS(m != NULL);
+
+ if (proto == GENEVE_PROTO_ETHER)
+ (*ifp->if_input)(ifp, m);
+ else {
+ BPF_MTAP2(ifp, &af, sizeof(af), m);
+ netisr_dispatch_src(info.isr, (uintptr_t)xgnvso, m);
+ }
+
+ m = NULL;
+out:
+ geneve_release(sc);
+ if (m != NULL) {
+ if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
+ m_freem(m);
+ }
+
+ return (true);
+}
+
+static int
+geneve_input_ether(struct geneve_softc *sc, struct mbuf **m0,
+ const struct sockaddr *sa, struct geneve_pkt_info *info)
+{
+ struct mbuf *m;
+ struct ether_header *eh;
+
+ m = *m0;
+
+ if (sc->gnv_proto != GENEVE_PROTO_ETHER)
+ return (EPROTOTYPE);
+
+ if (m->m_pkthdr.len < ETHER_HDR_LEN)
+ return (EINVAL);
+
+ if (m->m_len < ETHER_HDR_LEN &&
+ (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
+ *m0 = NULL;
+ return (ENOBUFS);
+ }
+
+ eh = mtod(m, struct ether_header *);
+
+ info->ethertype = ntohs(eh->ether_type);
+ if (sc->gnv_flags & GENEVE_FLAG_LEARN)
+ geneve_ftable_learn(sc, sa, eh->ether_shost);
+
+ *m0 = m;
+ return (0);
+}
+
+static int
+geneve_input_inherit(struct geneve_softc *sc, struct mbuf **m0,
+ int offset, struct geneve_pkt_info *info)
+{
+ struct mbuf *m;
+ struct ip *iphdr;
+ struct ip6_hdr *ip6hdr;
+ uint8_t itos;
+
+ m = *m0;
+
+ switch (info->ethertype) {
+ case ETHERTYPE_IP:
+ offset += sizeof(struct ip);
+ if (m->m_pkthdr.len < offset)
+ return (EINVAL);
+
+ if (m->m_len < offset &&
+ (m = m_pullup(m, offset)) == NULL) {
+ *m0 = NULL;
+ return (ENOBUFS);
+ }
+ iphdr = mtodo(m, offset - sizeof(struct ip));
+
+ /*
+ * XXX: RFC 6040 MUST be followed for IP packets encapsulated in geneve
+ * therefore should be ECN_COMPLETE of D53516
+ */
+ if (ip_ecn_egress(ECN_ALLOWED, &info->ecn, &iphdr->ip_tos) == 0) {
+ *m0 = NULL;
+ return (ENOBUFS);
+ }
+
+ if ((sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) &&
+ (info->ttl > 0))
+ iphdr->ip_ttl = info->ttl;
+
+ info->isr = NETISR_IP;
+ break;
+
+ case ETHERTYPE_IPV6:
+ offset += sizeof(struct ip6_hdr);
+ if (m->m_pkthdr.len < offset)
+ return (EINVAL);
+
+ if (m->m_len < offset &&
+ (m = m_pullup(m, offset)) == NULL) {
+ *m0 = NULL;
+ return (ENOBUFS);
+ }
+ ip6hdr = mtodo(m, offset - sizeof(struct ip6_hdr));
+
+ /*
+ * XXX: RFC 6040 MUST be followed for IP packets encapsulated in geneve
+ * therefore should be ECN_COMPLETE of D53516
+ */
+ itos = (ntohl(ip6hdr->ip6_flow) >> IPV6_FLOWLABEL_LEN) & 0xff;
+ if (ip_ecn_egress(ECN_ALLOWED, &info->ecn, &itos) == 0) {
+ *m0 = NULL;
+ return (ENOBUFS);
+ }
+ ip6hdr->ip6_flow |= htonl((uint32_t)itos << IPV6_FLOWLABEL_LEN);
+
+ if ((sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT) && (info->ttl > 0))
+ ip6hdr->ip6_hlim = info->ttl;
+
+ info->isr = NETISR_IPV6;
+ break;
+
+ case ETHERTYPE_ARP:
+ if (sc->gnv_proto == GENEVE_PROTO_INHERIT)
+ return (EINVAL);
+
+ offset += sizeof(struct arphdr);
+ if (m->m_pkthdr.len < offset)
+ return (EINVAL);
+
+ if (m->m_len < offset &&
+ (m = m_pullup(m, offset)) == NULL) {
+ *m0 = NULL;
+ return (ENOBUFS);
+ }
+ info->isr = NETISR_ARP;
+ break;
+
+ default:
+ if_inc_counter(sc->gnv_ifp, IFCOUNTER_NOPROTO, 1);
+ return (EINVAL);
+ }
+
+ *m0 = m;
+ return (0);
+}
+
+static void
+geneve_stats_alloc(struct geneve_softc *sc)
+{
+ struct geneve_statistics *stats = &sc->gnv_stats;
+
+ stats->txcsum = counter_u64_alloc(M_WAITOK);
+ stats->tso = counter_u64_alloc(M_WAITOK);
+ stats->rxcsum = counter_u64_alloc(M_WAITOK);
+}
+
+static void
+geneve_stats_free(struct geneve_softc *sc)
+{
+ struct geneve_statistics *stats = &sc->gnv_stats;
+
+ counter_u64_free(stats->txcsum);
+ counter_u64_free(stats->tso);
+ counter_u64_free(stats->rxcsum);
+}
+
+static void
+geneve_set_default_config(struct geneve_softc *sc)
+{
+
+ sc->gnv_flags |= GENEVE_FLAG_LEARN;
+
+ sc->gnv_vni = GENEVE_VNI_MAX;
+ sc->gnv_ttl = V_ip_defttl;
+
+ sc->gnv_src_addr.sin.sin_port = htons(GENEVE_UDPPORT);
+ sc->gnv_dst_addr.sin.sin_port = htons(GENEVE_UDPPORT);
+
+ /*
+ * RFC 8926 Section 3.3, the entire 16-bit range MAY
+ * be used to maximize entropy.
+ */
+ sc->gnv_min_port = V_ipport_firstauto;
+ sc->gnv_max_port = V_ipport_lastauto;
+
+ sc->gnv_proto = GENEVE_PROTO_ETHER;
+
+ sc->gnv_ftable_max = GENEVE_FTABLE_MAX;
+ sc->gnv_ftable_timeout = GENEVE_FTABLE_TIMEOUT;
+}
+
+static int
+geneve_set_reqcap(struct geneve_softc *sc, struct ifnet *ifp, int reqcap,
+ int reqcap2)
+{
+ int mask = reqcap ^ ifp->if_capenable;
+
+ /* Disable TSO if tx checksums are disabled. */
+ if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
+ reqcap & IFCAP_TSO4) {
+ reqcap &= ~IFCAP_TSO4;
+ if_printf(ifp, "tso4 disabled due to -txcsum.\n");
+ }
+ if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
+ reqcap & IFCAP_TSO6) {
+ reqcap &= ~IFCAP_TSO6;
+ if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
+ }
+
+ /* Do not enable TSO if tx checksums are disabled. */
+ if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
+ !(reqcap & IFCAP_TXCSUM)) {
+ if_printf(ifp, "enable txcsum first.\n");
+ return (EAGAIN);
+ }
+ if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
+ !(reqcap & IFCAP_TXCSUM_IPV6)) {
+ if_printf(ifp, "enable txcsum6 first.\n");
+ return (EAGAIN);
+ }
+
+ sc->gnv_reqcap = reqcap;
+ sc->gnv_reqcap2 = reqcap2;
+ return (0);
+}
+
+/*
+ * A GENEVE interface inherits the capabilities of the genevedev or the interface
+ * hosting the genevelocal address.
+ */
+static void
+geneve_set_hwcaps(struct geneve_softc *sc)
+{
+ struct epoch_tracker et;
+ struct ifnet *p;
+ struct ifaddr *ifa;
+ u_long hwa;
+ int cap, ena;
+ bool rel;
+ struct ifnet *ifp = sc->gnv_ifp;
+
+ /* reset caps */
+ ifp->if_capabilities &= GENEVE_BASIC_IFCAPS;
+ ifp->if_capenable &= GENEVE_BASIC_IFCAPS;
+ ifp->if_hwassist = 0;
+
+ NET_EPOCH_ENTER(et);
+ CURVNET_SET(ifp->if_vnet);
+
+ rel = false;
+ p = NULL;
+ if (sc->gnv_mc_ifname[0] != '\0') {
+ rel = true;
+ p = ifunit_ref(sc->gnv_mc_ifname);
+ } else if (geneve_sockaddr_in_any(&sc->gnv_src_addr) == 0) {
+ if (sc->gnv_src_addr.sa.sa_family == AF_INET) {
+ struct sockaddr_in in4 = sc->gnv_src_addr.sin;
+
+ in4.sin_port = 0;
+ ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
+ if (ifa != NULL)
+ p = ifa->ifa_ifp;
+ } else if (sc->gnv_src_addr.sa.sa_family == AF_INET6) {
+ struct sockaddr_in6 in6 = sc->gnv_src_addr.sin6;
+
+ in6.sin6_port = 0;
+ ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
+ if (ifa != NULL)
+ p = ifa->ifa_ifp;
+ }
+ }
+ if (p == NULL) {
+ CURVNET_RESTORE();
+ NET_EPOCH_EXIT(et);
+ return;
+ }
+
+ cap = ena = hwa = 0;
+
+ /* checksum offload */
+ if ((p->if_capabilities2 & IFCAP2_BIT(IFCAP2_GENEVE_HWCSUM)) != 0)
+ cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
+ if ((p->if_capenable2 & IFCAP2_BIT(IFCAP2_GENEVE_HWCSUM)) != 0) {
+ ena |= sc->gnv_reqcap & p->if_capenable &
+ (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
+ if (ena & IFCAP_TXCSUM) {
+ if (p->if_hwassist & CSUM_INNER_IP)
+ hwa |= CSUM_IP;
+ if (p->if_hwassist & CSUM_INNER_IP_UDP)
+ hwa |= CSUM_IP_UDP;
+ if (p->if_hwassist & CSUM_INNER_IP_TCP)
+ hwa |= CSUM_IP_TCP;
+ }
+ if (ena & IFCAP_TXCSUM_IPV6) {
+ if (p->if_hwassist & CSUM_INNER_IP6_UDP)
+ hwa |= CSUM_IP6_UDP;
+ if (p->if_hwassist & CSUM_INNER_IP6_TCP)
+ hwa |= CSUM_IP6_TCP;
+ }
+ }
+
+ /* hardware TSO */
+ if ((p->if_capabilities2 & IFCAP2_BIT(IFCAP2_GENEVE_HWTSO)) != 0) {
+ cap |= p->if_capabilities & IFCAP_TSO;
+ if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
+ ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
+ else
+ ifp->if_hw_tsomax = p->if_hw_tsomax;
+ ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
+ ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
+ }
+ if ((p->if_capenable2 & IFCAP2_BIT(IFCAP2_GENEVE_HWTSO)) != 0) {
+ ena |= sc->gnv_reqcap & p->if_capenable & IFCAP_TSO;
+ if (ena & IFCAP_TSO) {
+ if (p->if_hwassist & CSUM_INNER_IP_TSO)
+ hwa |= CSUM_IP_TSO;
+ if (p->if_hwassist & CSUM_INNER_IP6_TSO)
+ hwa |= CSUM_IP6_TSO;
+ }
+ }
+
+ ifp->if_capabilities |= cap;
+ ifp->if_capenable |= ena;
+ ifp->if_hwassist |= hwa;
+ if (rel)
+ if_rele(p);
+
+ CURVNET_RESTORE();
+ NET_EPOCH_EXIT(et);
+}
+
+#ifdef VIMAGE
+static void
+geneve_reassign(struct ifnet *ifp, struct vnet *new_vnet, char *unused)
+{
+ struct geneve_softc *sc;
+
+ sc = if_getsoftc(ifp);
+ GENEVE_WLOCK(sc);
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
+ callout_drain(&sc->gnv_callout);
+ geneve_ftable_flush(sc, 1);
+ ether_reassign(ifp, new_vnet, unused);
+ }
+
+ GENEVE_WUNLOCK(sc);
+}
+#endif
+
+static int
+geneve_clone_create_nl(struct if_clone *ifc, char *name, size_t len,
+ struct ifc_data_nl *ifd)
+{
+ struct nl_parsed_link *lattrs = ifd->lattrs;
+ struct nl_pstate *npt = ifd->npt;
+ struct nl_parsed_geneve attrs = {};
+ int error;
+
+ if ((lattrs->ifla_idata == NULL) ||
+ (!nl_has_attr(ifd->bm, IFLA_LINKINFO))) {
+ nlmsg_report_err_msg(npt, "geneve protocol is required");
+ return (ENOTSUP);
+ }
+
+ error = nl_parse_nested(lattrs->ifla_idata, &geneve_create_parser, npt, &attrs);
+ if (error != 0)
+ return (error);
+ if (geneve_check_proto(attrs.ifla_proto)) {
+ nlmsg_report_err_msg(npt, "Unsupported ethertype: 0x%04X", attrs.ifla_proto);
+ return (ENOTSUP);
+ }
+
+ struct geneve_params gnvp = {
+ .ifla_proto = attrs.ifla_proto
+ };
+ struct ifc_data ifd_new = {
+ .flags = IFC_F_SYSSPACE,
+ .unit = ifd->unit,
+ .params = &gnvp
+ };
+
+ return (geneve_clone_create(ifc, name, len, &ifd_new, &ifd->ifp));
+}
+
+static int
+geneve_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd)
+{
+ struct geneve_softc *sc = ifp->if_softc;
+ struct nl_parsed_link *lattrs = ifd->lattrs;
+ struct nl_pstate *npt = ifd->npt;
+ struct nl_parsed_geneve params;
+ struct nlattr *attrs = lattrs->ifla_idata;
+ struct nlattr_bmask bm;
+ int error = 0;
+
+ if ((attrs == NULL) ||
+ (nl_has_attr(ifd->bm, IFLA_LINKINFO) == 0)) {
+ error = nl_modify_ifp_generic(ifp, lattrs, ifd->bm, npt);
+ return (error);
+ }
+
+ error = priv_check(curthread, PRIV_NET_GENEVE);
+ if (error)
+ return (error);
+
+ /* make sure ignored attributes by nl_parse will not cause panics */
+ memset(&params, 0, sizeof(params));
+
+ nl_get_attrs_bmask_raw(NLA_DATA(attrs), NLA_DATA_LEN(attrs), &bm);
+ error = nl_parse_nested(attrs, &geneve_modify_parser, npt, &params);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_ID))
+ error = geneve_set_vni_nl(sc, npt, params.ifla_vni);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_LOCAL))
+ error = geneve_set_local_addr_nl(sc, npt, params.ifla_local);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_REMOTE))
+ error = geneve_set_remote_addr_nl(sc, npt, params.ifla_remote);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_LOCAL_PORT))
+ error = geneve_set_local_port_nl(sc, npt, params.ifla_local_port);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_PORT))
+ error = geneve_set_remote_port_nl(sc, npt, params.ifla_remote_port);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_PORT_RANGE))
+ error = geneve_set_port_range_nl(sc, npt, params.ifla_port_range);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_DF))
+ error = geneve_set_df_nl(sc, npt, params.ifla_df);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_TTL))
+ error = geneve_set_ttl_nl(sc, npt, params.ifla_ttl);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_TTL_INHERIT))
+ error = geneve_set_ttl_inherit_nl(sc, npt, params.ifla_ttl_inherit);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_DSCP_INHERIT))
+ error = geneve_set_dscp_inherit_nl(sc, npt, params.ifla_dscp_inherit);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_COLLECT_METADATA))
+ error = geneve_set_collect_metadata_nl(sc, npt, params.ifla_external);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_LEARN))
+ error = geneve_set_learn_nl(sc, npt, params.ifla_ftable_learn);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_FLUSH))
+ error = geneve_flush_ftable_nl(sc, npt, params.ifla_ftable_flush);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_MAX))
+ error = geneve_set_ftable_max_nl(sc, npt, params.ifla_ftable_max);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_FTABLE_TIMEOUT))
+ error = geneve_set_ftable_timeout_nl(sc, npt, params.ifla_ftable_timeout);
+
+ if (error == 0 && nl_has_attr(&bm, IFLA_GENEVE_MC_IFNAME))
+ error = geneve_set_mc_if_nl(sc, npt, params.ifla_mc_ifname);
+
+ return (error);
+}
+
+static void
+geneve_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw)
+{
+ struct geneve_softc *sc;
+ struct rm_priotracker tracker;
+
+ nlattr_add_u32(nw, IFLA_LINK, ifp->if_index);
+ nlattr_add_string(nw, IFLA_IFNAME, ifp->if_xname);
+
+ int off = nlattr_add_nested(nw, IFLA_LINKINFO);
+ if (off == 0)
+ return;
+
+ nlattr_add_string(nw, IFLA_INFO_KIND, "geneve");
+ int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA);
+ if (off2 == 0) {
+ nlattr_set_len(nw, off);
+ return;
+ }
+
+ sc = ifp->if_softc;
+ GENEVE_RLOCK(sc, &tracker);
+
+ nlattr_add_u32(nw, IFLA_GENEVE_ID, sc->gnv_vni);
+ nlattr_add_u16(nw, IFLA_GENEVE_PROTOCOL, sc->gnv_proto);
+ geneve_get_local_addr_nl(sc, nw);
+ geneve_get_remote_addr_nl(sc, nw);
+ nlattr_add_u16(nw, IFLA_GENEVE_LOCAL_PORT, geneve_get_local_port(sc));
+ nlattr_add_u16(nw, IFLA_GENEVE_PORT, geneve_get_remote_port(sc));
+
+ const struct ifla_geneve_port_range port_range = {
+ .low = sc->gnv_min_port,
+ .high = sc->gnv_max_port
+ };
+ nlattr_add(nw, IFLA_GENEVE_PORT_RANGE, sizeof(port_range), &port_range);
+
+ nlattr_add_u8(nw, IFLA_GENEVE_DF, sc->gnv_df);
+ nlattr_add_u8(nw, IFLA_GENEVE_TTL, sc->gnv_ttl);
+ nlattr_add_bool(nw, IFLA_GENEVE_TTL_INHERIT,
+ sc->gnv_flags & GENEVE_FLAG_TTL_INHERIT);
+ nlattr_add_bool(nw, IFLA_GENEVE_DSCP_INHERIT,
+ sc->gnv_flags & GENEVE_FLAG_DSCP_INHERIT);
+ nlattr_add_bool(nw, IFLA_GENEVE_COLLECT_METADATA,
+ sc->gnv_flags & GENEVE_FLAG_COLLECT_METADATA);
+
+ nlattr_add_bool(nw, IFLA_GENEVE_FTABLE_LEARN,
+ sc->gnv_flags & GENEVE_FLAG_LEARN);
+ nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_MAX, sc->gnv_ftable_max);
+ nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_TIMEOUT, sc->gnv_ftable_timeout);
+ nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_COUNT, sc->gnv_ftable_cnt);
+ nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_NOSPACE_CNT, sc->gnv_stats.ftable_nospace);
+ nlattr_add_u32(nw, IFLA_GENEVE_FTABLE_LOCK_UP_FAIL_CNT,
+ sc->gnv_stats.ftable_lock_upgrade_failed);
+
+ nlattr_add_string(nw, IFLA_GENEVE_MC_IFNAME, sc->gnv_mc_ifname);
+ nlattr_add_u32(nw, IFLA_GENEVE_MC_IFINDEX, sc->gnv_mc_ifindex);
+
+ nlattr_add_u64(nw, IFLA_GENEVE_TXCSUM_CNT,
+ counter_u64_fetch(sc->gnv_stats.txcsum));
+ nlattr_add_u64(nw, IFLA_GENEVE_TSO_CNT,
+ counter_u64_fetch(sc->gnv_stats.tso));
+ nlattr_add_u64(nw, IFLA_GENEVE_RXCSUM_CNT,
+ counter_u64_fetch(sc->gnv_stats.rxcsum));
+
+ nlattr_set_len(nw, off2);
+ nlattr_set_len(nw, off);
+
+ GENEVE_RUNLOCK(sc, &tracker);
+}
+
+static int
+geneve_clone_create(struct if_clone *ifc, char *name, size_t len,
+ struct ifc_data *ifd, struct ifnet **ifpp)
+{
+ struct geneve_softc *sc;
+ struct geneve_params gnvp;
+ struct ifnet *ifp;
+ int error;
+
+ sc = malloc(sizeof(struct geneve_softc), M_GENEVE, M_WAITOK | M_ZERO);
+ sc->gnv_fibnum = curthread->td_proc->p_fibnum;
+ geneve_set_default_config(sc);
+
+ if (ifd != NULL) {
+ error = ifc_copyin(ifd, &gnvp, sizeof(gnvp));
+ if (error || geneve_check_proto(gnvp.ifla_proto)) {
+ free(sc, M_GENEVE);
+ return (error);
+ }
+
+ sc->gnv_proto = gnvp.ifla_proto;
+ }
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
+ ifp = if_alloc(IFT_ETHER);
+ ifp->if_flags |= IFF_SIMPLEX | IFF_BROADCAST;
+ geneve_ftable_init(sc);
+ callout_init_rw(&sc->gnv_callout, &sc->gnv_lock, 0);
+ } else if (sc->gnv_proto == GENEVE_PROTO_INHERIT) {
+ ifp = if_alloc(IFT_TUNNEL);
+ ifp->if_flags |= IFF_NOARP;
+ } else {
+ free(sc, M_GENEVE);
+ return (EINVAL);
+ }
+
+ geneve_stats_alloc(sc);
+ sc->gnv_ifp = ifp;
+ sc->gnv_vnet = ifp->if_vnet;
+ rm_init(&sc->gnv_lock, "geneverm");
+ sc->gnv_port_hash_key = arc4random();
+
+ ifp->if_softc = sc;
+ if_initname(ifp, geneve_name, ifd->unit);
+ ifp->if_flags |= IFF_MULTICAST;
+ ifp->if_init = geneve_init;
+ ifp->if_ioctl = geneve_ioctl;
+ ifp->if_transmit = geneve_transmit;
+ ifp->if_qflush = geneve_qflush;
+ ifp->if_capabilities = GENEVE_BASIC_IFCAPS;
+ ifp->if_capenable = GENEVE_BASIC_IFCAPS;
+ sc->gnv_reqcap = -1;
+ geneve_set_hwcaps(sc);
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
+ ifmedia_init(&sc->gnv_media, 0, geneve_media_change, geneve_media_status);
+ ifmedia_add(&sc->gnv_media, IFM_ETHER | IFM_AUTO, 0, NULL);
+ ifmedia_set(&sc->gnv_media, IFM_ETHER | IFM_AUTO);
+
+ ether_gen_addr(ifp, &sc->gnv_hwaddr);
+ ether_ifattach(ifp, sc->gnv_hwaddr.octet);
+
+ ifp->if_baudrate = 0;
+ } else {
+ ifp->if_output = geneve_output;
+
+ if_attach(ifp);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int32_t));
+ }
+
+#ifdef VIMAGE
+ ifp->if_reassign = geneve_reassign;
+#endif
+
+ GENEVE_WLOCK(sc);
+ geneve_setup_interface_hdrlen(sc);
+ GENEVE_WUNLOCK(sc);
+ *ifpp = ifp;
+
+ return (0);
+}
+
+static int
+geneve_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
+{
+ struct geneve_softc *sc;
+
+ sc = if_getsoftc(ifp);
+ geneve_teardown(sc);
+
+ sc->gnv_vnet = NULL;
+
+ if (sc->gnv_proto == GENEVE_PROTO_ETHER) {
+ geneve_ftable_flush(sc, 1);
+
+ ether_ifdetach(ifp);
+ if_free(ifp);
+ ifmedia_removeall(&sc->gnv_media);
+
+ geneve_ftable_fini(sc);
+ } else {
+ bpfdetach(ifp);
+ if_detach(ifp);
+ if_free(ifp);
+ }
+
+ rm_destroy(&sc->gnv_lock);
+ geneve_stats_free(sc);
+ free(sc, M_GENEVE);
+
+ return (0);
+}
+
+/* BMV: Taken from if_bridge. */
+static uint32_t
+geneve_mac_hash(struct geneve_softc *sc, const uint8_t *addr)
+{
+ uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->gnv_ftable_hash_key;
+
+ b += addr[5] << 8;
+ b += addr[4];
+ a += addr[3] << 24;
+ a += addr[2] << 16;
+ a += addr[1] << 8;
+ a += addr[0];
+
+/*
+ * The following hash function is adapted from "Hash Functions" by Bob Jenkins
+ * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
+ */
+#define mix(a, b, c) \
+do { \
+ a -= b; a -= c; a ^= (c >> 13); \
+ b -= c; b -= a; b ^= (a << 8); \
+ c -= a; c -= b; c ^= (b >> 13); \
+ a -= b; a -= c; a ^= (c >> 12); \
+ b -= c; b -= a; b ^= (a << 16); \
+ c -= a; c -= b; c ^= (b >> 5); \
+ a -= b; a -= c; a ^= (c >> 3); \
+ b -= c; b -= a; b ^= (a << 10); \
+ c -= a; c -= b; c ^= (b >> 15); \
+} while (0)
+
+ mix(a, b, c);
+
+#undef mix
+
+ return (c);
+}
+
+static int
+geneve_media_change(struct ifnet *ifp)
+{
+
+ /* Ignore. */
+ return (0);
+}
+
+static void
+geneve_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
+{
+
+ ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
+ ifmr->ifm_active = IFM_ETHER | IFM_FDX;
+}
+
+static int
+geneve_sockaddr_cmp(const union sockaddr_union *unsa,
+ const struct sockaddr *sa)
+{
+
+ return (memcmp(&unsa->sa, sa, unsa->sa.sa_len));
+}
+
+static void
+geneve_sockaddr_copy(union sockaddr_union *dst,
+ const struct sockaddr *sa)
+{
+
+ MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+ memset(dst, 0, sizeof(*dst));
+
+ if (sa->sa_family == AF_INET) {
+ dst->sin = *SATOCONSTSIN(sa);
+ dst->sin.sin_len = sizeof(struct sockaddr_in);
+ } else if (sa->sa_family == AF_INET6) {
+ dst->sin6 = *SATOCONSTSIN6(sa);
+ dst->sin6.sin6_len = sizeof(struct sockaddr_in6);
+ }
+}
+
+static int
+geneve_sockaddr_in_equal(const union sockaddr_union *unsa,
+ const struct sockaddr *sa)
+{
+ int equal;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
+ equal = in4->s_addr == unsa->sin.sin_addr.s_addr;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
+ equal = IN6_ARE_ADDR_EQUAL(in6, &unsa->sin6.sin6_addr);
+ } else
+ equal = 0;
+
+ return (equal);
+}
+
+static void
+geneve_sockaddr_in_copy(union sockaddr_union *dst,
+ const struct sockaddr *sa)
+{
+
+ MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
+ dst->sin.sin_family = AF_INET;
+ dst->sin.sin_len = sizeof(struct sockaddr_in);
+ dst->sin.sin_addr = *in4;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
+ dst->sin6.sin6_family = AF_INET6;
+ dst->sin6.sin6_len = sizeof(struct sockaddr_in6);
+ dst->sin6.sin6_addr = *in6;
+ }
+}
+
+static int
+geneve_sockaddr_supported(const union sockaddr_union *gnvaddr, int unspec)
+{
+ const struct sockaddr *sa;
+ int supported;
+
+ sa = &gnvaddr->sa;
+ supported = 0;
+
+ if (sa->sa_family == AF_UNSPEC && unspec != 0) {
+ supported = 1;
+ } else if (sa->sa_family == AF_INET) {
+#ifdef INET
+ supported = 1;
+#endif
+ } else if (sa->sa_family == AF_INET6) {
+#ifdef INET6
+ supported = 1;
+#endif
+ }
+
+ return (supported);
+}
+
+static int
+geneve_sockaddr_in_any(const union sockaddr_union *gnvaddr)
+{
+ const struct sockaddr *sa;
+ int any;
+
+ sa = &gnvaddr->sa;
+
+ if (sa->sa_family == AF_INET) {
+ const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
+ any = in4->s_addr == INADDR_ANY;
+ } else if (sa->sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
+ any = IN6_IS_ADDR_UNSPECIFIED(in6);
+ } else
+ any = -1;
+
+ return (any);
+}
+
+static int
+geneve_sockaddr_in6_embedscope(struct sockaddr_in6 *sin6)
+{
+ int error;
+
+ MPASS(sin6->sin6_family == AF_INET6);
+#ifdef INET6
+ error = sa6_embedscope(sin6, V_ip6_use_defzone);
+#else
+ error = EAFNOSUPPORT;
+#endif
+
+ return (error);
+}
+
+static int
+geneve_can_change_config(struct geneve_softc *sc)
+{
+ struct ifnet *ifp;
+
+ ifp = sc->gnv_ifp;
+ GENEVE_LOCK_ASSERT(sc);
+
+ if (ifp->if_drv_flags & IFF_DRV_RUNNING)
+ return (0);
+ if (sc->gnv_flags & (GENEVE_FLAG_INIT | GENEVE_FLAG_TEARDOWN))
+ return (0);
+ if (sc->gnv_flags & GENEVE_FLAG_COLLECT_METADATA)
+ return (0);
+
+ return (1);
+}
+
+static int
+geneve_check_proto(uint16_t proto)
+{
+ int error;
+
+ switch (proto) {
+ case GENEVE_PROTO_ETHER:
+ case GENEVE_PROTO_INHERIT:
+ error = 0;
+ break;
+
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+geneve_check_multicast_addr(const union sockaddr_union *sa)
+{
+ int mc;
+
+ if (sa->sa.sa_family == AF_INET) {
+ const struct in_addr *in4 = &SATOCONSTSIN(sa)->sin_addr;
+ mc = IN_MULTICAST(ntohl(in4->s_addr));
+ } else if (sa->sa.sa_family == AF_INET6) {
+ const struct in6_addr *in6 = &SATOCONSTSIN6(sa)->sin6_addr;
+ mc = IN6_IS_ADDR_MULTICAST(in6);
+ } else
+ mc = -1;
+
+ return (mc);
+}
+
+static int
+geneve_check_sockaddr(const union sockaddr_union *sa, const int len)
+{
+ int error;
+
+ error = 0;
+
+ switch (sa->sa.sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ if (len < sizeof(struct sockaddr))
+ error = EINVAL;
+ break;
+
+ default:
+ error = EAFNOSUPPORT;
+ }
+
+ return (error);
+}
+
+static int
+geneve_check_sockaddr_in(const union sockaddr_union *sa, const int len)
+{
+ int error;
+
+ error = 0;
+
+ if (sa->sa.sa_family == AF_INET) {
+ if (len != sizeof(struct sockaddr_in))
+ error = EINVAL;
+ } else if (sa->sa.sa_family == AF_INET6) {
+ if (len != sizeof(struct sockaddr_in6))
+ error = EINVAL;
+ } else
+ error = EINVAL;
+
+ return (error);
+}
+
+static int
+geneve_prison_remove(void *obj, void *data __unused)
+{
+#ifdef VIMAGE
+ struct prison *pr;
+
+ pr = obj;
+ if (prison_owns_vnet(pr)) {
+ CURVNET_SET(pr->pr_vnet);
+ if (V_geneve_cloner != NULL) {
+ ifc_detach_cloner(V_geneve_cloner);
+ V_geneve_cloner = NULL;
+ }
+ CURVNET_RESTORE();
+ }
+#endif
+ return (0);
+}
+
+static void
+vnet_geneve_load(void)
+{
+ struct if_clone_addreq_v2 req = {
+ .version = 2,
+ .flags = IFC_F_AUTOUNIT,
+ .match_f = NULL,
+ .create_f = geneve_clone_create,
+ .destroy_f = geneve_clone_destroy,
+ .create_nl_f = geneve_clone_create_nl,
+ .modify_nl_f = geneve_clone_modify_nl,
+ .dump_nl_f = geneve_clone_dump_nl,
+ };
+ V_geneve_cloner = ifc_attach_cloner(geneve_name, (struct if_clone_addreq *)&req);
+}
+VNET_SYSINIT(vnet_geneve_load, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_geneve_load, NULL);
+
+static void
+vnet_geneve_unload(void)
+{
+
+ if (V_geneve_cloner != NULL)
+ ifc_detach_cloner(V_geneve_cloner);
+}
+VNET_SYSUNINIT(vnet_geneve_unload, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_geneve_unload, NULL);
+
+static void
+geneve_module_init(void)
+{
+ mtx_init(&geneve_list_mtx, "geneve list", NULL, MTX_DEF);
+ osd_method_t methods[PR_MAXMETHOD] = {
+ [PR_METHOD_REMOVE] = geneve_prison_remove,
+ };
+
+ geneve_osd_jail_slot = osd_jail_register(NULL, methods);
+ NL_VERIFY_PARSERS(all_parsers);
+}
+
+static void
+geneve_module_deinit(void)
+{
+ struct if_clone *clone;
+ VNET_ITERATOR_DECL(vnet_iter);
+
+ VNET_LIST_RLOCK();
+ VNET_FOREACH(vnet_iter) {
+ clone = VNET_VNET(vnet_iter, geneve_cloner);
+ if (clone != NULL) {
+ ifc_detach_cloner(clone);
+ VNET_VNET(vnet_iter, geneve_cloner) = NULL;
+ }
+ }
+ VNET_LIST_RUNLOCK();
+ NET_EPOCH_WAIT();
+ MPASS(LIST_EMPTY(&geneve_socket_list));
+ mtx_destroy(&geneve_list_mtx);
+ if (geneve_osd_jail_slot != 0)
+ osd_jail_deregister(geneve_osd_jail_slot);
+}
+
+static int
+geneve_modevent(module_t mod, int type, void *unused)
+{
+ int error;
+
+ error = 0;
+
+ switch (type) {
+ case MOD_LOAD:
+ geneve_module_init();
+ break;
+
+ case MOD_UNLOAD:
+ geneve_module_deinit();
+ break;
+
+ default:
+ error = ENOTSUP;
+ break;
+ }
+
+ return (error);
+}
+
+static moduledata_t geneve_mod = {
+ "if_geneve",
+ geneve_modevent,
+ 0
+};
+
+DECLARE_MODULE(if_geneve, geneve_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
+MODULE_VERSION(if_geneve, 1);
diff --git a/sys/net/if_strings.h b/sys/net/if_strings.h
--- a/sys/net/if_strings.h
+++ b/sys/net/if_strings.h
@@ -61,10 +61,14 @@
#define IFCAP_RXTLS4_NAME "RXTLS4"
#define IFCAP_RXTLS6_NAME "RXTLS6"
#define IFCAP_IPSEC_OFFLOAD_NAME "IPSEC"
+#define IFCAP_GENEVE_HWCSUM_NAME "GENEVE_HWCSUM"
+#define IFCAP_GENEVE_HWTSO_NAME "GENEVE_HWTSO"
-#define IFCAP2_RXTLS4_NAME IFCAP_RXTLS4_NAME
-#define IFCAP2_RXTLS6_NAME IFCAP_RXTLS6_NAME
-#define IFCAP2_IPSEC_OFFLOAD_NAME IFCAP_IPSEC_OFFLOAD_NAME
+#define IFCAP2_RXTLS4_NAME IFCAP_RXTLS4_NAME
+#define IFCAP2_RXTLS6_NAME IFCAP_RXTLS6_NAME
+#define IFCAP2_IPSEC_OFFLOAD_NAME IFCAP_IPSEC_OFFLOAD_NAME
+#define IFCAP2_GENEVE_HWCSUM_NAME IFCAP_GENEVE_HWCSUM_NAME
+#define IFCAP2_GENEVE_HWTSO_NAME IFCAP_GENEVE_HWTSO_NAME
#ifdef _WANT_IFCAP_BIT_NAMES
static const char *ifcap_bit_names[] = {
@@ -103,6 +107,8 @@
IFCAP_RXTLS4_NAME,
IFCAP_RXTLS6_NAME,
IFCAP_IPSEC_OFFLOAD_NAME,
+ IFCAP_GENEVE_HWCSUM_NAME,
+ IFCAP_GENEVE_HWTSO_NAME,
};
#ifdef IFCAP_B_SIZE
diff --git a/sys/netlink/route/interface.h b/sys/netlink/route/interface.h
--- a/sys/netlink/route/interface.h
+++ b/sys/netlink/route/interface.h
@@ -263,4 +263,48 @@
uint32_t mask;
};
+/* IFLA_INFO_DATA geneve attributes */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_PROTOCOL,
+ IFLA_GENEVE_LOCAL,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_LOCAL_PORT,
+ IFLA_GENEVE_PORT,
+ IFLA_GENEVE_PORT_RANGE,
+ IFLA_GENEVE_DF,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TTL_INHERIT,
+ IFLA_GENEVE_DSCP_INHERIT,
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_FTABLE_LEARN,
+ IFLA_GENEVE_FTABLE_FLUSH,
+ IFLA_GENEVE_FTABLE_MAX,
+ IFLA_GENEVE_FTABLE_TIMEOUT,
+ IFLA_GENEVE_FTABLE_COUNT,
+ IFLA_GENEVE_FTABLE_NOSPACE_CNT,
+ IFLA_GENEVE_FTABLE_LOCK_UP_FAIL_CNT,
+ IFLA_GENEVE_MC_IFNAME,
+ IFLA_GENEVE_MC_IFINDEX,
+ IFLA_GENEVE_TXCSUM_CNT,
+ IFLA_GENEVE_TSO_CNT,
+ IFLA_GENEVE_RXCSUM_CNT,
+ __IFLA_GENEVE_MAX,
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
+enum ifla_geneve_df {
+ IFLA_GENEVE_DF_UNSET,
+ IFLA_GENEVE_DF_SET,
+ IFLA_GENEVE_DF_INHERIT,
+ __IFLA_GENEVE_DF_MAX,
+};
+#define IFLA_GENEVE_DF_MAX (__IFLA_GENEVE_DF_MAX - 1)
+
+struct ifla_geneve_port_range {
+ uint16_t low;
+ uint16_t high;
+};
+
#endif
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -676,7 +676,7 @@
#define CSUM_INNER_IP_TSO 0x00020000
#define CSUM_ENCAP_VXLAN 0x00040000 /* VXLAN outer encapsulation */
-#define CSUM_ENCAP_RSVD1 0x00080000
+#define CSUM_ENCAP_GENEVE 0x00080000 /* GENEVE outer encapsulation */
/* Flags used to indicate that the checksum was verified by hardware. */
#define CSUM_INNER_L3_CALC 0x00100000
@@ -698,7 +698,7 @@
CSUM_INNER_IP6_TSO | CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_SCTP | \
CSUM_IP6_TSO | CSUM_IP6_ISCSI | CSUM_INNER_IP | CSUM_INNER_IP_UDP | \
CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN | \
- CSUM_ENCAP_RSVD1 | CSUM_SND_TAG)
+ CSUM_ENCAP_GENEVE | CSUM_SND_TAG)
#define CSUM_FLAGS_RX (CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID | \
CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID | CSUM_L3_CALC | CSUM_L3_VALID | \
@@ -714,7 +714,7 @@
"\11CSUM_INNER_IP6_TSO\12CSUM_IP6_UDP\13CSUM_IP6_TCP\14CSUM_IP6_SCTP" \
"\15CSUM_IP6_TSO\16CSUM_IP6_ISCSI\17CSUM_INNER_IP\20CSUM_INNER_IP_UDP" \
"\21CSUM_INNER_IP_TCP\22CSUM_INNER_IP_TSO\23CSUM_ENCAP_VXLAN" \
- "\24CSUM_ENCAP_RSVD1\25CSUM_INNER_L3_CALC\26CSUM_INNER_L3_VALID" \
+ "\24CSUM_ENCAP_GENEVE\25CSUM_INNER_L3_CALC\26CSUM_INNER_L3_VALID" \
"\27CSUM_INNER_L4_CALC\30CSUM_INNER_L4_VALID\31CSUM_L3_CALC" \
"\32CSUM_L3_VALID\33CSUM_L4_CALC\34CSUM_L4_VALID\35CSUM_L5_CALC" \
"\36CSUM_L5_VALID\37CSUM_COALESCED\40CSUM_SND_TAG"
diff --git a/sys/sys/priv.h b/sys/sys/priv.h
--- a/sys/sys/priv.h
+++ b/sys/sys/priv.h
@@ -355,6 +355,7 @@
#define PRIV_NET_OVPN 422 /* Administer OpenVPN DCO. */
#define PRIV_NET_ME 423 /* Administer ME interface. */
#define PRIV_NET_WG 424 /* Administer WireGuard interface. */
+#define PRIV_NET_GENEVE 425 /* Administer geneve. */
/*
* 802.11-related privileges.
diff --git a/tests/atf_python/sys/netlink/netlink_route.py b/tests/atf_python/sys/netlink/netlink_route.py
--- a/tests/atf_python/sys/netlink/netlink_route.py
+++ b/tests/atf_python/sys/netlink/netlink_route.py
@@ -381,6 +381,34 @@
IFLA_VLAN_PROTOCOL = auto()
+class IfLinkInfoDataGeneve(Enum):
+ IFLA_GENEVE_UNSPEC = 0
+ IFLA_GENEVE_ID = auto()
+ IFLA_GENEVE_PROTOCOL = auto()
+ IFLA_GENEVE_LOCAL = auto()
+ IFLA_GENEVE_REMOTE = auto()
+ IFLA_GENEVE_LOCAL_PORT = auto()
+ IFLA_GENEVE_PORT = auto()
+ IFLA_GENEVE_PORT_RANGE = auto()
+ IFLA_GENEVE_DF = auto()
+ IFLA_GENEVE_TTL = auto()
+ IFLA_GENEVE_TTL_INHERIT = auto()
+ IFLA_GENEVE_DSCP_INHERIT = auto()
+ IFLA_GENEVE_COLLECT_METADATA = auto()
+ IFLA_GENEVE_FTABLE_LEARN = auto()
+ IFLA_GENEVE_FTABLE_FLUSH = auto()
+ IFLA_GENEVE_FTABLE_MAX = auto()
+ IFLA_GENEVE_FTABLE_TIMEOUT = auto()
+ IFLA_GENEVE_FTABLE_COUNT = auto()
+ IFLA_GENEVE_FTABLE_NOSPACE_CNT = auto()
+ IFLA_GENEVE_FTABLE_LOCK_UP_FAIL_CNT = auto()
+ IFLA_GENEVE_MC_IFNAME = auto()
+ IFLA_GENEVE_MC_IFINDEX = auto()
+ IFLA_GENEVE_TXCSUM_CNT = auto()
+ IFLA_GENEVE_TSO_CNT = auto()
+ IFLA_GENEVE_RXCSUM_CNT = auto()
+
+
class IfaddrMsg(Structure):
_fields_ = [
("ifa_family", c_ubyte),
diff --git a/tests/ci/tools/ci.conf b/tests/ci/tools/ci.conf
--- a/tests/ci/tools/ci.conf
+++ b/tests/ci/tools/ci.conf
@@ -74,6 +74,7 @@
kld_list="\${kld_list} if_bridge" # sys/net/if_bridge_test
kld_list="\${kld_list} if_enc" # sys/netpfil/pf
kld_list="\${kld_list} if_epair" # sys/net/if_epair_test
+kld_list="\${kld_list} if_geneve" # sys/net/if_geneve
kld_list="\${kld_list} if_ovpn" # sys/net/if_ovpn
kld_list="\${kld_list} if_stf" # sys/net/if_stf
kld_list="\${kld_list} ipdivert" # sys/netinet (loads ipdivert)
diff --git a/tests/sys/net/Makefile b/tests/sys/net/Makefile
--- a/tests/sys/net/Makefile
+++ b/tests/sys/net/Makefile
@@ -15,6 +15,7 @@
ATF_TESTS_SH+= if_tun_test
ATF_TESTS_SH+= if_vlan
ATF_TESTS_SH+= if_wg
+ATF_TESTS_SH+= if_geneve
TESTS_SUBDIRS+= bpf
TESTS_SUBDIRS+= if_ovpn
diff --git a/tests/sys/net/if_geneve.sh b/tests/sys/net/if_geneve.sh
new file mode 100755
--- /dev/null
+++ b/tests/sys/net/if_geneve.sh
@@ -0,0 +1,1000 @@
+#
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2025 Seyed Pouria Mousavizadeh Tehrani <info@spmzt.net>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+. $(atf_get_srcdir)/../common/vnet.subr
+
+atf_test_case "ether_ipv4" "cleanup"
+ether_ipv4_head()
+{
+ atf_set descr 'Create a geneve(4) l2 tunnel over an ipv4 underlay using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+ether_ipv4_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=192.168.2.1
+ endpoint2=192.168.2.2
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet ${endpoint1}/24 up
+ ifconfig -j genevetest2 ${epair}b inet ${endpoint2}/24 up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint2} genevelocal ${endpoint1} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint1} genevelocal ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+}
+
+ether_ipv4_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "ether_ipv6" "cleanup"
+ether_ipv6_head()
+{
+ atf_set descr 'Create a geneve(4) l2 tunnel over an ipv6 underlay using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+ether_ipv6_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint2} genevelocal ${endpoint1} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint1} genevelocal ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+}
+
+ether_ipv6_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "inherit_ipv4" "cleanup"
+inherit_ipv4_head()
+{
+ atf_set descr 'Create a geneve(4) l3 tunnel over an ipv4 underlay using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+inherit_ipv4_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=192.168.2.1
+ endpoint2=192.168.2.2
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=2
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet ${endpoint1}/24 up
+ ifconfig -j genevetest2 ${epair}b inet ${endpoint2}/24 up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l3 debug \
+ geneveid $vni1 geneveremote ${endpoint2} genevelocal ${endpoint1} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l3 debug \
+ geneveid $vni1 geneveremote ${endpoint1} genevelocal ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+}
+
+inherit_ipv4_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "inherit_ipv6" "cleanup"
+inherit_ipv6_head()
+{
+ atf_set descr 'Create a geneve(4) l3 tunnel over an ipv6 underlay using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+inherit_ipv6_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l3 debug \
+ geneveid $vni1 geneveremote ${endpoint2} genevelocal ${endpoint1} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l3 debug \
+ geneveid $vni1 geneveremote ${endpoint1} genevelocal ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+}
+
+inherit_ipv6_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "ether_ipv6_blind_options" "cleanup"
+ether_ipv6_blind_options_head()
+{
+ atf_set descr 'Create a geneve(4) l2 ipv6 tunnel and test geneve options'
+ atf_set require.user root
+}
+
+ether_ipv6_blind_options_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2
+ local v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint2} genevelocal ${endpoint1} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint1} genevelocal ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 genevemaxaddr 1000
+ atf_check -s exit:0 -o match:"max: 1000" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 genevetimeout 1000
+ atf_check -s exit:0 -o match:"timeout: 1000" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 -genevelearn
+ atf_check -s exit:0 -o match:"mode: nolearning" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 genevelearn
+ atf_check -s exit:0 -o match:" learning" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o match:"count: 1" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 geneveflush
+ atf_check -s exit:0 -o match:"count: 0" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 geneveflushall
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 genevettl inherit
+ atf_check -s exit:0 -o match:"ttl: inherit" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 genevettl 1
+ atf_check -s exit:0 -o match:"ttl: 1" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 down genevedf set up
+ atf_check -s exit:0 -o match:"df: set" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 down genevedf inherit up
+ atf_check -s exit:0 -o match:"df: inherit" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 down genevedf unset up
+ atf_check -s exit:0 -o match:"df: unset" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 genevedscpinherit
+ atf_check -s exit:0 -o match:"dscp: inherit" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 -genevedscpinherit
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 geneveexternal
+ atf_check -s exit:0 -o match:" external" ifconfig -j genevetest1 -v geneve1
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 -geneveexternal
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 down geneveportrange 11000 62000 up
+ atf_check -s exit:0 -o match:"portrange: 11000-62000" ifconfig -j genevetest1 -v geneve1
+
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+}
+
+ether_ipv6_blind_options_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "ether_ipv6_external" "cleanup"
+ether_ipv6_external_head()
+{
+ atf_set descr 'Create a geneve(4) l2 ipv6 tunnel and test geneve collect metadata'
+ atf_set require.user root
+}
+
+ether_ipv6_external_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2
+ local v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint2} genevelocal ${endpoint1} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 geneveremote ${endpoint1} genevelocal ${endpoint2} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 geneveexternal
+ atf_check -s exit:16 -e ignore ifconfig -j genevetest1 geneve1 down geneveid 10 up
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 -geneveexternal
+ atf_check -s exit:0 -o ignore ifconfig -j genevetest1 geneve1 down geneveid 10 up
+
+}
+
+ether_ipv6_external_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "ether_ipv4_multicast" "cleanup"
+ether_ipv4_multicast_head()
+{
+ atf_set descr 'Create a geneve(4) l2 ipv4 multicast tunnel using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+ether_ipv4_multicast_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=192.168.2.1
+ endpoint2=192.168.2.2
+ mc_group=239.0.0.1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip_mroute; then
+ atf_skip "This test requires ip_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet ${endpoint1}/24 up
+ ifconfig -j genevetest2 ${epair}b inet ${endpoint2}/24 up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 add -net 239.0.0.0/8 -interface ${epair}a
+ route -j genevetest2 add -net 239.0.0.0/8 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint1} \
+ genevegroup ${mc_group} genevedev ${epair}a up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint2} \
+ genevegroup ${mc_group} genevedev ${epair}b up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest1 ifmcstat -i ${epair}a -f inet
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest2 ifmcstat -i ${epair}b -f inet
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+
+}
+
+ether_ipv4_multicast_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "ether_ipv6_multicast" "cleanup"
+ether_ipv6_multicast_head()
+{
+ atf_set descr 'Create a geneve(4) l2 ipv6 multicast tunnel using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+ether_ipv6_multicast_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ mc_group=ff08::db8:0:1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip6_mroute; then
+ atf_skip "This test requires ip6_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 -n6 add -net ff08::db8:0:1/96 -interface ${epair}a
+ route -j genevetest2 -n6 add -net ff08::db8:0:1/96 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint1} \
+ genevegroup ${mc_group} genevedev ${epair}a up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint2} \
+ genevegroup ${mc_group} genevedev ${epair}b up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+
+}
+
+ether_ipv6_multicast_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "ether_ipv4_multicast_without_dev" "cleanup"
+ether_ipv4_multicast_without_dev_head()
+{
+ atf_set descr 'Create a geneve(4) l2 ipv4 multicast tunnel without specifying genevedev using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+ether_ipv4_multicast_without_dev_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=192.168.2.1
+ endpoint2=192.168.2.2
+ mc_group=239.0.0.1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip_mroute; then
+ atf_skip "This test requires ip_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet ${endpoint1}/24 up
+ ifconfig -j genevetest2 ${epair}b inet ${endpoint2}/24 up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 add -net 239.0.0.0/8 -interface ${epair}a
+ route -j genevetest2 add -net 239.0.0.0/8 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint1} genevegroup ${mc_group} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint2} genevegroup ${mc_group} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest1 ifmcstat -i ${epair}a -f inet
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest2 ifmcstat -i ${epair}b -f inet
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+
+}
+
+ether_ipv4_multicast_without_dev_cleanup()
+{
+ vnet_cleanup
+}
+
+
+atf_test_case "ether_ipv6_multicast_without_dev" "cleanup"
+ether_ipv6_multicast_without_dev_head()
+{
+ atf_set descr 'Create a geneve(4) l2 ipv6 multicast tunnel without specifying genevedev using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+ether_ipv6_multicast_without_dev_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ mc_group=ff08::db8:0:1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip6_mroute; then
+ atf_skip "This test requires ip6_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 -n6 add -net ff08::db8:0:1/96 -interface ${epair}a
+ route -j genevetest2 -n6 add -net ff08::db8:0:1/96 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint1} genevegroup ${mc_group} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l2 debug \
+ geneveid $vni1 genevelocal ${endpoint2} genevegroup ${mc_group} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/24
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+
+}
+
+ether_ipv6_multicast_without_dev_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "inherit_ipv4_multicast" "cleanup"
+inherit_ipv4_multicast_head()
+{
+ atf_set descr 'Create a geneve(4) l3 ipv4 multicast tunnel using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+inherit_ipv4_multicast_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=192.168.2.1
+ endpoint2=192.168.2.2
+ mc_group=239.0.0.1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip_mroute; then
+ atf_skip "This test requires ip_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet ${endpoint1}/24 up
+ ifconfig -j genevetest2 ${epair}b inet ${endpoint2}/24 up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 add -net 239.0.0.0/8 -interface ${epair}a
+ route -j genevetest2 add -net 239.0.0.0/8 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint1} \
+ genevegroup ${mc_group} genevedev ${epair}a up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint2} \
+ genevegroup ${mc_group} genevedev ${epair}b up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore sysctl -j genevetest1 net.inet.icmp.bmcastecho=1
+ atf_check -s exit:0 -o ignore sysctl -j genevetest2 net.inet.icmp.bmcastecho=1
+
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest1 ifmcstat -i ${epair}a -f inet
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest2 ifmcstat -i ${epair}b -f inet
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+
+}
+
+inherit_ipv4_multicast_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "inherit_ipv6_multicast" "cleanup"
+inherit_ipv6_multicast_head()
+{
+ atf_set descr 'Create a geneve(4) l3 ipv6 multicast tunnel using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+inherit_ipv6_multicast_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ mc_group=ff08::db8:0:1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip6_mroute; then
+ atf_skip "This test requires ip6_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 -n6 add -net ff08::db8:0:1/96 -interface ${epair}a
+ route -j genevetest2 -n6 add -net ff08::db8:0:1/96 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint1} \
+ genevegroup ${mc_group} genevedev ${epair}a up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint2} \
+ genevegroup ${mc_group} genevedev ${epair}b up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore sysctl -j genevetest1 net.inet.icmp.bmcastecho=1
+ atf_check -s exit:0 -o ignore sysctl -j genevetest2 net.inet.icmp.bmcastecho=1
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+
+}
+
+inherit_ipv6_multicast_cleanup()
+{
+ vnet_cleanup
+}
+
+atf_test_case "inherit_ipv4_multicast_without_dev" "cleanup"
+inherit_ipv4_multicast_without_dev_head()
+{
+ atf_set descr 'Create a geneve(4) l3 ipv4 multicast tunnel without specifying genevedev using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+inherit_ipv4_multicast_without_dev_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=192.168.2.1
+ endpoint2=192.168.2.2
+ mc_group=239.0.0.1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip_mroute; then
+ atf_skip "This test requires ip_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet ${endpoint1}/24 up
+ ifconfig -j genevetest2 ${epair}b inet ${endpoint2}/24 up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 add -net 239.0.0.0/8 -interface ${epair}a
+ route -j genevetest2 add -net 239.0.0.0/8 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint1} genevegroup ${mc_group} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint2} genevegroup ${mc_group} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore sysctl -j genevetest1 net.inet.icmp.bmcastecho=1
+ atf_check -s exit:0 -o ignore sysctl -j genevetest2 net.inet.icmp.bmcastecho=1
+
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest1 ifmcstat -i ${epair}a -f inet
+ atf_check -s exit:0 -o match:"group 239.0.0.1" jexec genevetest2 ifmcstat -i ${epair}b -f inet
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+
+}
+
+inherit_ipv4_multicast_without_dev_cleanup()
+{
+ vnet_cleanup
+}
+
+
+atf_test_case "inherit_ipv6_multicast_without_dev" "cleanup"
+inherit_ipv6_multicast_without_dev_head()
+{
+ atf_set descr 'Create a geneve(4) l3 ipv6 multicast tunnel without specifying genevedev using epair and pass traffic between jails'
+ atf_set require.user root
+}
+
+inherit_ipv6_multicast_without_dev_body()
+{
+ local epair geneve1 geneve2 vni1 endpoint1 endpoint2 mc_group
+ local v4tunnel1 v4tunnel2 v6tunnel1 v6tunnel2
+
+ endpoint1=3fff::1
+ endpoint2=3fff::2
+ mc_group=ff08::db8:0:1
+ v4tunnel1=169.254.0.1
+ v4tunnel2=169.254.0.2
+ v6tunnel1=2001:db8::1
+ v6tunnel2=2001:db8::2
+ vni1=1
+
+ if ! kldstat -q -m if_geneve; then
+ atf_skip "This test requires if_geneve"
+ fi
+ if ! kldstat -q -m ip6_mroute; then
+ atf_skip "This test requires ip6_mroute"
+ fi
+
+ vnet_init
+ epair=$(vnet_mkepair)
+ vnet_mkjail genevetest1 ${epair}a
+ vnet_mkjail genevetest2 ${epair}b
+
+ ifconfig -j genevetest1 ${epair}a inet6 ${endpoint1} up
+ ifconfig -j genevetest2 ${epair}b inet6 ${endpoint2} up
+
+ # manually add the multicast routes to epairs
+ route -j genevetest1 -n6 add -net ff08::db8:0:1/96 -interface ${epair}a
+ route -j genevetest2 -n6 add -net ff08::db8:0:1/96 -interface ${epair}b
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint1} genevegroup ${mc_group} up
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 create genevemode l3 debug \
+ geneveid $vni1 genevelocal ${endpoint2} genevegroup ${mc_group} up
+
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet ${v4tunnel1}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest1 geneve1 inet6 ${v6tunnel1}
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet ${v4tunnel2}/30
+ atf_check -s exit:0 -o ignore \
+ ifconfig -j genevetest2 geneve1 inet6 ${v6tunnel2}
+
+ atf_check -s exit:0 -o ignore sysctl -j genevetest1 net.inet.icmp.bmcastecho=1
+ atf_check -s exit:0 -o ignore sysctl -j genevetest2 net.inet.icmp.bmcastecho=1
+
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v6tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v6tunnel1
+ atf_check -s exit:0 -o ignore jexec genevetest1 ping -nc 1 -t 1 $v4tunnel2
+ atf_check -s exit:0 -o ignore jexec genevetest2 ping -nc 1 -t 1 $v4tunnel1
+
+}
+
+inherit_ipv6_multicast_without_dev_cleanup()
+{
+ vnet_cleanup
+}
+
+
+atf_init_test_cases()
+{
+ atf_add_test_case "ether_ipv4"
+ atf_add_test_case "ether_ipv4_multicast"
+ atf_add_test_case "ether_ipv4_multicast_without_dev"
+ atf_add_test_case "ether_ipv6"
+ atf_add_test_case "ether_ipv6_blind_options"
+ atf_add_test_case "ether_ipv6_external"
+ atf_add_test_case "ether_ipv6_multicast"
+ atf_add_test_case "ether_ipv6_multicast_without_dev"
+ atf_add_test_case "inherit_ipv4"
+ atf_add_test_case "inherit_ipv4_multicast"
+ atf_add_test_case "inherit_ipv4_multicast_without_dev"
+ atf_add_test_case "inherit_ipv6"
+ atf_add_test_case "inherit_ipv6_multicast"
+ atf_add_test_case "inherit_ipv6_multicast_without_dev"
+}

File Metadata

Mime Type
text/plain
Expires
Fri, Dec 12, 7:39 AM (6 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26891750
Default Alt Text
D54172.diff (227 KB)

Event Timeline