diff --git a/contrib/tcpdump/print-pfsync.c b/contrib/tcpdump/print-pfsync.c index e4f11930816c..e7885c4a9e00 100644 --- a/contrib/tcpdump/print-pfsync.c +++ b/contrib/tcpdump/print-pfsync.c @@ -1,472 +1,484 @@ /* * Copyright (c) 2012 Gleb Smirnoff * Copyright (c) 2002 Michael Shalayeff * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: print-pfsync.c,v 1.38 2012/09/19 13:50:36 mikeb Exp $ * $OpenBSD: pf_print_state.c,v 1.11 2012/07/08 17:48:37 lteo Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifndef HAVE_NET_PFVAR_H #error "No pf headers available" #endif #include #include #include #include #define TCPSTATES #include #include #include #include "netdissect.h" #include "interface.h" #include "addrtoname.h" static void pfsync_print(netdissect_options *, struct pfsync_header *, const u_char *, u_int); static void print_src_dst(netdissect_options *, const struct pf_state_peer_export *, const struct pf_state_peer_export *, uint8_t); static void print_state(netdissect_options *, union pfsync_state_union *, int); void pfsync_if_print(netdissect_options *ndo, const struct pcap_pkthdr *h, register const u_char *p) { u_int caplen = h->caplen; ts_print(ndo, &h->ts); if (caplen < PFSYNC_HDRLEN) { ND_PRINT("[|pfsync]"); goto out; } pfsync_print(ndo, (struct pfsync_header *)p, p + sizeof(struct pfsync_header), caplen - sizeof(struct pfsync_header)); out: if (ndo->ndo_xflag) { hex_print(ndo, "\n\t", p, caplen); } fn_print_char(ndo, '\n'); return; } void pfsync_ip_print(netdissect_options *ndo , const u_char *bp, u_int len) { struct pfsync_header *hdr = (struct pfsync_header *)bp; if (len < PFSYNC_HDRLEN || !ND_TTEST_LEN(bp, len)) ND_PRINT("[|pfsync]"); else pfsync_print(ndo, hdr, bp + sizeof(struct pfsync_header), len - sizeof(struct pfsync_header)); } struct pfsync_actions { const char *name; size_t len; void (*print)(netdissect_options *, const void *); }; static void pfsync_print_clr(netdissect_options *, const void *); static void pfsync_print_state_1301(netdissect_options *, const void *); static void pfsync_print_state_1400(netdissect_options *, const void *); +static void pfsync_print_state_1500(netdissect_options *, const void *); static void pfsync_print_ins_ack(netdissect_options *, const void *); static void pfsync_print_upd_c(netdissect_options *, const void *); static void pfsync_print_upd_req(netdissect_options *, const void *); static void pfsync_print_del_c(netdissect_options *, const void *); static void pfsync_print_bus(netdissect_options *, const void *); static void pfsync_print_tdb(netdissect_options *, const void *); struct pfsync_actions actions[] = { { "clear all", sizeof(struct pfsync_clr), pfsync_print_clr }, { "insert 13.1", sizeof(struct pfsync_state_1301), pfsync_print_state_1301 }, { "insert ack", sizeof(struct pfsync_ins_ack), pfsync_print_ins_ack }, { "update 13.1", sizeof(struct pfsync_state_1301), pfsync_print_state_1301 }, { "update compressed", sizeof(struct pfsync_upd_c), pfsync_print_upd_c }, { "request uncompressed", sizeof(struct pfsync_upd_req), pfsync_print_upd_req }, { "delete", sizeof(struct pfsync_state_1301), pfsync_print_state_1301 }, { "delete compressed", sizeof(struct pfsync_del_c), pfsync_print_del_c }, { "frag insert", 0, NULL }, { "frag delete", 0, NULL }, { "bulk update status", sizeof(struct pfsync_bus), pfsync_print_bus }, { "tdb", 0, pfsync_print_tdb }, { "eof", 0, NULL }, { "insert", sizeof(struct pfsync_state_1400), pfsync_print_state_1400 }, { "update", sizeof(struct pfsync_state_1400), pfsync_print_state_1400 }, + { "insert", sizeof(struct pfsync_state_1500), pfsync_print_state_1500 }, + { "update", sizeof(struct pfsync_state_1500), pfsync_print_state_1500 }, }; static void pfsync_print(netdissect_options *ndo, struct pfsync_header *hdr, const u_char *bp, u_int len) { struct pfsync_subheader *subh; int count, plen, i; u_int alen; plen = ntohs(hdr->len); ND_PRINT("PFSYNCv%d len %d", hdr->version, plen); if (hdr->version != PFSYNC_VERSION) return; plen -= sizeof(*hdr); while (plen > 0) { if (len < sizeof(*subh)) break; subh = (struct pfsync_subheader *)bp; bp += sizeof(*subh); len -= sizeof(*subh); plen -= sizeof(*subh); if (subh->action >= PFSYNC_ACT_MAX) { ND_PRINT("\n act UNKNOWN id %d", subh->action); return; } count = ntohs(subh->count); ND_PRINT("\n %s count %d", actions[subh->action].name, count); alen = actions[subh->action].len; if (subh->action == PFSYNC_ACT_EOF) return; if (actions[subh->action].print == NULL) { ND_PRINT("\n unimplemented action %hhu", subh->action); return; } for (i = 0; i < count; i++) { if (len < alen) { len = 0; break; } if (ndo->ndo_vflag) actions[subh->action].print(ndo, bp); bp += alen; len -= alen; plen -= alen; } } if (plen > 0) { ND_PRINT("\n ..."); return; } if (plen < 0) { ND_PRINT("\n invalid header length"); return; } if (len > 0) ND_PRINT("\n invalid packet length"); } static void pfsync_print_clr(netdissect_options *ndo, const void *bp) { const struct pfsync_clr *clr = bp; ND_PRINT("\n\tcreatorid: %08x", htonl(clr->creatorid)); if (clr->ifname[0] != '\0') ND_PRINT(" interface: %s", clr->ifname); } static void pfsync_print_state_1301(netdissect_options *ndo, const void *bp) { struct pfsync_state_1301 *st = (struct pfsync_state_1301 *)bp; fn_print_char(ndo, '\n'); print_state(ndo, (union pfsync_state_union *)st, PFSYNC_MSG_VERSION_1301); } static void pfsync_print_state_1400(netdissect_options *ndo, const void *bp) { - struct pfsync_state_1301 *st = (struct pfsync_state_1301 *)bp; + struct pfsync_state_1400 *st = (struct pfsync_state_1400 *)bp; fn_print_char(ndo, '\n'); print_state(ndo, (union pfsync_state_union *)st, PFSYNC_MSG_VERSION_1400); } +static void +pfsync_print_state_1500(netdissect_options *ndo, const void *bp) +{ + struct pfsync_state_1500 *st = (struct pfsync_state_1500 *)bp; + + fn_print_char(ndo, '\n'); + print_state(ndo, (union pfsync_state_union *)st, PFSYNC_MSG_VERSION_1500); +} + static void pfsync_print_ins_ack(netdissect_options *ndo, const void *bp) { const struct pfsync_ins_ack *iack = bp; ND_PRINT("\n\tid: %016jx creatorid: %08x", (uintmax_t)be64toh(iack->id), ntohl(iack->creatorid)); } static void pfsync_print_upd_c(netdissect_options *ndo, const void *bp) { const struct pfsync_upd_c *u = bp; ND_PRINT("\n\tid: %016jx creatorid: %08x", (uintmax_t)be64toh(u->id), ntohl(u->creatorid)); if (ndo->ndo_vflag > 2) { ND_PRINT("\n\tTCP? :"); print_src_dst(ndo, &u->src, &u->dst, IPPROTO_TCP); } } static void pfsync_print_upd_req(netdissect_options *ndo, const void *bp) { const struct pfsync_upd_req *ur = bp; ND_PRINT("\n\tid: %016jx creatorid: %08x", (uintmax_t)be64toh(ur->id), ntohl(ur->creatorid)); } static void pfsync_print_del_c(netdissect_options *ndo, const void *bp) { const struct pfsync_del_c *d = bp; ND_PRINT("\n\tid: %016jx creatorid: %08x", (uintmax_t)be64toh(d->id), ntohl(d->creatorid)); } static void pfsync_print_bus(netdissect_options *ndo, const void *bp) { const struct pfsync_bus *b = bp; uint32_t endtime; int min, sec; const char *status; endtime = ntohl(b->endtime); sec = endtime % 60; endtime /= 60; min = endtime % 60; endtime /= 60; switch (b->status) { case PFSYNC_BUS_START: status = "start"; break; case PFSYNC_BUS_END: status = "end"; break; default: status = "UNKNOWN"; break; } ND_PRINT("\n\tcreatorid: %08x age: %.2u:%.2u:%.2u status: %s", htonl(b->creatorid), endtime, min, sec, status); } static void pfsync_print_tdb(netdissect_options *ndo, const void *bp) { const struct pfsync_tdb *t = bp; ND_PRINT("\n\tspi: 0x%08x rpl: %ju cur_bytes: %ju", ntohl(t->spi), (uintmax_t )be64toh(t->rpl), (uintmax_t )be64toh(t->cur_bytes)); } static void print_host(netdissect_options *ndo, struct pf_addr *addr, uint16_t port, sa_family_t af, const char *proto) { char buf[48]; if (inet_ntop(af, addr, buf, sizeof(buf)) == NULL) ND_PRINT("?"); else ND_PRINT("%s", buf); if (port) ND_PRINT(".%hu", ntohs(port)); } static void print_seq(netdissect_options *ndo, const struct pf_state_peer_export *p) { if (p->seqdiff) ND_PRINT("[%u + %u](+%u)", ntohl(p->seqlo), ntohl(p->seqhi) - ntohl(p->seqlo), ntohl(p->seqdiff)); else ND_PRINT("[%u + %u]", ntohl(p->seqlo), ntohl(p->seqhi) - ntohl(p->seqlo)); } static void print_src_dst(netdissect_options *ndo, const struct pf_state_peer_export *src, const struct pf_state_peer_export *dst, uint8_t proto) { if (proto == IPPROTO_TCP) { if (src->state <= TCPS_TIME_WAIT && dst->state <= TCPS_TIME_WAIT) ND_PRINT(" %s:%s", tcpstates[src->state], tcpstates[dst->state]); else if (src->state == PF_TCPS_PROXY_SRC || dst->state == PF_TCPS_PROXY_SRC) ND_PRINT(" PROXY:SRC"); else if (src->state == PF_TCPS_PROXY_DST || dst->state == PF_TCPS_PROXY_DST) ND_PRINT(" PROXY:DST"); else ND_PRINT(" ", src->state, dst->state); if (ndo->ndo_vflag > 1) { ND_PRINT("\n\t"); print_seq(ndo, src); if (src->wscale && dst->wscale) ND_PRINT(" wscale %u", src->wscale & PF_WSCALE_MASK); ND_PRINT(" "); print_seq(ndo, dst); if (src->wscale && dst->wscale) ND_PRINT(" wscale %u", dst->wscale & PF_WSCALE_MASK); } } else if (proto == IPPROTO_UDP && src->state < PFUDPS_NSTATES && dst->state < PFUDPS_NSTATES) { const char *states[] = PFUDPS_NAMES; ND_PRINT(" %s:%s", states[src->state], states[dst->state]); } else if (proto != IPPROTO_ICMP && src->state < PFOTHERS_NSTATES && dst->state < PFOTHERS_NSTATES) { /* XXX ICMP doesn't really have state levels */ const char *states[] = PFOTHERS_NAMES; ND_PRINT(" %s:%s", states[src->state], states[dst->state]); } else { ND_PRINT(" %u:%u", src->state, dst->state); } } static void print_state(netdissect_options *ndo, union pfsync_state_union *s, int version) { struct pf_state_peer_export *src, *dst; struct pfsync_state_key *sk, *nk; int min, sec; if (s->pfs_1301.direction == PF_OUT) { src = &s->pfs_1301.src; dst = &s->pfs_1301.dst; sk = &s->pfs_1301.key[PF_SK_STACK]; nk = &s->pfs_1301.key[PF_SK_WIRE]; if (s->pfs_1301.proto == IPPROTO_ICMP || s->pfs_1301.proto == IPPROTO_ICMPV6) sk->port[0] = nk->port[0]; } else { src = &s->pfs_1301.dst; dst = &s->pfs_1301.src; sk = &s->pfs_1301.key[PF_SK_WIRE]; nk = &s->pfs_1301.key[PF_SK_STACK]; if (s->pfs_1301.proto == IPPROTO_ICMP || s->pfs_1301.proto == IPPROTO_ICMPV6) sk->port[1] = nk->port[1]; } ND_PRINT("\t%s ", s->pfs_1301.ifname); ND_PRINT("proto %u ", s->pfs_1301.proto); print_host(ndo, &nk->addr[1], nk->port[1], s->pfs_1301.af, NULL); if (PF_ANEQ(&nk->addr[1], &sk->addr[1], s->pfs_1301.af) || nk->port[1] != sk->port[1]) { ND_PRINT((" (")); print_host(ndo, &sk->addr[1], sk->port[1], s->pfs_1301.af, NULL); ND_PRINT(")"); } if (s->pfs_1301.direction == PF_OUT) ND_PRINT((" -> ")); else ND_PRINT((" <- ")); print_host(ndo, &nk->addr[0], nk->port[0], s->pfs_1301.af, NULL); if (PF_ANEQ(&nk->addr[0], &sk->addr[0], s->pfs_1301.af) || nk->port[0] != sk->port[0]) { ND_PRINT((" (")); print_host(ndo, &sk->addr[0], sk->port[0], s->pfs_1301.af, NULL); ND_PRINT((")")); } print_src_dst(ndo, src, dst, s->pfs_1301.proto); if (ndo->ndo_vflag > 1) { uint64_t packets[2]; uint64_t bytes[2]; uint32_t creation = ntohl(s->pfs_1301.creation); uint32_t expire = ntohl(s->pfs_1301.expire); sec = creation % 60; creation /= 60; min = creation % 60; creation /= 60; ND_PRINT("\n\tage %.2u:%.2u:%.2u", creation, min, sec); sec = expire % 60; expire /= 60; min = expire % 60; expire /= 60; ND_PRINT(", expires in %.2u:%.2u:%.2u", expire, min, sec); bcopy(s->pfs_1301.packets[0], &packets[0], sizeof(uint64_t)); bcopy(s->pfs_1301.packets[1], &packets[1], sizeof(uint64_t)); bcopy(s->pfs_1301.bytes[0], &bytes[0], sizeof(uint64_t)); bcopy(s->pfs_1301.bytes[1], &bytes[1], sizeof(uint64_t)); ND_PRINT(", %ju:%ju pkts, %ju:%ju bytes", be64toh(packets[0]), be64toh(packets[1]), be64toh(bytes[0]), be64toh(bytes[1])); if (s->pfs_1301.anchor != ntohl(-1)) ND_PRINT(", anchor %u", ntohl(s->pfs_1301.anchor)); if (s->pfs_1301.rule != ntohl(-1)) ND_PRINT(", rule %u", ntohl(s->pfs_1301.rule)); } if (ndo->ndo_vflag > 1) { uint64_t id; bcopy(&s->pfs_1301.id, &id, sizeof(uint64_t)); ND_PRINT("\n\tid: %016jx creatorid: %08x", (uintmax_t )be64toh(id), ntohl(s->pfs_1301.creatorid)); } } diff --git a/share/man/man4/pfsync.4 b/share/man/man4/pfsync.4 index cc9c350ea875..c12bad74831f 100644 --- a/share/man/man4/pfsync.4 +++ b/share/man/man4/pfsync.4 @@ -1,294 +1,296 @@ .\" $OpenBSD: pfsync.4,v 1.28 2009/02/17 10:05:18 dlg Exp $ .\" .\" Copyright (c) 2002 Michael Shalayeff .\" Copyright (c) 2003-2004 Ryan McBride .\" All rights reserved. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR .\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES .\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. .\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, .\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT .\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF MIND, .\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY .\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT .\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF .\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .\" .Dd November 08, 2023 .Dt PFSYNC 4 .Os .Sh NAME .Nm pfsync .Nd packet filter state table synchronisation interface .Sh SYNOPSIS .Cd "device pfsync" .Pp In .Xr loader.conf 5 : .Cd net.pfsync.pfsync_buckets .Pp In .Xr sysctl.conf 5 : .Cd net.pfsync.carp_demotion_factor .Sh DESCRIPTION The .Nm interface is a pseudo-device which exposes certain changes to the state table used by .Xr pf 4 . State changes can be viewed by invoking .Xr tcpdump 1 on the .Nm interface. If configured with a physical synchronisation interface, .Nm will also send state changes out on that interface, and insert state changes received on that interface from other systems into the state table. .Pp By default, all local changes to the state table are exposed via .Nm . State changes from packets received by .Nm over the network are not rebroadcast. Updates to states created by a rule marked with the .Ar no-sync keyword are ignored by the .Nm interface (see .Xr pf.conf 5 for details). .Pp The .Nm interface will attempt to collapse multiple state updates into a single packet where possible. The maximum number of times a single state can be updated before a .Nm packet will be sent out is controlled by the .Ar maxupd parameter to ifconfig (see .Xr ifconfig 8 and the example below for more details). The sending out of a .Nm packet will be delayed by a maximum of one second. .Sh NETWORK SYNCHRONISATION States can be synchronised between two or more firewalls using this interface, by specifying a synchronisation interface using .Xr ifconfig 8 . For example, the following command sets fxp0 as the synchronisation interface: .Bd -literal -offset indent # ifconfig pfsync0 syncdev fxp0 .Ed .Pp By default, state change messages are sent out on the synchronisation interface using IP multicast packets to the 224.0.0.240 group address. An alternative destination address for .Nm packets can be specified using the .Ic syncpeer keyword. This can be used in combination with .Xr ipsec 4 to protect the synchronisation traffic. In such a configuration, the syncdev should be set to the .Xr enc 4 interface, as this is where the traffic arrives when it is decapsulated, e.g.: .Bd -literal -offset indent # ifconfig pfsync0 syncpeer 10.0.0.2 syncdev enc0 .Ed .Pp It is important that the pfsync traffic be well secured as there is no authentication on the protocol and it would be trivial to spoof packets which create states, bypassing the pf ruleset. Either run the pfsync protocol on a trusted network \- ideally a network dedicated to pfsync messages such as a crossover cable between two firewalls, or specify a peer address and protect the traffic with .Xr ipsec 4 . .Pp Support for .Nm transport over IPv6 was introduced in .Fx 14.0 . To set up .Nm using multicast with IPv6 link-local addresses, the .Ic syncpeer must be set to the .Nm multicast address and the .Ic syncdev to the interface where .Nm traffic is expected. .Bd -literal -offset indent # ifconfig pfsync0 syncpeer ff12::f0 syncdev vtnet0 .Ed .Pp When new features are introduced to .Xr pf 4 the format of messages used by .Nm might change. .Nm will by default use the latest format. If synchronization with a peer running an older version of FreeBSD is needed the .Ar version parameter can be used. E.g.: .Bd -literal -offset indent # ifconfig pfsync0 version 1301 .Ed .Pp Currently the following versions are supported: .Bl -tag -width indent .It Cm 1301 FreeBSD releases 13.2 and older. Compatibility with FreeBSD 13.1 has been verified. .It Cm 1400 FreeBSD release 14.0. +.It Cm 1500 +FreeBSD release 15.0. .El .Sh SYSCTL VARIABLES The following variables can be entered at the .Xr loader 8 prompt, set in .Xr loader.conf 5 , or changed at runtime with .Xr sysctl 8 : .Bl -tag -width indent .It Va net.pfsync.carp_demotion_factor Value added to .Va net.inet.carp.demotion while .Nm tries to perform its bulk update. See .Xr carp 4 for more information. Default value is 240. .El .Sh LOADER TUNABLES The following tunable may be set in .Xr loader.conf 5 or at the .Xr loader 8 prompt: .Bl -tag -width indent .It Va net.pfsync.pfsync_buckets The number of .Nm buckets. This affects the performance and memory tradeoff. Defaults to twice the number of CPUs. Change only if benchmarks show this helps on your workload. .El .Sh EXAMPLES .Nm and .Xr carp 4 can be used together to provide automatic failover of a pair of firewalls configured in parallel. One firewall will handle all traffic until it dies, is shut down, or is manually demoted, at which point the second firewall will take over automatically. .Pp Both firewalls in this example have three .Xr sis 4 interfaces. sis0 is the external interface, on the 10.0.0.0/24 subnet; sis1 is the internal interface, on the 192.168.0.0/24 subnet; and sis2 is the .Nm interface, using the 192.168.254.0/24 subnet. A crossover cable connects the two firewalls via their sis2 interfaces. On all three interfaces, firewall A uses the .254 address, while firewall B uses .253. The interfaces are configured as follows (firewall A unless otherwise indicated): .Pp Interfaces configuration in .Pa /etc/rc.conf : .Bd -literal -offset indent network_interfaces="lo0 sis0 sis1 sis2" ifconfig_sis0="10.0.0.254/24" ifconfig_sis0_alias0="inet 10.0.0.1/24 vhid 1 pass foo" ifconfig_sis1="192.168.0.254/24" ifconfig_sis1_alias0="inet 192.168.0.1/24 vhid 2 pass bar" ifconfig_sis2="192.168.254.254/24" pfsync_enable="YES" pfsync_syncdev="sis2" .Ed .Pp .Xr pf 4 must also be configured to allow .Nm and .Xr carp 4 traffic through. The following should be added to the top of .Pa /etc/pf.conf : .Bd -literal -offset indent pass quick on { sis2 } proto pfsync keep state (no-sync) pass on { sis0 sis1 } proto carp keep state (no-sync) .Ed .Pp It is preferable that one firewall handle the forwarding of all the traffic, therefore the .Ar advskew on the backup firewall's .Xr carp 4 vhids should be set to something higher than the primary's. For example, if firewall B is the backup, its carp1 configuration would look like this: .Bd -literal -offset indent ifconfig_sis1_alias0="inet 192.168.0.1/24 vhid 2 pass bar advskew 100" .Ed .Pp The following must also be added to .Pa /etc/sysctl.conf : .Bd -literal -offset indent net.inet.carp.preempt=1 .Ed .Sh SEE ALSO .Xr tcpdump 1 , .Xr bpf 4 , .Xr carp 4 , .Xr enc 4 , .Xr inet 4 , .Xr inet6 4 , .Xr ipsec 4 , .Xr netintro 4 , .Xr pf 4 , .Xr pf.conf 5 , .Xr protocols 5 , .Xr rc.conf 5 , .Xr ifconfig 8 .Sh HISTORY The .Nm device first appeared in .Ox 3.3 . It was first imported to .Fx 5.3 . .Pp The .Nm protocol and kernel implementation were significantly modified in .Fx 9.0 . The newer protocol is not compatible with older one and will not interoperate with it. diff --git a/sys/net/if_pfsync.h b/sys/net/if_pfsync.h index e99df0b85ccf..7b3177e1137d 100644 --- a/sys/net/if_pfsync.h +++ b/sys/net/if_pfsync.h @@ -1,299 +1,302 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Michael Shalayeff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2008 David Gwynne * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * $OpenBSD: if_pfsync.h,v 1.35 2008/06/29 08:42:15 mcbride Exp $ */ #ifndef _NET_IF_PFSYNC_H_ #define _NET_IF_PFSYNC_H_ #include #include #include #include #define PFSYNC_VERSION 5 #define PFSYNC_DFLTTL 255 enum pfsync_msg_versions { PFSYNC_MSG_VERSION_UNSPECIFIED = 0, PFSYNC_MSG_VERSION_1301 = 1301, PFSYNC_MSG_VERSION_1400 = 1400, + PFSYNC_MSG_VERSION_1500 = 1500, }; -#define PFSYNC_MSG_VERSION_DEFAULT PFSYNC_MSG_VERSION_1400 +#define PFSYNC_MSG_VERSION_DEFAULT PFSYNC_MSG_VERSION_1500 #define PFSYNC_ACT_CLR 0 /* clear all states */ #define PFSYNC_ACT_INS_1301 1 /* insert state */ #define PFSYNC_ACT_INS_ACK 2 /* ack of inserted state */ #define PFSYNC_ACT_UPD_1301 3 /* update state */ #define PFSYNC_ACT_UPD_C 4 /* "compressed" update state */ #define PFSYNC_ACT_UPD_REQ 5 /* request "uncompressed" state */ #define PFSYNC_ACT_DEL 6 /* delete state */ #define PFSYNC_ACT_DEL_C 7 /* "compressed" delete state */ #define PFSYNC_ACT_INS_F 8 /* insert fragment */ #define PFSYNC_ACT_DEL_F 9 /* delete fragments */ #define PFSYNC_ACT_BUS 10 /* bulk update status */ #define PFSYNC_ACT_TDB 11 /* TDB replay counter update */ #define PFSYNC_ACT_EOF 12 /* end of frame */ #define PFSYNC_ACT_INS_1400 13 /* insert state */ #define PFSYNC_ACT_UPD_1400 14 /* update state */ -#define PFSYNC_ACT_MAX 15 +#define PFSYNC_ACT_INS_1500 15 /* insert state */ +#define PFSYNC_ACT_UPD_1500 16 /* update state */ +#define PFSYNC_ACT_MAX 17 /* * A pfsync frame is built from a header followed by several sections which * are all prefixed with their own subheaders. Frames must be terminated with * an EOF subheader. * * | ... | * | IP header | * +============================+ * | pfsync_header | * +----------------------------+ * | pfsync_subheader | * +----------------------------+ * | first action fields | * | ... | * +----------------------------+ * | pfsync_subheader | * +----------------------------+ * | second action fields | * | ... | * +----------------------------+ * | EOF pfsync_subheader | * +----------------------------+ * | HMAC | * +============================+ */ /* * Frame header */ struct pfsync_header { u_int8_t version; u_int8_t _pad; u_int16_t len; u_int8_t pfcksum[PF_MD5_DIGEST_LENGTH]; } __packed; /* * Frame region subheader */ struct pfsync_subheader { u_int8_t action; u_int8_t _pad; u_int16_t count; } __packed; /* * CLR */ struct pfsync_clr { char ifname[IFNAMSIZ]; u_int32_t creatorid; } __packed; /* * INS, UPD, DEL */ /* these use struct pfsync_state in pfvar.h */ /* * INS_ACK */ struct pfsync_ins_ack { u_int64_t id; u_int32_t creatorid; } __packed; /* * UPD_C */ struct pfsync_upd_c { u_int64_t id; struct pf_state_peer_export src; struct pf_state_peer_export dst; u_int32_t creatorid; u_int32_t expire; u_int8_t timeout; u_int8_t _pad[3]; } __packed; /* * UPD_REQ */ struct pfsync_upd_req { u_int64_t id; u_int32_t creatorid; } __packed; /* * DEL_C */ struct pfsync_del_c { u_int64_t id; u_int32_t creatorid; } __packed; /* * INS_F, DEL_F */ /* not implemented (yet) */ /* * BUS */ struct pfsync_bus { u_int32_t creatorid; u_int32_t endtime; u_int8_t status; #define PFSYNC_BUS_START 1 #define PFSYNC_BUS_END 2 u_int8_t _pad[3]; } __packed; /* * TDB */ struct pfsync_tdb { u_int32_t spi; union sockaddr_union dst; u_int32_t rpl; u_int64_t cur_bytes; u_int8_t sproto; u_int8_t updates; u_int8_t _pad[2]; } __packed; #define PFSYNC_HDRLEN sizeof(struct pfsync_header) struct pfsyncstats { u_int64_t pfsyncs_ipackets; /* total input packets, IPv4 */ u_int64_t pfsyncs_ipackets6; /* total input packets, IPv6 */ u_int64_t pfsyncs_badif; /* not the right interface */ u_int64_t pfsyncs_badttl; /* TTL is not PFSYNC_DFLTTL */ u_int64_t pfsyncs_hdrops; /* packets shorter than hdr */ u_int64_t pfsyncs_badver; /* bad (incl unsupp) version */ u_int64_t pfsyncs_badact; /* bad action */ u_int64_t pfsyncs_badlen; /* data length does not match */ u_int64_t pfsyncs_badauth; /* bad authentication */ u_int64_t pfsyncs_stale; /* stale state */ u_int64_t pfsyncs_badval; /* bad values */ u_int64_t pfsyncs_badstate; /* insert/lookup failed */ u_int64_t pfsyncs_opackets; /* total output packets, IPv4 */ u_int64_t pfsyncs_opackets6; /* total output packets, IPv6 */ u_int64_t pfsyncs_onomem; /* no memory for an mbuf */ u_int64_t pfsyncs_oerrors; /* ip output error */ u_int64_t pfsyncs_iacts[PFSYNC_ACT_MAX]; u_int64_t pfsyncs_oacts[PFSYNC_ACT_MAX]; }; /* * Configuration structure for SIOCSETPFSYNC SIOCGETPFSYNC */ struct pfsyncreq { char pfsyncr_syncdev[IFNAMSIZ]; struct in_addr pfsyncr_syncpeer; int pfsyncr_maxupdates; #define PFSYNCF_OK 0x00000001 #define PFSYNCF_DEFER 0x00000002 int pfsyncr_defer; }; struct pfsync_kstatus { char syncdev[IFNAMSIZ]; struct sockaddr_storage syncpeer; int maxupdates; int version; int flags; }; struct pfsyncioc_nv { void *data; size_t len; /* The length of the nvlist data. */ size_t size; /* The total size of the data buffer. */ }; #define SIOCSETPFSYNC _IOW('i', 247, struct ifreq) #define SIOCGETPFSYNC _IOWR('i', 248, struct ifreq) #define SIOCSETPFSYNCNV _IOW('i', 249, struct ifreq) #define SIOCGETPFSYNCNV _IOWR('i', 250, struct ifreq) #ifdef _KERNEL /* * this shows where a pf state is with respect to the syncing. * pf_kstate->sync_state */ #define PFSYNC_S_INS 0x00 #define PFSYNC_S_IACK 0x01 #define PFSYNC_S_UPD 0x02 #define PFSYNC_S_UPD_C 0x03 #define PFSYNC_S_DEL_C 0x04 #define PFSYNC_S_DEFER 0xfe #define PFSYNC_S_NONE 0xff #define PFSYNC_SI_IOCTL 0x01 #define PFSYNC_SI_CKSUM 0x02 #define PFSYNC_SI_ACK 0x04 #endif /* _KERNEL */ #endif /* _NET_IF_PFSYNC_H_ */ diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h index f73420494000..8c0e9b057a4f 100644 --- a/sys/net/pfvar.h +++ b/sys/net/pfvar.h @@ -1,2724 +1,2780 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $OpenBSD: pfvar.h,v 1.282 2009/01/29 15:12:28 pyr Exp $ */ #ifndef _NET_PFVAR_H_ #define _NET_PFVAR_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _KERNEL #include #include #include #include #include #include #endif #include #include #include #ifdef _KERNEL #define PF_PFIL_NOREFRAGMENT 0x80000000 #if defined(__arm__) #define PF_WANT_32_TO_64_COUNTER #endif /* * A hybrid of 32-bit and 64-bit counters which can be used on platforms where * counter(9) is very expensive. * * As 32-bit counters are expected to overflow, a periodic job sums them up to * a saved 64-bit state. Fetching the value still walks all CPUs to get the most * current snapshot. */ #ifdef PF_WANT_32_TO_64_COUNTER struct pf_counter_u64_pcpu { u_int32_t current; u_int32_t snapshot; }; struct pf_counter_u64 { struct pf_counter_u64_pcpu *pfcu64_pcpu; u_int64_t pfcu64_value; seqc_t pfcu64_seqc; }; static inline int pf_counter_u64_init(struct pf_counter_u64 *pfcu64, int flags) { pfcu64->pfcu64_value = 0; pfcu64->pfcu64_seqc = 0; pfcu64->pfcu64_pcpu = uma_zalloc_pcpu(pcpu_zone_8, flags | M_ZERO); if (__predict_false(pfcu64->pfcu64_pcpu == NULL)) return (ENOMEM); return (0); } static inline void pf_counter_u64_deinit(struct pf_counter_u64 *pfcu64) { uma_zfree_pcpu(pcpu_zone_8, pfcu64->pfcu64_pcpu); } static inline void pf_counter_u64_critical_enter(void) { critical_enter(); } static inline void pf_counter_u64_critical_exit(void) { critical_exit(); } static inline void pf_counter_u64_rollup_protected(struct pf_counter_u64 *pfcu64, uint64_t n) { MPASS(curthread->td_critnest > 0); pfcu64->pfcu64_value += n; } static inline void pf_counter_u64_add_protected(struct pf_counter_u64 *pfcu64, uint32_t n) { struct pf_counter_u64_pcpu *pcpu; u_int32_t val; MPASS(curthread->td_critnest > 0); pcpu = zpcpu_get(pfcu64->pfcu64_pcpu); val = atomic_load_int(&pcpu->current); atomic_store_int(&pcpu->current, val + n); } static inline void pf_counter_u64_add(struct pf_counter_u64 *pfcu64, uint32_t n) { critical_enter(); pf_counter_u64_add_protected(pfcu64, n); critical_exit(); } static inline u_int64_t pf_counter_u64_periodic(struct pf_counter_u64 *pfcu64) { struct pf_counter_u64_pcpu *pcpu; u_int64_t sum; u_int32_t val; int cpu; MPASS(curthread->td_critnest > 0); seqc_write_begin(&pfcu64->pfcu64_seqc); sum = pfcu64->pfcu64_value; CPU_FOREACH(cpu) { pcpu = zpcpu_get_cpu(pfcu64->pfcu64_pcpu, cpu); val = atomic_load_int(&pcpu->current); sum += (uint32_t)(val - pcpu->snapshot); pcpu->snapshot = val; } pfcu64->pfcu64_value = sum; seqc_write_end(&pfcu64->pfcu64_seqc); return (sum); } static inline u_int64_t pf_counter_u64_fetch(const struct pf_counter_u64 *pfcu64) { struct pf_counter_u64_pcpu *pcpu; u_int64_t sum; seqc_t seqc; int cpu; for (;;) { seqc = seqc_read(&pfcu64->pfcu64_seqc); sum = 0; CPU_FOREACH(cpu) { pcpu = zpcpu_get_cpu(pfcu64->pfcu64_pcpu, cpu); sum += (uint32_t)(atomic_load_int(&pcpu->current) -pcpu->snapshot); } sum += pfcu64->pfcu64_value; if (seqc_consistent(&pfcu64->pfcu64_seqc, seqc)) break; } return (sum); } static inline void pf_counter_u64_zero_protected(struct pf_counter_u64 *pfcu64) { struct pf_counter_u64_pcpu *pcpu; int cpu; MPASS(curthread->td_critnest > 0); seqc_write_begin(&pfcu64->pfcu64_seqc); CPU_FOREACH(cpu) { pcpu = zpcpu_get_cpu(pfcu64->pfcu64_pcpu, cpu); pcpu->snapshot = atomic_load_int(&pcpu->current); } pfcu64->pfcu64_value = 0; seqc_write_end(&pfcu64->pfcu64_seqc); } static inline void pf_counter_u64_zero(struct pf_counter_u64 *pfcu64) { critical_enter(); pf_counter_u64_zero_protected(pfcu64); critical_exit(); } #else struct pf_counter_u64 { counter_u64_t counter; }; static inline int pf_counter_u64_init(struct pf_counter_u64 *pfcu64, int flags) { pfcu64->counter = counter_u64_alloc(flags); if (__predict_false(pfcu64->counter == NULL)) return (ENOMEM); return (0); } static inline void pf_counter_u64_deinit(struct pf_counter_u64 *pfcu64) { counter_u64_free(pfcu64->counter); } static inline void pf_counter_u64_critical_enter(void) { } static inline void pf_counter_u64_critical_exit(void) { } static inline void pf_counter_u64_rollup_protected(struct pf_counter_u64 *pfcu64, uint64_t n) { counter_u64_add(pfcu64->counter, n); } static inline void pf_counter_u64_add_protected(struct pf_counter_u64 *pfcu64, uint32_t n) { counter_u64_add(pfcu64->counter, n); } static inline void pf_counter_u64_add(struct pf_counter_u64 *pfcu64, uint32_t n) { pf_counter_u64_add_protected(pfcu64, n); } static inline u_int64_t pf_counter_u64_fetch(const struct pf_counter_u64 *pfcu64) { return (counter_u64_fetch(pfcu64->counter)); } static inline void pf_counter_u64_zero_protected(struct pf_counter_u64 *pfcu64) { counter_u64_zero(pfcu64->counter); } static inline void pf_counter_u64_zero(struct pf_counter_u64 *pfcu64) { pf_counter_u64_zero_protected(pfcu64); } #endif #define pf_get_timestamp(prule)({ \ uint32_t _ts = 0; \ uint32_t __ts; \ int cpu; \ CPU_FOREACH(cpu) { \ __ts = *zpcpu_get_cpu(prule->timestamp, cpu); \ if (__ts > _ts) \ _ts = __ts; \ } \ _ts; \ }) #define pf_update_timestamp(prule) \ do { \ critical_enter(); \ *zpcpu_get((prule)->timestamp) = time_second; \ critical_exit(); \ } while (0) #define pf_timestamp_pcpu_zone (sizeof(time_t) == 4 ? pcpu_zone_4 : pcpu_zone_8) _Static_assert(sizeof(time_t) == 4 || sizeof(time_t) == 8, "unexpected time_t size"); SYSCTL_DECL(_net_pf); MALLOC_DECLARE(M_PF); MALLOC_DECLARE(M_PFHASH); MALLOC_DECLARE(M_PF_RULE_ITEM); SDT_PROVIDER_DECLARE(pf); SDT_PROBE_DECLARE(pf, , test, reason_set); SDT_PROBE_DECLARE(pf, , log, log); #define DPFPRINTF(n, fmt, x...) \ do { \ SDT_PROBE2(pf, , log, log, (n), fmt); \ if (V_pf_status.debug >= (n)) \ printf(fmt "\n", ##x); \ } while (0) struct pfi_dynaddr { TAILQ_ENTRY(pfi_dynaddr) entry; struct pf_addr pfid_addr4; struct pf_addr pfid_mask4; struct pf_addr pfid_addr6; struct pf_addr pfid_mask6; struct pfr_ktable *pfid_kt; struct pfi_kkif *pfid_kif; int pfid_net; /* mask or 128 */ int pfid_acnt4; /* address count IPv4 */ int pfid_acnt6; /* address count IPv6 */ sa_family_t pfid_af; /* rule af */ u_int8_t pfid_iflags; /* PFI_AFLAG_* */ }; #define PF_NAME "pf" #define PF_HASHROW_ASSERT(h) mtx_assert(&(h)->lock, MA_OWNED) #define PF_HASHROW_LOCK(h) mtx_lock(&(h)->lock) #define PF_HASHROW_UNLOCK(h) mtx_unlock(&(h)->lock) #ifdef INVARIANTS #define PF_STATE_LOCK(s) \ do { \ struct pf_kstate *_s = (s); \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(_s)]; \ MPASS(_s->lock == &_ih->lock); \ mtx_lock(_s->lock); \ } while (0) #define PF_STATE_UNLOCK(s) \ do { \ struct pf_kstate *_s = (s); \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(_s)]; \ MPASS(_s->lock == &_ih->lock); \ mtx_unlock(_s->lock); \ } while (0) #else #define PF_STATE_LOCK(s) mtx_lock((s)->lock) #define PF_STATE_UNLOCK(s) mtx_unlock((s)->lock) #endif #ifdef INVARIANTS #define PF_STATE_LOCK_ASSERT(s) \ do { \ struct pf_kstate *_s = (s); \ struct pf_idhash *_ih = &V_pf_idhash[PF_IDHASH(_s)]; \ MPASS(_s->lock == &_ih->lock); \ PF_HASHROW_ASSERT(_ih); \ } while (0) #else /* !INVARIANTS */ #define PF_STATE_LOCK_ASSERT(s) do {} while (0) #endif /* INVARIANTS */ #ifdef INVARIANTS #define PF_SRC_NODE_LOCK(sn) \ do { \ struct pf_ksrc_node *_sn = (sn); \ struct pf_srchash *_sh = &V_pf_srchash[ \ pf_hashsrc(&_sn->addr, _sn->af)]; \ MPASS(_sn->lock == &_sh->lock); \ mtx_lock(_sn->lock); \ } while (0) #define PF_SRC_NODE_UNLOCK(sn) \ do { \ struct pf_ksrc_node *_sn = (sn); \ struct pf_srchash *_sh = &V_pf_srchash[ \ pf_hashsrc(&_sn->addr, _sn->af)]; \ MPASS(_sn->lock == &_sh->lock); \ mtx_unlock(_sn->lock); \ } while (0) #else #define PF_SRC_NODE_LOCK(sn) mtx_lock((sn)->lock) #define PF_SRC_NODE_UNLOCK(sn) mtx_unlock((sn)->lock) #endif #ifdef INVARIANTS #define PF_SRC_NODE_LOCK_ASSERT(sn) \ do { \ struct pf_ksrc_node *_sn = (sn); \ struct pf_srchash *_sh = &V_pf_srchash[ \ pf_hashsrc(&_sn->addr, _sn->af)]; \ MPASS(_sn->lock == &_sh->lock); \ PF_HASHROW_ASSERT(_sh); \ } while (0) #else /* !INVARIANTS */ #define PF_SRC_NODE_LOCK_ASSERT(sn) do {} while (0) #endif /* INVARIANTS */ extern struct mtx_padalign pf_unlnkdrules_mtx; #define PF_UNLNKDRULES_LOCK() mtx_lock(&pf_unlnkdrules_mtx) #define PF_UNLNKDRULES_UNLOCK() mtx_unlock(&pf_unlnkdrules_mtx) #define PF_UNLNKDRULES_ASSERT() mtx_assert(&pf_unlnkdrules_mtx, MA_OWNED) extern struct sx pf_config_lock; #define PF_CONFIG_LOCK() sx_xlock(&pf_config_lock) #define PF_CONFIG_UNLOCK() sx_xunlock(&pf_config_lock) #define PF_CONFIG_ASSERT() sx_assert(&pf_config_lock, SA_XLOCKED) VNET_DECLARE(struct rmlock, pf_rules_lock); #define V_pf_rules_lock VNET(pf_rules_lock) #define PF_RULES_RLOCK_TRACKER struct rm_priotracker _pf_rules_tracker #define PF_RULES_RLOCK() rm_rlock(&V_pf_rules_lock, &_pf_rules_tracker) #define PF_RULES_RUNLOCK() rm_runlock(&V_pf_rules_lock, &_pf_rules_tracker) #define PF_RULES_WLOCK() rm_wlock(&V_pf_rules_lock) #define PF_RULES_WUNLOCK() rm_wunlock(&V_pf_rules_lock) #define PF_RULES_WOWNED() rm_wowned(&V_pf_rules_lock) #define PF_RULES_ASSERT() rm_assert(&V_pf_rules_lock, RA_LOCKED) #define PF_RULES_RASSERT() rm_assert(&V_pf_rules_lock, RA_RLOCKED) #define PF_RULES_WASSERT() rm_assert(&V_pf_rules_lock, RA_WLOCKED) +VNET_DECLARE(struct rmlock, pf_tags_lock); +#define V_pf_tags_lock VNET(pf_tags_lock) + +#define PF_TAGS_RLOCK_TRACKER struct rm_priotracker _pf_tags_tracker +#define PF_TAGS_RLOCK() rm_rlock(&V_pf_tags_lock, &_pf_tags_tracker) +#define PF_TAGS_RUNLOCK() rm_runlock(&V_pf_tags_lock, &_pf_tags_tracker) +#define PF_TAGS_WLOCK() rm_wlock(&V_pf_tags_lock) +#define PF_TAGS_WUNLOCK() rm_wunlock(&V_pf_tags_lock) +#define PF_TAGS_WASSERT() rm_assert(&V_pf_tags_lock, RA_WLOCKED) + extern struct mtx_padalign pf_table_stats_lock; #define PF_TABLE_STATS_LOCK() mtx_lock(&pf_table_stats_lock) #define PF_TABLE_STATS_UNLOCK() mtx_unlock(&pf_table_stats_lock) #define PF_TABLE_STATS_OWNED() mtx_owned(&pf_table_stats_lock) #define PF_TABLE_STATS_ASSERT() mtx_assert(&pf_table_stats_lock, MA_OWNED) extern struct sx pf_end_lock; #define PF_MODVER 1 #define PFLOG_MODVER 1 #define PFSYNC_MODVER 1 #define PFLOG_MINVER 1 #define PFLOG_PREFVER PFLOG_MODVER #define PFLOG_MAXVER 1 #define PFSYNC_MINVER 1 #define PFSYNC_PREFVER PFSYNC_MODVER #define PFSYNC_MAXVER 1 #ifdef INET #ifndef INET6 #define PF_INET_ONLY #endif /* ! INET6 */ #endif /* INET */ #ifdef INET6 #ifndef INET #define PF_INET6_ONLY #endif /* ! INET */ #endif /* INET6 */ #ifdef INET #ifdef INET6 #define PF_INET_INET6 #endif /* INET6 */ #endif /* INET */ #else #define PF_INET_INET6 #endif /* _KERNEL */ /* Both IPv4 and IPv6 */ #ifdef PF_INET_INET6 #define PF_AEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] == (b)->addr32[0]) || \ (c == AF_INET6 && (a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0])) \ #define PF_ANEQ(a, b, c) \ ((c == AF_INET && (a)->addr32[0] != (b)->addr32[0]) || \ (c == AF_INET6 && ((a)->addr32[0] != (b)->addr32[0] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[3] != (b)->addr32[3]))) \ #define PF_AZERO(a, c) \ ((c == AF_INET && !(a)->addr32[0]) || \ (c == AF_INET6 && !(a)->addr32[0] && !(a)->addr32[1] && \ !(a)->addr32[2] && !(a)->addr32[3] )) \ #else /* Just IPv6 */ #ifdef PF_INET6_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[3] == (b)->addr32[3] && \ (a)->addr32[2] == (b)->addr32[2] && \ (a)->addr32[1] == (b)->addr32[1] && \ (a)->addr32[0] == (b)->addr32[0]) \ #define PF_ANEQ(a, b, c) \ ((a)->addr32[3] != (b)->addr32[3] || \ (a)->addr32[2] != (b)->addr32[2] || \ (a)->addr32[1] != (b)->addr32[1] || \ (a)->addr32[0] != (b)->addr32[0]) \ #define PF_AZERO(a, c) \ (!(a)->addr32[0] && \ !(a)->addr32[1] && \ !(a)->addr32[2] && \ !(a)->addr32[3] ) \ #else /* Just IPv4 */ #ifdef PF_INET_ONLY #define PF_AEQ(a, b, c) \ ((a)->addr32[0] == (b)->addr32[0]) #define PF_ANEQ(a, b, c) \ ((a)->addr32[0] != (b)->addr32[0]) #define PF_AZERO(a, c) \ (!(a)->addr32[0]) #endif /* PF_INET_ONLY */ #endif /* PF_INET6_ONLY */ #endif /* PF_INET_INET6 */ #ifdef _KERNEL void unhandled_af(int) __dead2; static void inline pf_addrcpy(struct pf_addr *dst, const struct pf_addr *src, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: memcpy(&dst->v4, &src->v4, sizeof(dst->v4)); break; #endif /* INET */ #ifdef INET6 case AF_INET6: memcpy(&dst->v6, &src->v6, sizeof(dst->v6)); break; #endif /* INET6 */ default: unhandled_af(af); } } #endif /* * XXX callers not FIB-aware in our version of pf yet. * OpenBSD fixed it later it seems, 2010/05/07 13:33:16 claudio. */ #define PF_MISMATCHAW(aw, x, af, neg, ifp, rtid) \ ( \ (((aw)->type == PF_ADDR_NOROUTE && \ pf_routable((x), (af), NULL, (rtid))) || \ (((aw)->type == PF_ADDR_URPFFAILED && (ifp) != NULL && \ pf_routable((x), (af), (ifp), (rtid))) || \ ((aw)->type == PF_ADDR_TABLE && \ !pfr_match_addr((aw)->p.tbl, (x), (af))) || \ ((aw)->type == PF_ADDR_DYNIFTL && \ !pfi_match_addr((aw)->p.dyn, (x), (af))) || \ ((aw)->type == PF_ADDR_RANGE && \ !pf_match_addr_range(&(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))) || \ ((aw)->type == PF_ADDR_ADDRMASK && \ !PF_AZERO(&(aw)->v.a.mask, (af)) && \ !pf_match_addr(0, &(aw)->v.a.addr, \ &(aw)->v.a.mask, (x), (af))))) != \ (neg) \ ) #define PF_ALGNMNT(off) (((off) % 2) == 0) /* * At the moment there are no rules which have both NAT and RDR actions, * apart from af-to rules, but those don't to source tracking for address * translation. And the r->rdr pool is used for both NAT and RDR. * So there is no PF_SN_RDR. */ enum pf_sn_types { PF_SN_LIMIT, PF_SN_NAT, PF_SN_ROUTE, PF_SN_MAX }; typedef enum pf_sn_types pf_sn_types_t; #define PF_SN_TYPE_NAMES { \ "limit source-track", \ "NAT/RDR sticky-address", \ "route sticky-address", \ NULL \ } #ifdef _KERNEL struct pf_kpooladdr { struct pf_addr_wrap addr; TAILQ_ENTRY(pf_kpooladdr) entries; char ifname[IFNAMSIZ]; sa_family_t af; struct pfi_kkif *kif; }; TAILQ_HEAD(pf_kpalist, pf_kpooladdr); struct pf_kpool { struct mtx mtx; struct pf_kpalist list; struct pf_kpooladdr *cur; struct pf_poolhashkey key; struct pf_addr counter; struct pf_mape_portset mape; int tblidx; u_int16_t proxy_port[2]; u_int8_t opts; sa_family_t ipv6_nexthop_af; }; struct pf_rule_actions { struct pf_addr rt_addr; struct pfi_kkif *rt_kif; int32_t rtableid; uint32_t flags; uint16_t qid; uint16_t pqid; uint16_t max_mss; uint16_t dnpipe; uint16_t dnrpipe; /* Reverse direction pipe */ sa_family_t rt_af; uint8_t log; uint8_t set_tos; uint8_t min_ttl; uint8_t set_prio[2]; uint8_t rt; uint8_t allow_opts; uint16_t max_pkt_size; }; union pf_keth_rule_ptr { struct pf_keth_rule *ptr; uint32_t nr; }; struct pf_keth_rule_addr { uint8_t addr[ETHER_ADDR_LEN]; uint8_t mask[ETHER_ADDR_LEN]; bool neg; uint8_t isset; }; struct pf_keth_anchor; TAILQ_HEAD(pf_keth_ruleq, pf_keth_rule); struct pf_keth_ruleset { struct pf_keth_ruleq rules[2]; struct pf_keth_rules { struct pf_keth_ruleq *rules; int open; uint32_t ticket; } active, inactive; struct vnet *vnet; struct pf_keth_anchor *anchor; }; RB_HEAD(pf_keth_anchor_global, pf_keth_anchor); RB_HEAD(pf_keth_anchor_node, pf_keth_anchor); struct pf_keth_anchor { RB_ENTRY(pf_keth_anchor) entry_node; RB_ENTRY(pf_keth_anchor) entry_global; struct pf_keth_anchor *parent; struct pf_keth_anchor_node children; char name[PF_ANCHOR_NAME_SIZE]; char path[MAXPATHLEN]; struct pf_keth_ruleset ruleset; int refcnt; /* anchor rules */ uint8_t anchor_relative; uint8_t anchor_wildcard; }; RB_PROTOTYPE(pf_keth_anchor_node, pf_keth_anchor, entry_node, pf_keth_anchor_compare); RB_PROTOTYPE(pf_keth_anchor_global, pf_keth_anchor, entry_global, pf_keth_anchor_compare); struct pf_keth_rule { #define PFE_SKIP_IFP 0 #define PFE_SKIP_DIR 1 #define PFE_SKIP_PROTO 2 #define PFE_SKIP_SRC_ADDR 3 #define PFE_SKIP_DST_ADDR 4 #define PFE_SKIP_SRC_IP_ADDR 5 #define PFE_SKIP_DST_IP_ADDR 6 #define PFE_SKIP_COUNT 7 union pf_keth_rule_ptr skip[PFE_SKIP_COUNT]; TAILQ_ENTRY(pf_keth_rule) entries; struct pf_keth_anchor *anchor; u_int8_t anchor_relative; u_int8_t anchor_wildcard; uint32_t nr; bool quick; /* Filter */ char ifname[IFNAMSIZ]; struct pfi_kkif *kif; bool ifnot; uint8_t direction; uint16_t proto; struct pf_keth_rule_addr src, dst; struct pf_rule_addr ipsrc, ipdst; char match_tagname[PF_TAG_NAME_SIZE]; uint16_t match_tag; bool match_tag_not; /* Stats */ counter_u64_t evaluations; counter_u64_t packets[2]; counter_u64_t bytes[2]; time_t *timestamp; /* Action */ char qname[PF_QNAME_SIZE]; int qid; char tagname[PF_TAG_NAME_SIZE]; uint16_t tag; char bridge_to_name[IFNAMSIZ]; struct pfi_kkif *bridge_to; uint8_t action; uint16_t dnpipe; uint32_t dnflags; char label[PF_RULE_MAX_LABEL_COUNT][PF_RULE_LABEL_SIZE]; uint32_t ridentifier; }; struct pf_kthreshold { uint32_t limit; uint32_t seconds; struct counter_rate *cr; }; RB_HEAD(pf_krule_global, pf_krule); RB_PROTOTYPE(pf_krule_global, pf_krule, entry_global, pf_krule_compare); struct pf_krule { struct pf_rule_addr src; struct pf_rule_addr dst; struct pf_krule *skip[PF_SKIP_COUNT]; char label[PF_RULE_MAX_LABEL_COUNT][PF_RULE_LABEL_SIZE]; uint32_t ridentifier; char ifname[IFNAMSIZ]; char rcv_ifname[IFNAMSIZ]; char qname[PF_QNAME_SIZE]; char pqname[PF_QNAME_SIZE]; char tagname[PF_TAG_NAME_SIZE]; char match_tagname[PF_TAG_NAME_SIZE]; char overload_tblname[PF_TABLE_NAME_SIZE]; TAILQ_ENTRY(pf_krule) entries; struct pf_kpool nat; struct pf_kpool rdr; struct pf_kpool route; struct pf_kthreshold pktrate; struct pf_counter_u64 evaluations; struct pf_counter_u64 packets[2]; struct pf_counter_u64 bytes[2]; time_t *timestamp; struct pfi_kkif *kif; struct pfi_kkif *rcv_kif; struct pf_kanchor *anchor; struct pfr_ktable *overload_tbl; pf_osfp_t os_fingerprint; int32_t rtableid; u_int32_t timeout[PFTM_MAX]; u_int32_t max_states; u_int32_t max_src_nodes; u_int32_t max_src_states; u_int32_t max_src_conn; struct { u_int32_t limit; u_int32_t seconds; } max_src_conn_rate; uint16_t max_pkt_size; u_int16_t qid; u_int16_t pqid; u_int16_t dnpipe; u_int16_t dnrpipe; u_int32_t free_flags; u_int32_t nr; u_int32_t prob; uid_t cuid; pid_t cpid; counter_u64_t states_cur; counter_u64_t states_tot; counter_u64_t src_nodes[PF_SN_MAX]; u_int16_t return_icmp; u_int16_t return_icmp6; u_int16_t max_mss; u_int16_t tag; u_int16_t match_tag; u_int16_t scrub_flags; struct pf_rule_uid uid; struct pf_rule_gid gid; u_int32_t rule_flag; uint32_t rule_ref; u_int8_t action; u_int8_t direction; u_int8_t log; u_int8_t logif; u_int8_t quick; u_int8_t ifnot; u_int8_t match_tag_not; u_int8_t natpass; u_int8_t keep_state; sa_family_t af; u_int8_t proto; uint16_t type; uint16_t code; u_int8_t flags; u_int8_t flagset; u_int8_t min_ttl; u_int8_t allow_opts; u_int8_t rt; u_int8_t return_ttl; u_int8_t tos; u_int8_t set_tos; u_int8_t anchor_relative; u_int8_t anchor_wildcard; u_int8_t flush; u_int8_t prio; u_int8_t set_prio[2]; sa_family_t naf; u_int8_t rcvifnot; struct { struct pf_addr addr; u_int16_t port; } divert; u_int8_t md5sum[PF_MD5_DIGEST_LENGTH]; RB_ENTRY(pf_krule) entry_global; #ifdef PF_WANT_32_TO_64_COUNTER LIST_ENTRY(pf_krule) allrulelist; bool allrulelinked; #endif }; struct pf_krule_item { SLIST_ENTRY(pf_krule_item) entry; struct pf_krule *r; }; SLIST_HEAD(pf_krule_slist, pf_krule_item); struct pf_ksrc_node { LIST_ENTRY(pf_ksrc_node) entry; struct pf_addr addr; struct pf_addr raddr; struct pf_krule_slist match_rules; struct pf_krule *rule; struct pfi_kkif *rkif; counter_u64_t bytes[2]; counter_u64_t packets[2]; u_int32_t states; u_int32_t conn; struct pf_kthreshold conn_rate; u_int32_t creation; u_int32_t expire; sa_family_t af; sa_family_t raf; u_int8_t ruletype; pf_sn_types_t type; struct mtx *lock; }; #endif struct pf_state_scrub { struct timeval pfss_last; /* time received last packet */ u_int32_t pfss_tsecr; /* last echoed timestamp */ u_int32_t pfss_tsval; /* largest timestamp */ u_int32_t pfss_tsval0; /* original timestamp */ u_int16_t pfss_flags; #define PFSS_TIMESTAMP 0x0001 /* modulate timestamp */ #define PFSS_PAWS 0x0010 /* stricter PAWS checks */ #define PFSS_PAWS_IDLED 0x0020 /* was idle too long. no PAWS */ #define PFSS_DATA_TS 0x0040 /* timestamp on data packets */ #define PFSS_DATA_NOTS 0x0080 /* no timestamp on data packets */ u_int8_t pfss_ttl; /* stashed TTL */ u_int8_t pad; union { u_int32_t pfss_ts_mod; /* timestamp modulation */ u_int32_t pfss_v_tag; /* SCTP verification tag */ }; }; struct pf_state_host { struct pf_addr addr; u_int16_t port; u_int16_t pad; }; struct pf_state_peer { struct pf_state_scrub *scrub; /* state is scrubbed */ u_int32_t seqlo; /* Max sequence number sent */ u_int32_t seqhi; /* Max the other end ACKd + win */ u_int32_t seqdiff; /* Sequence number modulator */ u_int16_t max_win; /* largest window (pre scaling) */ u_int16_t mss; /* Maximum segment size option */ u_int8_t state; /* active state level */ u_int8_t wscale; /* window scaling factor */ u_int8_t tcp_est; /* Did we reach TCPS_ESTABLISHED */ u_int8_t pad[1]; }; /* Keep synced with struct pf_udp_endpoint. */ struct pf_udp_endpoint_cmp { struct pf_addr addr; uint16_t port; sa_family_t af; uint8_t pad[1]; }; struct pf_udp_endpoint { struct pf_addr addr; uint16_t port; sa_family_t af; uint8_t pad[1]; struct pf_udp_mapping *mapping; LIST_ENTRY(pf_udp_endpoint) entry; }; struct pf_udp_mapping { struct pf_udp_endpoint endpoints[2]; u_int refs; }; /* Keep synced with struct pf_state_key. */ struct pf_state_key_cmp { struct pf_addr addr[2]; u_int16_t port[2]; sa_family_t af; u_int8_t proto; u_int8_t pad[2]; }; struct pf_state_key { struct pf_addr addr[2]; u_int16_t port[2]; sa_family_t af; u_int8_t proto; u_int8_t pad[2]; LIST_ENTRY(pf_state_key) entry; TAILQ_HEAD(, pf_kstate) states[2]; }; #define PF_REVERSED_KEY(state, family) \ (((state)->key[PF_SK_WIRE]->af != (state)->key[PF_SK_STACK]->af) && \ ((state)->key[PF_SK_WIRE]->af != (family)) && \ ((state)->direction == PF_IN)) /* Keep synced with struct pf_kstate. */ struct pf_state_cmp { u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; }; struct pf_state_scrub_export { uint16_t pfss_flags; uint8_t pfss_ttl; /* stashed TTL */ #define PF_SCRUB_FLAG_VALID 0x01 uint8_t scrub_flag; uint32_t pfss_ts_mod; /* timestamp modulation */ } __packed; struct pf_state_key_export { struct pf_addr addr[2]; uint16_t port[2]; }; struct pf_state_peer_export { struct pf_state_scrub_export scrub; /* state is scrubbed */ uint32_t seqlo; /* Max sequence number sent */ uint32_t seqhi; /* Max the other end ACKd + win */ uint32_t seqdiff; /* Sequence number modulator */ uint16_t max_win; /* largest window (pre scaling) */ uint16_t mss; /* Maximum segment size option */ uint8_t state; /* active state level */ uint8_t wscale; /* window scaling factor */ uint8_t dummy[6]; } __packed; _Static_assert(sizeof(struct pf_state_peer_export) == 32, "size incorrect"); struct pf_state_export { uint64_t version; #define PF_STATE_VERSION 20230404 uint64_t id; char ifname[IFNAMSIZ]; char orig_ifname[IFNAMSIZ]; struct pf_state_key_export key[2]; struct pf_state_peer_export src; struct pf_state_peer_export dst; struct pf_addr rt_addr; uint32_t rule; uint32_t anchor; uint32_t nat_rule; uint32_t creation; uint32_t expire; uint32_t spare0; uint64_t packets[2]; uint64_t bytes[2]; uint32_t creatorid; uint32_t spare1; sa_family_t af; uint8_t proto; uint8_t direction; uint8_t log; uint8_t state_flags_compat; uint8_t timeout; uint8_t sync_flags; uint8_t updates; uint16_t state_flags; uint16_t qid; uint16_t pqid; uint16_t dnpipe; uint16_t dnrpipe; int32_t rtableid; uint8_t min_ttl; uint8_t set_tos; uint16_t max_mss; uint8_t set_prio[2]; uint8_t rt; char rt_ifname[IFNAMSIZ]; uint8_t spare[72]; }; _Static_assert(sizeof(struct pf_state_export) == 384, "size incorrect"); #ifdef _KERNEL struct pf_kstate { /* * Area shared with pf_state_cmp */ u_int64_t id; u_int32_t creatorid; u_int8_t direction; u_int8_t pad[3]; /* * end of the area */ u_int16_t state_flags; u_int8_t timeout; u_int8_t sync_state; /* PFSYNC_S_x */ u_int8_t sync_updates; u_int refs; struct mtx *lock; TAILQ_ENTRY(pf_kstate) sync_list; TAILQ_ENTRY(pf_kstate) key_list[2]; LIST_ENTRY(pf_kstate) entry; struct pf_state_peer src; struct pf_state_peer dst; struct pf_krule_slist match_rules; struct pf_krule *rule; struct pf_krule *anchor; struct pf_krule *nat_rule; struct pf_state_key *key[2]; /* addresses stack and wire */ struct pf_udp_mapping *udp_mapping; struct pfi_kkif *kif; struct pfi_kkif *orig_kif; /* The real kif, even if we're a floating state (i.e. if == V_pfi_all). */ struct pf_ksrc_node *sns[PF_SN_MAX];/* source nodes */ u_int64_t packets[2]; u_int64_t bytes[2]; u_int64_t creation; u_int64_t expire; u_int32_t pfsync_time; struct pf_rule_actions act; u_int16_t tag; u_int16_t if_index_in; u_int16_t if_index_out; }; /* * 6 cache lines per struct, 10 structs per page. * Try to not grow the struct beyond that. */ _Static_assert(sizeof(struct pf_kstate) <= 384, "pf_kstate size crosses 384 bytes"); enum pf_test_status { PF_TEST_FAIL = -1, PF_TEST_OK, PF_TEST_QUICK }; struct pf_test_ctx { enum pf_test_status test_status; struct pf_pdesc *pd; struct pf_rule_actions act; uint8_t icmpcode; uint8_t icmptype; int icmp_dir; int state_icmp; int tag; int rewrite; u_short reason; struct pf_src_node *sns[PF_SN_MAX]; struct pf_krule_slist rules; struct pf_krule *nr; struct pf_krule *tr; struct pf_krule **rm; struct pf_krule *a; struct pf_krule **am; struct pf_kruleset **rsm; struct pf_kruleset *arsm; struct pf_kruleset *aruleset; struct pf_state_key *sk; struct pf_state_key *nk; struct tcphdr *th; struct pf_udp_mapping *udp_mapping; struct pf_kpool *nat_pool; uint16_t virtual_type; uint16_t virtual_id; int depth; }; #define PF_ANCHOR_STACK_MAX 32 #endif /* * Unified state structures for pulling states out of the kernel * used by pfsync(4) and the pf(4) ioctl. */ struct pfsync_state_key { struct pf_addr addr[2]; u_int16_t port[2]; }; struct pfsync_state_1301 { u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pf_state_peer_export src; struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; u_int32_t packets[2][2]; u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; u_int8_t direction; u_int8_t __spare[2]; u_int8_t log; u_int8_t state_flags; u_int8_t timeout; u_int8_t sync_flags; - u_int8_t updates; + u_int8_t updates; /* unused */ } __packed; struct pfsync_state_1400 { - /* The beginning of the struct is compatible with previous versions */ + /* The beginning of the struct is compatible with pfsync_state_1301 */ u_int64_t id; char ifname[IFNAMSIZ]; struct pfsync_state_key key[2]; struct pf_state_peer_export src; struct pf_state_peer_export dst; struct pf_addr rt_addr; u_int32_t rule; u_int32_t anchor; u_int32_t nat_rule; u_int32_t creation; u_int32_t expire; u_int32_t packets[2][2]; u_int32_t bytes[2][2]; u_int32_t creatorid; sa_family_t af; u_int8_t proto; u_int8_t direction; u_int16_t state_flags; u_int8_t log; u_int8_t __spare; u_int8_t timeout; u_int8_t sync_flags; - u_int8_t updates; + u_int8_t updates; /* unused */ /* The rest is not */ u_int16_t qid; u_int16_t pqid; u_int16_t dnpipe; u_int16_t dnrpipe; int32_t rtableid; u_int8_t min_ttl; u_int8_t set_tos; u_int16_t max_mss; u_int8_t set_prio[2]; u_int8_t rt; char rt_ifname[IFNAMSIZ]; +} __packed; +struct pfsync_state_1500 { + /* The beginning of the struct is compatible with pfsync_state_1301 */ + u_int64_t id; + char ifname[IFNAMSIZ]; + struct pfsync_state_key key[2]; + struct pf_state_peer_export src; + struct pf_state_peer_export dst; + struct pf_addr rt_addr; + u_int32_t rule; + u_int32_t anchor; + u_int32_t nat_rule; + u_int32_t creation; + u_int32_t expire; + u_int32_t packets[2][2]; + u_int32_t bytes[2][2]; + u_int32_t creatorid; + /* The rest is not, use the opportunity to fix alignment */ + char tagname[PF_TAG_NAME_SIZE]; + char rt_ifname[IFNAMSIZ]; + char orig_ifname[IFNAMSIZ]; + int32_t rtableid; + u_int16_t state_flags; + u_int16_t qid; + u_int16_t pqid; + u_int16_t dnpipe; + u_int16_t dnrpipe; + u_int16_t max_mss; + sa_family_t wire_af; + sa_family_t stack_af; + sa_family_t rt_af; + u_int8_t wire_proto; + u_int8_t stack_proto; + u_int8_t log; + u_int8_t timeout; + u_int8_t direction; + u_int8_t rt; + u_int8_t min_ttl; + u_int8_t set_tos; + u_int8_t set_prio[2]; + u_int8_t spare[3]; /* Improve struct alignment */ } __packed; union pfsync_state_union { struct pfsync_state_1301 pfs_1301; struct pfsync_state_1400 pfs_1400; + struct pfsync_state_1500 pfs_1500; } __packed; #ifdef _KERNEL /* pfsync */ typedef int pfsync_state_import_t(union pfsync_state_union *, int, int); typedef void pfsync_insert_state_t(struct pf_kstate *); typedef void pfsync_update_state_t(struct pf_kstate *); typedef void pfsync_delete_state_t(struct pf_kstate *); typedef void pfsync_clear_states_t(u_int32_t, const char *); typedef int pfsync_defer_t(struct pf_kstate *, struct mbuf *); typedef void pfsync_detach_ifnet_t(struct ifnet *); typedef void pflow_export_state_t(const struct pf_kstate *); typedef bool pf_addr_filter_func_t(const sa_family_t, const struct pf_addr *); VNET_DECLARE(pfsync_state_import_t *, pfsync_state_import_ptr); #define V_pfsync_state_import_ptr VNET(pfsync_state_import_ptr) VNET_DECLARE(pfsync_insert_state_t *, pfsync_insert_state_ptr); #define V_pfsync_insert_state_ptr VNET(pfsync_insert_state_ptr) VNET_DECLARE(pfsync_update_state_t *, pfsync_update_state_ptr); #define V_pfsync_update_state_ptr VNET(pfsync_update_state_ptr) VNET_DECLARE(pfsync_delete_state_t *, pfsync_delete_state_ptr); #define V_pfsync_delete_state_ptr VNET(pfsync_delete_state_ptr) VNET_DECLARE(pfsync_clear_states_t *, pfsync_clear_states_ptr); #define V_pfsync_clear_states_ptr VNET(pfsync_clear_states_ptr) VNET_DECLARE(pfsync_defer_t *, pfsync_defer_ptr); #define V_pfsync_defer_ptr VNET(pfsync_defer_ptr) VNET_DECLARE(pflow_export_state_t *, pflow_export_state_ptr); #define V_pflow_export_state_ptr VNET(pflow_export_state_ptr) extern pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr; void pfsync_state_export(union pfsync_state_union *, struct pf_kstate *, int); void pf_state_export(struct pf_state_export *, struct pf_kstate *); /* pflog */ struct pf_kruleset; struct pf_pdesc; typedef int pflog_packet_t(uint8_t, u_int8_t, struct pf_krule *, struct pf_krule *, struct pf_kruleset *, struct pf_pdesc *, int, struct pf_krule *); extern pflog_packet_t *pflog_packet_ptr; #endif /* _KERNEL */ #define PFSYNC_FLAG_SRCNODE 0x04 #define PFSYNC_FLAG_NATSRCNODE 0x08 /* for copies to/from network byte order */ /* ioctl interface also uses network byte order */ void pf_state_peer_hton(const struct pf_state_peer *, struct pf_state_peer_export *); void pf_state_peer_ntoh(const struct pf_state_peer_export *, struct pf_state_peer *); #define pf_state_counter_hton(s,d) do { \ d[0] = htonl((s>>32)&0xffffffff); \ d[1] = htonl(s&0xffffffff); \ } while (0) #define pf_state_counter_from_pfsync(s) \ (((u_int64_t)(s[0])<<32) | (u_int64_t)(s[1])) #define pf_state_counter_ntoh(s,d) do { \ d = ntohl(s[0]); \ d = d<<32; \ d += ntohl(s[1]); \ } while (0) TAILQ_HEAD(pf_krulequeue, pf_krule); struct pf_kanchor; struct pf_kruleset { struct { struct pf_krulequeue queues[2]; struct { struct pf_krulequeue *ptr; u_int32_t rcount; u_int32_t ticket; int open; struct pf_krule_global *tree; } active, inactive; } rules[PF_RULESET_MAX]; struct pf_kanchor *anchor; u_int32_t tticket; int tables; int topen; }; RB_HEAD(pf_kanchor_global, pf_kanchor); RB_HEAD(pf_kanchor_node, pf_kanchor); struct pf_kanchor { RB_ENTRY(pf_kanchor) entry_global; RB_ENTRY(pf_kanchor) entry_node; struct pf_kanchor *parent; struct pf_kanchor_node children; char name[PF_ANCHOR_NAME_SIZE]; char path[MAXPATHLEN]; struct pf_kruleset ruleset; int refcnt; /* anchor rules */ }; RB_PROTOTYPE(pf_kanchor_global, pf_kanchor, entry_global, pf_anchor_compare); RB_PROTOTYPE(pf_kanchor_node, pf_kanchor, entry_node, pf_kanchor_compare); #define PF_RESERVED_ANCHOR "_pf" #define PFR_TFLAG_PERSIST 0x00000001 #define PFR_TFLAG_CONST 0x00000002 #define PFR_TFLAG_ACTIVE 0x00000004 #define PFR_TFLAG_INACTIVE 0x00000008 #define PFR_TFLAG_REFERENCED 0x00000010 #define PFR_TFLAG_REFDANCHOR 0x00000020 #define PFR_TFLAG_COUNTERS 0x00000040 /* Adjust masks below when adding flags. */ #define PFR_TFLAG_USRMASK (PFR_TFLAG_PERSIST | \ PFR_TFLAG_CONST | \ PFR_TFLAG_COUNTERS) #define PFR_TFLAG_SETMASK (PFR_TFLAG_ACTIVE | \ PFR_TFLAG_INACTIVE | \ PFR_TFLAG_REFERENCED | \ PFR_TFLAG_REFDANCHOR) #define PFR_TFLAG_ALLMASK (PFR_TFLAG_PERSIST | \ PFR_TFLAG_CONST | \ PFR_TFLAG_ACTIVE | \ PFR_TFLAG_INACTIVE | \ PFR_TFLAG_REFERENCED | \ PFR_TFLAG_REFDANCHOR | \ PFR_TFLAG_COUNTERS) struct pf_keth_anchor_stackframe; struct pfr_table { char pfrt_anchor[MAXPATHLEN]; char pfrt_name[PF_TABLE_NAME_SIZE]; u_int32_t pfrt_flags; u_int8_t pfrt_fback; }; enum { PFR_FB_NONE, PFR_FB_MATCH, PFR_FB_ADDED, PFR_FB_DELETED, PFR_FB_CHANGED, PFR_FB_CLEARED, PFR_FB_DUPLICATE, PFR_FB_NOTMATCH, PFR_FB_CONFLICT, PFR_FB_NOCOUNT, PFR_FB_MAX }; struct pfr_addr { union { struct in_addr _pfra_ip4addr; struct in6_addr _pfra_ip6addr; } pfra_u; u_int8_t pfra_af; u_int8_t pfra_net; u_int8_t pfra_not; u_int8_t pfra_fback; }; #define pfra_ip4addr pfra_u._pfra_ip4addr #define pfra_ip6addr pfra_u._pfra_ip6addr enum { PFR_DIR_IN, PFR_DIR_OUT, PFR_DIR_MAX }; enum { PFR_OP_BLOCK, PFR_OP_PASS, PFR_OP_ADDR_MAX, PFR_OP_TABLE_MAX }; enum { PFR_TYPE_PACKETS, PFR_TYPE_BYTES, PFR_TYPE_MAX }; #define PFR_NUM_COUNTERS (PFR_DIR_MAX * PFR_OP_ADDR_MAX * PFR_TYPE_MAX) #define PFR_OP_XPASS PFR_OP_ADDR_MAX struct pfr_astats { struct pfr_addr pfras_a; u_int64_t pfras_packets[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; u_int64_t pfras_bytes[PFR_DIR_MAX][PFR_OP_ADDR_MAX]; time_t pfras_tzero; }; enum { PFR_REFCNT_RULE, PFR_REFCNT_ANCHOR, PFR_REFCNT_MAX }; struct pfr_tstats { struct pfr_table pfrts_t; u_int64_t pfrts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; u_int64_t pfrts_match; u_int64_t pfrts_nomatch; time_t pfrts_tzero; int pfrts_cnt; int pfrts_refcnt[PFR_REFCNT_MAX]; }; #ifdef _KERNEL struct pfr_kstate_counter { counter_u64_t pkc_pcpu; u_int64_t pkc_zero; }; static inline int pfr_kstate_counter_init(struct pfr_kstate_counter *pfrc, int flags) { pfrc->pkc_zero = 0; pfrc->pkc_pcpu = counter_u64_alloc(flags); if (pfrc->pkc_pcpu == NULL) return (ENOMEM); return (0); } static inline void pfr_kstate_counter_deinit(struct pfr_kstate_counter *pfrc) { counter_u64_free(pfrc->pkc_pcpu); } static inline u_int64_t pfr_kstate_counter_fetch(struct pfr_kstate_counter *pfrc) { u_int64_t c; c = counter_u64_fetch(pfrc->pkc_pcpu); c -= pfrc->pkc_zero; return (c); } static inline void pfr_kstate_counter_zero(struct pfr_kstate_counter *pfrc) { u_int64_t c; c = counter_u64_fetch(pfrc->pkc_pcpu); pfrc->pkc_zero = c; } static inline void pfr_kstate_counter_add(struct pfr_kstate_counter *pfrc, int64_t n) { counter_u64_add(pfrc->pkc_pcpu, n); } struct pfr_ktstats { struct pfr_table pfrts_t; struct pfr_kstate_counter pfrkts_packets[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; struct pfr_kstate_counter pfrkts_bytes[PFR_DIR_MAX][PFR_OP_TABLE_MAX]; struct pfr_kstate_counter pfrkts_match; struct pfr_kstate_counter pfrkts_nomatch; time_t pfrkts_tzero; int pfrkts_cnt; int pfrkts_refcnt[PFR_REFCNT_MAX]; }; #endif /* _KERNEL */ #define pfrts_name pfrts_t.pfrt_name #define pfrts_flags pfrts_t.pfrt_flags #ifndef _SOCKADDR_UNION_DEFINED #define _SOCKADDR_UNION_DEFINED union sockaddr_union { struct sockaddr sa; struct sockaddr_in sin; struct sockaddr_in6 sin6; }; #endif /* _SOCKADDR_UNION_DEFINED */ struct pfr_kcounters { counter_u64_t pfrkc_counters; time_t pfrkc_tzero; }; #define pfr_kentry_counter(kc, dir, op, t) \ ((kc)->pfrkc_counters + \ (dir) * PFR_OP_ADDR_MAX * PFR_TYPE_MAX + (op) * PFR_TYPE_MAX + (t)) #ifdef _KERNEL SLIST_HEAD(pfr_kentryworkq, pfr_kentry); struct pfr_kentry { struct radix_node pfrke_node[2]; union sockaddr_union pfrke_sa; SLIST_ENTRY(pfr_kentry) pfrke_workq; struct pfr_kcounters pfrke_counters; u_int8_t pfrke_af; u_int8_t pfrke_net; u_int8_t pfrke_not; u_int8_t pfrke_mark; }; SLIST_HEAD(pfr_ktableworkq, pfr_ktable); RB_HEAD(pfr_ktablehead, pfr_ktable); struct pfr_ktable { struct pfr_ktstats pfrkt_kts; RB_ENTRY(pfr_ktable) pfrkt_tree; SLIST_ENTRY(pfr_ktable) pfrkt_workq; struct radix_node_head *pfrkt_ip4; struct radix_node_head *pfrkt_ip6; struct pfr_ktable *pfrkt_shadow; struct pfr_ktable *pfrkt_root; struct pf_kruleset *pfrkt_rs; long pfrkt_larg; int pfrkt_nflags; }; #define pfrkt_t pfrkt_kts.pfrts_t #define pfrkt_name pfrkt_t.pfrt_name #define pfrkt_anchor pfrkt_t.pfrt_anchor #define pfrkt_ruleset pfrkt_t.pfrt_ruleset #define pfrkt_flags pfrkt_t.pfrt_flags #define pfrkt_cnt pfrkt_kts.pfrkts_cnt #define pfrkt_refcnt pfrkt_kts.pfrkts_refcnt #define pfrkt_packets pfrkt_kts.pfrkts_packets #define pfrkt_bytes pfrkt_kts.pfrkts_bytes #define pfrkt_match pfrkt_kts.pfrkts_match #define pfrkt_nomatch pfrkt_kts.pfrkts_nomatch #define pfrkt_tzero pfrkt_kts.pfrkts_tzero #endif #ifdef _KERNEL struct pfi_kkif { char pfik_name[IFNAMSIZ]; union { RB_ENTRY(pfi_kkif) _pfik_tree; LIST_ENTRY(pfi_kkif) _pfik_list; } _pfik_glue; #define pfik_tree _pfik_glue._pfik_tree #define pfik_list _pfik_glue._pfik_list struct pf_counter_u64 pfik_packets[2][2][2]; struct pf_counter_u64 pfik_bytes[2][2][2]; time_t pfik_tzero; u_int pfik_flags; struct ifnet *pfik_ifp; struct ifg_group *pfik_group; u_int pfik_rulerefs; TAILQ_HEAD(, pfi_dynaddr) pfik_dynaddrs; #ifdef PF_WANT_32_TO_64_COUNTER LIST_ENTRY(pfi_kkif) pfik_allkiflist; #endif }; #endif #define PFI_IFLAG_REFS 0x0001 /* has state references */ #define PFI_IFLAG_SKIP 0x0100 /* skip filtering on interface */ #define PFI_IFLAG_ANY 0x0200 /* match any non-loopback interface */ #ifdef _KERNEL struct pf_sctp_multihome_job; TAILQ_HEAD(pf_sctp_multihome_jobs, pf_sctp_multihome_job); struct pf_pdesc { struct { int done; uid_t uid; gid_t gid; } lookup; u_int64_t tot_len; /* Make Mickey money */ union pf_headers { struct tcphdr tcp; struct udphdr udp; struct sctphdr sctp; struct icmp icmp; #ifdef INET6 struct icmp6_hdr icmp6; #endif /* INET6 */ char any[0]; } hdr; struct pf_addr nsaddr; /* src address after NAT */ struct pf_addr ndaddr; /* dst address after NAT */ struct pfi_kkif *kif; /* incomming interface */ struct mbuf *m; struct pf_addr *src; /* src address */ struct pf_addr *dst; /* dst address */ struct pf_addr osrc; struct pf_addr odst; u_int16_t *pcksum; /* proto cksum */ u_int16_t *sport; u_int16_t *dport; u_int16_t osport; u_int16_t odport; u_int16_t nsport; /* src port after NAT */ u_int16_t ndport; /* dst port after NAT */ struct pf_mtag *pf_mtag; struct pf_rule_actions act; u_int32_t off; /* protocol header offset */ bool df; /* IPv4 Don't fragment flag. */ u_int32_t hdrlen; /* protocol header length */ u_int32_t p_len; /* total length of protocol payload */ u_int32_t extoff; /* extentsion header offset */ u_int32_t fragoff; /* fragment header offset */ u_int32_t jumbolen; /* length from v6 jumbo header */ u_int32_t badopts; /* v4 options or v6 routing headers */ #define PF_OPT_OTHER 0x0001 #define PF_OPT_JUMBO 0x0002 #define PF_OPT_ROUTER_ALERT 0x0004 u_int16_t *ip_sum; u_int16_t flags; /* Let SCRUB trigger behavior in * state code. Easier than tags */ #define PFDESC_TCP_NORM 0x0001 /* TCP shall be statefully scrubbed */ u_int16_t virtual_proto; #define PF_VPROTO_FRAGMENT 256 sa_family_t af; sa_family_t naf; u_int8_t proto; u_int8_t tos; u_int8_t ttl; u_int8_t dir; /* direction */ u_int8_t sidx; /* key index for source */ u_int8_t didx; /* key index for destination */ #define PFDESC_SCTP_INIT 0x0001 #define PFDESC_SCTP_INIT_ACK 0x0002 #define PFDESC_SCTP_COOKIE 0x0004 #define PFDESC_SCTP_COOKIE_ACK 0x0008 #define PFDESC_SCTP_ABORT 0x0010 #define PFDESC_SCTP_SHUTDOWN 0x0020 #define PFDESC_SCTP_SHUTDOWN_COMPLETE 0x0040 #define PFDESC_SCTP_DATA 0x0080 #define PFDESC_SCTP_ASCONF 0x0100 #define PFDESC_SCTP_HEARTBEAT 0x0200 #define PFDESC_SCTP_HEARTBEAT_ACK 0x0400 #define PFDESC_SCTP_OTHER 0x0800 #define PFDESC_SCTP_ADD_IP 0x1000 u_int16_t sctp_flags; u_int32_t sctp_initiate_tag; u_int16_t sctp_dummy_sum; struct pf_krule *related_rule; struct pf_sctp_multihome_jobs sctp_multihome_jobs; }; struct pf_sctp_multihome_job { TAILQ_ENTRY(pf_sctp_multihome_job) next; struct pf_pdesc pd; struct pf_addr src; struct pf_addr dst; int op; }; #endif /* flags for RDR options */ #define PF_DPORT_RANGE 0x01 /* Dest port uses range */ #define PF_RPORT_RANGE 0x02 /* RDR'ed port uses range */ /* UDP state enumeration */ #define PFUDPS_NO_TRAFFIC 0 #define PFUDPS_SINGLE 1 #define PFUDPS_MULTIPLE 2 #define PFUDPS_NSTATES 3 /* number of state levels */ #define PFUDPS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } /* Other protocol state enumeration */ #define PFOTHERS_NO_TRAFFIC 0 #define PFOTHERS_SINGLE 1 #define PFOTHERS_MULTIPLE 2 #define PFOTHERS_NSTATES 3 /* number of state levels */ #define PFOTHERS_NAMES { \ "NO_TRAFFIC", \ "SINGLE", \ "MULTIPLE", \ NULL \ } #define ACTION_SET(a, x) \ do { \ if ((a) != NULL) \ *(a) = (x); \ } while (0) #define REASON_SET(a, x) \ do { \ SDT_PROBE2(pf, , test, reason_set, x, __LINE__); \ if ((a) != NULL) \ *(a) = (x); \ if (x < PFRES_MAX) \ counter_u64_add(V_pf_status.counters[x], 1); \ } while (0) enum pf_syncookies_mode { PF_SYNCOOKIES_NEVER = 0, PF_SYNCOOKIES_ALWAYS = 1, PF_SYNCOOKIES_ADAPTIVE = 2, PF_SYNCOOKIES_MODE_MAX = PF_SYNCOOKIES_ADAPTIVE }; #define PF_SYNCOOKIES_HIWATPCT 25 #define PF_SYNCOOKIES_LOWATPCT (PF_SYNCOOKIES_HIWATPCT / 2) #ifdef _KERNEL struct pf_kstatus { counter_u64_t counters[PFRES_MAX]; /* reason for passing/dropping */ counter_u64_t lcounters[KLCNT_MAX]; /* limit counters */ struct pf_counter_u64 fcounters[FCNT_MAX]; /* state operation counters */ counter_u64_t scounters[SCNT_MAX]; /* src_node operation counters */ uint32_t states; uint32_t src_nodes; uint32_t running; uint32_t since; uint32_t debug; uint32_t hostid; char ifname[IFNAMSIZ]; uint8_t pf_chksum[PF_MD5_DIGEST_LENGTH]; bool keep_counters; enum pf_syncookies_mode syncookies_mode; bool syncookies_active; uint64_t syncookies_inflight[2]; uint32_t states_halfopen; uint32_t reass; }; #endif struct pf_divert { union { struct in_addr ipv4; struct in6_addr ipv6; } addr; u_int16_t port; }; #define PFFRAG_FRENT_HIWAT 5000 /* Number of fragment entries */ #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ struct pf_fragment_tag { uint16_t ft_hdrlen; /* header length of reassembled pkt */ uint16_t ft_extoff; /* last extension header offset or 0 */ uint16_t ft_maxlen; /* maximum fragment payload length */ uint32_t ft_id; /* fragment id */ }; /* * Limit the length of the fragment queue traversal. Remember * search entry points based on the fragment offset. */ #define PF_FRAG_ENTRY_POINTS 16 /* * The number of entries in the fragment queue must be limited * to avoid DoS by linear searching. Instead of a global limit, * use a limit per entry point. For large packets these sum up. */ #define PF_FRAG_ENTRY_LIMIT 64 /* * ioctl parameter structures */ struct pfioc_pooladdr { u_int32_t action; u_int32_t ticket; u_int32_t nr; u_int32_t r_num; u_int8_t r_action; u_int8_t r_last; u_int8_t af; char anchor[MAXPATHLEN]; struct pf_pooladdr addr; }; struct pfioc_rule { u_int32_t action; u_int32_t ticket; u_int32_t pool_ticket; u_int32_t nr; char anchor[MAXPATHLEN]; char anchor_call[MAXPATHLEN]; struct pf_rule rule; }; struct pfioc_natlook { struct pf_addr saddr; struct pf_addr daddr; struct pf_addr rsaddr; struct pf_addr rdaddr; u_int16_t sport; u_int16_t dport; u_int16_t rsport; u_int16_t rdport; sa_family_t af; u_int8_t proto; u_int8_t direction; }; struct pfioc_state { struct pfsync_state_1301 state; }; struct pfioc_src_node_kill { sa_family_t psnk_af; struct pf_rule_addr psnk_src; struct pf_rule_addr psnk_dst; u_int psnk_killed; }; #ifdef _KERNEL struct pf_kstate_kill { struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; struct pf_rule_addr psk_rt_addr; char psk_ifname[IFNAMSIZ]; char psk_label[PF_RULE_LABEL_SIZE]; u_int psk_killed; bool psk_kill_match; bool psk_nat; }; #endif struct pfioc_state_kill { struct pf_state_cmp psk_pfcmp; sa_family_t psk_af; int psk_proto; struct pf_rule_addr psk_src; struct pf_rule_addr psk_dst; char psk_ifname[IFNAMSIZ]; char psk_label[PF_RULE_LABEL_SIZE]; u_int psk_killed; }; struct pfioc_states { int ps_len; union { void *ps_buf; struct pfsync_state_1301 *ps_states; }; }; struct pfioc_states_v2 { int ps_len; uint64_t ps_req_version; union { void *ps_buf; struct pf_state_export *ps_states; }; }; struct pfioc_src_nodes { int psn_len; union { void *psn_buf; struct pf_src_node *psn_src_nodes; }; }; struct pfioc_if { char ifname[IFNAMSIZ]; }; struct pfioc_tm { int timeout; int seconds; }; struct pfioc_limit { int index; unsigned limit; }; struct pfioc_altq_v0 { u_int32_t action; u_int32_t ticket; u_int32_t nr; struct pf_altq_v0 altq; }; struct pfioc_altq_v1 { u_int32_t action; u_int32_t ticket; u_int32_t nr; /* * Placed here so code that only uses the above parameters can be * written entirely in terms of the v0 or v1 type. */ u_int32_t version; struct pf_altq_v1 altq; }; /* * Latest version of struct pfioc_altq_vX. This must move in lock-step with * the latest version of struct pf_altq_vX as it has that struct as a * member. */ #define PFIOC_ALTQ_VERSION PF_ALTQ_VERSION struct pfioc_qstats_v0 { u_int32_t ticket; u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; }; struct pfioc_qstats_v1 { u_int32_t ticket; u_int32_t nr; void *buf; int nbytes; u_int8_t scheduler; /* * Placed here so code that only uses the above parameters can be * written entirely in terms of the v0 or v1 type. */ u_int32_t version; /* Requested version of stats struct */ }; /* Latest version of struct pfioc_qstats_vX */ #define PFIOC_QSTATS_VERSION 1 struct pfioc_ruleset { u_int32_t nr; char path[MAXPATHLEN]; char name[PF_ANCHOR_NAME_SIZE]; }; #define PF_RULESET_ALTQ (PF_RULESET_MAX) #define PF_RULESET_TABLE (PF_RULESET_MAX+1) #define PF_RULESET_ETH (PF_RULESET_MAX+2) struct pfioc_trans { int size; /* number of elements */ int esize; /* size of each element in bytes */ struct pfioc_trans_e { int rs_num; char anchor[MAXPATHLEN]; u_int32_t ticket; } *array; }; #define PFR_FLAG_ATOMIC 0x00000001 /* unused */ #define PFR_FLAG_DUMMY 0x00000002 #define PFR_FLAG_FEEDBACK 0x00000004 #define PFR_FLAG_CLSTATS 0x00000008 #define PFR_FLAG_ADDRSTOO 0x00000010 #define PFR_FLAG_REPLACE 0x00000020 #define PFR_FLAG_ALLRSETS 0x00000040 #define PFR_FLAG_ALLMASK 0x0000007F #ifdef _KERNEL #define PFR_FLAG_USERIOCTL 0x10000000 #endif struct pfioc_table { struct pfr_table pfrio_table; void *pfrio_buffer; int pfrio_esize; int pfrio_size; int pfrio_size2; int pfrio_nadd; int pfrio_ndel; int pfrio_nchange; int pfrio_flags; u_int32_t pfrio_ticket; }; #define pfrio_exists pfrio_nadd #define pfrio_nzero pfrio_nadd #define pfrio_nmatch pfrio_nadd #define pfrio_naddr pfrio_size2 #define pfrio_setflag pfrio_size2 #define pfrio_clrflag pfrio_nadd struct pfioc_iface { char pfiio_name[IFNAMSIZ]; void *pfiio_buffer; int pfiio_esize; int pfiio_size; int pfiio_nzero; int pfiio_flags; }; /* * ioctl operations */ #define DIOCSTART _IO ('D', 1) #define DIOCSTOP _IO ('D', 2) #define DIOCADDRULE _IOWR('D', 4, struct pfioc_rule) #define DIOCADDRULENV _IOWR('D', 4, struct pfioc_nv) #define DIOCGETRULES _IOWR('D', 6, struct pfioc_rule) #define DIOCGETRULENV _IOWR('D', 7, struct pfioc_nv) #define DIOCCLRSTATESNV _IOWR('D', 18, struct pfioc_nv) #define DIOCGETSTATE _IOWR('D', 19, struct pfioc_state) #define DIOCGETSTATENV _IOWR('D', 19, struct pfioc_nv) #define DIOCSETSTATUSIF _IOWR('D', 20, struct pfioc_if) #define DIOCGETSTATUSNV _IOWR('D', 21, struct pfioc_nv) #define DIOCCLRSTATUS _IO ('D', 22) #define DIOCNATLOOK _IOWR('D', 23, struct pfioc_natlook) #define DIOCSETDEBUG _IOWR('D', 24, u_int32_t) #ifdef COMPAT_FREEBSD14 #define DIOCGETSTATES _IOWR('D', 25, struct pfioc_states) #endif #define DIOCCHANGERULE _IOWR('D', 26, struct pfioc_rule) #define DIOCSETTIMEOUT _IOWR('D', 29, struct pfioc_tm) #define DIOCGETTIMEOUT _IOWR('D', 30, struct pfioc_tm) #define DIOCADDSTATE _IOWR('D', 37, struct pfioc_state) #define DIOCCLRRULECTRS _IO ('D', 38) #define DIOCGETLIMIT _IOWR('D', 39, struct pfioc_limit) #define DIOCSETLIMIT _IOWR('D', 40, struct pfioc_limit) #define DIOCKILLSTATESNV _IOWR('D', 41, struct pfioc_nv) #define DIOCSTARTALTQ _IO ('D', 42) #define DIOCSTOPALTQ _IO ('D', 43) #define DIOCADDALTQV0 _IOWR('D', 45, struct pfioc_altq_v0) #define DIOCADDALTQV1 _IOWR('D', 45, struct pfioc_altq_v1) #define DIOCGETALTQSV0 _IOWR('D', 47, struct pfioc_altq_v0) #define DIOCGETALTQSV1 _IOWR('D', 47, struct pfioc_altq_v1) #define DIOCGETALTQV0 _IOWR('D', 48, struct pfioc_altq_v0) #define DIOCGETALTQV1 _IOWR('D', 48, struct pfioc_altq_v1) #define DIOCCHANGEALTQV0 _IOWR('D', 49, struct pfioc_altq_v0) #define DIOCCHANGEALTQV1 _IOWR('D', 49, struct pfioc_altq_v1) #define DIOCGETQSTATSV0 _IOWR('D', 50, struct pfioc_qstats_v0) #define DIOCGETQSTATSV1 _IOWR('D', 50, struct pfioc_qstats_v1) #define DIOCBEGINADDRS _IOWR('D', 51, struct pfioc_pooladdr) #define DIOCADDADDR _IOWR('D', 52, struct pfioc_pooladdr) #define DIOCGETADDRS _IOWR('D', 53, struct pfioc_pooladdr) #define DIOCGETADDR _IOWR('D', 54, struct pfioc_pooladdr) #define DIOCCHANGEADDR _IOWR('D', 55, struct pfioc_pooladdr) #define DIOCGETRULESETS _IOWR('D', 58, struct pfioc_ruleset) #define DIOCGETRULESET _IOWR('D', 59, struct pfioc_ruleset) #define DIOCRCLRTABLES _IOWR('D', 60, struct pfioc_table) #define DIOCRADDTABLES _IOWR('D', 61, struct pfioc_table) #define DIOCRDELTABLES _IOWR('D', 62, struct pfioc_table) #define DIOCRGETTABLES _IOWR('D', 63, struct pfioc_table) #define DIOCRGETTSTATS _IOWR('D', 64, struct pfioc_table) #define DIOCRCLRTSTATS _IOWR('D', 65, struct pfioc_table) #define DIOCRCLRADDRS _IOWR('D', 66, struct pfioc_table) #define DIOCRADDADDRS _IOWR('D', 67, struct pfioc_table) #define DIOCRDELADDRS _IOWR('D', 68, struct pfioc_table) #define DIOCRSETADDRS _IOWR('D', 69, struct pfioc_table) #define DIOCRGETADDRS _IOWR('D', 70, struct pfioc_table) #define DIOCRGETASTATS _IOWR('D', 71, struct pfioc_table) #define DIOCRCLRASTATS _IOWR('D', 72, struct pfioc_table) #define DIOCRTSTADDRS _IOWR('D', 73, struct pfioc_table) #define DIOCRSETTFLAGS _IOWR('D', 74, struct pfioc_table) #define DIOCRINADEFINE _IOWR('D', 77, struct pfioc_table) #define DIOCOSFPFLUSH _IO('D', 78) #define DIOCOSFPADD _IOWR('D', 79, struct pf_osfp_ioctl) #define DIOCOSFPGET _IOWR('D', 80, struct pf_osfp_ioctl) #define DIOCXBEGIN _IOWR('D', 81, struct pfioc_trans) #define DIOCXCOMMIT _IOWR('D', 82, struct pfioc_trans) #define DIOCXROLLBACK _IOWR('D', 83, struct pfioc_trans) #define DIOCGETSRCNODES _IOWR('D', 84, struct pfioc_src_nodes) #define DIOCCLRSRCNODES _IO('D', 85) #define DIOCSETHOSTID _IOWR('D', 86, u_int32_t) #define DIOCIGETIFACES _IOWR('D', 87, struct pfioc_iface) #define DIOCSETIFFLAG _IOWR('D', 89, struct pfioc_iface) #define DIOCCLRIFFLAG _IOWR('D', 90, struct pfioc_iface) #define DIOCKILLSRCNODES _IOWR('D', 91, struct pfioc_src_node_kill) #define DIOCGIFSPEEDV0 _IOWR('D', 92, struct pf_ifspeed_v0) #define DIOCGIFSPEEDV1 _IOWR('D', 92, struct pf_ifspeed_v1) #ifdef COMPAT_FREEBSD14 #define DIOCGETSTATESV2 _IOWR('D', 93, struct pfioc_states_v2) #endif #define DIOCGETSYNCOOKIES _IOWR('D', 94, struct pfioc_nv) #define DIOCSETSYNCOOKIES _IOWR('D', 95, struct pfioc_nv) #define DIOCKEEPCOUNTERS _IOWR('D', 96, struct pfioc_nv) #define DIOCKEEPCOUNTERS_FREEBSD13 _IOWR('D', 92, struct pfioc_nv) #define DIOCADDETHRULE _IOWR('D', 97, struct pfioc_nv) #define DIOCGETETHRULE _IOWR('D', 98, struct pfioc_nv) #define DIOCGETETHRULES _IOWR('D', 99, struct pfioc_nv) #define DIOCGETETHRULESETS _IOWR('D', 100, struct pfioc_nv) #define DIOCGETETHRULESET _IOWR('D', 101, struct pfioc_nv) #define DIOCSETREASS _IOWR('D', 102, u_int32_t) struct pf_ifspeed_v0 { char ifname[IFNAMSIZ]; u_int32_t baudrate; }; struct pf_ifspeed_v1 { char ifname[IFNAMSIZ]; u_int32_t baudrate32; /* layout identical to struct pf_ifspeed_v0 up to this point */ u_int64_t baudrate; }; /* Latest version of struct pf_ifspeed_vX */ #define PF_IFSPEED_VERSION 1 /* * Compatibility and convenience macros */ #ifndef _KERNEL #ifdef PFIOC_USE_LATEST /* * Maintaining in-tree consumers of the ioctl interface is easier when that * code can be written in terms old names that refer to the latest interface * version as that reduces the required changes in the consumers to those * that are functionally necessary to accommodate a new interface version. */ #define pfioc_altq __CONCAT(pfioc_altq_v, PFIOC_ALTQ_VERSION) #define pfioc_qstats __CONCAT(pfioc_qstats_v, PFIOC_QSTATS_VERSION) #define pf_ifspeed __CONCAT(pf_ifspeed_v, PF_IFSPEED_VERSION) #define DIOCADDALTQ __CONCAT(DIOCADDALTQV, PFIOC_ALTQ_VERSION) #define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, PFIOC_ALTQ_VERSION) #define DIOCGETALTQ __CONCAT(DIOCGETALTQV, PFIOC_ALTQ_VERSION) #define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, PFIOC_ALTQ_VERSION) #define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, PFIOC_QSTATS_VERSION) #define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, PF_IFSPEED_VERSION) #else /* * When building out-of-tree code that is written for the old interface, * such as may exist in ports for example, resolve the old struct tags and * ioctl command names to the v0 versions. */ #define pfioc_altq __CONCAT(pfioc_altq_v, 0) #define pfioc_qstats __CONCAT(pfioc_qstats_v, 0) #define pf_ifspeed __CONCAT(pf_ifspeed_v, 0) #define DIOCADDALTQ __CONCAT(DIOCADDALTQV, 0) #define DIOCGETALTQS __CONCAT(DIOCGETALTQSV, 0) #define DIOCGETALTQ __CONCAT(DIOCGETALTQV, 0) #define DIOCCHANGEALTQ __CONCAT(DIOCCHANGEALTQV, 0) #define DIOCGETQSTATS __CONCAT(DIOCGETQSTATSV, 0) #define DIOCGIFSPEED __CONCAT(DIOCGIFSPEEDV, 0) #endif /* PFIOC_USE_LATEST */ #endif /* _KERNEL */ #ifdef _KERNEL LIST_HEAD(pf_ksrc_node_list, pf_ksrc_node); struct pf_srchash { struct pf_ksrc_node_list nodes; struct mtx lock; }; struct pf_keyhash { LIST_HEAD(, pf_state_key) keys; struct mtx lock; }; struct pf_idhash { LIST_HEAD(, pf_kstate) states; struct mtx lock; }; struct pf_udpendpointhash { LIST_HEAD(, pf_udp_endpoint) endpoints; /* refcont is synchronized on the source endpoint's row lock */ struct mtx lock; }; extern u_long pf_ioctl_maxcount; VNET_DECLARE(u_long, pf_hashmask); #define V_pf_hashmask VNET(pf_hashmask) VNET_DECLARE(u_long, pf_srchashmask); #define V_pf_srchashmask VNET(pf_srchashmask) VNET_DECLARE(u_long, pf_udpendpointhashmask); #define V_pf_udpendpointhashmask VNET(pf_udpendpointhashmask) #define PF_HASHSIZ (131072) #define PF_SRCHASHSIZ (PF_HASHSIZ/4) #define PF_UDPENDHASHSIZ (PF_HASHSIZ/4) VNET_DECLARE(struct pf_keyhash *, pf_keyhash); VNET_DECLARE(struct pf_idhash *, pf_idhash); VNET_DECLARE(struct pf_udpendpointhash *, pf_udpendpointhash); #define V_pf_keyhash VNET(pf_keyhash) #define V_pf_idhash VNET(pf_idhash) #define V_pf_udpendpointhash VNET(pf_udpendpointhash) VNET_DECLARE(struct pf_srchash *, pf_srchash); #define V_pf_srchash VNET(pf_srchash) #define PF_IDHASHID(id) (be64toh(id) % (V_pf_hashmask + 1)) #define PF_IDHASH(s) PF_IDHASHID((s)->id) VNET_DECLARE(void *, pf_swi_cookie); #define V_pf_swi_cookie VNET(pf_swi_cookie) VNET_DECLARE(struct intr_event *, pf_swi_ie); #define V_pf_swi_ie VNET(pf_swi_ie) VNET_DECLARE(struct unrhdr64, pf_stateid); #define V_pf_stateid VNET(pf_stateid) TAILQ_HEAD(pf_altqqueue, pf_altq); VNET_DECLARE(struct pf_altqqueue, pf_altqs[4]); #define V_pf_altqs VNET(pf_altqs) VNET_DECLARE(struct pf_kpalist, pf_pabuf[3]); #define V_pf_pabuf VNET(pf_pabuf) VNET_DECLARE(u_int32_t, ticket_altqs_active); #define V_ticket_altqs_active VNET(ticket_altqs_active) VNET_DECLARE(u_int32_t, ticket_altqs_inactive); #define V_ticket_altqs_inactive VNET(ticket_altqs_inactive) VNET_DECLARE(int, altqs_inactive_open); #define V_altqs_inactive_open VNET(altqs_inactive_open) VNET_DECLARE(u_int32_t, ticket_pabuf); #define V_ticket_pabuf VNET(ticket_pabuf) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_active); #define V_pf_altqs_active VNET(pf_altqs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_active); #define V_pf_altq_ifs_active VNET(pf_altq_ifs_active) VNET_DECLARE(struct pf_altqqueue *, pf_altqs_inactive); #define V_pf_altqs_inactive VNET(pf_altqs_inactive) VNET_DECLARE(struct pf_altqqueue *, pf_altq_ifs_inactive); #define V_pf_altq_ifs_inactive VNET(pf_altq_ifs_inactive) VNET_DECLARE(struct pf_krulequeue, pf_unlinked_rules); #define V_pf_unlinked_rules VNET(pf_unlinked_rules) #ifdef PF_WANT_32_TO_64_COUNTER LIST_HEAD(allkiflist_head, pfi_kkif); VNET_DECLARE(struct allkiflist_head, pf_allkiflist); #define V_pf_allkiflist VNET(pf_allkiflist) VNET_DECLARE(size_t, pf_allkifcount); #define V_pf_allkifcount VNET(pf_allkifcount) VNET_DECLARE(struct pfi_kkif *, pf_kifmarker); #define V_pf_kifmarker VNET(pf_kifmarker) LIST_HEAD(allrulelist_head, pf_krule); VNET_DECLARE(struct allrulelist_head, pf_allrulelist); #define V_pf_allrulelist VNET(pf_allrulelist) VNET_DECLARE(size_t, pf_allrulecount); #define V_pf_allrulecount VNET(pf_allrulecount) VNET_DECLARE(struct pf_krule *, pf_rulemarker); #define V_pf_rulemarker VNET(pf_rulemarker) #endif int pf_start(void); int pf_stop(void); void pf_initialize(void); void pf_mtag_initialize(void); void pf_mtag_cleanup(void); void pf_cleanup(void); struct pf_mtag *pf_get_mtag(struct mbuf *); extern void pf_calc_skip_steps(struct pf_krulequeue *); #ifdef ALTQ extern void pf_altq_ifnet_event(struct ifnet *, int); #endif VNET_DECLARE(uma_zone_t, pf_state_z); #define V_pf_state_z VNET(pf_state_z) VNET_DECLARE(uma_zone_t, pf_state_key_z); #define V_pf_state_key_z VNET(pf_state_key_z) VNET_DECLARE(uma_zone_t, pf_udp_mapping_z); #define V_pf_udp_mapping_z VNET(pf_udp_mapping_z) VNET_DECLARE(uma_zone_t, pf_state_scrub_z); #define V_pf_state_scrub_z VNET(pf_state_scrub_z) VNET_DECLARE(uma_zone_t, pf_anchor_z); #define V_pf_anchor_z VNET(pf_anchor_z) VNET_DECLARE(uma_zone_t, pf_eth_anchor_z); #define V_pf_eth_anchor_z VNET(pf_eth_anchor_z) extern void pf_purge_thread(void *); extern void pf_unload_vnet_purge(void); extern void pf_intr(void *); extern void pf_purge_expired_src_nodes(void); extern int pf_remove_state(struct pf_kstate *); extern int pf_state_insert(struct pfi_kkif *, struct pfi_kkif *, struct pf_state_key *, struct pf_state_key *, struct pf_kstate *); extern struct pf_kstate *pf_alloc_state(int); extern void pf_free_state(struct pf_kstate *); extern void pf_killstates(struct pf_kstate_kill *, unsigned int *); extern unsigned int pf_clear_states(const struct pf_kstate_kill *); static __inline void pf_ref_state(struct pf_kstate *s) { refcount_acquire(&s->refs); } static __inline int pf_release_state(struct pf_kstate *s) { if (refcount_release(&s->refs)) { pf_free_state(s); return (1); } else return (0); } static __inline int pf_release_staten(struct pf_kstate *s, u_int n) { if (refcount_releasen(&s->refs, n)) { pf_free_state(s); return (1); } else return (0); } static __inline uint64_t pf_get_uptime(void) { struct timeval t; microuptime(&t); return ((t.tv_sec * 1000) + (t.tv_usec / 1000)); } static __inline uint64_t pf_get_time(void) { struct timeval t; microtime(&t); return ((t.tv_sec * 1000) + (t.tv_usec / 1000)); } extern struct pf_kstate *pf_find_state_byid(uint64_t, uint32_t); extern struct pf_kstate *pf_find_state_all( const struct pf_state_key_cmp *, u_int, int *); extern bool pf_find_state_all_exists( const struct pf_state_key_cmp *, u_int); extern struct pf_udp_mapping *pf_udp_mapping_find(struct pf_udp_endpoint_cmp *endpoint); extern struct pf_udp_mapping *pf_udp_mapping_create(sa_family_t af, struct pf_addr *src_addr, uint16_t src_port, struct pf_addr *nat_addr, uint16_t nat_port); extern int pf_udp_mapping_insert(struct pf_udp_mapping *mapping); extern void pf_udp_mapping_release(struct pf_udp_mapping *mapping); uint32_t pf_hashsrc(struct pf_addr *, sa_family_t); extern bool pf_src_node_exists(struct pf_ksrc_node **, struct pf_srchash *); extern struct pf_ksrc_node *pf_find_src_node(struct pf_addr *, struct pf_krule *, sa_family_t, struct pf_srchash **, pf_sn_types_t, bool); extern void pf_unlink_src_node(struct pf_ksrc_node *); extern u_int pf_free_src_nodes(struct pf_ksrc_node_list *); extern void pf_print_state(struct pf_kstate *); extern void pf_print_flags(uint16_t); extern int pf_addr_wrap_neq(struct pf_addr_wrap *, struct pf_addr_wrap *); extern u_int16_t pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t, u_int8_t); extern u_int16_t pf_proto_cksum_fixup(struct mbuf *, u_int16_t, u_int16_t, u_int16_t, u_int8_t); VNET_DECLARE(struct ifnet *, sync_ifp); #define V_sync_ifp VNET(sync_ifp); VNET_DECLARE(struct pf_krule, pf_default_rule); #define V_pf_default_rule VNET(pf_default_rule) extern void pf_addrcpy(struct pf_addr *, const struct pf_addr *, sa_family_t); void pf_free_rule(struct pf_krule *); int pf_test_eth(int, int, struct ifnet *, struct mbuf **, struct inpcb *); int pf_scan_sctp(struct pf_pdesc *); #if defined(INET) || defined(INET6) int pf_test(sa_family_t, int, int, struct ifnet *, struct mbuf **, struct inpcb *, struct pf_rule_actions *); #endif #ifdef INET int pf_normalize_ip(u_short *, struct pf_pdesc *); #endif /* INET */ void pf_poolmask(struct pf_addr *, struct pf_addr*, struct pf_addr *, struct pf_addr *, sa_family_t); void pf_addr_inc(struct pf_addr *, sa_family_t); #ifdef INET6 int pf_normalize_ip6(int, u_short *, struct pf_pdesc *); int pf_max_frag_size(struct mbuf *); int pf_refragment6(struct ifnet *, struct mbuf **, struct m_tag *, struct ifnet *, bool); #endif /* INET6 */ int pf_multihome_scan_init(int, int, struct pf_pdesc *); int pf_multihome_scan_asconf(int, int, struct pf_pdesc *); u_int32_t pf_new_isn(struct pf_kstate *); void *pf_pull_hdr(const struct mbuf *, int, void *, int, u_short *, u_short *, sa_family_t); void pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t, u_int8_t); void pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t); int pf_patch_16(struct pf_pdesc *, void *, u_int16_t, bool); int pf_patch_32(struct pf_pdesc *, void *, u_int32_t, bool); void pf_send_deferred_syn(struct pf_kstate *); int pf_match_addr(u_int8_t, const struct pf_addr *, const struct pf_addr *, const struct pf_addr *, sa_family_t); int pf_match_addr_range(const struct pf_addr *, const struct pf_addr *, const struct pf_addr *, sa_family_t); int pf_match_port(u_int8_t, u_int16_t, u_int16_t, u_int16_t); void pf_normalize_init(void); void pf_normalize_cleanup(void); int pf_normalize_tcp(struct pf_pdesc *); void pf_normalize_tcp_cleanup(struct pf_kstate *); int pf_normalize_tcp_init(struct pf_pdesc *, struct tcphdr *, struct pf_state_peer *); int pf_normalize_tcp_stateful(struct pf_pdesc *, u_short *, struct tcphdr *, struct pf_kstate *, struct pf_state_peer *, struct pf_state_peer *, int *); int pf_normalize_sctp_init(struct pf_pdesc *, struct pf_state_peer *, struct pf_state_peer *); int pf_normalize_sctp(struct pf_pdesc *); u_int32_t pf_state_expires(const struct pf_kstate *); void pf_purge_expired_fragments(void); void pf_purge_fragments(uint32_t); int pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *, int); int pf_socket_lookup(struct pf_pdesc *); struct pf_state_key *pf_alloc_state_key(int); int pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t, struct pf_addr *, u_int16_t, u_int16_t, int); int pf_translate_af(struct pf_pdesc *); bool pf_init_threshold(struct pf_kthreshold *, uint32_t, uint32_t); +uint16_t pf_tagname2tag(const char *); +#ifdef ALTQ +uint16_t pf_qname2qid(const char *, bool); +#endif /* ALTQ */ void pfr_initialize(void); void pfr_cleanup(void); int pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t); void pfr_update_stats(struct pfr_ktable *, struct pf_addr *, sa_family_t, u_int64_t, int, int, int); int pfr_pool_get(struct pfr_ktable *, int *, struct pf_addr *, sa_family_t, pf_addr_filter_func_t, bool); void pfr_dynaddr_update(struct pfr_ktable *, struct pfi_dynaddr *); struct pfr_ktable * pfr_attach_table(struct pf_kruleset *, char *); struct pfr_ktable * pfr_eth_attach_table(struct pf_keth_ruleset *, char *); void pfr_detach_table(struct pfr_ktable *); int pfr_clr_tables(struct pfr_table *, int *, int); int pfr_add_tables(struct pfr_table *, int, int *, int); int pfr_del_tables(struct pfr_table *, int, int *, int); int pfr_table_count(struct pfr_table *, int); int pfr_get_tables(struct pfr_table *, struct pfr_table *, int *, int); int pfr_get_tstats(struct pfr_table *, struct pfr_tstats *, int *, int); int pfr_clr_tstats(struct pfr_table *, int, int *, int); int pfr_set_tflags(struct pfr_table *, int, int, int, int *, int *, int); int pfr_clr_addrs(struct pfr_table *, int *, int); int pfr_insert_kentry(struct pfr_ktable *, struct pfr_addr *, time_t); int pfr_add_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_del_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_set_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int *, int *, int *, int, u_int32_t); int pfr_get_addrs(struct pfr_table *, struct pfr_addr *, int *, int); int pfr_get_astats(struct pfr_table *, struct pfr_astats *, int *, int); int pfr_clr_astats(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_tst_addrs(struct pfr_table *, struct pfr_addr *, int, int *, int); int pfr_ina_begin(struct pfr_table *, u_int32_t *, int *, int); int pfr_ina_rollback(struct pfr_table *, u_int32_t, int *, int); int pfr_ina_commit(struct pfr_table *, u_int32_t, int *, int *, int); int pfr_ina_define(struct pfr_table *, struct pfr_addr *, int, int *, int *, u_int32_t, int); struct pfr_ktable *pfr_ktable_select_active(struct pfr_ktable *); MALLOC_DECLARE(PFI_MTYPE); VNET_DECLARE(struct pfi_kkif *, pfi_all); #define V_pfi_all VNET(pfi_all) void pfi_initialize(void); void pfi_initialize_vnet(void); void pfi_cleanup(void); void pfi_cleanup_vnet(void); void pfi_kkif_ref(struct pfi_kkif *); void pfi_kkif_unref(struct pfi_kkif *); struct pfi_kkif *pfi_kkif_find(const char *); struct pfi_kkif *pfi_kkif_attach(struct pfi_kkif *, const char *); int pfi_kkif_match(struct pfi_kkif *, struct pfi_kkif *); void pfi_kkif_purge(void); int pfi_match_addr(struct pfi_dynaddr *, struct pf_addr *, sa_family_t); int pfi_dynaddr_setup(struct pf_addr_wrap *, sa_family_t); void pfi_dynaddr_remove(struct pfi_dynaddr *); void pfi_dynaddr_copyout(struct pf_addr_wrap *); void pfi_update_status(const char *, struct pf_status *); void pfi_get_ifaces(const char *, struct pfi_kif *, int *); int pfi_set_flags(const char *, int); int pfi_clear_flags(const char *, int); int pf_match_tag(struct mbuf *, struct pf_krule *, int *, int); int pf_tag_packet(struct pf_pdesc *, int); int pf_addr_cmp(struct pf_addr *, struct pf_addr *, sa_family_t); uint8_t* pf_find_tcpopt(u_int8_t *, u_int8_t *, size_t, u_int8_t, u_int8_t); u_int16_t pf_get_mss(struct pf_pdesc *); u_int8_t pf_get_wscale(struct pf_pdesc *); struct mbuf *pf_build_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, u_int16_t, u_int16_t, u_int, int); void pf_send_tcp(const struct pf_krule *, sa_family_t, const struct pf_addr *, const struct pf_addr *, u_int16_t, u_int16_t, u_int32_t, u_int32_t, u_int8_t, u_int16_t, u_int16_t, u_int8_t, int, u_int16_t, u_int16_t, int); void pf_syncookies_init(void); void pf_syncookies_cleanup(void); int pf_get_syncookies(struct pfioc_nv *); int pf_set_syncookies(struct pfioc_nv *); int pf_synflood_check(struct pf_pdesc *); void pf_syncookie_send(struct pf_pdesc *); bool pf_syncookie_check(struct pf_pdesc *); u_int8_t pf_syncookie_validate(struct pf_pdesc *); struct mbuf * pf_syncookie_recreate_syn(struct pf_pdesc *); VNET_DECLARE(struct pf_kstatus, pf_status); #define V_pf_status VNET(pf_status) struct pf_limit { uma_zone_t zone; u_int limit; }; VNET_DECLARE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); #define V_pf_limits VNET(pf_limits) #endif /* _KERNEL */ #ifdef _KERNEL struct pf_nl_pooladdr { u_int32_t action; u_int32_t ticket; u_int32_t nr; u_int32_t r_num; u_int8_t r_action; u_int8_t r_last; u_int8_t af; char anchor[MAXPATHLEN]; struct pf_pooladdr addr; /* Above this is identical to pfioc_pooladdr */ int which; }; VNET_DECLARE(struct pf_kanchor_global, pf_anchors); #define V_pf_anchors VNET(pf_anchors) VNET_DECLARE(struct pf_kanchor, pf_main_anchor); #define V_pf_main_anchor VNET(pf_main_anchor) VNET_DECLARE(struct pf_keth_anchor_global, pf_keth_anchors); #define V_pf_keth_anchors VNET(pf_keth_anchors) #define pf_main_ruleset V_pf_main_anchor.ruleset VNET_DECLARE(struct pf_keth_anchor, pf_main_keth_anchor); #define V_pf_main_keth_anchor VNET(pf_main_keth_anchor) VNET_DECLARE(struct pf_keth_ruleset*, pf_keth); #define V_pf_keth VNET(pf_keth) void pf_init_kruleset(struct pf_kruleset *); void pf_init_keth(struct pf_keth_ruleset *); int pf_kanchor_setup(struct pf_krule *, const struct pf_kruleset *, const char *); int pf_kanchor_copyout(const struct pf_kruleset *, const struct pf_krule *, char *, size_t); int pf_kanchor_nvcopyout(const struct pf_kruleset *, const struct pf_krule *, nvlist_t *); void pf_remove_kanchor(struct pf_krule *); void pf_remove_if_empty_kruleset(struct pf_kruleset *); struct pf_kruleset *pf_find_kruleset(const char *); struct pf_kruleset *pf_get_leaf_kruleset(char *, char **); struct pf_kruleset *pf_find_or_create_kruleset(const char *); void pf_rs_initialize(void); void pf_rule_tree_free(struct pf_krule_global *); struct pf_krule *pf_krule_alloc(void); void pf_remove_if_empty_keth_ruleset( struct pf_keth_ruleset *); struct pf_keth_ruleset *pf_find_keth_ruleset(const char *); struct pf_keth_anchor *pf_find_keth_anchor(const char *); int pf_keth_anchor_setup(struct pf_keth_rule *, const struct pf_keth_ruleset *, const char *); int pf_keth_anchor_nvcopyout( const struct pf_keth_ruleset *, const struct pf_keth_rule *, nvlist_t *); struct pf_keth_ruleset *pf_find_or_create_keth_ruleset(const char *); void pf_keth_anchor_remove(struct pf_keth_rule *); int pf_ioctl_getrules(struct pfioc_rule *); int pf_ioctl_addrule(struct pf_krule *, uint32_t, uint32_t, const char *, const char *, uid_t uid, pid_t); void pf_ioctl_clear_status(void); int pf_ioctl_get_timeout(int, int *); int pf_ioctl_set_timeout(int, int, int *); int pf_ioctl_get_limit(int, unsigned int *); int pf_ioctl_set_limit(int, unsigned int, unsigned int *); int pf_ioctl_begin_addrs(uint32_t *); int pf_ioctl_add_addr(struct pf_nl_pooladdr *); int pf_ioctl_get_addrs(struct pf_nl_pooladdr *); int pf_ioctl_get_addr(struct pf_nl_pooladdr *); int pf_ioctl_get_rulesets(struct pfioc_ruleset *); int pf_ioctl_get_ruleset(struct pfioc_ruleset *); int pf_ioctl_natlook(struct pfioc_natlook *); void pf_krule_free(struct pf_krule *); void pf_krule_clear_counters(struct pf_krule *); void pf_addr_copyout(struct pf_addr_wrap *); #endif /* The fingerprint functions can be linked into userland programs (tcpdump) */ int pf_osfp_add(struct pf_osfp_ioctl *); #ifdef _KERNEL struct pf_osfp_enlist * pf_osfp_fingerprint(struct pf_pdesc *, const struct tcphdr *); #endif /* _KERNEL */ void pf_osfp_flush(void); int pf_osfp_get(struct pf_osfp_ioctl *); int pf_osfp_match(struct pf_osfp_enlist *, pf_osfp_t); #ifdef _KERNEL void pf_print_host(struct pf_addr *, u_int16_t, sa_family_t); enum pf_test_status pf_step_into_anchor(struct pf_test_ctx *, struct pf_krule *); enum pf_test_status pf_match_rule(struct pf_test_ctx *, struct pf_kruleset *); void pf_step_into_keth_anchor(struct pf_keth_anchor_stackframe *, int *, struct pf_keth_ruleset **, struct pf_keth_rule **, struct pf_keth_rule **, int *); int pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *, int *, struct pf_keth_ruleset **, struct pf_keth_rule **, struct pf_keth_rule **, int *); u_short pf_map_addr(sa_family_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, struct pfi_kkif **nkif, sa_family_t *, struct pf_addr *, struct pf_kpool *); u_short pf_map_addr_sn(u_int8_t, struct pf_krule *, struct pf_addr *, struct pf_addr *, sa_family_t *, struct pfi_kkif **, struct pf_addr *, struct pf_kpool *, pf_sn_types_t); int pf_get_transaddr_af(struct pf_krule *, struct pf_pdesc *); u_short pf_get_translation(struct pf_test_ctx *); u_short pf_get_transaddr(struct pf_test_ctx *, struct pf_krule *, u_int8_t, struct pf_kpool *); int pf_translate_compat(struct pf_test_ctx *); int pf_state_key_setup(struct pf_pdesc *, u_int16_t, u_int16_t, struct pf_state_key **sk, struct pf_state_key **nk); struct pf_state_key *pf_state_key_clone(const struct pf_state_key *); void pf_rule_to_actions(struct pf_krule *, struct pf_rule_actions *); int pf_normalize_mss(struct pf_pdesc *pd); #if defined(INET) || defined(INET6) void pf_scrub(struct pf_pdesc *); #endif struct pfi_kkif *pf_kkif_create(int); void pf_kkif_free(struct pfi_kkif *); void pf_kkif_zero(struct pfi_kkif *); /* NAT64 functions. */ int inet_nat64(int, const void *, void *, const void *, u_int8_t); int inet_nat64_inet(const void *, void *, const void *, u_int8_t); int inet_nat64_inet6(const void *, void *, const void *, u_int8_t); int inet_nat46(int, const void *, void *, const void *, u_int8_t); int inet_nat46_inet(const void *, void *, const void *, u_int8_t); int inet_nat46_inet6(const void *, void *, const void *, u_int8_t); #endif /* _KERNEL */ #endif /* _NET_PFVAR_H_ */ diff --git a/sys/netpfil/pf/if_pfsync.c b/sys/netpfil/pf/if_pfsync.c index 7b9405ee1f8d..66bc99df2afa 100644 --- a/sys/netpfil/pf/if_pfsync.c +++ b/sys/netpfil/pf/if_pfsync.c @@ -1,3301 +1,3436 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause AND ISC) * * Copyright (c) 2002 Michael Shalayeff * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2009 David Gwynne * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ * * Revisions picked from OpenBSD after revision 1.110 import: * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates * 1.120, 1.175 - use monotonic time_uptime * 1.122 - reduce number of updates for non-TCP sessions * 1.125, 1.127 - rewrite merge or stale processing * 1.128 - cleanups * 1.146 - bzero() mbuf before sparsely filling it with data * 1.170 - SIOCSIFMTU checks * 1.126, 1.142 - deferred packets processing * 1.173 - correct expire time processing */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct pfsync_bucket; struct pfsync_softc; union inet_template { struct ip ipv4; struct ip6_hdr ipv6; }; #define PFSYNC_MINPKT ( \ sizeof(union inet_template) + \ sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) static int pfsync_upd_tcp(struct pf_kstate *, struct pf_state_peer_export *, struct pf_state_peer_export *); static int pfsync_in_clr(struct mbuf *, int, int, int, int); static int pfsync_in_ins(struct mbuf *, int, int, int, int); static int pfsync_in_iack(struct mbuf *, int, int, int, int); static int pfsync_in_upd(struct mbuf *, int, int, int, int); static int pfsync_in_upd_c(struct mbuf *, int, int, int, int); static int pfsync_in_ureq(struct mbuf *, int, int, int, int); static int pfsync_in_del_c(struct mbuf *, int, int, int, int); static int pfsync_in_bus(struct mbuf *, int, int, int, int); static int pfsync_in_tdb(struct mbuf *, int, int, int, int); static int pfsync_in_eof(struct mbuf *, int, int, int, int); static int pfsync_in_error(struct mbuf *, int, int, int, int); static int (*pfsync_acts[])(struct mbuf *, int, int, int, int) = { pfsync_in_clr, /* PFSYNC_ACT_CLR */ pfsync_in_ins, /* PFSYNC_ACT_INS_1301 */ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ pfsync_in_upd, /* PFSYNC_ACT_UPD_1301 */ pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ pfsync_in_error, /* PFSYNC_ACT_DEL */ pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ pfsync_in_error, /* PFSYNC_ACT_INS_F */ pfsync_in_error, /* PFSYNC_ACT_DEL_F */ pfsync_in_bus, /* PFSYNC_ACT_BUS */ pfsync_in_tdb, /* PFSYNC_ACT_TDB */ pfsync_in_eof, /* PFSYNC_ACT_EOF */ pfsync_in_ins, /* PFSYNC_ACT_INS_1400 */ pfsync_in_upd, /* PFSYNC_ACT_UPD_1400 */ + pfsync_in_ins, /* PFSYNC_ACT_INS_1500 */ + pfsync_in_upd, /* PFSYNC_ACT_UPD_1500 */ }; struct pfsync_q { void (*write)(struct pf_kstate *, void *); size_t len; u_int8_t action; }; /* We have the following sync queues */ enum pfsync_q_id { PFSYNC_Q_INS_1301, PFSYNC_Q_INS_1400, + PFSYNC_Q_INS_1500, PFSYNC_Q_IACK, PFSYNC_Q_UPD_1301, PFSYNC_Q_UPD_1400, + PFSYNC_Q_UPD_1500, PFSYNC_Q_UPD_C, PFSYNC_Q_DEL_C, PFSYNC_Q_COUNT, }; /* Functions for building messages for given queue */ static void pfsync_out_state_1301(struct pf_kstate *, void *); static void pfsync_out_state_1400(struct pf_kstate *, void *); +static void pfsync_out_state_1500(struct pf_kstate *, void *); static void pfsync_out_iack(struct pf_kstate *, void *); static void pfsync_out_upd_c(struct pf_kstate *, void *); static void pfsync_out_del_c(struct pf_kstate *, void *); /* Attach those functions to queue */ static struct pfsync_q pfsync_qs[] = { { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_INS_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_INS_1400 }, + { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_INS_1500 }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state_1301, sizeof(struct pfsync_state_1301), PFSYNC_ACT_UPD_1301 }, { pfsync_out_state_1400, sizeof(struct pfsync_state_1400), PFSYNC_ACT_UPD_1400 }, + { pfsync_out_state_1500, sizeof(struct pfsync_state_1500), PFSYNC_ACT_UPD_1500 }, { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, { pfsync_out_del_c, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; /* Map queue to pf_kstate->sync_state */ static u_int8_t pfsync_qid_sstate[] = { PFSYNC_S_INS, /* PFSYNC_Q_INS_1301 */ PFSYNC_S_INS, /* PFSYNC_Q_INS_1400 */ + PFSYNC_S_INS, /* PFSYNC_Q_INS_1500 */ PFSYNC_S_IACK, /* PFSYNC_Q_IACK */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1301 */ PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1400 */ + PFSYNC_S_UPD, /* PFSYNC_Q_UPD_1500 */ PFSYNC_S_UPD_C, /* PFSYNC_Q_UPD_C */ PFSYNC_S_DEL_C, /* PFSYNC_Q_DEL_C */ }; /* Map pf_kstate->sync_state to queue */ static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t); static void pfsync_q_ins(struct pf_kstate *, int sync_state, bool); static void pfsync_q_del(struct pf_kstate *, bool, struct pfsync_bucket *); static void pfsync_update_state(struct pf_kstate *); static void pfsync_tx(struct pfsync_softc *, struct mbuf *); struct pfsync_upd_req_item { TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; struct pfsync_upd_req ur_msg; }; struct pfsync_deferral { struct pfsync_softc *pd_sc; TAILQ_ENTRY(pfsync_deferral) pd_entry; struct callout pd_tmo; struct pf_kstate *pd_st; struct mbuf *pd_m; }; struct pfsync_bucket { int b_id; struct pfsync_softc *b_sc; struct mtx b_mtx; struct callout b_tmo; int b_flags; #define PFSYNCF_BUCKET_PUSH 0x00000001 size_t b_len; TAILQ_HEAD(, pf_kstate) b_qs[PFSYNC_Q_COUNT]; TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; TAILQ_HEAD(, pfsync_deferral) b_deferrals; u_int b_deferred; uint8_t *b_plus; size_t b_pluslen; struct ifaltq b_snd; }; struct pfsync_softc { /* Configuration */ struct ifnet *sc_ifp; struct ifnet *sc_sync_if; struct ip_moptions sc_imo; struct ip6_moptions sc_im6o; struct sockaddr_storage sc_sync_peer; uint32_t sc_flags; uint8_t sc_maxupdates; union inet_template sc_template; struct mtx sc_mtx; uint32_t sc_version; /* Queued data */ struct pfsync_bucket *sc_buckets; /* Bulk update info */ struct mtx sc_bulk_mtx; uint32_t sc_ureq_sent; int sc_bulk_tries; uint32_t sc_ureq_received; int sc_bulk_hashid; uint64_t sc_bulk_stateid; uint32_t sc_bulk_creatorid; struct callout sc_bulk_tmo; struct callout sc_bulkfail_tmo; }; #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) #define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) #define PFSYNC_BUCKET_LOCK_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) #define PFSYNC_DEFER_TIMEOUT 20 static const char pfsyncname[] = "pfsync"; static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) VNET_DEFINE_STATIC(struct intr_event *, pfsync_swi_ie); #define V_pfsync_swi_ie VNET(pfsync_swi_ie) VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); #define V_pfsyncstats VNET(pfsyncstats) VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) VNET_DEFINE_STATIC(unsigned int, pfsync_defer_timeout) = PFSYNC_DEFER_TIMEOUT; #define V_pfsync_defer_timeout VNET(pfsync_defer_timeout) static void pfsync_timeout(void *); static void pfsync_push(struct pfsync_bucket *); static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, struct in_mfilter *, struct in6_mfilter *); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); static int pfsync_init(void); static void pfsync_uninit(void); static unsigned long pfsync_buckets; SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, &pfsync_buckets, 0, "Number of pfsync hash buckets"); SYSCTL_UINT(_net_pfsync, OID_AUTO, defer_delay, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsync_defer_timeout), 0, "Deferred packet timeout (in ms)"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int pfsyncioctl(struct ifnet *, u_long, caddr_t); static int pfsync_defer(struct pf_kstate *, struct mbuf *); static void pfsync_undefer(struct pfsync_deferral *, int); static void pfsync_undefer_state_locked(struct pf_kstate *, int); static void pfsync_undefer_state(struct pf_kstate *, int); static void pfsync_defer_tmo(void *); static void pfsync_request_update(u_int32_t, u_int64_t); static bool pfsync_update_state_req(struct pf_kstate *); static void pfsync_drop_all(struct pfsync_softc *); static void pfsync_drop(struct pfsync_softc *, int); static void pfsync_sendout(int, int); static void pfsync_send_plus(void *, size_t); static void pfsync_bulk_start(void); static void pfsync_bulk_status(u_int8_t); static void pfsync_bulk_update(void *); static void pfsync_bulk_fail(void *); static void pfsync_detach_ifnet(struct ifnet *); static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *, struct pfsync_kstatus *); static int pfsync_kstatus_to_softc(struct pfsync_kstatus *, struct pfsync_softc *); #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, struct pf_kstate *); #define PFSYNC_MAX_BULKTRIES 12 VNET_DEFINE(struct if_clone *, pfsync_cloner); #define V_pfsync_cloner VNET(pfsync_cloner) const struct in6_addr in6addr_linklocal_pfsync_group = {{{ 0xff, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0 }}}; static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) { struct pfsync_softc *sc; struct ifnet *ifp; struct pfsync_bucket *b; int c; enum pfsync_q_id q; if (unit != 0) return (EINVAL); if (! pfsync_buckets) pfsync_buckets = mp_ncpus * 2; sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); sc->sc_flags |= PFSYNCF_OK; sc->sc_maxupdates = 128; sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), M_PFSYNC, M_ZERO | M_WAITOK); for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; mtx_init(&b->b_mtx, "pfsync bucket", NULL, MTX_DEF); b->b_id = c; b->b_sc = sc; b->b_len = PFSYNC_MINPKT; for (q = 0; q < PFSYNC_Q_COUNT; q++) TAILQ_INIT(&b->b_qs[q]); TAILQ_INIT(&b->b_upd_req_list); TAILQ_INIT(&b->b_deferrals); callout_init(&b->b_tmo, 1); b->b_snd.ifq_maxlen = ifqmaxlen; } ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if_initname(ifp, pfsyncname, unit); ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); if_attach(ifp); bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); V_pfsyncif = sc; return (0); } static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; struct pfsync_bucket *b; int c, ret; for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; /* * At this stage, everything should have already been * cleared by pfsync_uninit(), and we have only to * drain callouts. */ PFSYNC_BUCKET_LOCK(b); while (b->b_deferred > 0) { struct pfsync_deferral *pd = TAILQ_FIRST(&b->b_deferrals); ret = callout_stop(&pd->pd_tmo); PFSYNC_BUCKET_UNLOCK(b); if (ret > 0) { pfsync_undefer(pd, 1); } else { callout_drain(&pd->pd_tmo); } PFSYNC_BUCKET_LOCK(b); } MPASS(b->b_deferred == 0); MPASS(TAILQ_EMPTY(&b->b_deferrals)); PFSYNC_BUCKET_UNLOCK(b); free(b->b_plus, M_PFSYNC); b->b_plus = NULL; b->b_pluslen = 0; callout_drain(&b->b_tmo); } callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); pfsync_drop_all(sc); if_free(ifp); pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; mtx_destroy(&b->b_mtx); } free(sc->sc_buckets, M_PFSYNC); free(sc, M_PFSYNC); V_pfsyncif = NULL; } static int pfsync_alloc_scrub_memory(struct pf_state_peer_export *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); if (d->scrub == NULL) return (ENOMEM); } return (0); } static int pfsync_state_import(union pfsync_state_union *sp, int flags, int msg_version) { struct pfsync_softc *sc = V_pfsyncif; #ifndef __NO_STRICT_ALIGNMENT struct pfsync_state_key key[2]; #endif struct pfsync_state_key *kw, *ks; struct pf_kstate *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_krule *r = NULL; - struct pfi_kkif *kif; + struct pfi_kkif *kif, *orig_kif; struct pfi_kkif *rt_kif = NULL; struct pf_kpooladdr *rpool_first; int error; + int n = 0; sa_family_t rt_af = 0; uint8_t rt = 0; - int n = 0; + sa_family_t wire_af, stack_af; + u_int8_t wire_proto, stack_proto; PF_RULES_RASSERT(); if (sp->pfs_1301.creatorid == 0) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid creator id: %08x\n", __func__, ntohl(sp->pfs_1301.creatorid)); return (EINVAL); } - if ((kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { + /* + * Check interfaces early on. Do it before allocating memory etc. + * Because there is a high chance there will be a lot more such states. + */ + if ((kif = orig_kif = pfi_kkif_find(sp->pfs_1301.ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown interface: %s\n", __func__, sp->pfs_1301.ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ } + /* + * States created with floating interface policy can be synchronized to + * hosts with different interfaces, because they are bound to V_pfi_all. + * But s->orig_kif still points to a real interface. Don't abort + * importing the state if orig_kif does not exists on the importing host + * but the state is not interface-bound. + */ + if (msg_version == PFSYNC_MSG_VERSION_1500) { + orig_kif = pfi_kkif_find(sp->pfs_1500.orig_ifname); + if (orig_kif == NULL) { + if (kif == V_pfi_all) { + orig_kif = kif; + } else { + if (V_pf_status.debug >= PF_DEBUG_MISC) + printf("%s: unknown original interface:" + " %s\n", __func__, + sp->pfs_1500.orig_ifname); + if (flags & PFSYNC_SI_IOCTL) + return (EINVAL); + return (0); /* skip this state */ + } + } + } + /* * If the ruleset checksums match or the state is coming from the ioctl, * it's safe to associate the state with the rule of that number. */ if (sp->pfs_1301.rule != htonl(-1) && sp->pfs_1301.anchor == htonl(-1) && (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->pfs_1301.rule) < pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) { TAILQ_FOREACH(r, pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr, entries) if (ntohl(sp->pfs_1301.rule) == n++) break; } else r = &V_pf_default_rule; - /* - * Check routing interface early on. Do it before allocating memory etc. - * because there is a high chance there will be a lot more such states. - */ switch (msg_version) { case PFSYNC_MSG_VERSION_1301: /* * On FreeBSD <= 13 the routing interface and routing operation * are not sent over pfsync. If the ruleset is identical, * though, we might be able to recover the routing information * from the local ruleset. */ if (r != &V_pf_default_rule) { struct pf_kpool *pool = &r->route; /* Backwards compatibility. */ if (TAILQ_EMPTY(&pool->list)) pool = &r->rdr; /* * The ruleset is identical, try to recover. If the rule * has a redirection pool with a single interface, there * is a chance that this interface is identical as on * the pfsync peer. If there's more than one interface, * give up, as we can't be sure that we will pick the * same one as the pfsync peer did. */ rpool_first = TAILQ_FIRST(&(pool->list)); if ((rpool_first == NULL) || (TAILQ_NEXT(rpool_first, entries) != NULL)) { DPFPRINTF(PF_DEBUG_MISC, "%s: can't recover routing information " "because of empty or bad redirection pool", __func__); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } rt = r->rt; rt_kif = rpool_first->kif; /* * Guess the AF of the route address, FreeBSD 13 does * not support af-to nor prefer-ipv6-nexthop * so it should be safe. */ rt_af = r->af; } else if (!PF_AZERO(&sp->pfs_1301.rt_addr, sp->pfs_1301.af)) { /* * Ruleset different, routing *supposedly* requested, * give up on recovering. */ DPFPRINTF(PF_DEBUG_MISC, "%s: can't recover routing information " "because of different ruleset", __func__); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } + wire_af = stack_af = sp->pfs_1301.af; + wire_proto = stack_proto = sp->pfs_1301.proto; break; case PFSYNC_MSG_VERSION_1400: /* - * On FreeBSD 14 and above we're not taking any chances. + * On FreeBSD 14 we're not taking any chances. * We use the information synced to us. */ if (sp->pfs_1400.rt) { rt_kif = pfi_kkif_find(sp->pfs_1400.rt_ifname); if (rt_kif == NULL) { DPFPRINTF(PF_DEBUG_MISC, "%s: unknown route interface: %s", __func__, sp->pfs_1400.rt_ifname); return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); } rt = sp->pfs_1400.rt; /* * Guess the AF of the route address, FreeBSD 14 does * not support af-to nor prefer-ipv6-nexthop * so it should be safe. */ rt_af = sp->pfs_1400.af; } + wire_af = stack_af = sp->pfs_1400.af; + wire_proto = stack_proto = sp->pfs_1400.proto; + break; + case PFSYNC_MSG_VERSION_1500: + /* + * On FreeBSD 15 and above we're not taking any chances. + * We use the information synced to us. + */ + if (sp->pfs_1500.rt) { + rt_kif = pfi_kkif_find(sp->pfs_1500.rt_ifname); + if (rt_kif == NULL) { + DPFPRINTF(PF_DEBUG_MISC, + "%s: unknown route interface: %s", + __func__, sp->pfs_1500.rt_ifname); + return ((flags & PFSYNC_SI_IOCTL) ? EINVAL : 0); + } + rt = sp->pfs_1500.rt; + rt_af = sp->pfs_1500.rt_af; + } + wire_af = sp->pfs_1500.wire_af; + stack_af = sp->pfs_1500.stack_af; + wire_proto = sp->pfs_1500.wire_proto; + stack_proto = sp->pfs_1500.stack_proto; break; } if ((r->max_states && counter_u64_fetch(r->states_cur) >= r->max_states)) goto cleanup; /* * XXXGL: consider M_WAITOK in ioctl path after. */ st = pf_alloc_state(M_NOWAIT); if (__predict_false(st == NULL)) goto cleanup; if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) goto cleanup; #ifndef __NO_STRICT_ALIGNMENT bcopy(&sp->pfs_1301.key, key, sizeof(struct pfsync_state_key) * 2); kw = &key[PF_SK_WIRE]; ks = &key[PF_SK_STACK]; #else kw = &sp->pfs_1301.key[PF_SK_WIRE]; ks = &sp->pfs_1301.key[PF_SK_STACK]; #endif - if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->pfs_1301.af) || - PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->pfs_1301.af) || + if (wire_af != stack_af || + PF_ANEQ(&kw->addr[0], &ks->addr[0], wire_af) || + PF_ANEQ(&kw->addr[1], &ks->addr[1], wire_af) || kw->port[0] != ks->port[0] || kw->port[1] != ks->port[1]) { sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sks == NULL) goto cleanup; } else sks = skw; /* allocate memory for scrub info */ if (pfsync_alloc_scrub_memory(&sp->pfs_1301.src, &st->src) || pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst)) goto cleanup; /* Copy to state key(s). */ skw->addr[0] = kw->addr[0]; skw->addr[1] = kw->addr[1]; skw->port[0] = kw->port[0]; skw->port[1] = kw->port[1]; - skw->proto = sp->pfs_1301.proto; - skw->af = sp->pfs_1301.af; + skw->proto = wire_proto; + skw->af = wire_af; if (sks != skw) { sks->addr[0] = ks->addr[0]; sks->addr[1] = ks->addr[1]; sks->port[0] = ks->port[0]; sks->port[1] = ks->port[1]; - sks->proto = sp->pfs_1301.proto; - sks->af = sp->pfs_1301.af; + sks->proto = stack_proto; + sks->af = stack_af; } /* copy to state */ - bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, sizeof(st->act.rt_addr)); st->creation = (time_uptime - ntohl(sp->pfs_1301.creation)) * 1000; - st->expire = pf_get_uptime(); - if (sp->pfs_1301.expire) { - uint32_t timeout; - - timeout = r->timeout[sp->pfs_1301.timeout]; - if (!timeout) - timeout = V_pf_default_rule.timeout[sp->pfs_1301.timeout]; - - /* sp->expire may have been adaptively scaled by export. */ - st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; - } - - st->direction = sp->pfs_1301.direction; - st->act.log = sp->pfs_1301.log; - st->timeout = sp->pfs_1301.timeout; - st->act.rt = rt; st->act.rt_kif = rt_kif; st->act.rt_af = rt_af; switch (msg_version) { case PFSYNC_MSG_VERSION_1301: st->state_flags = sp->pfs_1301.state_flags; + st->direction = sp->pfs_1301.direction; + st->act.log = sp->pfs_1301.log; + st->timeout = sp->pfs_1301.timeout; + if (rt) + bcopy(&sp->pfs_1301.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); /* * In FreeBSD 13 pfsync lacks many attributes. Copy them * from the rule if possible. If rule can't be matched * clear any set options as we can't recover their * parameters. */ if (r == &V_pf_default_rule) { st->state_flags &= ~PFSTATE_SETMASK; } else { /* * Similar to pf_rule_to_actions(). This code * won't set the actions properly if they come * from multiple "match" rules as only rule * creating the state is send over pfsync. */ st->act.qid = r->qid; st->act.pqid = r->pqid; st->act.rtableid = r->rtableid; if (r->scrub_flags & PFSTATE_SETTOS) st->act.set_tos = r->set_tos; st->act.min_ttl = r->min_ttl; st->act.max_mss = r->max_mss; st->state_flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| PFSTATE_SETTOS|PFSTATE_SCRUB_TCP| PFSTATE_SETPRIO)); if (r->dnpipe || r->dnrpipe) { if (r->free_flags & PFRULE_DN_IS_PIPE) st->state_flags |= PFSTATE_DN_IS_PIPE; else st->state_flags &= ~PFSTATE_DN_IS_PIPE; } st->act.dnpipe = r->dnpipe; st->act.dnrpipe = r->dnrpipe; } break; case PFSYNC_MSG_VERSION_1400: st->state_flags = ntohs(sp->pfs_1400.state_flags); + st->direction = sp->pfs_1400.direction; + st->act.log = sp->pfs_1400.log; + st->timeout = sp->pfs_1400.timeout; st->act.qid = ntohs(sp->pfs_1400.qid); st->act.pqid = ntohs(sp->pfs_1400.pqid); st->act.dnpipe = ntohs(sp->pfs_1400.dnpipe); st->act.dnrpipe = ntohs(sp->pfs_1400.dnrpipe); st->act.rtableid = ntohl(sp->pfs_1400.rtableid); st->act.min_ttl = sp->pfs_1400.min_ttl; st->act.set_tos = sp->pfs_1400.set_tos; st->act.max_mss = ntohs(sp->pfs_1400.max_mss); st->act.set_prio[0] = sp->pfs_1400.set_prio[0]; st->act.set_prio[1] = sp->pfs_1400.set_prio[1]; + if (rt) + bcopy(&sp->pfs_1400.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); + break; + case PFSYNC_MSG_VERSION_1500: + st->state_flags = ntohs(sp->pfs_1500.state_flags); + st->direction = sp->pfs_1500.direction; + st->act.log = sp->pfs_1500.log; + st->timeout = sp->pfs_1500.timeout; + st->act.qid = ntohs(sp->pfs_1500.qid); + st->act.pqid = ntohs(sp->pfs_1500.pqid); + st->act.dnpipe = ntohs(sp->pfs_1500.dnpipe); + st->act.dnrpipe = ntohs(sp->pfs_1500.dnrpipe); + st->act.rtableid = ntohl(sp->pfs_1500.rtableid); + st->act.min_ttl = sp->pfs_1500.min_ttl; + st->act.set_tos = sp->pfs_1500.set_tos; + st->act.max_mss = ntohs(sp->pfs_1500.max_mss); + st->act.set_prio[0] = sp->pfs_1500.set_prio[0]; + st->act.set_prio[1] = sp->pfs_1500.set_prio[1]; + if (rt) + bcopy(&sp->pfs_1500.rt_addr, &st->act.rt_addr, + sizeof(st->act.rt_addr)); + if (sp->pfs_1500.tagname[0] != 0) + st->tag = pf_tagname2tag(sp->pfs_1500.tagname); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } + st->expire = pf_get_uptime(); + if (sp->pfs_1301.expire) { + uint32_t timeout; + timeout = r->timeout[st->timeout]; + if (!timeout) + timeout = V_pf_default_rule.timeout[st->timeout]; + + /* sp->expire may have been adaptively scaled by export. */ + st->expire -= (timeout - ntohl(sp->pfs_1301.expire)) * 1000; + } + if (! (st->act.rtableid == -1 || (st->act.rtableid >= 0 && st->act.rtableid < rt_numfibs))) goto cleanup; st->id = sp->pfs_1301.id; st->creatorid = sp->pfs_1301.creatorid; pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); st->rule = r; st->nat_rule = NULL; st->anchor = NULL; st->pfsync_time = time_uptime; st->sync_state = PFSYNC_S_NONE; if (!(flags & PFSYNC_SI_IOCTL)) st->state_flags |= PFSTATE_NOSYNC; - if ((error = pf_state_insert(kif, kif, skw, sks, st)) != 0) + if ((error = pf_state_insert(kif, orig_kif, skw, sks, st)) != 0) goto cleanup_state; /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ counter_u64_add(r->states_cur, 1); counter_u64_add(r->states_tot, 1); if (!(flags & PFSYNC_SI_IOCTL)) { st->state_flags &= ~PFSTATE_NOSYNC; if (st->state_flags & PFSTATE_ACK) { struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK(b); pfsync_q_ins(st, PFSYNC_S_IACK, true); PFSYNC_BUCKET_UNLOCK(b); pfsync_push_all(sc); } } st->state_flags &= ~PFSTATE_ACK; PF_STATE_UNLOCK(st); return (0); cleanup: error = ENOMEM; if (skw == sks) sks = NULL; uma_zfree(V_pf_state_key_z, skw); uma_zfree(V_pf_state_key_z, sks); cleanup_state: /* pf_state_insert() frees the state keys. */ if (st) { st->timeout = PFTM_UNLINKED; /* appease an assert */ pf_free_state(st); } return (error); } #ifdef INET static int pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) { struct pfsync_softc *sc = V_pfsyncif; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; struct pfsync_subheader subh; int offset, len, flags = 0; int rv; uint16_t count; PF_RULES_RLOCK_TRACKER; *mp = NULL; V_pfsyncstats.pfsyncs_ipackets++; /* Verify that we have a sync interface configured. */ if (!sc || !sc->sc_sync_if || !V_pf_status.running || (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ if (sc->sc_sync_if != m->m_pkthdr.rcvif) { V_pfsyncstats.pfsyncs_badif++; goto done; } if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; goto done; } offset = ip->ip_hl << 2; if (m->m_pkthdr.len < offset + sizeof(*ph)) { V_pfsyncstats.pfsyncs_hdrops++; goto done; } if (offset + sizeof(*ph) > m->m_len) { if (m_pullup(m, offset + sizeof(*ph)) == NULL) { V_pfsyncstats.pfsyncs_hdrops++; return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } ph = (struct pfsync_header *)((char *)ip + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { V_pfsyncstats.pfsyncs_badver++; goto done; } len = ntohs(ph->len) + offset; if (m->m_pkthdr.len < len) { V_pfsyncstats.pfsyncs_badlen++; goto done; } /* * Trusting pf_chksum during packet processing, as well as seeking * in interface name tree, require holding PF_RULES_RLOCK(). */ PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) flags = PFSYNC_SI_CKSUM; offset += sizeof(*ph); while (offset <= len - sizeof(subh)) { m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); offset += sizeof(subh); if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; PF_RULES_RUNLOCK(); goto done; } count = ntohs(subh.count); V_pfsyncstats.pfsyncs_iacts[subh.action] += count; rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); if (rv == -1) { PF_RULES_RUNLOCK(); return (IPPROTO_DONE); } offset += rv; } PF_RULES_RUNLOCK(); done: m_freem(m); return (IPPROTO_DONE); } #endif #ifdef INET6 static int pfsync6_input(struct mbuf **mp, int *offp __unused, int proto __unused) { struct pfsync_softc *sc = V_pfsyncif; struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct pfsync_header *ph; struct pfsync_subheader subh; int offset, len, flags = 0; int rv; uint16_t count; PF_RULES_RLOCK_TRACKER; *mp = NULL; V_pfsyncstats.pfsyncs_ipackets++; /* Verify that we have a sync interface configured. */ if (!sc || !sc->sc_sync_if || !V_pf_status.running || (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ if (sc->sc_sync_if != m->m_pkthdr.rcvif) { V_pfsyncstats.pfsyncs_badif++; goto done; } if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* verify that the IP TTL is 255. */ if (ip6->ip6_hlim != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; goto done; } offset = sizeof(*ip6); if (m->m_pkthdr.len < offset + sizeof(*ph)) { V_pfsyncstats.pfsyncs_hdrops++; goto done; } if (offset + sizeof(*ph) > m->m_len) { if (m_pullup(m, offset + sizeof(*ph)) == NULL) { V_pfsyncstats.pfsyncs_hdrops++; return (IPPROTO_DONE); } ip6 = mtod(m, struct ip6_hdr *); } ph = (struct pfsync_header *)((char *)ip6 + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { V_pfsyncstats.pfsyncs_badver++; goto done; } len = ntohs(ph->len) + offset; if (m->m_pkthdr.len < len) { V_pfsyncstats.pfsyncs_badlen++; goto done; } /* * Trusting pf_chksum during packet processing, as well as seeking * in interface name tree, require holding PF_RULES_RLOCK(). */ PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) flags = PFSYNC_SI_CKSUM; offset += sizeof(*ph); while (offset <= len - sizeof(subh)) { m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); offset += sizeof(subh); if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; PF_RULES_RUNLOCK(); goto done; } count = ntohs(subh.count); V_pfsyncstats.pfsyncs_iacts[subh.action] += count; rv = (*pfsync_acts[subh.action])(m, offset, count, flags, subh.action); if (rv == -1) { PF_RULES_RUNLOCK(); return (IPPROTO_DONE); } offset += rv; } PF_RULES_RUNLOCK(); done: m_freem(m); return (IPPROTO_DONE); } #endif static int pfsync_in_clr(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_clr *clr; struct mbuf *mp; int len = sizeof(*clr) * count; int i, offp; u_int32_t creatorid; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } clr = (struct pfsync_clr *)(mp->m_data + offp); for (i = 0; i < count; i++) { creatorid = clr[i].creatorid; if (clr[i].ifname[0] != '\0' && pfi_kkif_find(clr[i].ifname) == NULL) continue; for (int i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_kstate *s; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->creatorid == creatorid) { s->state_flags |= PFSTATE_NOSYNC; pf_remove_state(s); goto relock; } } PF_HASHROW_UNLOCK(ih); } } return (len); } static int pfsync_in_ins(struct mbuf *m, int offset, int count, int flags, int action) { struct mbuf *mp; union pfsync_state_union *sa, *sp; int i, offp, total_len, msg_version, msg_len; + u_int8_t timeout, direction; + sa_family_t af; switch (action) { case PFSYNC_ACT_INS_1301: msg_len = sizeof(struct pfsync_state_1301); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_INS_1400: msg_len = sizeof(struct pfsync_state_1400); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1400; break; + case PFSYNC_ACT_INS_1500: + msg_len = sizeof(struct pfsync_state_1500); + msg_version = PFSYNC_MSG_VERSION_1500; + break; default: V_pfsyncstats.pfsyncs_badver++; return (-1); } + total_len = msg_len * count; + mp = m_pulldown(m, offset, total_len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (union pfsync_state_union *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = (union pfsync_state_union *)((char *)sa + msg_len * i); + switch (msg_version) { + case PFSYNC_MSG_VERSION_1301: + case PFSYNC_MSG_VERSION_1400: + af = sp->pfs_1301.af; + timeout = sp->pfs_1301.timeout; + direction = sp->pfs_1301.direction; + break; + case PFSYNC_MSG_VERSION_1500: + af = sp->pfs_1500.wire_af; + timeout = sp->pfs_1500.timeout; + direction = sp->pfs_1500.direction; + break; + } + /* Check for invalid values. */ - if (sp->pfs_1301.timeout >= PFTM_MAX || + if (timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST || - sp->pfs_1301.direction > PF_OUT || - (sp->pfs_1301.af != AF_INET && - sp->pfs_1301.af != AF_INET6)) { + direction > PF_OUT || + (af != AF_INET && af != AF_INET6)) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; continue; } if (pfsync_state_import(sp, flags, msg_version) != 0) V_pfsyncstats.pfsyncs_badact++; } return (total_len); } static int pfsync_in_iack(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_ins_ack *ia, *iaa; struct pf_kstate *st; struct mbuf *mp; int len = count * sizeof(*ia); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); for (i = 0; i < count; i++) { ia = &iaa[i]; st = pf_find_state_byid(ia->id, ia->creatorid); if (st == NULL) continue; if (st->state_flags & PFSTATE_ACK) { pfsync_undefer_state(st, 0); } PF_STATE_UNLOCK(st); } /* * XXX this is not yet implemented, but we know the size of the * message so we can skip it. */ return (count * sizeof(struct pfsync_ins_ack)); } static int pfsync_upd_tcp(struct pf_kstate *st, struct pf_state_peer_export *src, struct pf_state_peer_export *dst) { int sync = 0; PF_STATE_LOCK_ASSERT(st); /* * The state should never go backwards except * for syn-proxy states. Neither should the * sequence window slide backwards. */ if ((st->src.state > src->state && (st->src.state < PF_TCPS_PROXY_SRC || src->state >= PF_TCPS_PROXY_SRC)) || (st->src.state == src->state && SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) sync++; else pf_state_peer_ntoh(src, &st->src); if ((st->dst.state > dst->state) || (st->dst.state >= TCPS_SYN_SENT && SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) sync++; else pf_state_peer_ntoh(dst, &st->dst); return (sync); } static int pfsync_in_upd(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_softc *sc = V_pfsyncif; union pfsync_state_union *sa, *sp; struct pf_kstate *st; struct mbuf *mp; int sync, offp, i, total_len, msg_len, msg_version; + u_int8_t timeout; switch (action) { case PFSYNC_ACT_UPD_1301: msg_len = sizeof(struct pfsync_state_1301); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1301; break; case PFSYNC_ACT_UPD_1400: msg_len = sizeof(struct pfsync_state_1400); - total_len = msg_len * count; msg_version = PFSYNC_MSG_VERSION_1400; break; + case PFSYNC_ACT_UPD_1500: + msg_len = sizeof(struct pfsync_state_1500); + msg_version = PFSYNC_MSG_VERSION_1500; + break; default: V_pfsyncstats.pfsyncs_badact++; return (-1); } + total_len = msg_len * count; + mp = m_pulldown(m, offset, total_len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (union pfsync_state_union *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = (union pfsync_state_union *)((char *)sa + msg_len * i); + switch (msg_version) { + case PFSYNC_MSG_VERSION_1301: + case PFSYNC_MSG_VERSION_1400: + timeout = sp->pfs_1301.timeout; + break; + case PFSYNC_MSG_VERSION_1500: + timeout = sp->pfs_1500.timeout; + break; + } + /* check for invalid values */ - if (sp->pfs_1301.timeout >= PFTM_MAX || + if (timeout >= PFTM_MAX || sp->pfs_1301.src.state > PF_TCPS_PROXY_DST || sp->pfs_1301.dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: PFSYNC_ACT_UPD: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(sp->pfs_1301.id, sp->pfs_1301.creatorid); if (st == NULL) { /* insert the update */ if (pfsync_state_import(sp, flags, msg_version)) V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_ACK) { pfsync_undefer_state(st, 1); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &sp->pfs_1301.src, &sp->pfs_1301.dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > sp->pfs_1301.src.state) sync++; else pf_state_peer_ntoh(&sp->pfs_1301.src, &st->src); if (st->dst.state > sp->pfs_1301.dst.state) sync++; else pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&sp->pfs_1301.dst, &st->dst); pf_state_peer_ntoh(&sp->pfs_1301.dst, &st->dst); st->expire = pf_get_uptime(); - st->timeout = sp->pfs_1301.timeout; + st->timeout = timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); } return (total_len); } static int pfsync_in_upd_c(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_c *ua, *up; struct pf_kstate *st; int len = count * sizeof(*up); int sync; struct mbuf *mp; int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ua = (struct pfsync_upd_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { up = &ua[i]; /* check for invalid values */ if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(up->creatorid, up->id); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); continue; } if (st->state_flags & PFSTATE_ACK) { pfsync_undefer_state(st, 1); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &up->src, &up->dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > up->src.state) sync++; else pf_state_peer_ntoh(&up->src, &st->src); if (st->dst.state > up->dst.state) sync++; else pf_state_peer_ntoh(&up->dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = pf_get_uptime(); st->timeout = up->timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_ureq(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_upd_req *ur, *ura; struct mbuf *mp; int len = count * sizeof(*ur); int i, offp; struct pf_kstate *st; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ura = (struct pfsync_upd_req *)(mp->m_data + offp); for (i = 0; i < count; i++) { ur = &ura[i]; if (ur->id == 0 && ur->creatorid == 0) pfsync_bulk_start(); else { st = pf_find_state_byid(ur->id, ur->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_NOSYNC) { PF_STATE_UNLOCK(st); continue; } pfsync_update_state_req(st); PF_STATE_UNLOCK(st); } } return (len); } static int pfsync_in_del_c(struct mbuf *m, int offset, int count, int flags, int action) { struct mbuf *mp; struct pfsync_del_c *sa, *sp; struct pf_kstate *st; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_del_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } st->state_flags |= PFSTATE_NOSYNC; pf_remove_state(st); } return (len); } static int pfsync_in_bus(struct mbuf *m, int offset, int count, int flags, int action) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bus *bus; struct mbuf *mp; int len = count * sizeof(*bus); int offp; PFSYNC_BLOCK(sc); /* If we're not waiting for a bulk update, who cares. */ if (sc->sc_ureq_sent == 0) { PFSYNC_BUNLOCK(sc); return (len); } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { PFSYNC_BUNLOCK(sc); V_pfsyncstats.pfsyncs_badlen++; return (-1); } bus = (struct pfsync_bus *)(mp->m_data + offp); switch (bus->status) { case PFSYNC_BUS_START: callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + V_pf_limits[PF_LIMIT_STATES].limit / ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / sizeof(union pfsync_state_union)), pfsync_bulk_fail, sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update start\n"); break; case PFSYNC_BUS_END: if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received valid " "bulk update end\n"); } else { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } PFSYNC_BUNLOCK(sc); return (len); } static int pfsync_in_tdb(struct mbuf *m, int offset, int count, int flags, int action) { int len = count * sizeof(struct pfsync_tdb); #if defined(IPSEC) struct pfsync_tdb *tp; struct mbuf *mp; int offp; int i; int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } tp = (struct pfsync_tdb *)(mp->m_data + offp); for (i = 0; i < count; i++) pfsync_update_net_tdb(&tp[i]); #endif return (len); } #if defined(IPSEC) /* Update an in-kernel tdb. Silently fail if no tdb is found. */ static void pfsync_update_net_tdb(struct pfsync_tdb *pt) { struct tdb *tdb; int s; /* check for invalid values */ if (ntohl(pt->spi) <= SPI_RESERVED_MAX || (pt->dst.sa.sa_family != AF_INET && pt->dst.sa.sa_family != AF_INET6)) goto bad; tdb = gettdb(pt->spi, &pt->dst, pt->sproto); if (tdb) { pt->rpl = ntohl(pt->rpl); pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); /* Neither replay nor byte counter should ever decrease. */ if (pt->rpl < tdb->tdb_rpl || pt->cur_bytes < tdb->tdb_cur_bytes) { goto bad; } tdb->tdb_rpl = pt->rpl; tdb->tdb_cur_bytes = pt->cur_bytes; } return; bad: if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " "invalid value\n"); V_pfsyncstats.pfsyncs_badstate++; return; } #endif static int pfsync_in_eof(struct mbuf *m, int offset, int count, int flags, int action) { /* check if we are at the right place in the packet */ if (offset != m->m_pkthdr.len) V_pfsyncstats.pfsyncs_badlen++; /* we're done. free and let the caller return */ m_freem(m); return (-1); } static int pfsync_in_error(struct mbuf *m, int offset, int count, int flags, int action) { V_pfsyncstats.pfsyncs_badact++; m_freem(m); return (-1); } static int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *rt) { m_freem(m); return (0); } /* ARGSUSED */ static int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct pfsyncreq pfsyncr; size_t nvbuflen; int error; int c; switch (cmd) { case SIOCSIFFLAGS: PFSYNC_LOCK(sc); if (ifp->if_flags & IFF_UP) { ifp->if_drv_flags |= IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_init(); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_uninit(); } break; case SIOCSIFMTU: if (!sc->sc_sync_if || ifr->ifr_mtu <= PFSYNC_MINPKT || ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { for (c = 0; c < pfsync_buckets; c++) { PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) pfsync_sendout(1, c); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); } } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); PFSYNC_LOCK(sc); if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_if->if_xname, IFNAMSIZ); } pfsyncr.pfsyncr_syncpeer = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; pfsyncr.pfsyncr_defer = sc->sc_flags; PFSYNC_UNLOCK(sc); return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), sizeof(pfsyncr))); case SIOCGETPFSYNCNV: { nvlist_t *nvl_syncpeer; nvlist_t *nvl = nvlist_create(0); if (nvl == NULL) return (ENOMEM); if (sc->sc_sync_if) nvlist_add_string(nvl, "syncdev", sc->sc_sync_if->if_xname); nvlist_add_number(nvl, "maxupdates", sc->sc_maxupdates); nvlist_add_number(nvl, "flags", sc->sc_flags); nvlist_add_number(nvl, "version", sc->sc_version); if ((nvl_syncpeer = pfsync_sockaddr_to_syncpeer_nvlist(&sc->sc_sync_peer)) != NULL) nvlist_add_nvlist(nvl, "syncpeer", nvl_syncpeer); void *packed = NULL; packed = nvlist_pack(nvl, &nvbuflen); if (packed == NULL) { free(packed, M_NVLIST); nvlist_destroy(nvl); return (ENOMEM); } if (nvbuflen > ifr->ifr_cap_nv.buf_length) { ifr->ifr_cap_nv.length = nvbuflen; ifr->ifr_cap_nv.buffer = NULL; free(packed, M_NVLIST); nvlist_destroy(nvl); return (EFBIG); } ifr->ifr_cap_nv.length = nvbuflen; error = copyout(packed, ifr->ifr_cap_nv.buffer, nvbuflen); nvlist_destroy(nvl); nvlist_destroy(nvl_syncpeer); free(packed, M_NVLIST); break; } case SIOCSETPFSYNC: { struct pfsync_kstatus status; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, sizeof(pfsyncr)))) return (error); memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); pfsync_pfsyncreq_to_kstatus(&pfsyncr, &status); error = pfsync_kstatus_to_softc(&status, sc); return (error); } case SIOCSETPFSYNCNV: { struct pfsync_kstatus status; void *data; nvlist_t *nvl; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); if (ifr->ifr_cap_nv.length > IFR_CAP_NV_MAXBUFSIZE) return (EINVAL); data = malloc(ifr->ifr_cap_nv.length, M_PF, M_WAITOK); if ((error = copyin(ifr->ifr_cap_nv.buffer, data, ifr->ifr_cap_nv.length)) != 0) { free(data, M_PF); return (error); } if ((nvl = nvlist_unpack(data, ifr->ifr_cap_nv.length, 0)) == NULL) { free(data, M_PF); return (EINVAL); } memset((char *)&status, 0, sizeof(struct pfsync_kstatus)); pfsync_nvstatus_to_kstatus(nvl, &status); nvlist_destroy(nvl); free(data, M_PF); error = pfsync_kstatus_to_softc(&status, sc); return (error); } default: return (ENOTTY); } return (0); } static void pfsync_out_state_1301(struct pf_kstate *st, void *buf) { union pfsync_state_union *sp = buf; pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1301); } static void pfsync_out_state_1400(struct pf_kstate *st, void *buf) { union pfsync_state_union *sp = buf; pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1400); } +static void +pfsync_out_state_1500(struct pf_kstate *st, void *buf) +{ + union pfsync_state_union *sp = buf; + + pfsync_state_export(sp, st, PFSYNC_MSG_VERSION_1500); +} + static void pfsync_out_iack(struct pf_kstate *st, void *buf) { struct pfsync_ins_ack *iack = buf; iack->id = st->id; iack->creatorid = st->creatorid; } static void pfsync_out_upd_c(struct pf_kstate *st, void *buf) { struct pfsync_upd_c *up = buf; bzero(up, sizeof(*up)); up->id = st->id; pf_state_peer_hton(&st->src, &up->src); pf_state_peer_hton(&st->dst, &up->dst); up->creatorid = st->creatorid; up->timeout = st->timeout; } static void pfsync_out_del_c(struct pf_kstate *st, void *buf) { struct pfsync_del_c *dp = buf; dp->id = st->id; dp->creatorid = st->creatorid; st->state_flags |= PFSTATE_NOSYNC; } static void pfsync_drop_all(struct pfsync_softc *sc) { struct pfsync_bucket *b; int c; for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; PFSYNC_BUCKET_LOCK(b); pfsync_drop(sc, c); PFSYNC_BUCKET_UNLOCK(b); } } static void pfsync_drop(struct pfsync_softc *sc, int c) { struct pf_kstate *st, *next; struct pfsync_upd_req_item *ur; struct pfsync_bucket *b; enum pfsync_q_id q; b = &sc->sc_buckets[c]; PFSYNC_BUCKET_LOCK_ASSERT(b); for (q = 0; q < PFSYNC_Q_COUNT; q++) { if (TAILQ_EMPTY(&b->b_qs[q])) continue; TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) { KASSERT(st->sync_state == pfsync_qid_sstate[q], ("%s: st->sync_state %d == q %d", __func__, st->sync_state, q)); st->sync_state = PFSYNC_S_NONE; pf_release_state(st); } TAILQ_INIT(&b->b_qs[q]); } while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); free(ur, M_PFSYNC); } b->b_len = PFSYNC_MINPKT; free(b->b_plus, M_PFSYNC); b->b_plus = NULL; b->b_pluslen = 0; } static void pfsync_sendout(int schedswi, int c) { struct pfsync_softc *sc = V_pfsyncif; struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct pfsync_header *ph; struct pfsync_subheader *subh; struct pf_kstate *st, *st_next; struct pfsync_upd_req_item *ur; struct pfsync_bucket *b = &sc->sc_buckets[c]; size_t len; int aflen, offset, count = 0; enum pfsync_q_id q; KASSERT(sc != NULL, ("%s: null sc", __func__)); KASSERT(b->b_len > PFSYNC_MINPKT, ("%s: sc_len %zu", __func__, b->b_len)); PFSYNC_BUCKET_LOCK_ASSERT(b); if (!bpf_peers_present(ifp->if_bpf) && sc->sc_sync_if == NULL) { pfsync_drop(sc, c); return; } m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); V_pfsyncstats.pfsyncs_onomem++; return; } m->m_data += max_linkhdr; bzero(m->m_data, b->b_len); len = b->b_len; /* build the ip header */ switch (sc->sc_sync_peer.ss_family) { #ifdef INET case AF_INET: { struct ip *ip; ip = mtod(m, struct ip *); bcopy(&sc->sc_template.ipv4, ip, sizeof(*ip)); aflen = offset = sizeof(*ip); len -= sizeof(union inet_template) - sizeof(struct ip); ip->ip_len = htons(len); ip_fillid(ip, V_ip_random_id); break; } #endif #ifdef INET6 case AF_INET6: { struct ip6_hdr *ip6; ip6 = mtod(m, struct ip6_hdr *); bcopy(&sc->sc_template.ipv6, ip6, sizeof(*ip6)); aflen = offset = sizeof(*ip6); len -= sizeof(union inet_template) - sizeof(struct ip6_hdr); ip6->ip6_plen = htons(len); break; } #endif default: m_freem(m); pfsync_drop(sc, c); return; } m->m_len = m->m_pkthdr.len = len; /* build the pfsync header */ ph = (struct pfsync_header *)(m->m_data + offset); offset += sizeof(*ph); ph->version = PFSYNC_VERSION; ph->len = htons(len - aflen); bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); /* walk the queues */ for (q = 0; q < PFSYNC_Q_COUNT; q++) { if (TAILQ_EMPTY(&b->b_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) { KASSERT(st->sync_state == pfsync_qid_sstate[q], ("%s: st->sync_state == q", __func__)); /* * XXXGL: some of write methods do unlocked reads * of state data :( */ pfsync_qs[q].write(st, m->m_data + offset); offset += pfsync_qs[q].len; st->sync_state = PFSYNC_S_NONE; pf_release_state(st); count++; } TAILQ_INIT(&b->b_qs[q]); subh->action = pfsync_qs[q].action; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } if (!TAILQ_EMPTY(&b->b_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) { TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); offset += sizeof(ur->ur_msg); free(ur, M_PFSYNC); count++; } subh->action = PFSYNC_ACT_UPD_REQ; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; } /* has someone built a custom region for us to add? */ if (b->b_plus != NULL) { bcopy(b->b_plus, m->m_data + offset, b->b_pluslen); offset += b->b_pluslen; free(b->b_plus, M_PFSYNC); b->b_plus = NULL; b->b_pluslen = 0; } subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); subh->action = PFSYNC_ACT_EOF; subh->count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; /* we're done, let's put it on the wire */ if (bpf_peers_present(ifp->if_bpf)) { m->m_data += aflen; m->m_len = m->m_pkthdr.len = len - aflen; bpf_mtap(ifp->if_bpf, m); m->m_data -= aflen; m->m_len = m->m_pkthdr.len = len; } if (sc->sc_sync_if == NULL) { b->b_len = PFSYNC_MINPKT; m_freem(m); return; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); b->b_len = PFSYNC_MINPKT; if (!_IF_QFULL(&b->b_snd)) _IF_ENQUEUE(&b->b_snd, m); else { m_freem(m); if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); } if (schedswi) swi_sched(V_pfsync_swi_cookie, 0); } static void pfsync_insert_state(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); if (st->state_flags & PFSTATE_NOSYNC) return; if ((st->rule->rule_flag & PFRULE_NOSYNC) || st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { st->state_flags |= PFSTATE_NOSYNC; return; } KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); PFSYNC_BUCKET_LOCK(b); if (b->b_len == PFSYNC_MINPKT) callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); pfsync_q_ins(st, PFSYNC_S_INS, true); PFSYNC_BUCKET_UNLOCK(b); st->sync_updates = 0; } static int pfsync_defer(struct pf_kstate *st, struct mbuf *m) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; struct pfsync_bucket *b; if (m->m_flags & (M_BCAST|M_MCAST)) return (0); if (sc == NULL) return (0); b = pfsync_get_bucket(sc, st); PFSYNC_LOCK(sc); if (!(sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) || !(sc->sc_flags & PFSYNCF_DEFER)) { PFSYNC_UNLOCK(sc); return (0); } PFSYNC_BUCKET_LOCK(b); PFSYNC_UNLOCK(sc); if (b->b_deferred >= 128) pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) { PFSYNC_BUCKET_UNLOCK(b); return (0); } b->b_deferred++; m->m_flags |= M_SKIP_FIREWALL; st->state_flags |= PFSTATE_ACK; pd->pd_sc = sc; pd->pd_st = st; pf_ref_state(st); pd->pd_m = m; TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); callout_reset(&pd->pd_tmo, (V_pfsync_defer_timeout * hz) / 1000, pfsync_defer_tmo, pd); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); return (1); } static void pfsync_undefer(struct pfsync_deferral *pd, int drop) { struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_kstate *st = pd->pd_st; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ free(pd, M_PFSYNC); pf_release_state(st); if (drop) m_freem(m); else { _IF_ENQUEUE(&b->b_snd, m); pfsync_push(b); } } static void pfsync_defer_tmo(void *arg) { struct epoch_tracker et; struct pfsync_deferral *pd = arg; struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_kstate *st = pd->pd_st; struct pfsync_bucket *b; CURVNET_SET(sc->sc_ifp->if_vnet); b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ PFSYNC_BUCKET_UNLOCK(b); free(pd, M_PFSYNC); if (sc->sc_sync_if == NULL) { pf_release_state(st); m_freem(m); CURVNET_RESTORE(); return; } NET_EPOCH_ENTER(et); pfsync_tx(sc, m); pf_release_state(st); CURVNET_RESTORE(); NET_EPOCH_EXIT(et); } static void pfsync_undefer_state_locked(struct pf_kstate *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { if (pd->pd_st == st) { if (callout_stop(&pd->pd_tmo) > 0) pfsync_undefer(pd, drop); return; } } panic("%s: unable to find deferred state", __func__); } static void pfsync_undefer_state(struct pf_kstate *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK(b); pfsync_undefer_state_locked(st, drop); PFSYNC_BUCKET_UNLOCK(b); } static struct pfsync_bucket* pfsync_get_bucket(struct pfsync_softc *sc, struct pf_kstate *st) { int c = PF_IDHASH(st) % pfsync_buckets; return &sc->sc_buckets[c]; } static void pfsync_update_state(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; bool sync = false, ref = true; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state_locked(st, 0); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true, b); PFSYNC_BUCKET_UNLOCK(b); return; } if (b->b_len == PFSYNC_MINPKT) callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_INS: /* we're already handling it */ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { st->sync_updates++; if (st->sync_updates >= sc->sc_maxupdates) sync = true; } break; case PFSYNC_S_IACK: pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); st->sync_updates = 0; break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } if (sync || (time_uptime - st->pfsync_time) < 2) pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = &sc->sc_buckets[0]; struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); PFSYNC_BUCKET_LOCK_ASSERT(b); /* * This code does a bit to prevent multiple update requests for the * same state being generated. It searches current subheader queue, * but it doesn't lookup into queue of already packed datagrams. */ TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry) if (item->ur_msg.id == id && item->ur_msg.creatorid == creatorid) return; item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); if (item == NULL) return; /* XXX stats */ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; if (TAILQ_EMPTY(&b->b_upd_req_list)) nlen += sizeof(struct pfsync_subheader); if (b->b_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(0, 0); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); } TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry); b->b_len += nlen; pfsync_push(b); } static bool pfsync_update_state_req(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; bool ref = true, full = false; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true, b); PFSYNC_BUCKET_UNLOCK(b); return (full); } switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_IACK: pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD, ref); pfsync_push(b); break; case PFSYNC_S_INS: case PFSYNC_S_UPD: case PFSYNC_S_DEL_C: /* we're already handling it */ break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(union pfsync_state_union)) full = true; PFSYNC_BUCKET_UNLOCK(b); return (full); } static void pfsync_delete_state(struct pf_kstate *st) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); bool ref = true; PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state_locked(st, 1); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true, b); PFSYNC_BUCKET_UNLOCK(b); return; } if (b->b_len == PFSYNC_MINPKT) callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_INS: /* We never got to tell the world so just forget about it. */ pfsync_q_del(st, true, b); break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_IACK: pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_DEL_C, ref); break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { struct { struct pfsync_subheader subh; struct pfsync_clr clr; } __packed r; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_CLR; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; pfsync_send_plus(&r, sizeof(r)); } static enum pfsync_q_id pfsync_sstate_to_qid(u_int8_t sync_state) { struct pfsync_softc *sc = V_pfsyncif; switch (sync_state) { case PFSYNC_S_INS: switch (sc->sc_version) { case PFSYNC_MSG_VERSION_1301: return PFSYNC_Q_INS_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_INS_1400; + case PFSYNC_MSG_VERSION_1500: + return PFSYNC_Q_INS_1500; } break; case PFSYNC_S_IACK: return PFSYNC_Q_IACK; case PFSYNC_S_UPD: switch (sc->sc_version) { case PFSYNC_MSG_VERSION_1301: return PFSYNC_Q_UPD_1301; case PFSYNC_MSG_VERSION_1400: return PFSYNC_Q_UPD_1400; + case PFSYNC_MSG_VERSION_1500: + return PFSYNC_Q_UPD_1500; } break; case PFSYNC_S_UPD_C: return PFSYNC_Q_UPD_C; case PFSYNC_S_DEL_C: return PFSYNC_Q_DEL_C; default: panic("%s: Unsupported st->sync_state 0x%02x", __func__, sync_state); } panic("%s: Unsupported pfsync_msg_version %d", __func__, sc->sc_version); } static void pfsync_q_ins(struct pf_kstate *st, int sync_state, bool ref) { enum pfsync_q_id q = pfsync_sstate_to_qid(sync_state); struct pfsync_softc *sc = V_pfsyncif; size_t nlen = pfsync_qs[q].len; struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PFSYNC_BUCKET_LOCK_ASSERT(b); KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", b->b_len)); if (TAILQ_EMPTY(&b->b_qs[q])) nlen += sizeof(struct pfsync_subheader); if (b->b_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(1, b->b_id); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } b->b_len += nlen; st->sync_state = pfsync_qid_sstate[q]; TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); if (ref) pf_ref_state(st); } static void pfsync_q_del(struct pf_kstate *st, bool unref, struct pfsync_bucket *b) { enum pfsync_q_id q; PFSYNC_BUCKET_LOCK_ASSERT(b); KASSERT(st->sync_state != PFSYNC_S_NONE, ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); q = pfsync_sstate_to_qid(st->sync_state); b->b_len -= pfsync_qs[q].len; TAILQ_REMOVE(&b->b_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; if (unref) pf_release_state(st); if (TAILQ_EMPTY(&b->b_qs[q])) b->b_len -= sizeof(struct pfsync_subheader); } static void pfsync_bulk_start(void) { struct pfsync_softc *sc = V_pfsyncif; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update request\n"); PFSYNC_BLOCK(sc); sc->sc_ureq_received = time_uptime; sc->sc_bulk_hashid = 0; sc->sc_bulk_stateid = 0; pfsync_bulk_status(PFSYNC_BUS_START); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); PFSYNC_BUNLOCK(sc); } static void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc = arg; struct pf_kstate *s; int i; PFSYNC_BLOCK_ASSERT(sc); CURVNET_SET(sc->sc_ifp->if_vnet); /* * Start with last state from previous invocation. * It may had gone, in this case start from the * hash slot. */ s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); if (s != NULL) i = PF_IDHASH(s); else i = sc->sc_bulk_hashid; for (; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; if (s != NULL) PF_HASHROW_ASSERT(ih); else { PF_HASHROW_LOCK(ih); s = LIST_FIRST(&ih->states); } for (; s; s = LIST_NEXT(s, entry)) { if (s->sync_state == PFSYNC_S_NONE && s->timeout < PFTM_MAX && s->pfsync_time <= sc->sc_ureq_received) { if (pfsync_update_state_req(s)) { /* We've filled a packet. */ sc->sc_bulk_hashid = i; sc->sc_bulk_stateid = s->id; sc->sc_bulk_creatorid = s->creatorid; PF_HASHROW_UNLOCK(ih); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); goto full; } } } PF_HASHROW_UNLOCK(ih); } /* We're done. */ pfsync_bulk_status(PFSYNC_BUS_END); full: CURVNET_RESTORE(); } static void pfsync_bulk_status(u_int8_t status) { struct { struct pfsync_subheader subh; struct pfsync_bus bus; } __packed r; struct pfsync_softc *sc = V_pfsyncif; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_BUS; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; r.bus.creatorid = V_pf_status.hostid; r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; pfsync_send_plus(&r, sizeof(r)); } static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; struct pfsync_bucket *b = &sc->sc_buckets[0]; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_BLOCK_ASSERT(sc); if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again */ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); PFSYNC_BUCKET_LOCK(b); pfsync_request_update(0, 0); PFSYNC_BUCKET_UNLOCK(b); } else { /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; PFSYNC_UNLOCK(sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: failed to receive bulk update\n"); } CURVNET_RESTORE(); } static void pfsync_send_plus(void *plus, size_t pluslen) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bucket *b = &sc->sc_buckets[0]; uint8_t *newplus; PFSYNC_BUCKET_LOCK(b); if (b->b_len + pluslen > sc->sc_ifp->if_mtu) pfsync_sendout(1, b->b_id); newplus = malloc(pluslen + b->b_pluslen, M_PFSYNC, M_NOWAIT); if (newplus == NULL) goto out; if (b->b_plus != NULL) { memcpy(newplus, b->b_plus, b->b_pluslen); free(b->b_plus, M_PFSYNC); } else { MPASS(b->b_pluslen == 0); } memcpy(newplus + b->b_pluslen, plus, pluslen); b->b_plus = newplus; b->b_pluslen += pluslen; b->b_len += pluslen; pfsync_sendout(1, b->b_id); out: PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_timeout(void *arg) { struct pfsync_bucket *b = arg; CURVNET_SET(b->b_sc->sc_ifp->if_vnet); PFSYNC_BUCKET_LOCK(b); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); CURVNET_RESTORE(); } static void pfsync_push(struct pfsync_bucket *b) { PFSYNC_BUCKET_LOCK_ASSERT(b); b->b_flags |= PFSYNCF_BUCKET_PUSH; swi_sched(V_pfsync_swi_cookie, 0); } static void pfsync_push_all(struct pfsync_softc *sc) { int c; struct pfsync_bucket *b; for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; PFSYNC_BUCKET_LOCK(b); pfsync_push(b); PFSYNC_BUCKET_UNLOCK(b); } } static void pfsync_tx(struct pfsync_softc *sc, struct mbuf *m) { struct ip *ip; int af, error = 0; ip = mtod(m, struct ip *); MPASS(ip->ip_v == IPVERSION || ip->ip_v == (IPV6_VERSION >> 4)); af = ip->ip_v == IPVERSION ? AF_INET : AF_INET6; /* * We distinguish between a deferral packet and our * own pfsync packet based on M_SKIP_FIREWALL * flag. This is XXX. */ switch (af) { #ifdef INET case AF_INET: if (m->m_flags & M_SKIP_FIREWALL) { error = ip_output(m, NULL, NULL, 0, NULL, NULL); } else { error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL); } break; #endif #ifdef INET6 case AF_INET6: if (m->m_flags & M_SKIP_FIREWALL) { error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } else { error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL); } break; #endif } if (error == 0) V_pfsyncstats.pfsyncs_opackets++; else V_pfsyncstats.pfsyncs_oerrors++; } static void pfsyncintr(void *arg) { struct epoch_tracker et; struct pfsync_softc *sc = arg; struct pfsync_bucket *b; struct mbuf *m, *n; int c; NET_EPOCH_ENTER(et); CURVNET_SET(sc->sc_ifp->if_vnet); for (c = 0; c < pfsync_buckets; c++) { b = &sc->sc_buckets[c]; PFSYNC_BUCKET_LOCK(b); if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { pfsync_sendout(0, b->b_id); b->b_flags &= ~PFSYNCF_BUCKET_PUSH; } _IF_DEQUEUE_ALL(&b->b_snd, m); PFSYNC_BUCKET_UNLOCK(b); for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; pfsync_tx(sc, m); } } CURVNET_RESTORE(); NET_EPOCH_EXIT(et); } static int pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, struct in_mfilter* imf, struct in6_mfilter* im6f) { #ifdef INET struct ip_moptions *imo = &sc->sc_imo; #endif #ifdef INET6 struct ip6_moptions *im6o = &sc->sc_im6o; struct sockaddr_in6 *syncpeer_sa6 = NULL; #endif if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); switch (sc->sc_sync_peer.ss_family) { #ifdef INET case AF_INET: { int error; ip_mfilter_init(&imo->imo_head); imo->imo_multicast_vif = -1; if ((error = in_joingroup(ifp, &((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr, NULL, &imf->imf_inm)) != 0) return (error); ip_mfilter_insert(&imo->imo_head, imf); imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; break; } #endif #ifdef INET6 case AF_INET6: { int error; syncpeer_sa6 = (struct sockaddr_in6 *)&sc->sc_sync_peer; if ((error = in6_setscope(&syncpeer_sa6->sin6_addr, ifp, NULL))) return (error); ip6_mfilter_init(&im6o->im6o_head); if ((error = in6_joingroup(ifp, &syncpeer_sa6->sin6_addr, NULL, &(im6f->im6f_in6m), 0)) != 0) return (error); ip6_mfilter_insert(&im6o->im6o_head, im6f); im6o->im6o_multicast_ifp = ifp; im6o->im6o_multicast_hlim = PFSYNC_DFLTTL; im6o->im6o_multicast_loop = 0; break; } #endif } return (0); } static void pfsync_multicast_cleanup(struct pfsync_softc *sc) { #ifdef INET struct ip_moptions *imo = &sc->sc_imo; struct in_mfilter *imf; while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { ip_mfilter_remove(&imo->imo_head, imf); in_leavegroup(imf->imf_inm, NULL); ip_mfilter_free(imf); } imo->imo_multicast_ifp = NULL; #endif #ifdef INET6 struct ip6_moptions *im6o = &sc->sc_im6o; struct in6_mfilter *im6f; while ((im6f = ip6_mfilter_first(&im6o->im6o_head)) != NULL) { ip6_mfilter_remove(&im6o->im6o_head, im6f); in6_leavegroup(im6f->im6f_in6m, NULL); ip6_mfilter_free(im6f); } im6o->im6o_multicast_ifp = NULL; #endif } void pfsync_detach_ifnet(struct ifnet *ifp) { struct pfsync_softc *sc = V_pfsyncif; if (sc == NULL) return; PFSYNC_LOCK(sc); if (sc->sc_sync_if == ifp) { /* We don't need mutlicast cleanup here, because the interface * is going away. We do need to ensure we don't try to do * cleanup later. */ ip_mfilter_init(&sc->sc_imo.imo_head); sc->sc_imo.imo_multicast_ifp = NULL; sc->sc_im6o.im6o_multicast_ifp = NULL; sc->sc_sync_if = NULL; } PFSYNC_UNLOCK(sc); } static int pfsync_pfsyncreq_to_kstatus(struct pfsyncreq *pfsyncr, struct pfsync_kstatus *status) { struct sockaddr_storage sa; status->maxupdates = pfsyncr->pfsyncr_maxupdates; status->flags = pfsyncr->pfsyncr_defer; strlcpy(status->syncdev, pfsyncr->pfsyncr_syncdev, IFNAMSIZ); memset(&sa, 0, sizeof(sa)); if (pfsyncr->pfsyncr_syncpeer.s_addr != 0) { struct sockaddr_in *in = (struct sockaddr_in *)&sa; in->sin_family = AF_INET; in->sin_len = sizeof(*in); in->sin_addr.s_addr = pfsyncr->pfsyncr_syncpeer.s_addr; } status->syncpeer = sa; return 0; } static int pfsync_kstatus_to_softc(struct pfsync_kstatus *status, struct pfsync_softc *sc) { struct ifnet *sifp; struct in_mfilter *imf = NULL; struct in6_mfilter *im6f = NULL; int error; int c; if ((status->maxupdates < 0) || (status->maxupdates > 255)) return (EINVAL); if (status->syncdev[0] == '\0') sifp = NULL; else if ((sifp = ifunit_ref(status->syncdev)) == NULL) return (EINVAL); switch (status->syncpeer.ss_family) { #ifdef INET case AF_UNSPEC: case AF_INET: { struct sockaddr_in *status_sin; status_sin = (struct sockaddr_in *)&(status->syncpeer); if (sifp != NULL) { if (status_sin->sin_addr.s_addr == 0 || status_sin->sin_addr.s_addr == htonl(INADDR_PFSYNC_GROUP)) { status_sin->sin_family = AF_INET; status_sin->sin_len = sizeof(*status_sin); status_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); } if (IN_MULTICAST(ntohl(status_sin->sin_addr.s_addr))) { imf = ip_mfilter_alloc(M_WAITOK, 0, 0); } } break; } #endif #ifdef INET6 case AF_INET6: { struct sockaddr_in6 *status_sin6; status_sin6 = (struct sockaddr_in6*)&(status->syncpeer); if (sifp != NULL) { if (IN6_IS_ADDR_UNSPECIFIED(&status_sin6->sin6_addr) || IN6_ARE_ADDR_EQUAL(&status_sin6->sin6_addr, &in6addr_linklocal_pfsync_group)) { status_sin6->sin6_family = AF_INET6; status_sin6->sin6_len = sizeof(*status_sin6); status_sin6->sin6_addr = in6addr_linklocal_pfsync_group; } if (IN6_IS_ADDR_MULTICAST(&status_sin6->sin6_addr)) { im6f = ip6_mfilter_alloc(M_WAITOK, 0, 0); } } break; } #endif } PFSYNC_LOCK(sc); switch (status->version) { case PFSYNC_MSG_VERSION_UNSPECIFIED: sc->sc_version = PFSYNC_MSG_VERSION_DEFAULT; break; case PFSYNC_MSG_VERSION_1301: case PFSYNC_MSG_VERSION_1400: + case PFSYNC_MSG_VERSION_1500: sc->sc_version = status->version; break; default: PFSYNC_UNLOCK(sc); return (EINVAL); } switch (status->syncpeer.ss_family) { case AF_INET: { struct sockaddr_in *status_sin = (struct sockaddr_in *)&(status->syncpeer); struct sockaddr_in *sc_sin = (struct sockaddr_in *)&sc->sc_sync_peer; sc_sin->sin_family = AF_INET; sc_sin->sin_len = sizeof(*sc_sin); if (status_sin->sin_addr.s_addr == 0) { sc_sin->sin_addr.s_addr = htonl(INADDR_PFSYNC_GROUP); } else { sc_sin->sin_addr.s_addr = status_sin->sin_addr.s_addr; } break; } case AF_INET6: { struct sockaddr_in6 *status_sin = (struct sockaddr_in6 *)&(status->syncpeer); struct sockaddr_in6 *sc_sin = (struct sockaddr_in6 *)&sc->sc_sync_peer; sc_sin->sin6_family = AF_INET6; sc_sin->sin6_len = sizeof(*sc_sin); if(IN6_IS_ADDR_UNSPECIFIED(&status_sin->sin6_addr)) { sc_sin->sin6_addr = in6addr_linklocal_pfsync_group; } else { sc_sin->sin6_addr = status_sin->sin6_addr; } break; } } sc->sc_maxupdates = status->maxupdates; if (status->flags & PFSYNCF_DEFER) { sc->sc_flags |= PFSYNCF_DEFER; V_pfsync_defer_ptr = pfsync_defer; } else { sc->sc_flags &= ~PFSYNCF_DEFER; V_pfsync_defer_ptr = NULL; } if (sifp == NULL) { if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; pfsync_multicast_cleanup(sc); PFSYNC_UNLOCK(sc); return (0); } for (c = 0; c < pfsync_buckets; c++) { PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && (sifp->if_mtu < sc->sc_ifp->if_mtu || (sc->sc_sync_if != NULL && sifp->if_mtu < sc->sc_sync_if->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip))) pfsync_sendout(1, c); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); } pfsync_multicast_cleanup(sc); if (((sc->sc_sync_peer.ss_family == AF_INET) && IN_MULTICAST(ntohl(((struct sockaddr_in *) &sc->sc_sync_peer)->sin_addr.s_addr))) || ((sc->sc_sync_peer.ss_family == AF_INET6) && IN6_IS_ADDR_MULTICAST(&((struct sockaddr_in6*) &sc->sc_sync_peer)->sin6_addr))) { error = pfsync_multicast_setup(sc, sifp, imf, im6f); if (error) { if_rele(sifp); PFSYNC_UNLOCK(sc); #ifdef INET if (imf != NULL) ip_mfilter_free(imf); #endif #ifdef INET6 if (im6f != NULL) ip6_mfilter_free(im6f); #endif return (error); } } if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = sifp; switch (sc->sc_sync_peer.ss_family) { #ifdef INET case AF_INET: { struct ip *ip; ip = &sc->sc_template.ipv4; bzero(ip, sizeof(*ip)); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(sc->sc_template.ipv4) >> 2; ip->ip_tos = IPTOS_LOWDELAY; /* len and id are set later. */ ip->ip_off = htons(IP_DF); ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst = ((struct sockaddr_in *)&sc->sc_sync_peer)->sin_addr; break; } #endif #ifdef INET6 case AF_INET6: { struct ip6_hdr *ip6; ip6 = &sc->sc_template.ipv6; bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc = IPV6_VERSION; ip6->ip6_hlim = PFSYNC_DFLTTL; ip6->ip6_nxt = IPPROTO_PFSYNC; ip6->ip6_dst = ((struct sockaddr_in6 *)&sc->sc_sync_peer)->sin6_addr; struct epoch_tracker et; NET_EPOCH_ENTER(et); in6_selectsrc_addr(if_getfib(sc->sc_sync_if), &ip6->ip6_dst, 0, sc->sc_sync_if, &ip6->ip6_src, NULL); NET_EPOCH_EXIT(et); break; } #endif } /* Request a full state table update. */ if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); PFSYNC_UNLOCK(sc); PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(0, 0); PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); PFSYNC_BLOCK(sc); sc->sc_ureq_sent = time_uptime; callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); PFSYNC_BUNLOCK(sc); return (0); } static void pfsync_pointers_init(void) { PF_RULES_WLOCK(); V_pfsync_state_import_ptr = pfsync_state_import; V_pfsync_insert_state_ptr = pfsync_insert_state; V_pfsync_update_state_ptr = pfsync_update_state; V_pfsync_delete_state_ptr = pfsync_delete_state; V_pfsync_clear_states_ptr = pfsync_clear_states; V_pfsync_defer_ptr = pfsync_defer; PF_RULES_WUNLOCK(); } static void pfsync_pointers_uninit(void) { PF_RULES_WLOCK(); V_pfsync_state_import_ptr = NULL; V_pfsync_insert_state_ptr = NULL; V_pfsync_update_state_ptr = NULL; V_pfsync_delete_state_ptr = NULL; V_pfsync_clear_states_ptr = NULL; V_pfsync_defer_ptr = NULL; PF_RULES_WUNLOCK(); } static void vnet_pfsync_init(const void *unused __unused) { int error; V_pfsync_cloner = if_clone_simple(pfsyncname, pfsync_clone_create, pfsync_clone_destroy, 1); error = swi_add(&V_pfsync_swi_ie, pfsyncname, pfsyncintr, V_pfsyncif, SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); if (error) { if_clone_detach(V_pfsync_cloner); log(LOG_INFO, "swi_add() failed in %s\n", __func__); } pfsync_pointers_init(); } VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, vnet_pfsync_init, NULL); static void vnet_pfsync_uninit(const void *unused __unused) { int ret __diagused; pfsync_pointers_uninit(); if_clone_detach(V_pfsync_cloner); ret = swi_remove(V_pfsync_swi_cookie); MPASS(ret == 0); ret = intr_event_destroy(V_pfsync_swi_ie); MPASS(ret == 0); } VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, vnet_pfsync_uninit, NULL); static int pfsync_init(void) { int error; pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; #ifdef INET error = ipproto_register(IPPROTO_PFSYNC, pfsync_input, NULL); if (error) return (error); #endif #ifdef INET6 error = ip6proto_register(IPPROTO_PFSYNC, pfsync6_input, NULL); if (error) { ipproto_unregister(IPPROTO_PFSYNC); return (error); } #endif return (0); } static void pfsync_uninit(void) { pfsync_detach_ifnet_ptr = NULL; #ifdef INET ipproto_unregister(IPPROTO_PFSYNC); #endif #ifdef INET6 ip6proto_unregister(IPPROTO_PFSYNC); #endif } static int pfsync_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: error = pfsync_init(); break; case MOD_UNLOAD: pfsync_uninit(); break; default: error = EINVAL; break; } return (error); } static moduledata_t pfsync_mod = { pfsyncname, pfsync_modevent, 0 }; #define PFSYNC_MODVER 1 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c index c7aa3e1822e7..d7bb62977afb 100644 --- a/sys/netpfil/pf/pf_ioctl.c +++ b/sys/netpfil/pf/pf_ioctl.c @@ -1,7034 +1,7150 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2001 Daniel Hartmeier * Copyright (c) 2002,2003 Henning Brauer * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Effort sponsored in part by the Defense Advanced Research Projects * Agency (DARPA) and Air Force Research Laboratory, Air Force * Materiel Command, USAF, under agreement number F30602-01-2-0537. * * $OpenBSD: pf_ioctl.c,v 1.213 2009/02/15 21:46:12 mbalmer Exp $ */ #include #include "opt_inet.h" #include "opt_inet6.h" #include "opt_bpf.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif /* INET6 */ #ifdef ALTQ #include #endif SDT_PROBE_DEFINE3(pf, ioctl, ioctl, error, "int", "int", "int"); SDT_PROBE_DEFINE3(pf, ioctl, function, error, "char *", "int", "int"); SDT_PROBE_DEFINE2(pf, ioctl, addrule, error, "int", "int"); SDT_PROBE_DEFINE2(pf, ioctl, nvchk, error, "int", "int"); static struct pf_kpool *pf_get_kpool(const char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t, int); static void pf_mv_kpool(struct pf_kpalist *, struct pf_kpalist *); static void pf_empty_kpool(struct pf_kpalist *); static int pfioctl(struct cdev *, u_long, caddr_t, int, struct thread *); static int pf_begin_eth(uint32_t *, const char *); static int pf_rollback_eth(uint32_t, const char *); static int pf_commit_eth(uint32_t, const char *); static void pf_free_eth_rule(struct pf_keth_rule *); #ifdef ALTQ static int pf_begin_altq(u_int32_t *); static int pf_rollback_altq(u_int32_t); static int pf_commit_altq(u_int32_t); static int pf_enable_altq(struct pf_altq *); static int pf_disable_altq(struct pf_altq *); -static uint16_t pf_qname2qid(const char *); static void pf_qid_unref(uint16_t); #endif /* ALTQ */ static int pf_begin_rules(u_int32_t *, int, const char *); static int pf_rollback_rules(u_int32_t, int, char *); static int pf_setup_pfsync_matching(struct pf_kruleset *); static void pf_hash_rule_rolling(MD5_CTX *, struct pf_krule *); static void pf_hash_rule(struct pf_krule *); static void pf_hash_rule_addr(MD5_CTX *, struct pf_rule_addr *); static int pf_commit_rules(u_int32_t, int, char *); static int pf_addr_setup(struct pf_kruleset *, struct pf_addr_wrap *, sa_family_t); static void pf_src_node_copy(const struct pf_ksrc_node *, struct pf_src_node *); #ifdef ALTQ static int pf_export_kaltq(struct pf_altq *, struct pfioc_altq_v1 *, size_t); static int pf_import_kaltq(struct pfioc_altq_v1 *, struct pf_altq *, size_t); #endif /* ALTQ */ VNET_DEFINE(struct pf_krule, pf_default_rule); static __inline int pf_krule_compare(struct pf_krule *, struct pf_krule *); RB_GENERATE(pf_krule_global, pf_krule, entry_global, pf_krule_compare); #ifdef ALTQ VNET_DEFINE_STATIC(int, pf_altq_running); #define V_pf_altq_running VNET(pf_altq_running) #endif #define TAGID_MAX 50000 struct pf_tagname { TAILQ_ENTRY(pf_tagname) namehash_entries; TAILQ_ENTRY(pf_tagname) taghash_entries; char name[PF_TAG_NAME_SIZE]; uint16_t tag; int ref; }; struct pf_tagset { TAILQ_HEAD(, pf_tagname) *namehash; TAILQ_HEAD(, pf_tagname) *taghash; unsigned int mask; uint32_t seed; BITSET_DEFINE(, TAGID_MAX) avail; }; VNET_DEFINE(struct pf_tagset, pf_tags); #define V_pf_tags VNET(pf_tags) static unsigned int pf_rule_tag_hashsize; #define PF_RULE_TAG_HASH_SIZE_DEFAULT 128 SYSCTL_UINT(_net_pf, OID_AUTO, rule_tag_hashsize, CTLFLAG_RDTUN, &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT, "Size of pf(4) rule tag hashtable"); #ifdef ALTQ VNET_DEFINE(struct pf_tagset, pf_qids); #define V_pf_qids VNET(pf_qids) static unsigned int pf_queue_tag_hashsize; #define PF_QUEUE_TAG_HASH_SIZE_DEFAULT 128 SYSCTL_UINT(_net_pf, OID_AUTO, queue_tag_hashsize, CTLFLAG_RDTUN, &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT, "Size of pf(4) queue tag hashtable"); #endif VNET_DEFINE(uma_zone_t, pf_tag_z); #define V_pf_tag_z VNET(pf_tag_z) static MALLOC_DEFINE(M_PFALTQ, "pf_altq", "pf(4) altq configuration db"); static MALLOC_DEFINE(M_PFRULE, "pf_rule", "pf(4) rules"); MALLOC_DEFINE(M_PF, "pf", "pf(4)"); #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif VNET_DEFINE_STATIC(bool, pf_filter_local) = false; #define V_pf_filter_local VNET(pf_filter_local) SYSCTL_BOOL(_net_pf, OID_AUTO, filter_local, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pf_filter_local), false, "Enable filtering for packets delivered to local network stack"); #ifdef PF_DEFAULT_TO_DROP VNET_DEFINE_STATIC(bool, default_to_drop) = true; #else VNET_DEFINE_STATIC(bool, default_to_drop); #endif #define V_default_to_drop VNET(default_to_drop) SYSCTL_BOOL(_net_pf, OID_AUTO, default_to_drop, CTLFLAG_RDTUN | CTLFLAG_VNET, &VNET_NAME(default_to_drop), false, "Make the default rule drop all packets."); static void pf_init_tagset(struct pf_tagset *, unsigned int *, unsigned int); static void pf_cleanup_tagset(struct pf_tagset *); static uint16_t tagname2hashindex(const struct pf_tagset *, const char *); static uint16_t tag2hashindex(const struct pf_tagset *, uint16_t); -static u_int16_t tagname2tag(struct pf_tagset *, const char *); -static u_int16_t pf_tagname2tag(const char *); +static u_int16_t tagname2tag(struct pf_tagset *, const char *, bool); static void tag_unref(struct pf_tagset *, u_int16_t); struct cdev *pf_dev; /* * XXX - These are new and need to be checked when moveing to a new version */ static void pf_clear_all_states(void); static int pf_killstates_row(struct pf_kstate_kill *, struct pf_idhash *); static int pf_killstates_nv(struct pfioc_nv *); static int pf_clearstates_nv(struct pfioc_nv *); static int pf_getstate(struct pfioc_nv *); static int pf_getstatus(struct pfioc_nv *); static int pf_clear_tables(void); static void pf_kill_srcnodes(struct pfioc_src_node_kill *); static int pf_keepcounters(struct pfioc_nv *); static void pf_tbladdr_copyout(struct pf_addr_wrap *); /* * Wrapper functions for pfil(9) hooks */ static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); #ifdef INET static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); #endif #ifdef INET6 static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp); #endif static void hook_pf_eth(void); static void hook_pf(void); static void dehook_pf_eth(void); static void dehook_pf(void); static int shutdown_pf(void); static int pf_load(void); static void pf_unload(void); static struct cdevsw pf_cdevsw = { .d_ioctl = pfioctl, .d_name = PF_NAME, .d_version = D_VERSION, }; VNET_DEFINE_STATIC(bool, pf_pfil_hooked); #define V_pf_pfil_hooked VNET(pf_pfil_hooked) VNET_DEFINE_STATIC(bool, pf_pfil_eth_hooked); #define V_pf_pfil_eth_hooked VNET(pf_pfil_eth_hooked) /* * We need a flag that is neither hooked nor running to know when * the VNET is "valid". We primarily need this to control (global) * external event, e.g., eventhandlers. */ VNET_DEFINE(int, pf_vnet_active); #define V_pf_vnet_active VNET(pf_vnet_active) int pf_end_threads; struct proc *pf_purge_proc; VNET_DEFINE(struct rmlock, pf_rules_lock); +VNET_DEFINE(struct rmlock, pf_tags_lock); VNET_DEFINE_STATIC(struct sx, pf_ioctl_lock); #define V_pf_ioctl_lock VNET(pf_ioctl_lock) struct sx pf_end_lock; /* pfsync */ VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr); VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr); VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr); VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr); VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr); VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr); VNET_DEFINE(pflow_export_state_t *, pflow_export_state_ptr); pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr; /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; /* * Copy a user-provided string, returning an error if truncation would occur. * Avoid scanning past "sz" bytes in the source string since there's no * guarantee that it's nul-terminated. */ static int pf_user_strcpy(char *dst, const char *src, size_t sz) { if (strnlen(src, sz) == sz) return (EINVAL); (void)strlcpy(dst, src, sz); return (0); } static void pfattach_vnet(void) { u_int32_t *my_timeout = V_pf_default_rule.timeout; bzero(&V_pf_status, sizeof(V_pf_status)); pf_initialize(); pfr_initialize(); pfi_initialize_vnet(); pf_normalize_init(); pf_syncookies_init(); V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; V_pf_limits[PF_LIMIT_SRC_NODES].limit = PFSNODE_HIWAT; V_pf_limits[PF_LIMIT_ANCHORS].limit = PF_ANCHOR_HIWAT; V_pf_limits[PF_LIMIT_ETH_ANCHORS].limit = PF_ANCHOR_HIWAT; RB_INIT(&V_pf_anchors); pf_init_kruleset(&pf_main_ruleset); pf_init_keth(V_pf_keth); /* default rule should never be garbage collected */ V_pf_default_rule.entries.tqe_prev = &V_pf_default_rule.entries.tqe_next; V_pf_default_rule.action = V_default_to_drop ? PF_DROP : PF_PASS; V_pf_default_rule.nr = (uint32_t)-1; V_pf_default_rule.rtableid = -1; pf_counter_u64_init(&V_pf_default_rule.evaluations, M_WAITOK); for (int i = 0; i < 2; i++) { pf_counter_u64_init(&V_pf_default_rule.packets[i], M_WAITOK); pf_counter_u64_init(&V_pf_default_rule.bytes[i], M_WAITOK); } V_pf_default_rule.states_cur = counter_u64_alloc(M_WAITOK); V_pf_default_rule.states_tot = counter_u64_alloc(M_WAITOK); for (pf_sn_types_t sn_type = 0; sn_type= PF_RULESET_MAX) return (NULL); if (active) { if (check_ticket && ticket != ruleset->rules[rs_num].active.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_krulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); } else { if (check_ticket && ticket != ruleset->rules[rs_num].inactive.ticket) return (NULL); if (r_last) rule = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_krulequeue); else rule = TAILQ_FIRST(ruleset->rules[rs_num].inactive.ptr); } if (!r_last) { while ((rule != NULL) && (rule->nr != rule_number)) rule = TAILQ_NEXT(rule, entries); } if (rule == NULL) return (NULL); switch (which) { case PF_RDR: return (&rule->rdr); case PF_NAT: return (&rule->nat); case PF_RT: return (&rule->route); default: panic("Unknow pool type %d", which); } } static void pf_mv_kpool(struct pf_kpalist *poola, struct pf_kpalist *poolb) { struct pf_kpooladdr *mv_pool_pa; while ((mv_pool_pa = TAILQ_FIRST(poola)) != NULL) { TAILQ_REMOVE(poola, mv_pool_pa, entries); TAILQ_INSERT_TAIL(poolb, mv_pool_pa, entries); } } static void pf_empty_kpool(struct pf_kpalist *poola) { struct pf_kpooladdr *pa; while ((pa = TAILQ_FIRST(poola)) != NULL) { switch (pa->addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(pa->addr.p.dyn); break; case PF_ADDR_TABLE: /* XXX: this could be unfinished pooladdr on pabuf */ if (pa->addr.p.tbl != NULL) pfr_detach_table(pa->addr.p.tbl); break; } if (pa->kif) pfi_kkif_unref(pa->kif); TAILQ_REMOVE(poola, pa, entries); free(pa, M_PFRULE); } } static void pf_unlink_rule_locked(struct pf_krulequeue *rulequeue, struct pf_krule *rule) { PF_RULES_WASSERT(); PF_UNLNKDRULES_ASSERT(); TAILQ_REMOVE(rulequeue, rule, entries); rule->rule_ref |= PFRULE_REFS; TAILQ_INSERT_TAIL(&V_pf_unlinked_rules, rule, entries); } static void pf_unlink_rule(struct pf_krulequeue *rulequeue, struct pf_krule *rule) { PF_RULES_WASSERT(); PF_UNLNKDRULES_LOCK(); pf_unlink_rule_locked(rulequeue, rule); PF_UNLNKDRULES_UNLOCK(); } static void pf_free_eth_rule(struct pf_keth_rule *rule) { PF_RULES_WASSERT(); if (rule == NULL) return; if (rule->tag) tag_unref(&V_pf_tags, rule->tag); if (rule->match_tag) tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ pf_qid_unref(rule->qid); #endif if (rule->bridge_to) pfi_kkif_unref(rule->bridge_to); if (rule->kif) pfi_kkif_unref(rule->kif); if (rule->ipsrc.addr.type == PF_ADDR_TABLE) pfr_detach_table(rule->ipsrc.addr.p.tbl); if (rule->ipdst.addr.type == PF_ADDR_TABLE) pfr_detach_table(rule->ipdst.addr.p.tbl); counter_u64_free(rule->evaluations); for (int i = 0; i < 2; i++) { counter_u64_free(rule->packets[i]); counter_u64_free(rule->bytes[i]); } uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp); pf_keth_anchor_remove(rule); free(rule, M_PFRULE); } void pf_free_rule(struct pf_krule *rule) { PF_RULES_WASSERT(); PF_CONFIG_ASSERT(); if (rule->tag) tag_unref(&V_pf_tags, rule->tag); if (rule->match_tag) tag_unref(&V_pf_tags, rule->match_tag); #ifdef ALTQ if (rule->pqid != rule->qid) pf_qid_unref(rule->pqid); pf_qid_unref(rule->qid); #endif switch (rule->src.addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(rule->src.addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(rule->src.addr.p.tbl); break; } switch (rule->dst.addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(rule->dst.addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(rule->dst.addr.p.tbl); break; } if (rule->overload_tbl) pfr_detach_table(rule->overload_tbl); if (rule->kif) pfi_kkif_unref(rule->kif); if (rule->rcv_kif) pfi_kkif_unref(rule->rcv_kif); pf_remove_kanchor(rule); pf_empty_kpool(&rule->rdr.list); pf_empty_kpool(&rule->nat.list); pf_empty_kpool(&rule->route.list); pf_krule_free(rule); } static void pf_init_tagset(struct pf_tagset *ts, unsigned int *tunable_size, unsigned int default_size) { unsigned int i; unsigned int hashsize; if (*tunable_size == 0 || !powerof2(*tunable_size)) *tunable_size = default_size; hashsize = *tunable_size; ts->namehash = mallocarray(hashsize, sizeof(*ts->namehash), M_PFHASH, M_WAITOK); ts->taghash = mallocarray(hashsize, sizeof(*ts->taghash), M_PFHASH, M_WAITOK); ts->mask = hashsize - 1; ts->seed = arc4random(); for (i = 0; i < hashsize; i++) { TAILQ_INIT(&ts->namehash[i]); TAILQ_INIT(&ts->taghash[i]); } BIT_FILL(TAGID_MAX, &ts->avail); } static void pf_cleanup_tagset(struct pf_tagset *ts) { unsigned int i; unsigned int hashsize; struct pf_tagname *t, *tmp; /* * Only need to clean up one of the hashes as each tag is hashed * into each table. */ hashsize = ts->mask + 1; for (i = 0; i < hashsize; i++) TAILQ_FOREACH_SAFE(t, &ts->namehash[i], namehash_entries, tmp) uma_zfree(V_pf_tag_z, t); free(ts->namehash, M_PFHASH); free(ts->taghash, M_PFHASH); } static uint16_t tagname2hashindex(const struct pf_tagset *ts, const char *tagname) { size_t len; len = strnlen(tagname, PF_TAG_NAME_SIZE - 1); return (murmur3_32_hash(tagname, len, ts->seed) & ts->mask); } static uint16_t tag2hashindex(const struct pf_tagset *ts, uint16_t tag) { return (tag & ts->mask); } static u_int16_t -tagname2tag(struct pf_tagset *ts, const char *tagname) +tagname2tag(struct pf_tagset *ts, const char *tagname, bool add_new) { struct pf_tagname *tag; u_int32_t index; u_int16_t new_tagid; - PF_RULES_WASSERT(); + PF_TAGS_RLOCK_TRACKER; + + PF_TAGS_RLOCK(); index = tagname2hashindex(ts, tagname); TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) if (strcmp(tagname, tag->name) == 0) { tag->ref++; - return (tag->tag); + new_tagid = tag->tag; + PF_TAGS_RUNLOCK(); + return (new_tagid); + } + + /* + * When used for pfsync with queues we must not create new entries. + * Pf tags can be created just fine by this function, but queues + * require additional configuration. If they are missing on the target + * system we just ignore them + */ + if (add_new == false) { + printf("%s: Not creating a new tag\n", __func__); + PF_TAGS_RUNLOCK(); + return (0); + } + + /* + * If a new entry must be created do it under a write lock. + * But first search again, somebody could have created the tag + * between unlocking the read lock and locking the write lock. + */ + PF_TAGS_RUNLOCK(); + PF_TAGS_WLOCK(); + TAILQ_FOREACH(tag, &ts->namehash[index], namehash_entries) + if (strcmp(tagname, tag->name) == 0) { + tag->ref++; + new_tagid = tag->tag; + PF_TAGS_WUNLOCK(); + return (new_tagid); } /* * new entry * * to avoid fragmentation, we do a linear search from the beginning * and take the first free slot we find. */ new_tagid = BIT_FFS(TAGID_MAX, &ts->avail); /* * Tags are 1-based, with valid tags in the range [1..TAGID_MAX]. * BIT_FFS() returns a 1-based bit number, with 0 indicating no bits * set. It may also return a bit number greater than TAGID_MAX due * to rounding of the number of bits in the vector up to a multiple * of the vector word size at declaration/allocation time. */ - if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) + if ((new_tagid == 0) || (new_tagid > TAGID_MAX)) { + PF_TAGS_WUNLOCK(); return (0); + } /* Mark the tag as in use. Bits are 0-based for BIT_CLR() */ BIT_CLR(TAGID_MAX, new_tagid - 1, &ts->avail); /* allocate and fill new struct pf_tagname */ tag = uma_zalloc(V_pf_tag_z, M_NOWAIT); - if (tag == NULL) + if (tag == NULL) { + PF_TAGS_WUNLOCK(); return (0); + } strlcpy(tag->name, tagname, sizeof(tag->name)); tag->tag = new_tagid; tag->ref = 1; /* Insert into namehash */ TAILQ_INSERT_TAIL(&ts->namehash[index], tag, namehash_entries); /* Insert into taghash */ index = tag2hashindex(ts, new_tagid); TAILQ_INSERT_TAIL(&ts->taghash[index], tag, taghash_entries); - return (tag->tag); + PF_TAGS_WUNLOCK(); + return (new_tagid); +} + +static char * +tag2tagname(struct pf_tagset *ts, u_int16_t tag) +{ + struct pf_tagname *t; + uint16_t index; + + PF_TAGS_RLOCK_TRACKER; + + PF_TAGS_RLOCK(); + + index = tag2hashindex(ts, tag); + TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) + if (tag == t->tag) { + PF_TAGS_RUNLOCK(); + return (t->name); + } + + PF_TAGS_RUNLOCK(); + return (NULL); } static void tag_unref(struct pf_tagset *ts, u_int16_t tag) { struct pf_tagname *t; uint16_t index; - PF_RULES_WASSERT(); + PF_TAGS_WLOCK(); index = tag2hashindex(ts, tag); TAILQ_FOREACH(t, &ts->taghash[index], taghash_entries) if (tag == t->tag) { if (--t->ref == 0) { TAILQ_REMOVE(&ts->taghash[index], t, taghash_entries); index = tagname2hashindex(ts, t->name); TAILQ_REMOVE(&ts->namehash[index], t, namehash_entries); /* Bits are 0-based for BIT_SET() */ BIT_SET(TAGID_MAX, tag - 1, &ts->avail); uma_zfree(V_pf_tag_z, t); } break; } + + PF_TAGS_WUNLOCK(); } -static uint16_t +uint16_t pf_tagname2tag(const char *tagname) { - return (tagname2tag(&V_pf_tags, tagname)); + return (tagname2tag(&V_pf_tags, tagname, true)); +} + +static const char * +pf_tag2tagname(uint16_t tag) +{ + return (tag2tagname(&V_pf_tags, tag)); } static int pf_begin_eth(uint32_t *ticket, const char *anchor) { struct pf_keth_rule *rule, *tmp; struct pf_keth_ruleset *rs; PF_RULES_WASSERT(); rs = pf_find_or_create_keth_ruleset(anchor); if (rs == NULL) return (EINVAL); /* Purge old inactive rules. */ TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries, tmp) { TAILQ_REMOVE(rs->inactive.rules, rule, entries); pf_free_eth_rule(rule); } *ticket = ++rs->inactive.ticket; rs->inactive.open = 1; return (0); } static int pf_rollback_eth(uint32_t ticket, const char *anchor) { struct pf_keth_rule *rule, *tmp; struct pf_keth_ruleset *rs; PF_RULES_WASSERT(); rs = pf_find_keth_ruleset(anchor); if (rs == NULL) return (EINVAL); if (!rs->inactive.open || ticket != rs->inactive.ticket) return (0); /* Purge old inactive rules. */ TAILQ_FOREACH_SAFE(rule, rs->inactive.rules, entries, tmp) { TAILQ_REMOVE(rs->inactive.rules, rule, entries); pf_free_eth_rule(rule); } rs->inactive.open = 0; pf_remove_if_empty_keth_ruleset(rs); return (0); } #define PF_SET_SKIP_STEPS(i) \ do { \ while (head[i] != cur) { \ head[i]->skip[i].ptr = cur; \ head[i] = TAILQ_NEXT(head[i], entries); \ } \ } while (0) static void pf_eth_calc_skip_steps(struct pf_keth_ruleq *rules) { struct pf_keth_rule *cur, *prev, *head[PFE_SKIP_COUNT]; int i; cur = TAILQ_FIRST(rules); prev = cur; for (i = 0; i < PFE_SKIP_COUNT; ++i) head[i] = cur; while (cur != NULL) { if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) PF_SET_SKIP_STEPS(PFE_SKIP_IFP); if (cur->direction != prev->direction) PF_SET_SKIP_STEPS(PFE_SKIP_DIR); if (cur->proto != prev->proto) PF_SET_SKIP_STEPS(PFE_SKIP_PROTO); if (memcmp(&cur->src, &prev->src, sizeof(cur->src)) != 0) PF_SET_SKIP_STEPS(PFE_SKIP_SRC_ADDR); if (memcmp(&cur->dst, &prev->dst, sizeof(cur->dst)) != 0) PF_SET_SKIP_STEPS(PFE_SKIP_DST_ADDR); if (cur->ipsrc.neg != prev->ipsrc.neg || pf_addr_wrap_neq(&cur->ipsrc.addr, &prev->ipsrc.addr)) PF_SET_SKIP_STEPS(PFE_SKIP_SRC_IP_ADDR); if (cur->ipdst.neg != prev->ipdst.neg || pf_addr_wrap_neq(&cur->ipdst.addr, &prev->ipdst.addr)) PF_SET_SKIP_STEPS(PFE_SKIP_DST_IP_ADDR); prev = cur; cur = TAILQ_NEXT(cur, entries); } for (i = 0; i < PFE_SKIP_COUNT; ++i) PF_SET_SKIP_STEPS(i); } static int pf_commit_eth(uint32_t ticket, const char *anchor) { struct pf_keth_ruleq *rules; struct pf_keth_ruleset *rs; rs = pf_find_keth_ruleset(anchor); if (rs == NULL) { return (EINVAL); } if (!rs->inactive.open || ticket != rs->inactive.ticket) return (EBUSY); PF_RULES_WASSERT(); pf_eth_calc_skip_steps(rs->inactive.rules); rules = rs->active.rules; atomic_store_ptr(&rs->active.rules, rs->inactive.rules); rs->inactive.rules = rules; rs->inactive.ticket = rs->active.ticket; return (pf_rollback_eth(rs->inactive.ticket, rs->anchor ? rs->anchor->path : "")); } #ifdef ALTQ -static uint16_t -pf_qname2qid(const char *qname) +uint16_t +pf_qname2qid(const char *qname, bool add_new) +{ + return (tagname2tag(&V_pf_qids, qname, add_new)); +} + +static const char * +pf_qid2qname(uint16_t qid) { - return (tagname2tag(&V_pf_qids, qname)); + return (tag2tagname(&V_pf_qids, qid)); } static void pf_qid_unref(uint16_t qid) { tag_unref(&V_pf_qids, qid); } static int pf_begin_altq(u_int32_t *ticket) { struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); /* Purge the old altq lists */ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altq_ifs_inactive); TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altqs_inactive); if (error) return (error); *ticket = ++V_ticket_altqs_inactive; V_altqs_inactive_open = 1; return (0); } static int pf_rollback_altq(u_int32_t ticket) { struct pf_altq *altq, *tmp; int error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (0); /* Purge the old altq lists */ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ error = altq_remove(altq); } free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altq_ifs_inactive); TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); } static int pf_commit_altq(u_int32_t ticket) { struct pf_altqqueue *old_altqs, *old_altq_ifs; struct pf_altq *altq, *tmp; int err, error = 0; PF_RULES_WASSERT(); if (!V_altqs_inactive_open || ticket != V_ticket_altqs_inactive) return (EBUSY); /* swap altqs, keep the old. */ old_altqs = V_pf_altqs_active; old_altq_ifs = V_pf_altq_ifs_active; V_pf_altqs_active = V_pf_altqs_inactive; V_pf_altq_ifs_active = V_pf_altq_ifs_inactive; V_pf_altqs_inactive = old_altqs; V_pf_altq_ifs_inactive = old_altq_ifs; V_ticket_altqs_active = V_ticket_altqs_inactive; /* Attach new disciplines */ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* attach the discipline */ error = altq_pfattach(altq); if (error == 0 && V_pf_altq_running) error = pf_enable_altq(altq); if (error != 0) return (error); } } /* Purge the old altq lists */ TAILQ_FOREACH_SAFE(altq, V_pf_altq_ifs_inactive, entries, tmp) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { /* detach and destroy the discipline */ if (V_pf_altq_running) error = pf_disable_altq(altq); err = altq_pfdetach(altq); if (err != 0 && error == 0) error = err; err = altq_remove(altq); if (err != 0 && error == 0) error = err; } free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altq_ifs_inactive); TAILQ_FOREACH_SAFE(altq, V_pf_altqs_inactive, entries, tmp) { pf_qid_unref(altq->qid); free(altq, M_PFALTQ); } TAILQ_INIT(V_pf_altqs_inactive); V_altqs_inactive_open = 0; return (error); } static int pf_enable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int error = 0; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); if (ifp->if_snd.altq_type != ALTQT_NONE) error = altq_enable(&ifp->if_snd); /* set tokenbucket regulator */ if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) { tb.rate = altq->ifbandwidth; tb.depth = altq->tbrsize; error = tbr_set(&ifp->if_snd, &tb); } return (error); } static int pf_disable_altq(struct pf_altq *altq) { struct ifnet *ifp; struct tb_profile tb; int error; if ((ifp = ifunit(altq->ifname)) == NULL) return (EINVAL); /* * when the discipline is no longer referenced, it was overridden * by a new one. if so, just return. */ if (altq->altq_disc != ifp->if_snd.altq_disc) return (0); error = altq_disable(&ifp->if_snd); if (error == 0) { /* clear tokenbucket regulator */ tb.rate = 0; error = tbr_set(&ifp->if_snd, &tb); } return (error); } static int pf_altq_ifnet_event_add(struct ifnet *ifp, int remove, u_int32_t ticket, struct pf_altq *altq) { struct ifnet *ifp1; int error = 0; /* Deactivate the interface in question */ altq->local_flags &= ~PFALTQ_FLAG_IF_REMOVED; if ((ifp1 = ifunit(altq->ifname)) == NULL || (remove && ifp1 == ifp)) { altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; } else { error = altq_add(ifp1, altq); if (ticket != V_ticket_altqs_inactive) error = EBUSY; if (error) free(altq, M_PFALTQ); } return (error); } void pf_altq_ifnet_event(struct ifnet *ifp, int remove) { struct pf_altq *a1, *a2, *a3; u_int32_t ticket; int error = 0; /* * No need to re-evaluate the configuration for events on interfaces * that do not support ALTQ, as it's not possible for such * interfaces to be part of the configuration. */ if (!ALTQ_IS_READY(&ifp->if_snd)) return; /* Interrupt userland queue modifications */ if (V_altqs_inactive_open) pf_rollback_altq(V_ticket_altqs_inactive); /* Start new altq ruleset */ if (pf_begin_altq(&ticket)) return; /* Copy the current active set */ TAILQ_FOREACH(a1, V_pf_altq_ifs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; } bcopy(a1, a2, sizeof(struct pf_altq)); error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); if (error) break; TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, a2, entries); } if (error) goto out; TAILQ_FOREACH(a1, V_pf_altqs_active, entries) { a2 = malloc(sizeof(*a2), M_PFALTQ, M_NOWAIT); if (a2 == NULL) { error = ENOMEM; break; } bcopy(a1, a2, sizeof(struct pf_altq)); - if ((a2->qid = pf_qname2qid(a2->qname)) == 0) { + if ((a2->qid = pf_qname2qid(a2->qname, true)) == 0) { error = EBUSY; free(a2, M_PFALTQ); break; } a2->altq_disc = NULL; TAILQ_FOREACH(a3, V_pf_altq_ifs_inactive, entries) { if (strncmp(a3->ifname, a2->ifname, IFNAMSIZ) == 0) { a2->altq_disc = a3->altq_disc; break; } } error = pf_altq_ifnet_event_add(ifp, remove, ticket, a2); if (error) break; TAILQ_INSERT_TAIL(V_pf_altqs_inactive, a2, entries); } out: if (error != 0) pf_rollback_altq(ticket); else pf_commit_altq(ticket); } #endif /* ALTQ */ static struct pf_krule_global * pf_rule_tree_alloc(int flags) { struct pf_krule_global *tree; tree = malloc(sizeof(struct pf_krule_global), M_PF, flags); if (tree == NULL) return (NULL); RB_INIT(tree); return (tree); } void pf_rule_tree_free(struct pf_krule_global *tree) { free(tree, M_PF); } static int pf_begin_rules(u_int32_t *ticket, int rs_num, const char *anchor) { struct pf_krule_global *tree; struct pf_kruleset *rs; struct pf_krule *rule; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); tree = pf_rule_tree_alloc(M_NOWAIT); if (tree == NULL) return (ENOMEM); rs = pf_find_or_create_kruleset(anchor); if (rs == NULL) { pf_rule_tree_free(tree); return (EINVAL); } pf_rule_tree_free(rs->rules[rs_num].inactive.tree); rs->rules[rs_num].inactive.tree = tree; while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } *ticket = ++rs->rules[rs_num].inactive.ticket; rs->rules[rs_num].inactive.open = 1; return (0); } static int pf_rollback_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_kruleset *rs; struct pf_krule *rule; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_kruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || rs->rules[rs_num].inactive.ticket != ticket) return (0); while ((rule = TAILQ_FIRST(rs->rules[rs_num].inactive.ptr)) != NULL) { pf_unlink_rule(rs->rules[rs_num].inactive.ptr, rule); rs->rules[rs_num].inactive.rcount--; } rs->rules[rs_num].inactive.open = 0; return (0); } #define PF_MD5_UPD(st, elm) \ MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) #define PF_MD5_UPD_STR(st, elm) \ MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) #define PF_MD5_UPD_HTONL(st, elm, stor) do { \ (stor) = htonl((st)->elm); \ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int32_t));\ } while (0) #define PF_MD5_UPD_HTONS(st, elm, stor) do { \ (stor) = htons((st)->elm); \ MD5Update(ctx, (u_int8_t *) &(stor), sizeof(u_int16_t));\ } while (0) static void pf_hash_rule_addr(MD5_CTX *ctx, struct pf_rule_addr *pfr) { PF_MD5_UPD(pfr, addr.type); switch (pfr->addr.type) { case PF_ADDR_DYNIFTL: PF_MD5_UPD(pfr, addr.v.ifname); PF_MD5_UPD(pfr, addr.iflags); break; case PF_ADDR_TABLE: if (strncmp(pfr->addr.v.tblname, PF_OPTIMIZER_TABLE_PFX, strlen(PF_OPTIMIZER_TABLE_PFX))) PF_MD5_UPD(pfr, addr.v.tblname); break; case PF_ADDR_ADDRMASK: /* XXX ignore af? */ PF_MD5_UPD(pfr, addr.v.a.addr.addr32); PF_MD5_UPD(pfr, addr.v.a.mask.addr32); break; } PF_MD5_UPD(pfr, port[0]); PF_MD5_UPD(pfr, port[1]); PF_MD5_UPD(pfr, neg); PF_MD5_UPD(pfr, port_op); } static void pf_hash_rule_rolling(MD5_CTX *ctx, struct pf_krule *rule) { u_int16_t x; u_int32_t y; pf_hash_rule_addr(ctx, &rule->src); pf_hash_rule_addr(ctx, &rule->dst); for (int i = 0; i < PF_RULE_MAX_LABEL_COUNT; i++) PF_MD5_UPD_STR(rule, label[i]); PF_MD5_UPD_STR(rule, ifname); PF_MD5_UPD_STR(rule, rcv_ifname); PF_MD5_UPD_STR(rule, match_tagname); PF_MD5_UPD_HTONS(rule, match_tag, x); /* dup? */ PF_MD5_UPD_HTONL(rule, os_fingerprint, y); PF_MD5_UPD_HTONL(rule, prob, y); PF_MD5_UPD_HTONL(rule, uid.uid[0], y); PF_MD5_UPD_HTONL(rule, uid.uid[1], y); PF_MD5_UPD(rule, uid.op); PF_MD5_UPD_HTONL(rule, gid.gid[0], y); PF_MD5_UPD_HTONL(rule, gid.gid[1], y); PF_MD5_UPD(rule, gid.op); PF_MD5_UPD_HTONL(rule, rule_flag, y); PF_MD5_UPD(rule, action); PF_MD5_UPD(rule, direction); PF_MD5_UPD(rule, af); PF_MD5_UPD(rule, quick); PF_MD5_UPD(rule, ifnot); PF_MD5_UPD(rule, rcvifnot); PF_MD5_UPD(rule, match_tag_not); PF_MD5_UPD(rule, natpass); PF_MD5_UPD(rule, keep_state); PF_MD5_UPD(rule, proto); PF_MD5_UPD(rule, type); PF_MD5_UPD(rule, code); PF_MD5_UPD(rule, flags); PF_MD5_UPD(rule, flagset); PF_MD5_UPD(rule, allow_opts); PF_MD5_UPD(rule, rt); PF_MD5_UPD(rule, tos); PF_MD5_UPD(rule, scrub_flags); PF_MD5_UPD(rule, min_ttl); PF_MD5_UPD(rule, set_tos); if (rule->anchor != NULL) PF_MD5_UPD_STR(rule, anchor->path); } static void pf_hash_rule(struct pf_krule *rule) { MD5_CTX ctx; MD5Init(&ctx); pf_hash_rule_rolling(&ctx, rule); MD5Final(rule->md5sum, &ctx); } static int pf_krule_compare(struct pf_krule *a, struct pf_krule *b) { return (memcmp(a->md5sum, b->md5sum, PF_MD5_DIGEST_LENGTH)); } static int pf_commit_rules(u_int32_t ticket, int rs_num, char *anchor) { struct pf_kruleset *rs; struct pf_krule *rule, *old_rule; struct pf_krulequeue *old_rules; struct pf_krule_global *old_tree; int error; u_int32_t old_rcount; PF_RULES_WASSERT(); if (rs_num < 0 || rs_num >= PF_RULESET_MAX) return (EINVAL); rs = pf_find_kruleset(anchor); if (rs == NULL || !rs->rules[rs_num].inactive.open || ticket != rs->rules[rs_num].inactive.ticket) return (EBUSY); /* Calculate checksum for the main ruleset */ if (rs == &pf_main_ruleset) { error = pf_setup_pfsync_matching(rs); if (error != 0) return (error); } /* Swap rules, keep the old. */ old_rules = rs->rules[rs_num].active.ptr; old_rcount = rs->rules[rs_num].active.rcount; old_tree = rs->rules[rs_num].active.tree; rs->rules[rs_num].active.ptr = rs->rules[rs_num].inactive.ptr; rs->rules[rs_num].active.tree = rs->rules[rs_num].inactive.tree; rs->rules[rs_num].active.rcount = rs->rules[rs_num].inactive.rcount; /* Attempt to preserve counter information. */ if (V_pf_status.keep_counters && old_tree != NULL) { TAILQ_FOREACH(rule, rs->rules[rs_num].active.ptr, entries) { old_rule = RB_FIND(pf_krule_global, old_tree, rule); if (old_rule == NULL) { continue; } pf_counter_u64_critical_enter(); pf_counter_u64_rollup_protected(&rule->evaluations, pf_counter_u64_fetch(&old_rule->evaluations)); pf_counter_u64_rollup_protected(&rule->packets[0], pf_counter_u64_fetch(&old_rule->packets[0])); pf_counter_u64_rollup_protected(&rule->packets[1], pf_counter_u64_fetch(&old_rule->packets[1])); pf_counter_u64_rollup_protected(&rule->bytes[0], pf_counter_u64_fetch(&old_rule->bytes[0])); pf_counter_u64_rollup_protected(&rule->bytes[1], pf_counter_u64_fetch(&old_rule->bytes[1])); pf_counter_u64_critical_exit(); } } rs->rules[rs_num].inactive.ptr = old_rules; rs->rules[rs_num].inactive.tree = NULL; /* important for pf_ioctl_addrule */ rs->rules[rs_num].inactive.rcount = old_rcount; rs->rules[rs_num].active.ticket = rs->rules[rs_num].inactive.ticket; pf_calc_skip_steps(rs->rules[rs_num].active.ptr); /* Purge the old rule list. */ PF_UNLNKDRULES_LOCK(); while ((rule = TAILQ_FIRST(old_rules)) != NULL) pf_unlink_rule_locked(old_rules, rule); PF_UNLNKDRULES_UNLOCK(); rs->rules[rs_num].inactive.rcount = 0; rs->rules[rs_num].inactive.open = 0; pf_remove_if_empty_kruleset(rs); pf_rule_tree_free(old_tree); return (0); } static int pf_setup_pfsync_matching(struct pf_kruleset *rs) { MD5_CTX ctx; struct pf_krule *rule; int rs_cnt; u_int8_t digest[PF_MD5_DIGEST_LENGTH]; MD5Init(&ctx); for (rs_cnt = 0; rs_cnt < PF_RULESET_MAX; rs_cnt++) { /* XXX PF_RULESET_SCRUB as well? */ if (rs_cnt == PF_RULESET_SCRUB) continue; if (rs->rules[rs_cnt].inactive.rcount) { TAILQ_FOREACH(rule, rs->rules[rs_cnt].inactive.ptr, entries) { pf_hash_rule_rolling(&ctx, rule); } } } MD5Final(digest, &ctx); memcpy(V_pf_status.pf_chksum, digest, sizeof(V_pf_status.pf_chksum)); return (0); } static int pf_eth_addr_setup(struct pf_keth_ruleset *ruleset, struct pf_addr_wrap *addr) { int error = 0; switch (addr->type) { case PF_ADDR_TABLE: addr->p.tbl = pfr_eth_attach_table(ruleset, addr->v.tblname); if (addr->p.tbl == NULL) error = ENOMEM; break; default: error = EINVAL; } return (error); } static int pf_addr_setup(struct pf_kruleset *ruleset, struct pf_addr_wrap *addr, sa_family_t af) { int error = 0; switch (addr->type) { case PF_ADDR_TABLE: addr->p.tbl = pfr_attach_table(ruleset, addr->v.tblname); if (addr->p.tbl == NULL) error = ENOMEM; break; case PF_ADDR_DYNIFTL: error = pfi_dynaddr_setup(addr, af); break; } return (error); } void pf_addr_copyout(struct pf_addr_wrap *addr) { switch (addr->type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_copyout(addr); break; case PF_ADDR_TABLE: pf_tbladdr_copyout(addr); break; } } static void pf_src_node_copy(const struct pf_ksrc_node *in, struct pf_src_node *out) { int secs = time_uptime; bzero(out, sizeof(struct pf_src_node)); bcopy(&in->addr, &out->addr, sizeof(struct pf_addr)); bcopy(&in->raddr, &out->raddr, sizeof(struct pf_addr)); if (in->rule != NULL) out->rule.nr = in->rule->nr; for (int i = 0; i < 2; i++) { out->bytes[i] = counter_u64_fetch(in->bytes[i]); out->packets[i] = counter_u64_fetch(in->packets[i]); } out->states = in->states; out->conn = in->conn; out->af = in->af; out->ruletype = in->ruletype; out->creation = secs - in->creation; if (out->expire > secs) out->expire -= secs; else out->expire = 0; /* Adjust the connection rate estimate. */ out->conn_rate.limit = in->conn_rate.limit; out->conn_rate.seconds = in->conn_rate.seconds; /* If there's no limit there's no counter_rate. */ if (in->conn_rate.cr != NULL) out->conn_rate.count = counter_rate_get(in->conn_rate.cr); } #ifdef ALTQ /* * Handle export of struct pf_kaltq to user binaries that may be using any * version of struct pf_altq. */ static int pf_export_kaltq(struct pf_altq *q, struct pfioc_altq_v1 *pa, size_t ioc_size) { u_int32_t version; if (ioc_size == sizeof(struct pfioc_altq_v0)) version = 0; else version = pa->version; if (version > PFIOC_ALTQ_VERSION) return (EINVAL); #define ASSIGN(x) exported_q->x = q->x #define COPY(x) \ bcopy(&q->x, &exported_q->x, min(sizeof(q->x), sizeof(exported_q->x))) #define SATU16(x) (u_int32_t)uqmin((x), USHRT_MAX) #define SATU32(x) (u_int32_t)uqmin((x), UINT_MAX) switch (version) { case 0: { struct pf_altq_v0 *exported_q = &((struct pfioc_altq_v0 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); exported_q->tbrsize = SATU16(q->tbrsize); exported_q->ifbandwidth = SATU32(q->ifbandwidth); COPY(qname); COPY(parent); ASSIGN(parent_qid); exported_q->bandwidth = SATU32(q->bandwidth); ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); if (q->scheduler == ALTQT_HFSC) { #define ASSIGN_OPT(x) exported_q->pq_u.hfsc_opts.x = q->pq_u.hfsc_opts.x #define ASSIGN_OPT_SATU32(x) exported_q->pq_u.hfsc_opts.x = \ SATU32(q->pq_u.hfsc_opts.x) - + ASSIGN_OPT_SATU32(rtsc_m1); ASSIGN_OPT(rtsc_d); ASSIGN_OPT_SATU32(rtsc_m2); ASSIGN_OPT_SATU32(lssc_m1); ASSIGN_OPT(lssc_d); ASSIGN_OPT_SATU32(lssc_m2); ASSIGN_OPT_SATU32(ulsc_m1); ASSIGN_OPT(ulsc_d); ASSIGN_OPT_SATU32(ulsc_m2); ASSIGN_OPT(flags); - + #undef ASSIGN_OPT #undef ASSIGN_OPT_SATU32 } else COPY(pq_u); ASSIGN(qid); break; } case 1: { struct pf_altq_v1 *exported_q = &((struct pfioc_altq_v1 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); ASSIGN(ifbandwidth); COPY(qname); COPY(parent); ASSIGN(parent_qid); ASSIGN(bandwidth); ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); COPY(pq_u); ASSIGN(qid); break; } default: panic("%s: unhandled struct pfioc_altq version", __func__); break; } #undef ASSIGN #undef COPY #undef SATU16 #undef SATU32 return (0); } /* * Handle import to struct pf_kaltq of struct pf_altq from user binaries * that may be using any version of it. */ static int pf_import_kaltq(struct pfioc_altq_v1 *pa, struct pf_altq *q, size_t ioc_size) { u_int32_t version; if (ioc_size == sizeof(struct pfioc_altq_v0)) version = 0; else version = pa->version; if (version > PFIOC_ALTQ_VERSION) return (EINVAL); #define ASSIGN(x) q->x = imported_q->x #define COPY(x) \ bcopy(&imported_q->x, &q->x, min(sizeof(imported_q->x), sizeof(q->x))) switch (version) { case 0: { struct pf_altq_v0 *imported_q = &((struct pfioc_altq_v0 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); /* 16-bit -> 32-bit */ ASSIGN(ifbandwidth); /* 32-bit -> 64-bit */ COPY(qname); COPY(parent); ASSIGN(parent_qid); ASSIGN(bandwidth); /* 32-bit -> 64-bit */ ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); if (imported_q->scheduler == ALTQT_HFSC) { #define ASSIGN_OPT(x) q->pq_u.hfsc_opts.x = imported_q->pq_u.hfsc_opts.x /* * The m1 and m2 parameters are being copied from * 32-bit to 64-bit. */ ASSIGN_OPT(rtsc_m1); ASSIGN_OPT(rtsc_d); ASSIGN_OPT(rtsc_m2); ASSIGN_OPT(lssc_m1); ASSIGN_OPT(lssc_d); ASSIGN_OPT(lssc_m2); ASSIGN_OPT(ulsc_m1); ASSIGN_OPT(ulsc_d); ASSIGN_OPT(ulsc_m2); ASSIGN_OPT(flags); - + #undef ASSIGN_OPT } else COPY(pq_u); ASSIGN(qid); break; } case 1: { struct pf_altq_v1 *imported_q = &((struct pfioc_altq_v1 *)pa)->altq; COPY(ifname); ASSIGN(scheduler); ASSIGN(tbrsize); ASSIGN(ifbandwidth); COPY(qname); COPY(parent); ASSIGN(parent_qid); ASSIGN(bandwidth); ASSIGN(priority); ASSIGN(local_flags); ASSIGN(qlimit); ASSIGN(flags); COPY(pq_u); ASSIGN(qid); break; } - default: + default: panic("%s: unhandled struct pfioc_altq version", __func__); break; } #undef ASSIGN #undef COPY return (0); } static struct pf_altq * pf_altq_get_nth_active(u_int32_t n) { struct pf_altq *altq; u_int32_t nr; nr = 0; TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if (nr == n) return (altq); nr++; } TAILQ_FOREACH(altq, V_pf_altqs_active, entries) { if (nr == n) return (altq); nr++; } return (NULL); } #endif /* ALTQ */ struct pf_krule * pf_krule_alloc(void) { struct pf_krule *rule; rule = malloc(sizeof(struct pf_krule), M_PFRULE, M_WAITOK | M_ZERO); mtx_init(&rule->nat.mtx, "pf_krule_nat_pool", NULL, MTX_DEF); mtx_init(&rule->rdr.mtx, "pf_krule_rdr_pool", NULL, MTX_DEF); mtx_init(&rule->route.mtx, "pf_krule_route_pool", NULL, MTX_DEF); rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone, M_WAITOK | M_ZERO); return (rule); } void pf_krule_free(struct pf_krule *rule) { #ifdef PF_WANT_32_TO_64_COUNTER bool wowned; #endif if (rule == NULL) return; #ifdef PF_WANT_32_TO_64_COUNTER if (rule->allrulelinked) { wowned = PF_RULES_WOWNED(); if (!wowned) PF_RULES_WLOCK(); LIST_REMOVE(rule, allrulelist); V_pf_allrulecount--; if (!wowned) PF_RULES_WUNLOCK(); } #endif pf_counter_u64_deinit(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_deinit(&rule->packets[i]); pf_counter_u64_deinit(&rule->bytes[i]); } counter_u64_free(rule->states_cur); counter_u64_free(rule->states_tot); for (pf_sn_types_t sn_type=0; sn_typesrc_nodes[sn_type]); uma_zfree_pcpu(pf_timestamp_pcpu_zone, rule->timestamp); mtx_destroy(&rule->nat.mtx); mtx_destroy(&rule->rdr.mtx); mtx_destroy(&rule->route.mtx); free(rule, M_PFRULE); } void pf_krule_clear_counters(struct pf_krule *rule) { pf_counter_u64_zero(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_zero(&rule->packets[i]); pf_counter_u64_zero(&rule->bytes[i]); } counter_u64_zero(rule->states_tot); } static void pf_kpooladdr_to_pooladdr(const struct pf_kpooladdr *kpool, struct pf_pooladdr *pool) { bzero(pool, sizeof(*pool)); bcopy(&kpool->addr, &pool->addr, sizeof(pool->addr)); strlcpy(pool->ifname, kpool->ifname, sizeof(pool->ifname)); } static int pf_pooladdr_to_kpooladdr(const struct pf_pooladdr *pool, struct pf_kpooladdr *kpool) { int ret; bzero(kpool, sizeof(*kpool)); bcopy(&pool->addr, &kpool->addr, sizeof(kpool->addr)); ret = pf_user_strcpy(kpool->ifname, pool->ifname, sizeof(kpool->ifname)); return (ret); } static void pf_pool_to_kpool(const struct pf_pool *pool, struct pf_kpool *kpool) { _Static_assert(sizeof(pool->key) == sizeof(kpool->key), ""); _Static_assert(sizeof(pool->counter) == sizeof(kpool->counter), ""); bcopy(&pool->key, &kpool->key, sizeof(kpool->key)); bcopy(&pool->counter, &kpool->counter, sizeof(kpool->counter)); kpool->tblidx = pool->tblidx; kpool->proxy_port[0] = pool->proxy_port[0]; kpool->proxy_port[1] = pool->proxy_port[1]; kpool->opts = pool->opts; } static int pf_rule_to_krule(const struct pf_rule *rule, struct pf_krule *krule) { int ret; #ifndef INET if (rule->af == AF_INET) { return (EAFNOSUPPORT); } #endif /* INET */ #ifndef INET6 if (rule->af == AF_INET6) { return (EAFNOSUPPORT); } #endif /* INET6 */ ret = pf_check_rule_addr(&rule->src); if (ret != 0) return (ret); ret = pf_check_rule_addr(&rule->dst); if (ret != 0) return (ret); bcopy(&rule->src, &krule->src, sizeof(rule->src)); bcopy(&rule->dst, &krule->dst, sizeof(rule->dst)); ret = pf_user_strcpy(krule->label[0], rule->label, sizeof(rule->label)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->ifname, rule->ifname, sizeof(rule->ifname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->qname, rule->qname, sizeof(rule->qname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->pqname, rule->pqname, sizeof(rule->pqname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->tagname, rule->tagname, sizeof(rule->tagname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->match_tagname, rule->match_tagname, sizeof(rule->match_tagname)); if (ret != 0) return (ret); ret = pf_user_strcpy(krule->overload_tblname, rule->overload_tblname, sizeof(rule->overload_tblname)); if (ret != 0) return (ret); pf_pool_to_kpool(&rule->rpool, &krule->rdr); /* Don't allow userspace to set evaluations, packets or bytes. */ /* kif, anchor, overload_tbl are not copied over. */ krule->os_fingerprint = rule->os_fingerprint; krule->rtableid = rule->rtableid; /* pf_rule->timeout is smaller than pf_krule->timeout */ bcopy(rule->timeout, krule->timeout, sizeof(rule->timeout)); krule->max_states = rule->max_states; krule->max_src_nodes = rule->max_src_nodes; krule->max_src_states = rule->max_src_states; krule->max_src_conn = rule->max_src_conn; krule->max_src_conn_rate.limit = rule->max_src_conn_rate.limit; krule->max_src_conn_rate.seconds = rule->max_src_conn_rate.seconds; krule->qid = rule->qid; krule->pqid = rule->pqid; krule->nr = rule->nr; krule->prob = rule->prob; krule->cuid = rule->cuid; krule->cpid = rule->cpid; krule->return_icmp = rule->return_icmp; krule->return_icmp6 = rule->return_icmp6; krule->max_mss = rule->max_mss; krule->tag = rule->tag; krule->match_tag = rule->match_tag; krule->scrub_flags = rule->scrub_flags; bcopy(&rule->uid, &krule->uid, sizeof(krule->uid)); bcopy(&rule->gid, &krule->gid, sizeof(krule->gid)); krule->rule_flag = rule->rule_flag; krule->action = rule->action; krule->direction = rule->direction; krule->log = rule->log; krule->logif = rule->logif; krule->quick = rule->quick; krule->ifnot = rule->ifnot; krule->match_tag_not = rule->match_tag_not; krule->natpass = rule->natpass; krule->keep_state = rule->keep_state; krule->af = rule->af; krule->proto = rule->proto; krule->type = rule->type; krule->code = rule->code; krule->flags = rule->flags; krule->flagset = rule->flagset; krule->min_ttl = rule->min_ttl; krule->allow_opts = rule->allow_opts; krule->rt = rule->rt; krule->return_ttl = rule->return_ttl; krule->tos = rule->tos; krule->set_tos = rule->set_tos; krule->flush = rule->flush; krule->prio = rule->prio; krule->set_prio[0] = rule->set_prio[0]; krule->set_prio[1] = rule->set_prio[1]; bcopy(&rule->divert, &krule->divert, sizeof(krule->divert)); return (0); } int pf_ioctl_getrules(struct pfioc_rule *pr) { struct pf_kruleset *ruleset; struct pf_krule *tail; int rs_num; PF_RULES_WLOCK(); ruleset = pf_find_kruleset(pr->anchor); if (ruleset == NULL) { PF_RULES_WUNLOCK(); return (EINVAL); } rs_num = pf_get_ruleset_number(pr->rule.action); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); return (EINVAL); } tail = TAILQ_LAST(ruleset->rules[rs_num].active.ptr, pf_krulequeue); if (tail) pr->nr = tail->nr + 1; else pr->nr = 0; pr->ticket = ruleset->rules[rs_num].active.ticket; PF_RULES_WUNLOCK(); return (0); } static int pf_rule_checkaf(struct pf_krule *r) { switch (r->af) { case 0: if (r->rule_flag & PFRULE_AFTO) return (EPFNOSUPPORT); break; case AF_INET: if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET6) return (EPFNOSUPPORT); break; #ifdef INET6 case AF_INET6: if ((r->rule_flag & PFRULE_AFTO) && r->naf != AF_INET) return (EPFNOSUPPORT); break; #endif /* INET6 */ default: return (EPFNOSUPPORT); } if ((r->rule_flag & PFRULE_AFTO) == 0 && r->naf != 0) return (EPFNOSUPPORT); return (0); } static int pf_validate_range(uint8_t op, uint16_t port[2]) { uint16_t a = ntohs(port[0]); uint16_t b = ntohs(port[1]); if ((op == PF_OP_RRG && a > b) || /* 34:12, i.e. none */ (op == PF_OP_IRG && a >= b) || /* 34><12, i.e. none */ (op == PF_OP_XRG && a > b)) /* 34<>22, i.e. all */ return 1; return 0; } int pf_ioctl_addrule(struct pf_krule *rule, uint32_t ticket, uint32_t pool_ticket, const char *anchor, const char *anchor_call, uid_t uid, pid_t pid) { struct pf_kruleset *ruleset; struct pf_krule *tail; struct pf_kpooladdr *pa; struct pfi_kkif *kif = NULL, *rcv_kif = NULL; int rs_num; int error = 0; #define ERROUT(x) ERROUT_FUNCTION(errout, x) #define ERROUT_UNLOCKED(x) ERROUT_FUNCTION(errout_unlocked, x) if ((rule->return_icmp >> 8) > ICMP_MAXTYPE) ERROUT_UNLOCKED(EINVAL); if ((error = pf_rule_checkaf(rule))) ERROUT_UNLOCKED(error); if (pf_validate_range(rule->src.port_op, rule->src.port)) ERROUT_UNLOCKED(EINVAL); if (pf_validate_range(rule->dst.port_op, rule->dst.port)) ERROUT_UNLOCKED(EINVAL); if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); if (rule->rcv_ifname[0]) rcv_kif = pf_kkif_create(M_WAITOK); pf_counter_u64_init(&rule->evaluations, M_WAITOK); for (int i = 0; i < 2; i++) { pf_counter_u64_init(&rule->packets[i], M_WAITOK); pf_counter_u64_init(&rule->bytes[i], M_WAITOK); } rule->states_cur = counter_u64_alloc(M_WAITOK); rule->states_tot = counter_u64_alloc(M_WAITOK); for (pf_sn_types_t sn_type=0; sn_typesrc_nodes[sn_type] = counter_u64_alloc(M_WAITOK); rule->cuid = uid; rule->cpid = pid; TAILQ_INIT(&rule->rdr.list); TAILQ_INIT(&rule->nat.list); TAILQ_INIT(&rule->route.list); PF_CONFIG_LOCK(); PF_RULES_WLOCK(); #ifdef PF_WANT_32_TO_64_COUNTER LIST_INSERT_HEAD(&V_pf_allrulelist, rule, allrulelist); MPASS(!rule->allrulelinked); rule->allrulelinked = true; V_pf_allrulecount++; #endif ruleset = pf_find_kruleset(anchor); if (ruleset == NULL) ERROUT(EINVAL); rs_num = pf_get_ruleset_number(rule->action); if (rs_num >= PF_RULESET_MAX) ERROUT(EINVAL); if (ticket != ruleset->rules[rs_num].inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, "ticket: %d != [%d]%d", ticket, rs_num, ruleset->rules[rs_num].inactive.ticket); ERROUT(EBUSY); } if (pool_ticket != V_ticket_pabuf) { DPFPRINTF(PF_DEBUG_MISC, "pool_ticket: %d != %d", pool_ticket, V_ticket_pabuf); ERROUT(EBUSY); } /* * XXXMJG hack: there is no mechanism to ensure they started the * transaction. Ticket checked above may happen to match by accident, * even if nobody called DIOCXBEGIN, let alone this process. * Partially work around it by checking if the RB tree got allocated, * see pf_begin_rules. */ if (ruleset->rules[rs_num].inactive.tree == NULL) { ERROUT(EINVAL); } tail = TAILQ_LAST(ruleset->rules[rs_num].inactive.ptr, pf_krulequeue); if (tail) rule->nr = tail->nr + 1; else rule->nr = 0; if (rule->ifname[0]) { rule->kif = pfi_kkif_attach(kif, rule->ifname); kif = NULL; pfi_kkif_ref(rule->kif); } else rule->kif = NULL; if (rule->rcv_ifname[0]) { rule->rcv_kif = pfi_kkif_attach(rcv_kif, rule->rcv_ifname); rcv_kif = NULL; pfi_kkif_ref(rule->rcv_kif); } else rule->rcv_kif = NULL; if (rule->rtableid > 0 && rule->rtableid >= rt_numfibs) ERROUT(EBUSY); #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { - if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0) ERROUT(EBUSY); else if (rule->pqname[0] != 0) { if ((rule->pqid = - pf_qname2qid(rule->pqname)) == 0) + pf_qname2qid(rule->pqname, true)) == 0) ERROUT(EBUSY); } else rule->pqid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) ERROUT(EBUSY); if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag(rule->match_tagname)) == 0) ERROUT(EBUSY); if (rule->rt && !rule->direction) ERROUT(EINVAL); if (!rule->log) rule->logif = 0; if (! pf_init_threshold(&rule->pktrate, rule->pktrate.limit, rule->pktrate.seconds)) ERROUT(ENOMEM); if (pf_addr_setup(ruleset, &rule->src.addr, rule->af)) ERROUT(ENOMEM); if (pf_addr_setup(ruleset, &rule->dst.addr, rule->af)) ERROUT(ENOMEM); if (pf_kanchor_setup(rule, ruleset, anchor_call)) ERROUT(EINVAL); if (rule->scrub_flags & PFSTATE_SETPRIO && (rule->set_prio[0] > PF_PRIO_MAX || rule->set_prio[1] > PF_PRIO_MAX)) ERROUT(EINVAL); for (int i = 0; i < 3; i++) { TAILQ_FOREACH(pa, &V_pf_pabuf[i], entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) ERROUT(ENOMEM); } } rule->overload_tbl = NULL; if (rule->overload_tblname[0]) { if ((rule->overload_tbl = pfr_attach_table(ruleset, rule->overload_tblname)) == NULL) ERROUT(EINVAL); else rule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_kpool(&V_pf_pabuf[0], &rule->nat.list); /* * Old version of pfctl provide route redirection pools in single * common redirection pool rdr. New versions use rdr only for * rdr-to rules. */ if (rule->rt > PF_NOPFROUTE && TAILQ_EMPTY(&V_pf_pabuf[2])) { pf_mv_kpool(&V_pf_pabuf[1], &rule->route.list); } else { pf_mv_kpool(&V_pf_pabuf[1], &rule->rdr.list); pf_mv_kpool(&V_pf_pabuf[2], &rule->route.list); } if (((rule->action == PF_NAT) || (rule->action == PF_RDR) || (rule->action == PF_BINAT)) && rule->anchor == NULL && TAILQ_FIRST(&rule->rdr.list) == NULL) { ERROUT(EINVAL); } if (rule->rt > PF_NOPFROUTE && (TAILQ_FIRST(&rule->route.list) == NULL)) { ERROUT(EINVAL); } if (rule->action == PF_PASS && (rule->rdr.opts & PF_POOL_STICKYADDR || rule->nat.opts & PF_POOL_STICKYADDR) && !rule->keep_state) { ERROUT(EINVAL); } MPASS(error == 0); rule->nat.cur = TAILQ_FIRST(&rule->nat.list); rule->rdr.cur = TAILQ_FIRST(&rule->rdr.list); rule->route.cur = TAILQ_FIRST(&rule->route.list); rule->route.ipv6_nexthop_af = AF_INET6; TAILQ_INSERT_TAIL(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount++; PF_RULES_WUNLOCK(); pf_hash_rule(rule); if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].inactive.tree, rule) != NULL) { PF_RULES_WLOCK(); TAILQ_REMOVE(ruleset->rules[rs_num].inactive.ptr, rule, entries); ruleset->rules[rs_num].inactive.rcount--; pf_free_rule(rule); rule = NULL; ERROUT(EEXIST); } PF_CONFIG_UNLOCK(); return (0); #undef ERROUT #undef ERROUT_UNLOCKED errout: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); errout_unlocked: pf_kkif_free(rcv_kif); pf_kkif_free(kif); pf_krule_free(rule); return (error); } static bool pf_label_match(const struct pf_krule *rule, const char *label) { int i = 0; while (*rule->label[i]) { if (strcmp(rule->label[i], label) == 0) return (true); i++; } return (false); } static unsigned int pf_kill_matching_state(struct pf_state_key_cmp *key, int dir) { struct pf_kstate *s; int more = 0; s = pf_find_state_all(key, dir, &more); if (s == NULL) return (0); if (more) { PF_STATE_UNLOCK(s); return (0); } pf_remove_state(s); return (1); } static int pf_killstates_row(struct pf_kstate_kill *psk, struct pf_idhash *ih) { struct pf_kstate *s; struct pf_state_key *sk; struct pf_addr *srcaddr, *dstaddr; struct pf_state_key_cmp match_key; int idx, killed = 0; unsigned int dir; u_int16_t srcport, dstport; struct pfi_kkif *kif; relock_DIOCKILLSTATES: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { /* For floating states look at the original kif. */ kif = s->kif == V_pfi_all ? s->orig_kif : s->kif; sk = s->key[psk->psk_nat ? PF_SK_STACK : PF_SK_WIRE]; if (s->direction == PF_OUT) { srcaddr = &sk->addr[1]; dstaddr = &sk->addr[0]; srcport = sk->port[1]; dstport = sk->port[0]; } else { srcaddr = &sk->addr[0]; dstaddr = &sk->addr[1]; srcport = sk->port[0]; dstport = sk->port[1]; } if (psk->psk_af && sk->af != psk->psk_af) continue; if (psk->psk_proto && psk->psk_proto != sk->proto) continue; if (! pf_match_addr(psk->psk_src.neg, &psk->psk_src.addr.v.a.addr, &psk->psk_src.addr.v.a.mask, srcaddr, sk->af)) continue; if (! pf_match_addr(psk->psk_dst.neg, &psk->psk_dst.addr.v.a.addr, &psk->psk_dst.addr.v.a.mask, dstaddr, sk->af)) continue; if (! pf_match_addr(psk->psk_rt_addr.neg, &psk->psk_rt_addr.addr.v.a.addr, &psk->psk_rt_addr.addr.v.a.mask, &s->act.rt_addr, sk->af)) continue; if (psk->psk_src.port_op != 0 && ! pf_match_port(psk->psk_src.port_op, psk->psk_src.port[0], psk->psk_src.port[1], srcport)) continue; if (psk->psk_dst.port_op != 0 && ! pf_match_port(psk->psk_dst.port_op, psk->psk_dst.port[0], psk->psk_dst.port[1], dstport)) continue; if (psk->psk_label[0] && ! pf_label_match(s->rule, psk->psk_label)) continue; if (psk->psk_ifname[0] && strcmp(psk->psk_ifname, kif->pfik_name)) continue; if (psk->psk_kill_match) { /* Create the key to find matching states, with lock * held. */ bzero(&match_key, sizeof(match_key)); if (s->direction == PF_OUT) { dir = PF_IN; idx = psk->psk_nat ? PF_SK_WIRE : PF_SK_STACK; } else { dir = PF_OUT; idx = psk->psk_nat ? PF_SK_STACK : PF_SK_WIRE; } match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } pf_remove_state(s); killed++; if (psk->psk_kill_match) killed += pf_kill_matching_state(&match_key, dir); goto relock_DIOCKILLSTATES; } PF_HASHROW_UNLOCK(ih); return (killed); } void unhandled_af(int af) { panic("unhandled af %d", af); } int pf_start(void) { int error = 0; sx_xlock(&V_pf_ioctl_lock); if (V_pf_status.running) error = EEXIST; else { hook_pf(); if (! TAILQ_EMPTY(V_pf_keth->active.rules)) hook_pf_eth(); V_pf_status.running = 1; V_pf_status.since = time_uptime; new_unrhdr64(&V_pf_stateid, time_second); DPFPRINTF(PF_DEBUG_MISC, "pf: started"); } sx_xunlock(&V_pf_ioctl_lock); return (error); } int pf_stop(void) { int error = 0; sx_xlock(&V_pf_ioctl_lock); if (!V_pf_status.running) error = ENOENT; else { V_pf_status.running = 0; dehook_pf(); dehook_pf_eth(); V_pf_status.since = time_uptime; DPFPRINTF(PF_DEBUG_MISC, "pf: stopped"); } sx_xunlock(&V_pf_ioctl_lock); return (error); } void pf_ioctl_clear_status(void) { PF_RULES_WLOCK(); for (int i = 0; i < PFRES_MAX; i++) counter_u64_zero(V_pf_status.counters[i]); for (int i = 0; i < FCNT_MAX; i++) pf_counter_u64_zero(&V_pf_status.fcounters[i]); for (int i = 0; i < SCNT_MAX; i++) counter_u64_zero(V_pf_status.scounters[i]); for (int i = 0; i < KLCNT_MAX; i++) counter_u64_zero(V_pf_status.lcounters[i]); V_pf_status.since = time_uptime; if (*V_pf_status.ifname) pfi_update_status(V_pf_status.ifname, NULL); PF_RULES_WUNLOCK(); } int pf_ioctl_set_timeout(int timeout, int seconds, int *prev_seconds) { uint32_t old; if (timeout < 0 || timeout >= PFTM_MAX || seconds < 0) return (EINVAL); PF_RULES_WLOCK(); old = V_pf_default_rule.timeout[timeout]; if (timeout == PFTM_INTERVAL && seconds == 0) seconds = 1; V_pf_default_rule.timeout[timeout] = seconds; if (timeout == PFTM_INTERVAL && seconds < old) wakeup(pf_purge_thread); if (prev_seconds != NULL) *prev_seconds = old; PF_RULES_WUNLOCK(); return (0); } int pf_ioctl_get_timeout(int timeout, int *seconds) { PF_RULES_RLOCK_TRACKER; if (timeout < 0 || timeout >= PFTM_MAX) return (EINVAL); PF_RULES_RLOCK(); *seconds = V_pf_default_rule.timeout[timeout]; PF_RULES_RUNLOCK(); return (0); } int pf_ioctl_set_limit(int index, unsigned int limit, unsigned int *old_limit) { PF_RULES_WLOCK(); if (index < 0 || index >= PF_LIMIT_MAX || V_pf_limits[index].zone == NULL) { PF_RULES_WUNLOCK(); return (EINVAL); } uma_zone_set_max(V_pf_limits[index].zone, limit == 0 ? INT_MAX : limit); if (old_limit != NULL) *old_limit = V_pf_limits[index].limit; V_pf_limits[index].limit = limit; PF_RULES_WUNLOCK(); return (0); } int pf_ioctl_get_limit(int index, unsigned int *limit) { PF_RULES_RLOCK_TRACKER; if (index < 0 || index >= PF_LIMIT_MAX) return (EINVAL); PF_RULES_RLOCK(); *limit = V_pf_limits[index].limit; PF_RULES_RUNLOCK(); return (0); } int pf_ioctl_begin_addrs(uint32_t *ticket) { PF_RULES_WLOCK(); pf_empty_kpool(&V_pf_pabuf[0]); pf_empty_kpool(&V_pf_pabuf[1]); pf_empty_kpool(&V_pf_pabuf[2]); *ticket = ++V_ticket_pabuf; PF_RULES_WUNLOCK(); return (0); } int pf_ioctl_add_addr(struct pf_nl_pooladdr *pp) { struct pf_kpooladdr *pa = NULL; struct pfi_kkif *kif = NULL; int error; if (pp->which != PF_RDR && pp->which != PF_NAT && pp->which != PF_RT) return (EINVAL); switch (pp->af) { #ifdef INET case AF_INET: /* FALLTHROUGH */ #endif /* INET */ #ifdef INET6 case AF_INET6: /* FALLTHROUGH */ #endif /* INET6 */ case AF_UNSPEC: break; default: return (EAFNOSUPPORT); } if (pp->addr.addr.type != PF_ADDR_ADDRMASK && pp->addr.addr.type != PF_ADDR_DYNIFTL && pp->addr.addr.type != PF_ADDR_TABLE) return (EINVAL); if (pp->addr.addr.p.dyn != NULL) return (EINVAL); pa = malloc(sizeof(*pa), M_PFRULE, M_WAITOK); error = pf_pooladdr_to_kpooladdr(&pp->addr, pa); if (error != 0) goto out; if (pa->ifname[0]) kif = pf_kkif_create(M_WAITOK); PF_RULES_WLOCK(); if (pp->ticket != V_ticket_pabuf) { PF_RULES_WUNLOCK(); if (pa->ifname[0]) pf_kkif_free(kif); error = EBUSY; goto out; } if (pa->ifname[0]) { pa->kif = pfi_kkif_attach(kif, pa->ifname); kif = NULL; pfi_kkif_ref(pa->kif); } else pa->kif = NULL; if (pa->addr.type == PF_ADDR_DYNIFTL && ((error = pfi_dynaddr_setup(&pa->addr, pp->af)) != 0)) { if (pa->ifname[0]) pfi_kkif_unref(pa->kif); PF_RULES_WUNLOCK(); goto out; } pa->af = pp->af; switch (pp->which) { case PF_NAT: TAILQ_INSERT_TAIL(&V_pf_pabuf[0], pa, entries); break; case PF_RDR: TAILQ_INSERT_TAIL(&V_pf_pabuf[1], pa, entries); break; case PF_RT: TAILQ_INSERT_TAIL(&V_pf_pabuf[2], pa, entries); break; } PF_RULES_WUNLOCK(); return (0); out: free(pa, M_PFRULE); return (error); } int pf_ioctl_get_addrs(struct pf_nl_pooladdr *pp) { struct pf_kpool *pool; struct pf_kpooladdr *pa; PF_RULES_RLOCK_TRACKER; if (pp->which != PF_RDR && pp->which != PF_NAT && pp->which != PF_RT) return (EINVAL); pp->anchor[sizeof(pp->anchor) - 1] = 0; pp->nr = 0; PF_RULES_RLOCK(); pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 0, pp->which); if (pool == NULL) { PF_RULES_RUNLOCK(); return (EBUSY); } TAILQ_FOREACH(pa, &pool->list, entries) pp->nr++; PF_RULES_RUNLOCK(); return (0); } int pf_ioctl_get_addr(struct pf_nl_pooladdr *pp) { struct pf_kpool *pool; struct pf_kpooladdr *pa; u_int32_t nr = 0; if (pp->which != PF_RDR && pp->which != PF_NAT && pp->which != PF_RT) return (EINVAL); PF_RULES_RLOCK_TRACKER; pp->anchor[sizeof(pp->anchor) - 1] = '\0'; PF_RULES_RLOCK(); pool = pf_get_kpool(pp->anchor, pp->ticket, pp->r_action, pp->r_num, 0, 1, 1, pp->which); if (pool == NULL) { PF_RULES_RUNLOCK(); return (EBUSY); } pa = TAILQ_FIRST(&pool->list); while ((pa != NULL) && (nr < pp->nr)) { pa = TAILQ_NEXT(pa, entries); nr++; } if (pa == NULL) { PF_RULES_RUNLOCK(); return (EBUSY); } pf_kpooladdr_to_pooladdr(pa, &pp->addr); pp->af = pa->af; pf_addr_copyout(&pp->addr.addr); PF_RULES_RUNLOCK(); return (0); } int pf_ioctl_get_rulesets(struct pfioc_ruleset *pr) { struct pf_kruleset *ruleset; struct pf_kanchor *anchor; PF_RULES_RLOCK_TRACKER; pr->path[sizeof(pr->path) - 1] = '\0'; PF_RULES_RLOCK(); if ((ruleset = pf_find_kruleset(pr->path)) == NULL) { PF_RULES_RUNLOCK(); return (ENOENT); } pr->nr = 0; if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL) pr->nr++; } else { RB_FOREACH(anchor, pf_kanchor_node, &ruleset->anchor->children) pr->nr++; } PF_RULES_RUNLOCK(); return (0); } int pf_ioctl_get_ruleset(struct pfioc_ruleset *pr) { struct pf_kruleset *ruleset; struct pf_kanchor *anchor; u_int32_t nr = 0; int error = 0; PF_RULES_RLOCK_TRACKER; PF_RULES_RLOCK(); if ((ruleset = pf_find_kruleset(pr->path)) == NULL) { PF_RULES_RUNLOCK(); return (ENOENT); } pr->name[0] = '\0'; if (ruleset == &pf_main_ruleset) { /* XXX kludge for pf_main_ruleset */ RB_FOREACH(anchor, pf_kanchor_global, &V_pf_anchors) if (anchor->parent == NULL && nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } else { RB_FOREACH(anchor, pf_kanchor_node, &ruleset->anchor->children) if (nr++ == pr->nr) { strlcpy(pr->name, anchor->name, sizeof(pr->name)); break; } } if (!pr->name[0]) error = EBUSY; PF_RULES_RUNLOCK(); return (error); } int pf_ioctl_natlook(struct pfioc_natlook *pnl) { struct pf_state_key *sk; struct pf_kstate *state; struct pf_state_key_cmp key; int m = 0, direction = pnl->direction; int sidx, didx; /* NATLOOK src and dst are reversed, so reverse sidx/didx */ sidx = (direction == PF_IN) ? 1 : 0; didx = (direction == PF_IN) ? 0 : 1; if (!pnl->proto || PF_AZERO(&pnl->saddr, pnl->af) || PF_AZERO(&pnl->daddr, pnl->af) || ((pnl->proto == IPPROTO_TCP || pnl->proto == IPPROTO_UDP) && (!pnl->dport || !pnl->sport))) return (EINVAL); switch (pnl->direction) { case PF_IN: case PF_OUT: case PF_INOUT: break; default: return (EINVAL); } switch (pnl->af) { #ifdef INET case AF_INET: break; #endif /* INET */ #ifdef INET6 case AF_INET6: break; #endif /* INET6 */ default: return (EAFNOSUPPORT); } bzero(&key, sizeof(key)); key.af = pnl->af; key.proto = pnl->proto; pf_addrcpy(&key.addr[sidx], &pnl->saddr, pnl->af); key.port[sidx] = pnl->sport; pf_addrcpy(&key.addr[didx], &pnl->daddr, pnl->af); key.port[didx] = pnl->dport; state = pf_find_state_all(&key, direction, &m); if (state == NULL) return (ENOENT); if (m > 1) { PF_STATE_UNLOCK(state); return (E2BIG); /* more than one state */ } sk = state->key[sidx]; pf_addrcpy(&pnl->rsaddr, &sk->addr[sidx], sk->af); pnl->rsport = sk->port[sidx]; pf_addrcpy(&pnl->rdaddr, &sk->addr[didx], sk->af); pnl->rdport = sk->port[didx]; PF_STATE_UNLOCK(state); return (0); } static int pfioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { int error = 0; PF_RULES_RLOCK_TRACKER; #define ERROUT_IOCTL(target, x) \ do { \ error = (x); \ SDT_PROBE3(pf, ioctl, ioctl, error, cmd, error, __LINE__); \ goto target; \ } while (0) /* XXX keep in sync with switch() below */ if (securelevel_gt(td->td_ucred, 2)) switch (cmd) { case DIOCGETRULES: case DIOCGETRULENV: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCGETSTATENV: case DIOCSETSTATUSIF: case DIOCGETSTATUSNV: case DIOCCLRSTATUS: case DIOCNATLOOK: case DIOCSETDEBUG: #ifdef COMPAT_FREEBSD14 case DIOCGETSTATES: case DIOCGETSTATESV2: #endif case DIOCGETTIMEOUT: case DIOCCLRRULECTRS: case DIOCGETLIMIT: case DIOCGETALTQSV0: case DIOCGETALTQSV1: case DIOCGETALTQV0: case DIOCGETALTQV1: case DIOCGETQSTATSV0: case DIOCGETQSTATSV1: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRCLRASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCCLRSRCNODES: case DIOCGETSYNCOOKIES: case DIOCIGETIFACES: case DIOCGIFSPEEDV0: case DIOCGIFSPEEDV1: case DIOCSETIFFLAG: case DIOCCLRIFFLAG: case DIOCGETETHRULES: case DIOCGETETHRULE: case DIOCGETETHRULESETS: case DIOCGETETHRULESET: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) break; /* dummy operation ok */ return (EPERM); default: return (EPERM); } if (!(flags & FWRITE)) switch (cmd) { case DIOCGETRULES: case DIOCGETADDRS: case DIOCGETADDR: case DIOCGETSTATE: case DIOCGETSTATENV: case DIOCGETSTATUSNV: #ifdef COMPAT_FREEBSD14 case DIOCGETSTATES: case DIOCGETSTATESV2: #endif case DIOCGETTIMEOUT: case DIOCGETLIMIT: case DIOCGETALTQSV0: case DIOCGETALTQSV1: case DIOCGETALTQV0: case DIOCGETALTQV1: case DIOCGETQSTATSV0: case DIOCGETQSTATSV1: case DIOCGETRULESETS: case DIOCGETRULESET: case DIOCNATLOOK: case DIOCRGETTABLES: case DIOCRGETTSTATS: case DIOCRGETADDRS: case DIOCRGETASTATS: case DIOCRTSTADDRS: case DIOCOSFPGET: case DIOCGETSRCNODES: case DIOCGETSYNCOOKIES: case DIOCIGETIFACES: case DIOCGIFSPEEDV1: case DIOCGIFSPEEDV0: case DIOCGETRULENV: case DIOCGETETHRULES: case DIOCGETETHRULE: case DIOCGETETHRULESETS: case DIOCGETETHRULESET: break; case DIOCRCLRTABLES: case DIOCRADDTABLES: case DIOCRDELTABLES: case DIOCRCLRTSTATS: case DIOCRCLRADDRS: case DIOCRADDADDRS: case DIOCRDELADDRS: case DIOCRSETADDRS: case DIOCRSETTFLAGS: if (((struct pfioc_table *)addr)->pfrio_flags & PFR_FLAG_DUMMY) { flags |= FWRITE; /* need write lock for dummy */ break; /* dummy operation ok */ } return (EACCES); default: return (EACCES); } CURVNET_SET(TD_TO_VNET(td)); switch (cmd) { #ifdef COMPAT_FREEBSD14 case DIOCSTART: error = pf_start(); break; case DIOCSTOP: error = pf_stop(); break; #endif case DIOCGETETHRULES: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl; void *packed; struct pf_keth_rule *tail; struct pf_keth_ruleset *rs; u_int32_t ticket, nr; const char *anchor = ""; nvl = NULL; packed = NULL; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULES_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); /* Copy the request in */ packed = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, packed, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(packed, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "anchor")) ERROUT(EBADMSG); anchor = nvlist_get_string(nvl, "anchor"); rs = pf_find_keth_ruleset(anchor); nvlist_destroy(nvl); nvl = NULL; free(packed, M_NVLIST); packed = NULL; if (rs == NULL) ERROUT(ENOENT); /* Reply */ nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); PF_RULES_RLOCK(); ticket = rs->active.ticket; tail = TAILQ_LAST(rs->active.rules, pf_keth_ruleq); if (tail) nr = tail->nr + 1; else nr = 0; PF_RULES_RUNLOCK(); nvlist_add_number(nvl, "ticket", ticket); nvlist_add_number(nvl, "nr", nr); packed = nvlist_pack(nvl, &nv->len); if (packed == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(packed, nv->data, nv->len); #undef ERROUT DIOCGETETHRULES_error: free(packed, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCGETETHRULE: { struct epoch_tracker et; struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_rule *rule = NULL; struct pf_keth_ruleset *rs; u_int32_t ticket, nr; bool clear = false; const char *anchor; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULE_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EBADMSG); ticket = nvlist_get_number(nvl, "ticket"); if (! nvlist_exists_string(nvl, "anchor")) ERROUT(EBADMSG); anchor = nvlist_get_string(nvl, "anchor"); if (nvlist_exists_bool(nvl, "clear")) clear = nvlist_get_bool(nvl, "clear"); if (clear && !(flags & FWRITE)) ERROUT(EACCES); if (! nvlist_exists_number(nvl, "nr")) ERROUT(EBADMSG); nr = nvlist_get_number(nvl, "nr"); PF_RULES_RLOCK(); rs = pf_find_keth_ruleset(anchor); if (rs == NULL) { PF_RULES_RUNLOCK(); ERROUT(ENOENT); } if (ticket != rs->active.ticket) { PF_RULES_RUNLOCK(); ERROUT(EBUSY); } nvlist_destroy(nvl); nvl = NULL; free(nvlpacked, M_NVLIST); nvlpacked = NULL; rule = TAILQ_FIRST(rs->active.rules); while ((rule != NULL) && (rule->nr != nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_RUNLOCK(); ERROUT(ENOENT); } /* Make sure rule can't go away. */ NET_EPOCH_ENTER(et); PF_RULES_RUNLOCK(); nvl = pf_keth_rule_to_nveth_rule(rule); if (pf_keth_anchor_nvcopyout(rs, rule, nvl)) { NET_EPOCH_EXIT(et); ERROUT(EBUSY); } NET_EPOCH_EXIT(et); if (nvl == NULL) ERROUT(ENOMEM); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); if (error == 0 && clear) { counter_u64_zero(rule->evaluations); for (int i = 0; i < 2; i++) { counter_u64_zero(rule->packets[i]); counter_u64_zero(rule->bytes[i]); } } #undef ERROUT DIOCGETETHRULE_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCADDETHRULE: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_rule *rule = NULL, *tail = NULL; struct pf_keth_ruleset *ruleset = NULL; struct pfi_kkif *kif = NULL, *bridge_to_kif = NULL; const char *anchor = "", *anchor_call = ""; #define ERROUT(x) ERROUT_IOCTL(DIOCADDETHRULE_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EBADMSG); if (nvlist_exists_string(nvl, "anchor")) anchor = nvlist_get_string(nvl, "anchor"); if (nvlist_exists_string(nvl, "anchor_call")) anchor_call = nvlist_get_string(nvl, "anchor_call"); ruleset = pf_find_keth_ruleset(anchor); if (ruleset == NULL) ERROUT(EINVAL); if (nvlist_get_number(nvl, "ticket") != ruleset->inactive.ticket) { DPFPRINTF(PF_DEBUG_MISC, "ticket: %d != %d", (u_int32_t)nvlist_get_number(nvl, "ticket"), ruleset->inactive.ticket); ERROUT(EBUSY); } rule = malloc(sizeof(*rule), M_PFRULE, M_WAITOK); rule->timestamp = NULL; error = pf_nveth_rule_to_keth_rule(nvl, rule); if (error != 0) ERROUT(error); if (rule->ifname[0]) kif = pf_kkif_create(M_WAITOK); if (rule->bridge_to_name[0]) bridge_to_kif = pf_kkif_create(M_WAITOK); rule->evaluations = counter_u64_alloc(M_WAITOK); for (int i = 0; i < 2; i++) { rule->packets[i] = counter_u64_alloc(M_WAITOK); rule->bytes[i] = counter_u64_alloc(M_WAITOK); } rule->timestamp = uma_zalloc_pcpu(pf_timestamp_pcpu_zone, M_WAITOK | M_ZERO); PF_RULES_WLOCK(); if (rule->ifname[0]) { rule->kif = pfi_kkif_attach(kif, rule->ifname); pfi_kkif_ref(rule->kif); } else rule->kif = NULL; if (rule->bridge_to_name[0]) { rule->bridge_to = pfi_kkif_attach(bridge_to_kif, rule->bridge_to_name); pfi_kkif_ref(rule->bridge_to); } else rule->bridge_to = NULL; #ifdef ALTQ /* set queue IDs */ if (rule->qname[0] != 0) { - if ((rule->qid = pf_qname2qid(rule->qname)) == 0) + if ((rule->qid = pf_qname2qid(rule->qname, true)) == 0) error = EBUSY; else rule->qid = rule->qid; } #endif if (rule->tagname[0]) if ((rule->tag = pf_tagname2tag(rule->tagname)) == 0) error = EBUSY; if (rule->match_tagname[0]) if ((rule->match_tag = pf_tagname2tag( rule->match_tagname)) == 0) error = EBUSY; if (error == 0 && rule->ipdst.addr.type == PF_ADDR_TABLE) error = pf_eth_addr_setup(ruleset, &rule->ipdst.addr); if (error == 0 && rule->ipsrc.addr.type == PF_ADDR_TABLE) error = pf_eth_addr_setup(ruleset, &rule->ipsrc.addr); if (error) { pf_free_eth_rule(rule); PF_RULES_WUNLOCK(); ERROUT(error); } if (pf_keth_anchor_setup(rule, ruleset, anchor_call)) { pf_free_eth_rule(rule); PF_RULES_WUNLOCK(); ERROUT(EINVAL); } tail = TAILQ_LAST(ruleset->inactive.rules, pf_keth_ruleq); if (tail) rule->nr = tail->nr + 1; else rule->nr = 0; TAILQ_INSERT_TAIL(ruleset->inactive.rules, rule, entries); PF_RULES_WUNLOCK(); #undef ERROUT DIOCADDETHRULE_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); break; } case DIOCGETETHRULESETS: { struct epoch_tracker et; struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_ruleset *ruleset; struct pf_keth_anchor *anchor; int nr = 0; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULESETS_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "path")) ERROUT(EBADMSG); NET_EPOCH_ENTER(et); if ((ruleset = pf_find_keth_ruleset( nvlist_get_string(nvl, "path"))) == NULL) { NET_EPOCH_EXIT(et); ERROUT(ENOENT); } if (ruleset->anchor == NULL) { RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors) if (anchor->parent == NULL) nr++; } else { RB_FOREACH(anchor, pf_keth_anchor_node, &ruleset->anchor->children) nr++; } NET_EPOCH_EXIT(et); nvlist_destroy(nvl); nvl = NULL; free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_number(nvl, "nr", nr); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT DIOCGETETHRULESETS_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCGETETHRULESET: { struct epoch_tracker et; struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_keth_ruleset *ruleset; struct pf_keth_anchor *anchor; int nr = 0, req_nr = 0; bool found = false; #define ERROUT(x) ERROUT_IOCTL(DIOCGETETHRULESET_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "path")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "nr")) ERROUT(EBADMSG); req_nr = nvlist_get_number(nvl, "nr"); NET_EPOCH_ENTER(et); if ((ruleset = pf_find_keth_ruleset( nvlist_get_string(nvl, "path"))) == NULL) { NET_EPOCH_EXIT(et); ERROUT(ENOENT); } nvlist_destroy(nvl); nvl = NULL; free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvl = nvlist_create(0); if (nvl == NULL) { NET_EPOCH_EXIT(et); ERROUT(ENOMEM); } if (ruleset->anchor == NULL) { RB_FOREACH(anchor, pf_keth_anchor_global, &V_pf_keth_anchors) { if (anchor->parent == NULL && nr++ == req_nr) { found = true; break; } } } else { RB_FOREACH(anchor, pf_keth_anchor_node, &ruleset->anchor->children) { if (nr++ == req_nr) { found = true; break; } } } NET_EPOCH_EXIT(et); if (found) { nvlist_add_number(nvl, "nr", nr); nvlist_add_string(nvl, "name", anchor->name); if (ruleset->anchor) nvlist_add_string(nvl, "path", ruleset->anchor->path); else nvlist_add_string(nvl, "path", ""); } else { ERROUT(EBUSY); } nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT DIOCGETETHRULESET_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); break; } case DIOCADDRULENV: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvl = NULL; void *nvlpacked = NULL; struct pf_krule *rule = NULL; const char *anchor = "", *anchor_call = ""; uint32_t ticket = 0, pool_ticket = 0; #define ERROUT(x) ERROUT_IOCTL(DIOCADDRULENV_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EINVAL); ticket = nvlist_get_number(nvl, "ticket"); if (! nvlist_exists_number(nvl, "pool_ticket")) ERROUT(EINVAL); pool_ticket = nvlist_get_number(nvl, "pool_ticket"); if (! nvlist_exists_nvlist(nvl, "rule")) ERROUT(EINVAL); rule = pf_krule_alloc(); error = pf_nvrule_to_krule(nvlist_get_nvlist(nvl, "rule"), rule); if (error) ERROUT(error); if (nvlist_exists_string(nvl, "anchor")) anchor = nvlist_get_string(nvl, "anchor"); if (nvlist_exists_string(nvl, "anchor_call")) anchor_call = nvlist_get_string(nvl, "anchor_call"); if ((error = nvlist_error(nvl))) ERROUT(error); /* Frees rule on error */ error = pf_ioctl_addrule(rule, ticket, pool_ticket, anchor, anchor_call, td->td_ucred->cr_ruid, td->td_proc ? td->td_proc->p_pid : 0); nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); break; #undef ERROUT DIOCADDRULENV_error: pf_krule_free(rule); nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); break; } case DIOCADDRULE: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; struct pf_krule *rule; rule = pf_krule_alloc(); error = pf_rule_to_krule(&pr->rule, rule); if (error != 0) { pf_krule_free(rule); goto fail; } pr->anchor[sizeof(pr->anchor) - 1] = '\0'; /* Frees rule on error */ error = pf_ioctl_addrule(rule, pr->ticket, pr->pool_ticket, pr->anchor, pr->anchor_call, td->td_ucred->cr_ruid, td->td_proc ? td->td_proc->p_pid : 0); break; } case DIOCGETRULES: { struct pfioc_rule *pr = (struct pfioc_rule *)addr; pr->anchor[sizeof(pr->anchor) - 1] = '\0'; error = pf_ioctl_getrules(pr); break; } case DIOCGETRULENV: { struct pfioc_nv *nv = (struct pfioc_nv *)addr; nvlist_t *nvrule = NULL; nvlist_t *nvl = NULL; struct pf_kruleset *ruleset; struct pf_krule *rule; void *nvlpacked = NULL; int rs_num, nr; bool clear_counter = false; #define ERROUT(x) ERROUT_IOCTL(DIOCGETRULENV_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); /* Copy the request in */ nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_string(nvl, "anchor")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ruleset")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "ticket")) ERROUT(EBADMSG); if (! nvlist_exists_number(nvl, "nr")) ERROUT(EBADMSG); if (nvlist_exists_bool(nvl, "clear_counter")) clear_counter = nvlist_get_bool(nvl, "clear_counter"); if (clear_counter && !(flags & FWRITE)) ERROUT(EACCES); nr = nvlist_get_number(nvl, "nr"); PF_RULES_WLOCK(); ruleset = pf_find_kruleset(nvlist_get_string(nvl, "anchor")); if (ruleset == NULL) { PF_RULES_WUNLOCK(); ERROUT(ENOENT); } rs_num = pf_get_ruleset_number(nvlist_get_number(nvl, "ruleset")); if (rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); ERROUT(EINVAL); } if (nvlist_get_number(nvl, "ticket") != ruleset->rules[rs_num].active.ticket) { PF_RULES_WUNLOCK(); ERROUT(EBUSY); } if ((error = nvlist_error(nvl))) { PF_RULES_WUNLOCK(); ERROUT(error); } rule = TAILQ_FIRST(ruleset->rules[rs_num].active.ptr); while ((rule != NULL) && (rule->nr != nr)) rule = TAILQ_NEXT(rule, entries); if (rule == NULL) { PF_RULES_WUNLOCK(); ERROUT(EBUSY); } nvrule = pf_krule_to_nvrule(rule); nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) { PF_RULES_WUNLOCK(); ERROUT(ENOMEM); } nvlist_add_number(nvl, "nr", nr); nvlist_add_nvlist(nvl, "rule", nvrule); nvlist_destroy(nvrule); nvrule = NULL; if (pf_kanchor_nvcopyout(ruleset, rule, nvl)) { PF_RULES_WUNLOCK(); ERROUT(EBUSY); } free(nvlpacked, M_NVLIST); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) { PF_RULES_WUNLOCK(); ERROUT(ENOMEM); } if (nv->size == 0) { PF_RULES_WUNLOCK(); ERROUT(0); } else if (nv->size < nv->len) { PF_RULES_WUNLOCK(); ERROUT(ENOSPC); } if (clear_counter) pf_krule_clear_counters(rule); PF_RULES_WUNLOCK(); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT DIOCGETRULENV_error: free(nvlpacked, M_NVLIST); nvlist_destroy(nvrule); nvlist_destroy(nvl); break; } case DIOCCHANGERULE: { struct pfioc_rule *pcr = (struct pfioc_rule *)addr; struct pf_kruleset *ruleset; struct pf_krule *oldrule = NULL, *newrule = NULL; struct pfi_kkif *kif = NULL; struct pf_kpooladdr *pa; u_int32_t nr = 0; int rs_num; pcr->anchor[sizeof(pcr->anchor) - 1] = '\0'; if (pcr->action < PF_CHANGE_ADD_HEAD || pcr->action > PF_CHANGE_GET_TICKET) { error = EINVAL; goto fail; } if (pcr->rule.return_icmp >> 8 > ICMP_MAXTYPE) { error = EINVAL; goto fail; } if (pcr->action != PF_CHANGE_REMOVE) { newrule = pf_krule_alloc(); error = pf_rule_to_krule(&pcr->rule, newrule); if (error != 0) { pf_krule_free(newrule); goto fail; } if ((error = pf_rule_checkaf(newrule))) { pf_krule_free(newrule); goto fail; } if (newrule->ifname[0]) kif = pf_kkif_create(M_WAITOK); pf_counter_u64_init(&newrule->evaluations, M_WAITOK); for (int i = 0; i < 2; i++) { pf_counter_u64_init(&newrule->packets[i], M_WAITOK); pf_counter_u64_init(&newrule->bytes[i], M_WAITOK); } newrule->states_cur = counter_u64_alloc(M_WAITOK); newrule->states_tot = counter_u64_alloc(M_WAITOK); for (pf_sn_types_t sn_type=0; sn_typesrc_nodes[sn_type] = counter_u64_alloc(M_WAITOK); newrule->cuid = td->td_ucred->cr_ruid; newrule->cpid = td->td_proc ? td->td_proc->p_pid : 0; TAILQ_INIT(&newrule->nat.list); TAILQ_INIT(&newrule->rdr.list); TAILQ_INIT(&newrule->route.list); } #define ERROUT(x) ERROUT_IOCTL(DIOCCHANGERULE_error, x) PF_CONFIG_LOCK(); PF_RULES_WLOCK(); #ifdef PF_WANT_32_TO_64_COUNTER if (newrule != NULL) { LIST_INSERT_HEAD(&V_pf_allrulelist, newrule, allrulelist); newrule->allrulelinked = true; V_pf_allrulecount++; } #endif if (!(pcr->action == PF_CHANGE_REMOVE || pcr->action == PF_CHANGE_GET_TICKET) && pcr->pool_ticket != V_ticket_pabuf) ERROUT(EBUSY); ruleset = pf_find_kruleset(pcr->anchor); if (ruleset == NULL) ERROUT(EINVAL); rs_num = pf_get_ruleset_number(pcr->rule.action); if (rs_num >= PF_RULESET_MAX) ERROUT(EINVAL); /* * XXXMJG: there is no guarantee that the ruleset was * created by the usual route of calling DIOCXBEGIN. * As a result it is possible the rule tree will not * be allocated yet. Hack around it by doing it here. * Note it is fine to let the tree persist in case of * error as it will be freed down the road on future * updates (if need be). */ if (ruleset->rules[rs_num].active.tree == NULL) { ruleset->rules[rs_num].active.tree = pf_rule_tree_alloc(M_NOWAIT); if (ruleset->rules[rs_num].active.tree == NULL) { ERROUT(ENOMEM); } } if (pcr->action == PF_CHANGE_GET_TICKET) { pcr->ticket = ++ruleset->rules[rs_num].active.ticket; ERROUT(0); } else if (pcr->ticket != ruleset->rules[rs_num].active.ticket) ERROUT(EINVAL); if (pcr->action != PF_CHANGE_REMOVE) { if (newrule->ifname[0]) { newrule->kif = pfi_kkif_attach(kif, newrule->ifname); kif = NULL; pfi_kkif_ref(newrule->kif); } else newrule->kif = NULL; if (newrule->rtableid > 0 && newrule->rtableid >= rt_numfibs) error = EBUSY; #ifdef ALTQ /* set queue IDs */ if (newrule->qname[0] != 0) { if ((newrule->qid = - pf_qname2qid(newrule->qname)) == 0) + pf_qname2qid(newrule->qname, true)) == 0) error = EBUSY; else if (newrule->pqname[0] != 0) { if ((newrule->pqid = - pf_qname2qid(newrule->pqname)) == 0) + pf_qname2qid(newrule->pqname, true)) == 0) error = EBUSY; } else newrule->pqid = newrule->qid; } #endif /* ALTQ */ if (newrule->tagname[0]) if ((newrule->tag = pf_tagname2tag(newrule->tagname)) == 0) error = EBUSY; if (newrule->match_tagname[0]) if ((newrule->match_tag = pf_tagname2tag( newrule->match_tagname)) == 0) error = EBUSY; if (newrule->rt && !newrule->direction) error = EINVAL; if (!newrule->log) newrule->logif = 0; if (pf_addr_setup(ruleset, &newrule->src.addr, newrule->af)) error = ENOMEM; if (pf_addr_setup(ruleset, &newrule->dst.addr, newrule->af)) error = ENOMEM; if (pf_kanchor_setup(newrule, ruleset, pcr->anchor_call)) error = EINVAL; for (int i = 0; i < 3; i++) { TAILQ_FOREACH(pa, &V_pf_pabuf[i], entries) if (pa->addr.type == PF_ADDR_TABLE) { pa->addr.p.tbl = pfr_attach_table(ruleset, pa->addr.v.tblname); if (pa->addr.p.tbl == NULL) error = ENOMEM; } } newrule->overload_tbl = NULL; if (newrule->overload_tblname[0]) { if ((newrule->overload_tbl = pfr_attach_table( ruleset, newrule->overload_tblname)) == NULL) error = EINVAL; else newrule->overload_tbl->pfrkt_flags |= PFR_TFLAG_ACTIVE; } pf_mv_kpool(&V_pf_pabuf[0], &newrule->nat.list); pf_mv_kpool(&V_pf_pabuf[1], &newrule->rdr.list); pf_mv_kpool(&V_pf_pabuf[2], &newrule->route.list); if (((((newrule->action == PF_NAT) || (newrule->action == PF_RDR) || (newrule->action == PF_BINAT) || (newrule->rt > PF_NOPFROUTE)) && !newrule->anchor)) && (TAILQ_FIRST(&newrule->rdr.list) == NULL)) error = EINVAL; if (error) { pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); goto fail; } newrule->nat.cur = TAILQ_FIRST(&newrule->nat.list); newrule->rdr.cur = TAILQ_FIRST(&newrule->rdr.list); } pf_empty_kpool(&V_pf_pabuf[0]); pf_empty_kpool(&V_pf_pabuf[1]); pf_empty_kpool(&V_pf_pabuf[2]); if (pcr->action == PF_CHANGE_ADD_HEAD) oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); else if (pcr->action == PF_CHANGE_ADD_TAIL) oldrule = TAILQ_LAST( ruleset->rules[rs_num].active.ptr, pf_krulequeue); else { oldrule = TAILQ_FIRST( ruleset->rules[rs_num].active.ptr); while ((oldrule != NULL) && (oldrule->nr != pcr->nr)) oldrule = TAILQ_NEXT(oldrule, entries); if (oldrule == NULL) { if (newrule != NULL) pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); error = EINVAL; goto fail; } } if (pcr->action == PF_CHANGE_REMOVE) { pf_unlink_rule(ruleset->rules[rs_num].active.ptr, oldrule); RB_REMOVE(pf_krule_global, ruleset->rules[rs_num].active.tree, oldrule); ruleset->rules[rs_num].active.rcount--; } else { pf_hash_rule(newrule); if (RB_INSERT(pf_krule_global, ruleset->rules[rs_num].active.tree, newrule) != NULL) { pf_free_rule(newrule); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); error = EEXIST; goto fail; } if (oldrule == NULL) TAILQ_INSERT_TAIL( ruleset->rules[rs_num].active.ptr, newrule, entries); else if (pcr->action == PF_CHANGE_ADD_HEAD || pcr->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldrule, newrule, entries); else TAILQ_INSERT_AFTER( ruleset->rules[rs_num].active.ptr, oldrule, newrule, entries); ruleset->rules[rs_num].active.rcount++; } nr = 0; TAILQ_FOREACH(oldrule, ruleset->rules[rs_num].active.ptr, entries) oldrule->nr = nr++; ruleset->rules[rs_num].active.ticket++; pf_calc_skip_steps(ruleset->rules[rs_num].active.ptr); pf_remove_if_empty_kruleset(ruleset); PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); break; #undef ERROUT DIOCCHANGERULE_error: PF_RULES_WUNLOCK(); PF_CONFIG_UNLOCK(); pf_krule_free(newrule); pf_kkif_free(kif); break; } case DIOCCLRSTATESNV: { error = pf_clearstates_nv((struct pfioc_nv *)addr); break; } case DIOCKILLSTATESNV: { error = pf_killstates_nv((struct pfioc_nv *)addr); break; } case DIOCADDSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pfsync_state_1301 *sp = &ps->state; if (sp->timeout >= PFTM_MAX) { error = EINVAL; goto fail; } if (V_pfsync_state_import_ptr != NULL) { PF_RULES_RLOCK(); error = V_pfsync_state_import_ptr( (union pfsync_state_union *)sp, PFSYNC_SI_IOCTL, PFSYNC_MSG_VERSION_1301); PF_RULES_RUNLOCK(); } else error = EOPNOTSUPP; break; } case DIOCGETSTATE: { struct pfioc_state *ps = (struct pfioc_state *)addr; struct pf_kstate *s; s = pf_find_state_byid(ps->state.id, ps->state.creatorid); if (s == NULL) { error = ENOENT; goto fail; } pfsync_state_export((union pfsync_state_union*)&ps->state, s, PFSYNC_MSG_VERSION_1301); PF_STATE_UNLOCK(s); break; } case DIOCGETSTATENV: { error = pf_getstate((struct pfioc_nv *)addr); break; } #ifdef COMPAT_FREEBSD14 case DIOCGETSTATES: { struct pfioc_states *ps = (struct pfioc_states *)addr; struct pf_kstate *s; struct pfsync_state_1301 *pstore, *p; int i, nr; size_t slice_count = 16, count; void *out; if (ps->ps_len <= 0) { nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pfsync_state_1301) * nr; break; } out = ps->ps_states; pstore = mallocarray(slice_count, sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; DIOCGETSTATES_retry: p = pstore; if (LIST_EMPTY(&ih->states)) continue; PF_HASHROW_LOCK(ih); count = 0; LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; count++; } if (count > slice_count) { PF_HASHROW_UNLOCK(ih); free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, sizeof(struct pfsync_state_1301), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATES_retry; } if ((nr+count) * sizeof(*p) > ps->ps_len) { PF_HASHROW_UNLOCK(ih); goto DIOCGETSTATES_full; } LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; pfsync_state_export((union pfsync_state_union*)p, s, PFSYNC_MSG_VERSION_1301); p++; nr++; } PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pfsync_state_1301) * count); if (error) { free(pstore, M_PF); goto fail; } out = ps->ps_states + nr; } DIOCGETSTATES_full: ps->ps_len = sizeof(struct pfsync_state_1301) * nr; free(pstore, M_PF); break; } case DIOCGETSTATESV2: { struct pfioc_states_v2 *ps = (struct pfioc_states_v2 *)addr; struct pf_kstate *s; struct pf_state_export *pstore, *p; int i, nr; size_t slice_count = 16, count; void *out; if (ps->ps_req_version > PF_STATE_VERSION) { error = ENOTSUP; goto fail; } if (ps->ps_len <= 0) { nr = uma_zone_get_cur(V_pf_state_z); ps->ps_len = sizeof(struct pf_state_export) * nr; break; } out = ps->ps_states; pstore = mallocarray(slice_count, sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); nr = 0; for (i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; DIOCGETSTATESV2_retry: p = pstore; if (LIST_EMPTY(&ih->states)) continue; PF_HASHROW_LOCK(ih); count = 0; LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; count++; } if (count > slice_count) { PF_HASHROW_UNLOCK(ih); free(pstore, M_PF); slice_count = count * 2; pstore = mallocarray(slice_count, sizeof(struct pf_state_export), M_PF, M_WAITOK | M_ZERO); goto DIOCGETSTATESV2_retry; } if ((nr+count) * sizeof(*p) > ps->ps_len) { PF_HASHROW_UNLOCK(ih); goto DIOCGETSTATESV2_full; } LIST_FOREACH(s, &ih->states, entry) { if (s->timeout == PFTM_UNLINKED) continue; pf_state_export(p, s); p++; nr++; } PF_HASHROW_UNLOCK(ih); error = copyout(pstore, out, sizeof(struct pf_state_export) * count); if (error) { free(pstore, M_PF); goto fail; } out = ps->ps_states + nr; } DIOCGETSTATESV2_full: ps->ps_len = nr * sizeof(struct pf_state_export); free(pstore, M_PF); break; } #endif case DIOCGETSTATUSNV: { error = pf_getstatus((struct pfioc_nv *)addr); break; } case DIOCSETSTATUSIF: { struct pfioc_if *pi = (struct pfioc_if *)addr; if (pi->ifname[0] == 0) { bzero(V_pf_status.ifname, IFNAMSIZ); break; } PF_RULES_WLOCK(); error = pf_user_strcpy(V_pf_status.ifname, pi->ifname, IFNAMSIZ); PF_RULES_WUNLOCK(); break; } case DIOCCLRSTATUS: { pf_ioctl_clear_status(); break; } case DIOCNATLOOK: { struct pfioc_natlook *pnl = (struct pfioc_natlook *)addr; error = pf_ioctl_natlook(pnl); break; } case DIOCSETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; error = pf_ioctl_set_timeout(pt->timeout, pt->seconds, &pt->seconds); break; } case DIOCGETTIMEOUT: { struct pfioc_tm *pt = (struct pfioc_tm *)addr; error = pf_ioctl_get_timeout(pt->timeout, &pt->seconds); break; } case DIOCGETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; error = pf_ioctl_get_limit(pl->index, &pl->limit); break; } case DIOCSETLIMIT: { struct pfioc_limit *pl = (struct pfioc_limit *)addr; unsigned int old_limit; error = pf_ioctl_set_limit(pl->index, pl->limit, &old_limit); pl->limit = old_limit; break; } case DIOCSETDEBUG: { u_int32_t *level = (u_int32_t *)addr; PF_RULES_WLOCK(); V_pf_status.debug = *level; PF_RULES_WUNLOCK(); break; } case DIOCCLRRULECTRS: { /* obsoleted by DIOCGETRULE with action=PF_GET_CLR_CNTR */ struct pf_kruleset *ruleset = &pf_main_ruleset; struct pf_krule *rule; PF_RULES_WLOCK(); TAILQ_FOREACH(rule, ruleset->rules[PF_RULESET_FILTER].active.ptr, entries) { pf_counter_u64_zero(&rule->evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_zero(&rule->packets[i]); pf_counter_u64_zero(&rule->bytes[i]); } } PF_RULES_WUNLOCK(); break; } case DIOCGIFSPEEDV0: case DIOCGIFSPEEDV1: { struct pf_ifspeed_v1 *psp = (struct pf_ifspeed_v1 *)addr; struct pf_ifspeed_v1 ps; struct ifnet *ifp; if (psp->ifname[0] == '\0') { error = EINVAL; goto fail; } error = pf_user_strcpy(ps.ifname, psp->ifname, IFNAMSIZ); if (error != 0) goto fail; ifp = ifunit(ps.ifname); if (ifp != NULL) { psp->baudrate32 = (u_int32_t)uqmin(ifp->if_baudrate, UINT_MAX); if (cmd == DIOCGIFSPEEDV1) psp->baudrate = ifp->if_baudrate; } else { error = EINVAL; } break; } #ifdef ALTQ case DIOCSTARTALTQ: { struct pf_altq *altq; PF_RULES_WLOCK(); /* enable all altq interfaces on active list */ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_enable_altq(altq); if (error != 0) break; } } if (error == 0) V_pf_altq_running = 1; PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, "altq: started"); break; } case DIOCSTOPALTQ: { struct pf_altq *altq; PF_RULES_WLOCK(); /* disable all altq interfaces on active list */ TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) { if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) == 0) { error = pf_disable_altq(altq); if (error != 0) break; } } if (error == 0) V_pf_altq_running = 0; PF_RULES_WUNLOCK(); DPFPRINTF(PF_DEBUG_MISC, "altq: stopped"); break; } case DIOCADDALTQV0: case DIOCADDALTQV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq, *a; struct ifnet *ifp; altq = malloc(sizeof(*altq), M_PFALTQ, M_WAITOK | M_ZERO); error = pf_import_kaltq(pa, altq, IOCPARM_LEN(cmd)); if (error) goto fail; altq->local_flags = 0; PF_RULES_WLOCK(); if (pa->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); error = EBUSY; goto fail; } /* * if this is for a queue, find the discipline and * copy the necessary fields */ if (altq->qname[0] != 0) { - if ((altq->qid = pf_qname2qid(altq->qname)) == 0) { + if ((altq->qid = pf_qname2qid(altq->qname, true)) == 0) { PF_RULES_WUNLOCK(); error = EBUSY; free(altq, M_PFALTQ); goto fail; } altq->altq_disc = NULL; TAILQ_FOREACH(a, V_pf_altq_ifs_inactive, entries) { if (strncmp(a->ifname, altq->ifname, IFNAMSIZ) == 0) { altq->altq_disc = a->altq_disc; break; } } } if ((ifp = ifunit(altq->ifname)) == NULL) altq->local_flags |= PFALTQ_FLAG_IF_REMOVED; else error = altq_add(ifp, altq); if (error) { PF_RULES_WUNLOCK(); free(altq, M_PFALTQ); goto fail; } if (altq->qname[0] != 0) TAILQ_INSERT_TAIL(V_pf_altqs_inactive, altq, entries); else TAILQ_INSERT_TAIL(V_pf_altq_ifs_inactive, altq, entries); /* version error check done on import above */ pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_WUNLOCK(); break; } case DIOCGETALTQSV0: case DIOCGETALTQSV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; PF_RULES_RLOCK(); pa->nr = 0; TAILQ_FOREACH(altq, V_pf_altq_ifs_active, entries) pa->nr++; TAILQ_FOREACH(altq, V_pf_altqs_active, entries) pa->nr++; pa->ticket = V_ticket_altqs_active; PF_RULES_RUNLOCK(); break; } case DIOCGETALTQV0: case DIOCGETALTQV1: { struct pfioc_altq_v1 *pa = (struct pfioc_altq_v1 *)addr; struct pf_altq *altq; PF_RULES_RLOCK(); if (pa->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; goto fail; } altq = pf_altq_get_nth_active(pa->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; goto fail; } pf_export_kaltq(altq, pa, IOCPARM_LEN(cmd)); PF_RULES_RUNLOCK(); break; } case DIOCCHANGEALTQV0: case DIOCCHANGEALTQV1: /* CHANGEALTQ not supported yet! */ error = ENODEV; break; case DIOCGETQSTATSV0: case DIOCGETQSTATSV1: { struct pfioc_qstats_v1 *pq = (struct pfioc_qstats_v1 *)addr; struct pf_altq *altq; int nbytes; u_int32_t version; PF_RULES_RLOCK(); if (pq->ticket != V_ticket_altqs_active) { PF_RULES_RUNLOCK(); error = EBUSY; goto fail; } nbytes = pq->nbytes; altq = pf_altq_get_nth_active(pq->nr); if (altq == NULL) { PF_RULES_RUNLOCK(); error = EBUSY; goto fail; } if ((altq->local_flags & PFALTQ_FLAG_IF_REMOVED) != 0) { PF_RULES_RUNLOCK(); error = ENXIO; goto fail; } PF_RULES_RUNLOCK(); if (cmd == DIOCGETQSTATSV0) version = 0; /* DIOCGETQSTATSV0 means stats struct v0 */ else version = pq->version; error = altq_getqstats(altq, pq->buf, &nbytes, version); if (error == 0) { pq->scheduler = altq->scheduler; pq->nbytes = nbytes; } break; } #endif /* ALTQ */ case DIOCBEGINADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; error = pf_ioctl_begin_addrs(&pp->ticket); break; } case DIOCADDADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_nl_pooladdr npp = {}; npp.which = PF_RDR; memcpy(&npp, pp, sizeof(*pp)); error = pf_ioctl_add_addr(&npp); break; } case DIOCGETADDRS: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_nl_pooladdr npp = {}; npp.which = PF_RDR; memcpy(&npp, pp, sizeof(*pp)); error = pf_ioctl_get_addrs(&npp); memcpy(pp, &npp, sizeof(*pp)); break; } case DIOCGETADDR: { struct pfioc_pooladdr *pp = (struct pfioc_pooladdr *)addr; struct pf_nl_pooladdr npp = {}; npp.which = PF_RDR; memcpy(&npp, pp, sizeof(*pp)); error = pf_ioctl_get_addr(&npp); memcpy(pp, &npp, sizeof(*pp)); break; } case DIOCCHANGEADDR: { struct pfioc_pooladdr *pca = (struct pfioc_pooladdr *)addr; struct pf_kpool *pool; struct pf_kpooladdr *oldpa = NULL, *newpa = NULL; struct pf_kruleset *ruleset; struct pfi_kkif *kif = NULL; pca->anchor[sizeof(pca->anchor) - 1] = '\0'; if (pca->action < PF_CHANGE_ADD_HEAD || pca->action > PF_CHANGE_REMOVE) { error = EINVAL; goto fail; } if (pca->addr.addr.type != PF_ADDR_ADDRMASK && pca->addr.addr.type != PF_ADDR_DYNIFTL && pca->addr.addr.type != PF_ADDR_TABLE) { error = EINVAL; goto fail; } if (pca->addr.addr.p.dyn != NULL) { error = EINVAL; goto fail; } if (pca->action != PF_CHANGE_REMOVE) { #ifndef INET if (pca->af == AF_INET) { error = EAFNOSUPPORT; goto fail; } #endif /* INET */ #ifndef INET6 if (pca->af == AF_INET6) { error = EAFNOSUPPORT; goto fail; } #endif /* INET6 */ newpa = malloc(sizeof(*newpa), M_PFRULE, M_WAITOK); bcopy(&pca->addr, newpa, sizeof(struct pf_pooladdr)); if (newpa->ifname[0]) kif = pf_kkif_create(M_WAITOK); newpa->kif = NULL; } #define ERROUT(x) ERROUT_IOCTL(DIOCCHANGEADDR_error, x) PF_RULES_WLOCK(); ruleset = pf_find_kruleset(pca->anchor); if (ruleset == NULL) ERROUT(EBUSY); pool = pf_get_kpool(pca->anchor, pca->ticket, pca->r_action, pca->r_num, pca->r_last, 1, 1, PF_RDR); if (pool == NULL) ERROUT(EBUSY); if (pca->action != PF_CHANGE_REMOVE) { if (newpa->ifname[0]) { newpa->kif = pfi_kkif_attach(kif, newpa->ifname); pfi_kkif_ref(newpa->kif); kif = NULL; } switch (newpa->addr.type) { case PF_ADDR_DYNIFTL: error = pfi_dynaddr_setup(&newpa->addr, pca->af); break; case PF_ADDR_TABLE: newpa->addr.p.tbl = pfr_attach_table(ruleset, newpa->addr.v.tblname); if (newpa->addr.p.tbl == NULL) error = ENOMEM; break; } if (error) goto DIOCCHANGEADDR_error; } switch (pca->action) { case PF_CHANGE_ADD_HEAD: oldpa = TAILQ_FIRST(&pool->list); break; case PF_CHANGE_ADD_TAIL: oldpa = TAILQ_LAST(&pool->list, pf_kpalist); break; default: oldpa = TAILQ_FIRST(&pool->list); for (int i = 0; oldpa && i < pca->nr; i++) oldpa = TAILQ_NEXT(oldpa, entries); if (oldpa == NULL) ERROUT(EINVAL); } if (pca->action == PF_CHANGE_REMOVE) { TAILQ_REMOVE(&pool->list, oldpa, entries); switch (oldpa->addr.type) { case PF_ADDR_DYNIFTL: pfi_dynaddr_remove(oldpa->addr.p.dyn); break; case PF_ADDR_TABLE: pfr_detach_table(oldpa->addr.p.tbl); break; } if (oldpa->kif) pfi_kkif_unref(oldpa->kif); free(oldpa, M_PFRULE); } else { if (oldpa == NULL) TAILQ_INSERT_TAIL(&pool->list, newpa, entries); else if (pca->action == PF_CHANGE_ADD_HEAD || pca->action == PF_CHANGE_ADD_BEFORE) TAILQ_INSERT_BEFORE(oldpa, newpa, entries); else TAILQ_INSERT_AFTER(&pool->list, oldpa, newpa, entries); } pool->cur = TAILQ_FIRST(&pool->list); pf_addrcpy(&pool->counter, &pool->cur->addr.v.a.addr, pca->af); PF_RULES_WUNLOCK(); break; #undef ERROUT DIOCCHANGEADDR_error: if (newpa != NULL) { if (newpa->kif) pfi_kkif_unref(newpa->kif); free(newpa, M_PFRULE); } PF_RULES_WUNLOCK(); pf_kkif_free(kif); break; } case DIOCGETRULESETS: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; pr->path[sizeof(pr->path) - 1] = '\0'; error = pf_ioctl_get_rulesets(pr); break; } case DIOCGETRULESET: { struct pfioc_ruleset *pr = (struct pfioc_ruleset *)addr; pr->path[sizeof(pr->path) - 1] = '\0'; error = pf_ioctl_get_ruleset(pr); break; } case DIOCRCLRTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; goto fail; } PF_RULES_WLOCK(); error = pfr_clr_tables(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); break; } case DIOCRADDTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_add_tables(pfrts, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_PF); break; } case DIOCRDELTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { error = ENOMEM; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_del_tables(pfrts, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_PF); break; } case DIOCRGETTABLES: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; int n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; goto fail; } PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); error = EINVAL; goto fail; } io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_PF, M_NOWAIT | M_ZERO); if (pfrts == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); goto fail; } error = pfr_get_tables(&io->pfrio_table, pfrts, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrts, io->pfrio_buffer, totlen); free(pfrts, M_PF); break; } case DIOCRGETTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_tstats *pfrtstats; size_t totlen; int n; if (io->pfrio_esize != sizeof(struct pfr_tstats)) { error = ENODEV; goto fail; } PF_TABLE_STATS_LOCK(); PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); error = EINVAL; goto fail; } io->pfrio_size = min(io->pfrio_size, n); totlen = io->pfrio_size * sizeof(struct pfr_tstats); pfrtstats = mallocarray(io->pfrio_size, sizeof(struct pfr_tstats), M_PF, M_NOWAIT | M_ZERO); if (pfrtstats == NULL) { error = ENOMEM; PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); goto fail; } error = pfr_get_tstats(&io->pfrio_table, pfrtstats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); if (error == 0) error = copyout(pfrtstats, io->pfrio_buffer, totlen); free(pfrtstats, M_PF); break; } case DIOCRCLRTSTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_table))) { /* We used to count tables and use the minimum required * size, so we didn't fail on overly large requests. * Keep doing so. */ io->pfrio_size = pf_ioctl_maxcount; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_PF); goto fail; } PF_TABLE_STATS_LOCK(); PF_RULES_RLOCK(); error = pfr_clr_tstats(pfrts, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); PF_TABLE_STATS_UNLOCK(); free(pfrts, M_PF); break; } case DIOCRSETTFLAGS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_table *pfrts; size_t totlen; int n; if (io->pfrio_esize != sizeof(struct pfr_table)) { error = ENODEV; goto fail; } PF_RULES_RLOCK(); n = pfr_table_count(&io->pfrio_table, io->pfrio_flags); if (n < 0) { PF_RULES_RUNLOCK(); error = EINVAL; goto fail; } io->pfrio_size = min(io->pfrio_size, n); PF_RULES_RUNLOCK(); totlen = io->pfrio_size * sizeof(struct pfr_table); pfrts = mallocarray(io->pfrio_size, sizeof(struct pfr_table), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfrts, totlen); if (error) { free(pfrts, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_set_tflags(pfrts, io->pfrio_size, io->pfrio_setflag, io->pfrio_clrflag, &io->pfrio_nchange, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfrts, M_PF); break; } case DIOCRCLRADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; if (io->pfrio_esize != 0) { error = ENODEV; goto fail; } PF_RULES_WLOCK(); error = pfr_clr_addrs(&io->pfrio_table, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); break; } case DIOCRADDADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); io->pfrio_nadd = 0; error = pfr_add_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_PF); break; } case DIOCRDELADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_del_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_ndel, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_PF); break; } case DIOCRSETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen, count; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size2 < 0) { error = EINVAL; goto fail; } count = max(io->pfrio_size, io->pfrio_size2); if (count > pf_ioctl_maxcount || WOULD_OVERFLOW(count, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = count * sizeof(struct pfr_addr); pfras = mallocarray(count, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_set_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_size2, &io->pfrio_nadd, &io->pfrio_ndel, &io->pfrio_nchange, io->pfrio_flags | PFR_FLAG_USERIOCTL, 0); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_PF); break; } case DIOCRGETADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_addrs(&io->pfrio_table, pfras, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_PF); break; } case DIOCRGETASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_astats *pfrastats; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_astats)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_astats))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_astats); pfrastats = mallocarray(io->pfrio_size, sizeof(struct pfr_astats), M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); error = pfr_get_astats(&io->pfrio_table, pfrastats, &io->pfrio_size, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfrastats, io->pfrio_buffer, totlen); free(pfrastats, M_PF); break; } case DIOCRCLRASTATS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_clr_astats(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nzero, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); if (error == 0 && io->pfrio_flags & PFR_FLAG_FEEDBACK) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_PF); break; } case DIOCRTSTADDRS: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_PF); goto fail; } PF_RULES_RLOCK(); error = pfr_tst_addrs(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nmatch, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_RUNLOCK(); if (error == 0) error = copyout(pfras, io->pfrio_buffer, totlen); free(pfras, M_PF); break; } case DIOCRINADEFINE: { struct pfioc_table *io = (struct pfioc_table *)addr; struct pfr_addr *pfras; size_t totlen; if (io->pfrio_esize != sizeof(struct pfr_addr)) { error = ENODEV; goto fail; } if (io->pfrio_size < 0 || io->pfrio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfrio_size, sizeof(struct pfr_addr))) { error = EINVAL; goto fail; } totlen = io->pfrio_size * sizeof(struct pfr_addr); pfras = mallocarray(io->pfrio_size, sizeof(struct pfr_addr), M_PF, M_WAITOK); error = copyin(io->pfrio_buffer, pfras, totlen); if (error) { free(pfras, M_PF); goto fail; } PF_RULES_WLOCK(); error = pfr_ina_define(&io->pfrio_table, pfras, io->pfrio_size, &io->pfrio_nadd, &io->pfrio_naddr, io->pfrio_ticket, io->pfrio_flags | PFR_FLAG_USERIOCTL); PF_RULES_WUNLOCK(); free(pfras, M_PF); break; } case DIOCOSFPADD: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; PF_RULES_WLOCK(); error = pf_osfp_add(io); PF_RULES_WUNLOCK(); break; } case DIOCOSFPGET: { struct pf_osfp_ioctl *io = (struct pf_osfp_ioctl *)addr; PF_RULES_RLOCK(); error = pf_osfp_get(io); PF_RULES_RUNLOCK(); break; } case DIOCXBEGIN: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioes, *ioe; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; goto fail; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { ioe->anchor[sizeof(ioe->anchor) - 1] = '\0'; switch (ioe->rs_num) { case PF_RULESET_ETH: if ((error = pf_begin_eth(&ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; } break; #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_begin_altq(&ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_begin(&table, &ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; } break; } default: if ((error = pf_begin_rules(&ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; } break; } } PF_RULES_WUNLOCK(); error = copyout(ioes, io->array, totlen); free(ioes, M_PF); break; } case DIOCXROLLBACK: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioe, *ioes; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; goto fail; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { ioe->anchor[sizeof(ioe->anchor) - 1] = '\0'; switch (ioe->rs_num) { case PF_RULESET_ETH: if ((error = pf_rollback_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EINVAL; goto fail; } if ((error = pf_rollback_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_rollback(&table, ioe->ticket, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; } default: if ((error = pf_rollback_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); free(ioes, M_PF); break; } case DIOCXCOMMIT: { struct pfioc_trans *io = (struct pfioc_trans *)addr; struct pfioc_trans_e *ioe, *ioes; struct pf_kruleset *rs; struct pf_keth_ruleset *ers; size_t totlen; int i; if (io->esize != sizeof(*ioe)) { error = ENODEV; goto fail; } if (io->size < 0 || io->size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->size, sizeof(struct pfioc_trans_e))) { error = EINVAL; goto fail; } totlen = sizeof(struct pfioc_trans_e) * io->size; ioes = mallocarray(io->size, sizeof(struct pfioc_trans_e), M_PF, M_WAITOK); error = copyin(io->array, ioes, totlen); if (error) { free(ioes, M_PF); goto fail; } PF_RULES_WLOCK(); /* First makes sure everything will succeed. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { ioe->anchor[sizeof(ioe->anchor) - 1] = '\0'; switch (ioe->rs_num) { case PF_RULESET_ETH: ers = pf_find_keth_ruleset(ioe->anchor); if (ers == NULL || ioe->ticket == 0 || ioe->ticket != ers->inactive.ticket) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EINVAL; goto fail; } break; #ifdef ALTQ case PF_RULESET_ALTQ: if (ioe->anchor[0]) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EINVAL; goto fail; } if (!V_altqs_inactive_open || ioe->ticket != V_ticket_altqs_inactive) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EBUSY; goto fail; } break; #endif /* ALTQ */ case PF_RULESET_TABLE: rs = pf_find_kruleset(ioe->anchor); if (rs == NULL || !rs->topen || ioe->ticket != rs->tticket) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EBUSY; goto fail; } break; default: if (ioe->rs_num < 0 || ioe->rs_num >= PF_RULESET_MAX) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EINVAL; goto fail; } rs = pf_find_kruleset(ioe->anchor); if (rs == NULL || !rs->rules[ioe->rs_num].inactive.open || rs->rules[ioe->rs_num].inactive.ticket != ioe->ticket) { PF_RULES_WUNLOCK(); free(ioes, M_PF); error = EBUSY; goto fail; } break; } } /* Now do the commit - no errors should happen here. */ for (i = 0, ioe = ioes; i < io->size; i++, ioe++) { switch (ioe->rs_num) { case PF_RULESET_ETH: if ((error = pf_commit_eth(ioe->ticket, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; #ifdef ALTQ case PF_RULESET_ALTQ: if ((error = pf_commit_altq(ioe->ticket))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; #endif /* ALTQ */ case PF_RULESET_TABLE: { struct pfr_table table; bzero(&table, sizeof(table)); (void)strlcpy(table.pfrt_anchor, ioe->anchor, sizeof(table.pfrt_anchor)); if ((error = pfr_ina_commit(&table, ioe->ticket, NULL, NULL, 0))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; } default: if ((error = pf_commit_rules(ioe->ticket, ioe->rs_num, ioe->anchor))) { PF_RULES_WUNLOCK(); free(ioes, M_PF); goto fail; /* really bad */ } break; } } PF_RULES_WUNLOCK(); /* Only hook into EtherNet taffic if we've got rules for it. */ if (! TAILQ_EMPTY(V_pf_keth->active.rules)) hook_pf_eth(); else dehook_pf_eth(); free(ioes, M_PF); break; } case DIOCGETSRCNODES: { struct pfioc_src_nodes *psn = (struct pfioc_src_nodes *)addr; struct pf_srchash *sh; struct pf_ksrc_node *n; struct pf_src_node *p, *pstore; uint32_t i, nr = 0; for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) nr++; PF_HASHROW_UNLOCK(sh); } psn->psn_len = min(psn->psn_len, sizeof(struct pf_src_node) * nr); if (psn->psn_len == 0) { psn->psn_len = sizeof(struct pf_src_node) * nr; goto fail; } nr = 0; p = pstore = malloc(psn->psn_len, M_PF, M_WAITOK | M_ZERO); for (i = 0, sh = V_pf_srchash; i <= V_pf_srchashmask; i++, sh++) { PF_HASHROW_LOCK(sh); LIST_FOREACH(n, &sh->nodes, entry) { if ((nr + 1) * sizeof(*p) > (unsigned)psn->psn_len) break; pf_src_node_copy(n, p); p++; nr++; } PF_HASHROW_UNLOCK(sh); } error = copyout(pstore, psn->psn_src_nodes, sizeof(struct pf_src_node) * nr); if (error) { free(pstore, M_PF); goto fail; } psn->psn_len = sizeof(struct pf_src_node) * nr; free(pstore, M_PF); break; } case DIOCCLRSRCNODES: { pf_kill_srcnodes(NULL); break; } case DIOCKILLSRCNODES: pf_kill_srcnodes((struct pfioc_src_node_kill *)addr); break; #ifdef COMPAT_FREEBSD13 case DIOCKEEPCOUNTERS_FREEBSD13: #endif case DIOCKEEPCOUNTERS: error = pf_keepcounters((struct pfioc_nv *)addr); break; case DIOCGETSYNCOOKIES: error = pf_get_syncookies((struct pfioc_nv *)addr); break; case DIOCSETSYNCOOKIES: error = pf_set_syncookies((struct pfioc_nv *)addr); break; case DIOCSETHOSTID: { u_int32_t *hostid = (u_int32_t *)addr; PF_RULES_WLOCK(); if (*hostid == 0) V_pf_status.hostid = arc4random(); else V_pf_status.hostid = *hostid; PF_RULES_WUNLOCK(); break; } case DIOCOSFPFLUSH: PF_RULES_WLOCK(); pf_osfp_flush(); PF_RULES_WUNLOCK(); break; case DIOCIGETIFACES: { struct pfioc_iface *io = (struct pfioc_iface *)addr; struct pfi_kif *ifstore; size_t bufsiz; if (io->pfiio_esize != sizeof(struct pfi_kif)) { error = ENODEV; goto fail; } if (io->pfiio_size < 0 || io->pfiio_size > pf_ioctl_maxcount || WOULD_OVERFLOW(io->pfiio_size, sizeof(struct pfi_kif))) { error = EINVAL; goto fail; } io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; bufsiz = io->pfiio_size * sizeof(struct pfi_kif); ifstore = mallocarray(io->pfiio_size, sizeof(struct pfi_kif), M_PF, M_WAITOK | M_ZERO); PF_RULES_RLOCK(); pfi_get_ifaces(io->pfiio_name, ifstore, &io->pfiio_size); PF_RULES_RUNLOCK(); error = copyout(ifstore, io->pfiio_buffer, bufsiz); free(ifstore, M_PF); break; } case DIOCSETIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; PF_RULES_WLOCK(); error = pfi_set_flags(io->pfiio_name, io->pfiio_flags); PF_RULES_WUNLOCK(); break; } case DIOCCLRIFFLAG: { struct pfioc_iface *io = (struct pfioc_iface *)addr; io->pfiio_name[sizeof(io->pfiio_name) - 1] = '\0'; PF_RULES_WLOCK(); error = pfi_clear_flags(io->pfiio_name, io->pfiio_flags); PF_RULES_WUNLOCK(); break; } case DIOCSETREASS: { u_int32_t *reass = (u_int32_t *)addr; V_pf_status.reass = *reass & (PF_REASS_ENABLED|PF_REASS_NODF); /* Removal of DF flag without reassembly enabled is not a * valid combination. Disable reassembly in such case. */ if (!(V_pf_status.reass & PF_REASS_ENABLED)) V_pf_status.reass = 0; break; } default: error = ENODEV; break; } fail: CURVNET_RESTORE(); #undef ERROUT_IOCTL return (error); } void pfsync_state_export(union pfsync_state_union *sp, struct pf_kstate *st, int msg_version) { + const char *tagname; bzero(sp, sizeof(union pfsync_state_union)); /* copy from state key */ sp->pfs_1301.key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; sp->pfs_1301.key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; sp->pfs_1301.key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; sp->pfs_1301.key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; sp->pfs_1301.key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; sp->pfs_1301.key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->pfs_1301.key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->pfs_1301.key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; - sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; - sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->pfs_1301.ifname, st->kif->pfik_name, sizeof(sp->pfs_1301.ifname)); bcopy(&st->act.rt_addr, &sp->pfs_1301.rt_addr, sizeof(sp->pfs_1301.rt_addr)); sp->pfs_1301.creation = htonl(time_uptime - (st->creation / 1000)); sp->pfs_1301.expire = pf_state_expires(st); if (sp->pfs_1301.expire <= time_uptime) sp->pfs_1301.expire = htonl(0); else sp->pfs_1301.expire = htonl(sp->pfs_1301.expire - time_uptime); - sp->pfs_1301.direction = st->direction; - sp->pfs_1301.log = st->act.log; - sp->pfs_1301.timeout = st->timeout; - switch (msg_version) { case PFSYNC_MSG_VERSION_1301: sp->pfs_1301.state_flags = st->state_flags; + sp->pfs_1301.direction = st->direction; + sp->pfs_1301.log = st->act.log; + sp->pfs_1301.timeout = st->timeout; + sp->pfs_1301.proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1301.af = st->key[PF_SK_WIRE]->af; + /* + * XXX Why do we bother pfsyncing source node information if source + * nodes are not synced? Showing users that there is source tracking + * when there is none seems useless. + */ + if (st->sns[PF_SN_LIMIT] != NULL) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) + sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; break; case PFSYNC_MSG_VERSION_1400: sp->pfs_1400.state_flags = htons(st->state_flags); + sp->pfs_1400.direction = st->direction; + sp->pfs_1400.log = st->act.log; + sp->pfs_1400.timeout = st->timeout; + sp->pfs_1400.proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1400.af = st->key[PF_SK_WIRE]->af; sp->pfs_1400.qid = htons(st->act.qid); sp->pfs_1400.pqid = htons(st->act.pqid); sp->pfs_1400.dnpipe = htons(st->act.dnpipe); sp->pfs_1400.dnrpipe = htons(st->act.dnrpipe); sp->pfs_1400.rtableid = htonl(st->act.rtableid); sp->pfs_1400.min_ttl = st->act.min_ttl; sp->pfs_1400.set_tos = st->act.set_tos; sp->pfs_1400.max_mss = htons(st->act.max_mss); sp->pfs_1400.set_prio[0] = st->act.set_prio[0]; sp->pfs_1400.set_prio[1] = st->act.set_prio[1]; sp->pfs_1400.rt = st->act.rt; if (st->act.rt_kif) strlcpy(sp->pfs_1400.rt_ifname, st->act.rt_kif->pfik_name, sizeof(sp->pfs_1400.rt_ifname)); + /* + * XXX Why do we bother pfsyncing source node information if source + * nodes are not synced? Showing users that there is source tracking + * when there is none seems useless. + */ + if (st->sns[PF_SN_LIMIT] != NULL) + sp->pfs_1400.sync_flags |= PFSYNC_FLAG_SRCNODE; + if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) + sp->pfs_1400.sync_flags |= PFSYNC_FLAG_NATSRCNODE; + break; + case PFSYNC_MSG_VERSION_1500: + sp->pfs_1500.state_flags = htons(st->state_flags); + sp->pfs_1500.direction = st->direction; + sp->pfs_1500.log = st->act.log; + sp->pfs_1500.timeout = st->timeout; + sp->pfs_1500.wire_proto = st->key[PF_SK_WIRE]->proto; + sp->pfs_1500.wire_af = st->key[PF_SK_WIRE]->af; + sp->pfs_1500.stack_proto = st->key[PF_SK_STACK]->proto; + sp->pfs_1500.stack_af = st->key[PF_SK_STACK]->af; + sp->pfs_1500.qid = htons(st->act.qid); + sp->pfs_1500.pqid = htons(st->act.pqid); + sp->pfs_1500.dnpipe = htons(st->act.dnpipe); + sp->pfs_1500.dnrpipe = htons(st->act.dnrpipe); + sp->pfs_1500.rtableid = htonl(st->act.rtableid); + sp->pfs_1500.min_ttl = st->act.min_ttl; + sp->pfs_1500.set_tos = st->act.set_tos; + sp->pfs_1500.max_mss = htons(st->act.max_mss); + sp->pfs_1500.set_prio[0] = st->act.set_prio[0]; + sp->pfs_1500.set_prio[1] = st->act.set_prio[1]; + sp->pfs_1500.rt = st->act.rt; + sp->pfs_1500.rt_af = st->act.rt_af; + if (st->act.rt_kif) + strlcpy(sp->pfs_1500.rt_ifname, + st->act.rt_kif->pfik_name, + sizeof(sp->pfs_1500.rt_ifname)); + strlcpy(sp->pfs_1500.orig_ifname, + st->orig_kif->pfik_name, + sizeof(sp->pfs_1500.orig_ifname)); + if ((tagname = pf_tag2tagname(st->tag)) != NULL) + strlcpy(sp->pfs_1500.tagname, tagname, + sizeof(sp->pfs_1500.tagname)); break; default: panic("%s: Unsupported pfsync_msg_version %d", __func__, msg_version); } - /* - * XXX Why do we bother pfsyncing source node information if source - * nodes are not synced? Showing users that there is source tracking - * when there is none seems useless. - */ - if (st->sns[PF_SN_LIMIT] != NULL) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_SRCNODE; - if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE]) - sp->pfs_1301.sync_flags |= PFSYNC_FLAG_NATSRCNODE; - sp->pfs_1301.id = st->id; sp->pfs_1301.creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->pfs_1301.src); pf_state_peer_hton(&st->dst, &sp->pfs_1301.dst); if (st->rule == NULL) sp->pfs_1301.rule = htonl(-1); else sp->pfs_1301.rule = htonl(st->rule->nr); if (st->anchor == NULL) sp->pfs_1301.anchor = htonl(-1); else sp->pfs_1301.anchor = htonl(st->anchor->nr); if (st->nat_rule == NULL) sp->pfs_1301.nat_rule = htonl(-1); else sp->pfs_1301.nat_rule = htonl(st->nat_rule->nr); pf_state_counter_hton(st->packets[0], sp->pfs_1301.packets[0]); pf_state_counter_hton(st->packets[1], sp->pfs_1301.packets[1]); pf_state_counter_hton(st->bytes[0], sp->pfs_1301.bytes[0]); pf_state_counter_hton(st->bytes[1], sp->pfs_1301.bytes[1]); } void pf_state_export(struct pf_state_export *sp, struct pf_kstate *st) { bzero(sp, sizeof(*sp)); sp->version = PF_STATE_VERSION; /* copy from state key */ sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; sp->proto = st->key[PF_SK_WIRE]->proto; sp->af = st->key[PF_SK_WIRE]->af; /* copy from state */ strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); strlcpy(sp->orig_ifname, st->orig_kif->pfik_name, sizeof(sp->orig_ifname)); memcpy(&sp->rt_addr, &st->act.rt_addr, sizeof(sp->rt_addr)); sp->creation = htonl(time_uptime - (st->creation / 1000)); sp->expire = pf_state_expires(st); if (sp->expire <= time_uptime) sp->expire = htonl(0); else sp->expire = htonl(sp->expire - time_uptime); sp->direction = st->direction; sp->log = st->act.log; sp->timeout = st->timeout; /* 8 bits for the old libpfctl, 16 bits for the new libpfctl */ sp->state_flags_compat = st->state_flags; sp->state_flags = htons(st->state_flags); if (st->sns[PF_SN_LIMIT] != NULL) sp->sync_flags |= PFSYNC_FLAG_SRCNODE; if (st->sns[PF_SN_NAT] != NULL || st->sns[PF_SN_ROUTE] != NULL) sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; sp->id = st->id; sp->creatorid = st->creatorid; pf_state_peer_hton(&st->src, &sp->src); pf_state_peer_hton(&st->dst, &sp->dst); if (st->rule == NULL) sp->rule = htonl(-1); else sp->rule = htonl(st->rule->nr); if (st->anchor == NULL) sp->anchor = htonl(-1); else sp->anchor = htonl(st->anchor->nr); if (st->nat_rule == NULL) sp->nat_rule = htonl(-1); else sp->nat_rule = htonl(st->nat_rule->nr); sp->packets[0] = st->packets[0]; sp->packets[1] = st->packets[1]; sp->bytes[0] = st->bytes[0]; sp->bytes[1] = st->bytes[1]; sp->qid = htons(st->act.qid); sp->pqid = htons(st->act.pqid); sp->dnpipe = htons(st->act.dnpipe); sp->dnrpipe = htons(st->act.dnrpipe); sp->rtableid = htonl(st->act.rtableid); sp->min_ttl = st->act.min_ttl; sp->set_tos = st->act.set_tos; sp->max_mss = htons(st->act.max_mss); sp->rt = st->act.rt; if (st->act.rt_kif) strlcpy(sp->rt_ifname, st->act.rt_kif->pfik_name, sizeof(sp->rt_ifname)); sp->set_prio[0] = st->act.set_prio[0]; sp->set_prio[1] = st->act.set_prio[1]; } static void pf_tbladdr_copyout(struct pf_addr_wrap *aw) { struct pfr_ktable *kt; KASSERT(aw->type == PF_ADDR_TABLE, ("%s: type %u", __func__, aw->type)); kt = aw->p.tbl; if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) kt = kt->pfrkt_root; aw->p.tbl = NULL; aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? kt->pfrkt_cnt : -1; } static int pf_add_status_counters(nvlist_t *nvl, const char *name, counter_u64_t *counters, size_t number, char **names) { nvlist_t *nvc; nvc = nvlist_create(0); if (nvc == NULL) return (ENOMEM); for (int i = 0; i < number; i++) { nvlist_append_number_array(nvc, "counters", counter_u64_fetch(counters[i])); nvlist_append_string_array(nvc, "names", names[i]); nvlist_append_number_array(nvc, "ids", i); } nvlist_add_nvlist(nvl, name, nvc); nvlist_destroy(nvc); return (0); } static int pf_getstatus(struct pfioc_nv *nv) { nvlist_t *nvl = NULL, *nvc = NULL; void *nvlpacked = NULL; int error; struct pf_status s; char *pf_reasons[PFRES_MAX+1] = PFRES_NAMES; char *pf_lcounter[KLCNT_MAX+1] = KLCNT_NAMES; char *pf_fcounter[FCNT_MAX+1] = FCNT_NAMES; time_t since; PF_RULES_RLOCK_TRACKER; #define ERROUT(x) ERROUT_FUNCTION(errout, x) PF_RULES_RLOCK(); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); since = time_second - (time_uptime - V_pf_status.since); nvlist_add_bool(nvl, "running", V_pf_status.running); nvlist_add_number(nvl, "since", since); nvlist_add_number(nvl, "debug", V_pf_status.debug); nvlist_add_number(nvl, "hostid", V_pf_status.hostid); nvlist_add_number(nvl, "states", V_pf_status.states); nvlist_add_number(nvl, "src_nodes", V_pf_status.src_nodes); nvlist_add_number(nvl, "reass", V_pf_status.reass); nvlist_add_bool(nvl, "syncookies_active", V_pf_status.syncookies_active); nvlist_add_number(nvl, "halfopen_states", V_pf_status.states_halfopen); /* counters */ error = pf_add_status_counters(nvl, "counters", V_pf_status.counters, PFRES_MAX, pf_reasons); if (error != 0) ERROUT(error); /* lcounters */ error = pf_add_status_counters(nvl, "lcounters", V_pf_status.lcounters, KLCNT_MAX, pf_lcounter); if (error != 0) ERROUT(error); /* fcounters */ nvc = nvlist_create(0); if (nvc == NULL) ERROUT(ENOMEM); for (int i = 0; i < FCNT_MAX; i++) { nvlist_append_number_array(nvc, "counters", pf_counter_u64_fetch(&V_pf_status.fcounters[i])); nvlist_append_string_array(nvc, "names", pf_fcounter[i]); nvlist_append_number_array(nvc, "ids", i); } nvlist_add_nvlist(nvl, "fcounters", nvc); nvlist_destroy(nvc); nvc = NULL; /* scounters */ error = pf_add_status_counters(nvl, "scounters", V_pf_status.scounters, SCNT_MAX, pf_fcounter); if (error != 0) ERROUT(error); nvlist_add_string(nvl, "ifname", V_pf_status.ifname); nvlist_add_binary(nvl, "chksum", V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH); pfi_update_status(V_pf_status.ifname, &s); /* pcounters / bcounters */ for (int i = 0; i < 2; i++) { for (int j = 0; j < 2; j++) { for (int k = 0; k < 2; k++) { nvlist_append_number_array(nvl, "pcounters", s.pcounters[i][j][k]); } nvlist_append_number_array(nvl, "bcounters", s.bcounters[i][j]); } } nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); PF_RULES_RUNLOCK(); error = copyout(nvlpacked, nv->data, nv->len); goto done; #undef ERROUT errout: PF_RULES_RUNLOCK(); done: free(nvlpacked, M_NVLIST); nvlist_destroy(nvc); nvlist_destroy(nvl); return (error); } /* * XXX - Check for version mismatch!!! */ static void pf_clear_all_states(void) { struct epoch_tracker et; struct pf_kstate *s; u_int i; NET_EPOCH_ENTER(et); for (i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { s->timeout = PFTM_PURGE; /* Don't send out individual delete messages. */ s->state_flags |= PFSTATE_NOSYNC; pf_remove_state(s); goto relock; } PF_HASHROW_UNLOCK(ih); } NET_EPOCH_EXIT(et); } static int pf_clear_tables(void) { struct pfioc_table io; int error; bzero(&io, sizeof(io)); io.pfrio_flags |= PFR_FLAG_ALLRSETS; error = pfr_clr_tables(&io.pfrio_table, &io.pfrio_ndel, io.pfrio_flags); return (error); } static void pf_kill_srcnodes(struct pfioc_src_node_kill *psnk) { struct pf_ksrc_node_list kill; u_int killed; LIST_INIT(&kill); for (int i = 0; i <= V_pf_srchashmask; i++) { struct pf_srchash *sh = &V_pf_srchash[i]; struct pf_ksrc_node *sn, *tmp; PF_HASHROW_LOCK(sh); LIST_FOREACH_SAFE(sn, &sh->nodes, entry, tmp) if (psnk == NULL || (pf_match_addr(psnk->psnk_src.neg, &psnk->psnk_src.addr.v.a.addr, &psnk->psnk_src.addr.v.a.mask, &sn->addr, sn->af) && pf_match_addr(psnk->psnk_dst.neg, &psnk->psnk_dst.addr.v.a.addr, &psnk->psnk_dst.addr.v.a.mask, &sn->raddr, sn->af))) { pf_unlink_src_node(sn); LIST_INSERT_HEAD(&kill, sn, entry); sn->expire = 1; } PF_HASHROW_UNLOCK(sh); } for (int i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_kstate *s; PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { for(pf_sn_types_t sn_type=0; sn_typesns[sn_type] && s->sns[sn_type]->expire == 1) { s->sns[sn_type] = NULL; } } } PF_HASHROW_UNLOCK(ih); } killed = pf_free_src_nodes(&kill); if (psnk != NULL) psnk->psnk_killed = killed; } static int pf_keepcounters(struct pfioc_nv *nv) { nvlist_t *nvl = NULL; void *nvlpacked = NULL; int error = 0; #define ERROUT(x) ERROUT_FUNCTION(on_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); if (! nvlist_exists_bool(nvl, "keep_counters")) ERROUT(EBADMSG); V_pf_status.keep_counters = nvlist_get_bool(nvl, "keep_counters"); on_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); return (error); } unsigned int pf_clear_states(const struct pf_kstate_kill *kill) { struct pf_state_key_cmp match_key; struct pf_kstate *s; struct pfi_kkif *kif; int idx; unsigned int killed = 0, dir; NET_EPOCH_ASSERT(); for (unsigned int i = 0; i <= V_pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; relock_DIOCCLRSTATES: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { /* For floating states look at the original kif. */ kif = s->kif == V_pfi_all ? s->orig_kif : s->kif; if (kill->psk_ifname[0] && strcmp(kill->psk_ifname, kif->pfik_name)) continue; if (kill->psk_kill_match) { bzero(&match_key, sizeof(match_key)); if (s->direction == PF_OUT) { dir = PF_IN; idx = PF_SK_STACK; } else { dir = PF_OUT; idx = PF_SK_WIRE; } match_key.af = s->key[idx]->af; match_key.proto = s->key[idx]->proto; pf_addrcpy(&match_key.addr[0], &s->key[idx]->addr[1], match_key.af); match_key.port[0] = s->key[idx]->port[1]; pf_addrcpy(&match_key.addr[1], &s->key[idx]->addr[0], match_key.af); match_key.port[1] = s->key[idx]->port[0]; } /* * Don't send out individual * delete messages. */ s->state_flags |= PFSTATE_NOSYNC; pf_remove_state(s); killed++; if (kill->psk_kill_match) killed += pf_kill_matching_state(&match_key, dir); goto relock_DIOCCLRSTATES; } PF_HASHROW_UNLOCK(ih); } if (V_pfsync_clear_states_ptr != NULL) V_pfsync_clear_states_ptr(V_pf_status.hostid, kill->psk_ifname); return (killed); } void pf_killstates(struct pf_kstate_kill *kill, unsigned int *killed) { struct pf_kstate *s; NET_EPOCH_ASSERT(); if (kill->psk_pfcmp.id) { if (kill->psk_pfcmp.creatorid == 0) kill->psk_pfcmp.creatorid = V_pf_status.hostid; if ((s = pf_find_state_byid(kill->psk_pfcmp.id, kill->psk_pfcmp.creatorid))) { pf_remove_state(s); *killed = 1; } return; } for (unsigned int i = 0; i <= V_pf_hashmask; i++) *killed += pf_killstates_row(kill, &V_pf_idhash[i]); } static int pf_killstates_nv(struct pfioc_nv *nv) { struct pf_kstate_kill kill; struct epoch_tracker et; nvlist_t *nvl = NULL; void *nvlpacked = NULL; int error = 0; unsigned int killed = 0; #define ERROUT(x) ERROUT_FUNCTION(on_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); error = pf_nvstate_kill_to_kstate_kill(nvl, &kill); if (error) ERROUT(error); NET_EPOCH_ENTER(et); pf_killstates(&kill, &killed); NET_EPOCH_EXIT(et); free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_number(nvl, "killed", killed); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); on_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); return (error); } static int pf_clearstates_nv(struct pfioc_nv *nv) { struct pf_kstate_kill kill; struct epoch_tracker et; nvlist_t *nvl = NULL; void *nvlpacked = NULL; int error = 0; unsigned int killed; #define ERROUT(x) ERROUT_FUNCTION(on_error, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); error = pf_nvstate_kill_to_kstate_kill(nvl, &kill); if (error) ERROUT(error); NET_EPOCH_ENTER(et); killed = pf_clear_states(&kill); NET_EPOCH_EXIT(et); free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvlist_add_number(nvl, "killed", killed); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT on_error: nvlist_destroy(nvl); free(nvlpacked, M_NVLIST); return (error); } static int pf_getstate(struct pfioc_nv *nv) { nvlist_t *nvl = NULL, *nvls; void *nvlpacked = NULL; struct pf_kstate *s = NULL; int error = 0; uint64_t id, creatorid; #define ERROUT(x) ERROUT_FUNCTION(errout, x) if (nv->len > pf_ioctl_maxcount) ERROUT(ENOMEM); nvlpacked = malloc(nv->len, M_NVLIST, M_WAITOK); error = copyin(nv->data, nvlpacked, nv->len); if (error) ERROUT(error); nvl = nvlist_unpack(nvlpacked, nv->len, 0); if (nvl == NULL) ERROUT(EBADMSG); PFNV_CHK(pf_nvuint64(nvl, "id", &id)); PFNV_CHK(pf_nvuint64(nvl, "creatorid", &creatorid)); s = pf_find_state_byid(id, creatorid); if (s == NULL) ERROUT(ENOENT); free(nvlpacked, M_NVLIST); nvlpacked = NULL; nvlist_destroy(nvl); nvl = nvlist_create(0); if (nvl == NULL) ERROUT(ENOMEM); nvls = pf_state_to_nvstate(s); if (nvls == NULL) ERROUT(ENOMEM); nvlist_add_nvlist(nvl, "state", nvls); nvlist_destroy(nvls); nvlpacked = nvlist_pack(nvl, &nv->len); if (nvlpacked == NULL) ERROUT(ENOMEM); if (nv->size == 0) ERROUT(0); else if (nv->size < nv->len) ERROUT(ENOSPC); error = copyout(nvlpacked, nv->data, nv->len); #undef ERROUT errout: if (s != NULL) PF_STATE_UNLOCK(s); free(nvlpacked, M_NVLIST); nvlist_destroy(nvl); return (error); } /* * XXX - Check for version mismatch!!! */ /* * Duplicate pfctl -Fa operation to get rid of as much as we can. */ static int shutdown_pf(void) { int error = 0; u_int32_t t[5]; char nn = '\0'; struct pf_kanchor *anchor, *tmp_anchor; struct pf_keth_anchor *eth_anchor, *tmp_eth_anchor; int rs_num; do { /* Unlink rules of all user defined anchors */ RB_FOREACH_SAFE(anchor, pf_kanchor_global, &V_pf_anchors, tmp_anchor) { for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) { if ((error = pf_begin_rules(&t[rs_num], rs_num, anchor->path)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: " "anchor.path=%s rs_num=%d", __func__, anchor->path, rs_num); goto error; /* XXX: rollback? */ } } for (rs_num = 0; rs_num < PF_RULESET_MAX; ++rs_num) { error = pf_commit_rules(t[rs_num], rs_num, anchor->path); MPASS(error == 0); } } /* Unlink rules of all user defined ether anchors */ RB_FOREACH_SAFE(eth_anchor, pf_keth_anchor_global, &V_pf_keth_anchors, tmp_eth_anchor) { if ((error = pf_begin_eth(&t[0], eth_anchor->path)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: eth " "anchor.path=%s", __func__, eth_anchor->path); goto error; } error = pf_commit_eth(t[0], eth_anchor->path); MPASS(error == 0); } if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: SCRUB", __func__); break; } if ((error = pf_begin_rules(&t[1], PF_RULESET_FILTER, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: FILTER", __func__); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[2], PF_RULESET_NAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: NAT", __func__); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[3], PF_RULESET_BINAT, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: BINAT", __func__); break; /* XXX: rollback? */ } if ((error = pf_begin_rules(&t[4], PF_RULESET_RDR, &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: RDR", __func__); break; /* XXX: rollback? */ } error = pf_commit_rules(t[0], PF_RULESET_SCRUB, &nn); MPASS(error == 0); error = pf_commit_rules(t[1], PF_RULESET_FILTER, &nn); MPASS(error == 0); error = pf_commit_rules(t[2], PF_RULESET_NAT, &nn); MPASS(error == 0); error = pf_commit_rules(t[3], PF_RULESET_BINAT, &nn); MPASS(error == 0); error = pf_commit_rules(t[4], PF_RULESET_RDR, &nn); MPASS(error == 0); if ((error = pf_clear_tables()) != 0) break; if ((error = pf_begin_eth(&t[0], &nn)) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: eth", __func__); break; } error = pf_commit_eth(t[0], &nn); MPASS(error == 0); #ifdef ALTQ if ((error = pf_begin_altq(&t[0])) != 0) { DPFPRINTF(PF_DEBUG_MISC, "%s: ALTQ", __func__); break; } pf_commit_altq(t[0]); #endif pf_clear_all_states(); pf_kill_srcnodes(NULL); for (int i = 0; i < PF_RULESET_MAX; i++) { pf_rule_tree_free(pf_main_ruleset.rules[i].active.tree); pf_rule_tree_free(pf_main_ruleset.rules[i].inactive.tree); } /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ } while(0); error: return (error); } static pfil_return_t pf_check_return(int chk, struct mbuf **m) { switch (chk) { case PF_PASS: if (*m == NULL) return (PFIL_CONSUMED); else return (PFIL_PASS); break; default: if (*m != NULL) { m_freem(*m); *m = NULL; } return (PFIL_DROPPED); } } static pfil_return_t pf_eth_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_ASSERT_SET(); chk = pf_test_eth(PF_IN, flags, ifp, m, inp); return (pf_check_return(chk, m)); } static pfil_return_t pf_eth_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_ASSERT_SET(); chk = pf_test_eth(PF_OUT, flags, ifp, m, inp); return (pf_check_return(chk, m)); } #ifdef INET static pfil_return_t pf_check_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_ASSERT_SET(); chk = pf_test(AF_INET, PF_IN, flags, ifp, m, inp, NULL); return (pf_check_return(chk, m)); } static pfil_return_t pf_check_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_ASSERT_SET(); chk = pf_test(AF_INET, PF_OUT, flags, ifp, m, inp, NULL); return (pf_check_return(chk, m)); } #endif #ifdef INET6 static pfil_return_t pf_check6_in(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_ASSERT_SET(); /* * In case of loopback traffic IPv6 uses the real interface in * order to support scoped addresses. In order to support stateful * filtering we have change this to lo0 as it is the case in IPv4. */ chk = pf_test(AF_INET6, PF_IN, flags, (*m)->m_flags & M_LOOP ? V_loif : ifp, m, inp, NULL); return (pf_check_return(chk, m)); } static pfil_return_t pf_check6_out(struct mbuf **m, struct ifnet *ifp, int flags, void *ruleset __unused, struct inpcb *inp) { int chk; CURVNET_ASSERT_SET(); chk = pf_test(AF_INET6, PF_OUT, flags, ifp, m, inp, NULL); return (pf_check_return(chk, m)); } #endif /* INET6 */ VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_in_hook); VNET_DEFINE_STATIC(pfil_hook_t, pf_eth_out_hook); #define V_pf_eth_in_hook VNET(pf_eth_in_hook) #define V_pf_eth_out_hook VNET(pf_eth_out_hook) #ifdef INET VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_in_hook); VNET_DEFINE_STATIC(pfil_hook_t, pf_ip4_out_hook); #define V_pf_ip4_in_hook VNET(pf_ip4_in_hook) #define V_pf_ip4_out_hook VNET(pf_ip4_out_hook) #endif #ifdef INET6 VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_in_hook); VNET_DEFINE_STATIC(pfil_hook_t, pf_ip6_out_hook); #define V_pf_ip6_in_hook VNET(pf_ip6_in_hook) #define V_pf_ip6_out_hook VNET(pf_ip6_out_hook) #endif static void hook_pf_eth(void) { struct pfil_hook_args pha = { .pa_version = PFIL_VERSION, .pa_modname = "pf", .pa_type = PFIL_TYPE_ETHERNET, }; struct pfil_link_args pla = { .pa_version = PFIL_VERSION, }; int ret __diagused; if (atomic_load_bool(&V_pf_pfil_eth_hooked)) return; pha.pa_mbuf_chk = pf_eth_check_in; pha.pa_flags = PFIL_IN; pha.pa_rulname = "eth-in"; V_pf_eth_in_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_link_pfil_head; pla.pa_hook = V_pf_eth_in_hook; ret = pfil_link(&pla); MPASS(ret == 0); pha.pa_mbuf_chk = pf_eth_check_out; pha.pa_flags = PFIL_OUT; pha.pa_rulname = "eth-out"; V_pf_eth_out_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_link_pfil_head; pla.pa_hook = V_pf_eth_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); atomic_store_bool(&V_pf_pfil_eth_hooked, true); } static void hook_pf(void) { struct pfil_hook_args pha = { .pa_version = PFIL_VERSION, .pa_modname = "pf", }; struct pfil_link_args pla = { .pa_version = PFIL_VERSION, }; int ret __diagused; if (atomic_load_bool(&V_pf_pfil_hooked)) return; #ifdef INET pha.pa_type = PFIL_TYPE_IP4; pha.pa_mbuf_chk = pf_check_in; pha.pa_flags = PFIL_IN; pha.pa_rulname = "default-in"; V_pf_ip4_in_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet_pfil_head; pla.pa_hook = V_pf_ip4_in_hook; ret = pfil_link(&pla); MPASS(ret == 0); pha.pa_mbuf_chk = pf_check_out; pha.pa_flags = PFIL_OUT; pha.pa_rulname = "default-out"; V_pf_ip4_out_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet_pfil_head; pla.pa_hook = V_pf_ip4_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); if (V_pf_filter_local) { pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet_local_pfil_head; pla.pa_hook = V_pf_ip4_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); } #endif #ifdef INET6 pha.pa_type = PFIL_TYPE_IP6; pha.pa_mbuf_chk = pf_check6_in; pha.pa_flags = PFIL_IN; pha.pa_rulname = "default-in6"; V_pf_ip6_in_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_IN | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet6_pfil_head; pla.pa_hook = V_pf_ip6_in_hook; ret = pfil_link(&pla); MPASS(ret == 0); pha.pa_mbuf_chk = pf_check6_out; pha.pa_rulname = "default-out6"; pha.pa_flags = PFIL_OUT; V_pf_ip6_out_hook = pfil_add_hook(&pha); pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet6_pfil_head; pla.pa_hook = V_pf_ip6_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); if (V_pf_filter_local) { pla.pa_flags = PFIL_OUT | PFIL_HEADPTR | PFIL_HOOKPTR; pla.pa_head = V_inet6_local_pfil_head; pla.pa_hook = V_pf_ip6_out_hook; ret = pfil_link(&pla); MPASS(ret == 0); } #endif atomic_store_bool(&V_pf_pfil_hooked, true); } static void dehook_pf_eth(void) { if (!atomic_load_bool(&V_pf_pfil_eth_hooked)) return; pfil_remove_hook(V_pf_eth_in_hook); pfil_remove_hook(V_pf_eth_out_hook); atomic_store_bool(&V_pf_pfil_eth_hooked, false); } static void dehook_pf(void) { if (!atomic_load_bool(&V_pf_pfil_hooked)) return; #ifdef INET pfil_remove_hook(V_pf_ip4_in_hook); pfil_remove_hook(V_pf_ip4_out_hook); #endif #ifdef INET6 pfil_remove_hook(V_pf_ip6_in_hook); pfil_remove_hook(V_pf_ip6_out_hook); #endif atomic_store_bool(&V_pf_pfil_hooked, false); } static void pf_load_vnet(void) { V_pf_tag_z = uma_zcreate("pf tags", sizeof(struct pf_tagname), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init_flags(&V_pf_rules_lock, "pf rulesets", RM_RECURSE); + rm_init_flags(&V_pf_tags_lock, "pf tags and queues", RM_RECURSE); sx_init(&V_pf_ioctl_lock, "pf ioctl"); pf_init_tagset(&V_pf_tags, &pf_rule_tag_hashsize, PF_RULE_TAG_HASH_SIZE_DEFAULT); #ifdef ALTQ pf_init_tagset(&V_pf_qids, &pf_queue_tag_hashsize, PF_QUEUE_TAG_HASH_SIZE_DEFAULT); #endif V_pf_keth = &V_pf_main_keth_anchor.ruleset; pfattach_vnet(); V_pf_vnet_active = 1; } static int pf_load(void) { int error; sx_init(&pf_end_lock, "pf end thread"); pf_mtag_initialize(); pf_dev = make_dev(&pf_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, PF_NAME); if (pf_dev == NULL) return (ENOMEM); pf_end_threads = 0; error = kproc_create(pf_purge_thread, NULL, &pf_purge_proc, 0, 0, "pf purge"); if (error != 0) return (error); pfi_initialize(); return (0); } static void pf_unload_vnet(void) { int ret __diagused; V_pf_vnet_active = 0; V_pf_status.running = 0; dehook_pf(); dehook_pf_eth(); PF_RULES_WLOCK(); pf_syncookies_cleanup(); shutdown_pf(); PF_RULES_WUNLOCK(); ret = swi_remove(V_pf_swi_cookie); MPASS(ret == 0); ret = intr_event_destroy(V_pf_swi_ie); MPASS(ret == 0); pf_unload_vnet_purge(); pf_normalize_cleanup(); PF_RULES_WLOCK(); pfi_cleanup_vnet(); PF_RULES_WUNLOCK(); pfr_cleanup(); pf_osfp_flush(); pf_cleanup(); if (IS_DEFAULT_VNET(curvnet)) pf_mtag_cleanup(); pf_cleanup_tagset(&V_pf_tags); #ifdef ALTQ pf_cleanup_tagset(&V_pf_qids); #endif uma_zdestroy(V_pf_tag_z); #ifdef PF_WANT_32_TO_64_COUNTER PF_RULES_WLOCK(); LIST_REMOVE(V_pf_kifmarker, pfik_allkiflist); MPASS(LIST_EMPTY(&V_pf_allkiflist)); MPASS(V_pf_allkifcount == 0); LIST_REMOVE(&V_pf_default_rule, allrulelist); V_pf_allrulecount--; LIST_REMOVE(V_pf_rulemarker, allrulelist); MPASS(LIST_EMPTY(&V_pf_allrulelist)); MPASS(V_pf_allrulecount == 0); PF_RULES_WUNLOCK(); free(V_pf_kifmarker, PFI_MTYPE); free(V_pf_rulemarker, M_PFRULE); #endif /* Free counters last as we updated them during shutdown. */ pf_counter_u64_deinit(&V_pf_default_rule.evaluations); for (int i = 0; i < 2; i++) { pf_counter_u64_deinit(&V_pf_default_rule.packets[i]); pf_counter_u64_deinit(&V_pf_default_rule.bytes[i]); } counter_u64_free(V_pf_default_rule.states_cur); counter_u64_free(V_pf_default_rule.states_tot); for (pf_sn_types_t sn_type=0; sn_typeping.stdout) & if [ "$1" = "backup_promotion" ] then sleep 1 jexec gw_route_to_backup ifconfig if_br0 vhid 10 advskew 0 jexec gw_route_to_backup ifconfig if_br1 vhid 11 advskew 0 jexec gw_reply_to_backup ifconfig if_br1 vhid 21 advskew 0 jexec gw_reply_to_backup ifconfig if_br2 vhid 22 advskew 0 fi while ! grep -q -e 'packet loss' ping.stdout do sleep 1 done atf_check -s exit:0 -e ignore -o ignore grep ', 0.0% packet loss' ping.stdout } pbr_common_cleanup() { pft_cleanup } atf_test_case "ipsec" "cleanup" ipsec_head() { atf_set descr 'Transport pfsync over IPSec' atf_set require.user root } ipsec_body() { if ! sysctl -q kern.features.ipsec >/dev/null ; then atf_skip "This test requires ipsec" fi # Run the common test, to set up pfsync common_body # But we want unicast pfsync jexec one ifconfig pfsync0 syncpeer 192.0.2.2 jexec two ifconfig pfsync0 syncpeer 192.0.2.1 # Flush existing states jexec one pfctl -Fs jexec two pfctl -Fs # Now define an ipsec policy to run over the epair_sync interfaces echo "flush; spdflush; spdadd 192.0.2.1/32 192.0.2.2/32 any -P out ipsec esp/transport//require; spdadd 192.0.2.2/32 192.0.2.1/32 any -P in ipsec esp/transport//require; add 192.0.2.1 192.0.2.2 esp 0x1000 -E aes-gcm-16 \"12345678901234567890\"; add 192.0.2.2 192.0.2.1 esp 0x1001 -E aes-gcm-16 \"12345678901234567890\";" \ | jexec one setkey -c echo "flush; spdflush; spdadd 192.0.2.2/32 192.0.2.1/32 any -P out ipsec esp/transport//require; spdadd 192.0.2.1/32 192.0.2.2/32 any -P in ipsec esp/transport//require; add 192.0.2.1 192.0.2.2 esp 0x1000 -E aes-gcm-16 \"12345678901234567891\"; add 192.0.2.2 192.0.2.1 esp 0x1001 -E aes-gcm-16 \"12345678901234567891\";" \ | jexec two setkey -c # We've set incompatible keys, so pfsync will be broken. ping -c 1 -S 198.51.100.254 198.51.100.1 # Give pfsync time to do its thing sleep 2 if jexec two pfctl -s states | grep icmp | grep 198.51.100.1 | \ grep 198.51.100.2 ; then atf_fail "state synced although IPSec should have prevented it" fi # Flush existing states jexec one pfctl -Fs jexec two pfctl -Fs # Fix the IPSec key to match echo "flush; spdflush; spdadd 192.0.2.2/32 192.0.2.1/32 any -P out ipsec esp/transport//require; spdadd 192.0.2.1/32 192.0.2.2/32 any -P in ipsec esp/transport//require; add 192.0.2.1 192.0.2.2 esp 0x1000 -E aes-gcm-16 \"12345678901234567890\"; add 192.0.2.2 192.0.2.1 esp 0x1001 -E aes-gcm-16 \"12345678901234567890\";" \ | jexec two setkey -c ping -c 1 -S 198.51.100.254 198.51.100.1 # Give pfsync time to do its thing sleep 2 if ! jexec two pfctl -s states | grep icmp | grep 198.51.100.1 | \ grep 198.51.100.2 ; then atf_fail "state not found on synced host" fi } ipsec_cleanup() { pft_cleanup } atf_test_case "timeout" "cleanup" timeout_head() { atf_set descr 'Trigger pfsync_timeout()' atf_set require.user root } timeout_body() { pft_init vnet_mkjail one jexec one ifconfig lo0 127.0.0.1/8 up jexec one ifconfig lo0 inet6 ::1/128 up pft_set_rules one \ "pass all" jexec one pfctl -e jexec one ifconfig pfsync0 defer up jexec one ping -c 1 ::1 jexec one ping -c 1 127.0.0.1 # Give pfsync_timeout() time to fire (a callout on a 1 second delay) sleep 2 } timeout_cleanup() { pft_cleanup } atf_test_case "basic_ipv6_unicast" "cleanup" basic_ipv6_unicast_head() { atf_set descr 'Basic pfsync test (IPv6)' atf_set require.user root } basic_ipv6_unicast_body() { pfsynct_init epair_sync=$(vnet_mkepair) epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) vnet_mkjail one ${epair_one}a ${epair_sync}a vnet_mkjail two ${epair_two}a ${epair_sync}b # pfsync interface jexec one ifconfig ${epair_sync}a inet6 fd2c::1/64 no_dad up jexec one ifconfig ${epair_one}a inet6 fd2b::1/64 no_dad up jexec one ifconfig pfsync0 \ syncdev ${epair_sync}a \ syncpeer fd2c::2 \ maxupd 1 \ up jexec two ifconfig ${epair_two}a inet6 fd2b::2/64 no_dad up jexec two ifconfig ${epair_sync}b inet6 fd2c::2/64 no_dad up jexec two ifconfig pfsync0 \ syncdev ${epair_sync}b \ syncpeer fd2c::1 \ maxupd 1 \ up # Enable pf! jexec one pfctl -e pft_set_rules one \ "block on ${epair_sync}a inet" \ "pass out keep state" jexec two pfctl -e pft_set_rules two \ "block on ${epair_sync}b inet" \ "pass out keep state" ifconfig ${epair_one}b inet6 fd2b::f0/64 no_dad up ping6 -c 1 -S fd2b::f0 fd2b::1 # Give pfsync time to do its thing sleep 2 if ! jexec two pfctl -s states | grep icmp | grep fd2b::1 | \ grep fd2b::f0 ; then atf_fail "state not found on synced host" fi } basic_ipv6_unicast_cleanup() { pfsynct_cleanup } atf_test_case "basic_ipv6" "cleanup" basic_ipv6_head() { atf_set descr 'Basic pfsync test (IPv6)' atf_set require.user root } basic_ipv6_body() { pfsynct_init epair_sync=$(vnet_mkepair) epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) vnet_mkjail one ${epair_one}a ${epair_sync}a vnet_mkjail two ${epair_two}a ${epair_sync}b # pfsync interface jexec one ifconfig ${epair_sync}a inet6 fd2c::1/64 no_dad up jexec one ifconfig ${epair_one}a inet6 fd2b::1/64 no_dad up jexec one ifconfig pfsync0 \ syncdev ${epair_sync}a \ syncpeer ff12::f0 \ maxupd 1 \ up jexec two ifconfig ${epair_two}a inet6 fd2b::2/64 no_dad up jexec two ifconfig ${epair_sync}b inet6 fd2c::2/64 no_dad up jexec two ifconfig pfsync0 \ syncdev ${epair_sync}b \ syncpeer ff12::f0 \ maxupd 1 \ up # Enable pf! jexec one pfctl -e pft_set_rules one \ "block on ${epair_sync}a inet" \ "pass out keep state" jexec two pfctl -e pft_set_rules two \ "block on ${epair_sync}b inet" \ "pass out keep state" ifconfig ${epair_one}b inet6 fd2b::f0/64 no_dad up ping6 -c 1 -S fd2b::f0 fd2b::1 # Give pfsync time to do its thing sleep 2 if ! jexec two pfctl -s states | grep icmp | grep fd2b::1 | \ grep fd2b::f0 ; then atf_fail "state not found on synced host" fi } basic_ipv6_cleanup() { pfsynct_cleanup } atf_test_case "rtable" "cleanup" rtable_head() { atf_set descr 'Test handling of invalid rtableid' atf_set require.user root } rtable_body() { pfsynct_init epair_sync=$(vnet_mkepair) epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) vnet_mkjail one ${epair_one}a ${epair_sync}a vnet_mkjail two ${epair_two}a ${epair_sync}b # pfsync interface jexec one ifconfig ${epair_sync}a 192.0.2.1/24 up jexec one ifconfig ${epair_one}a 198.51.100.1/24 up jexec one ifconfig pfsync0 \ syncdev ${epair_sync}a \ maxupd 1 \ up jexec two ifconfig ${epair_two}a 198.51.100.1/24 up jexec two ifconfig ${epair_sync}b 192.0.2.2/24 up jexec two ifconfig pfsync0 \ syncdev ${epair_sync}b \ maxupd 1 \ up # Make life easy, give ${epair_two}a the same mac addrss as ${epair_one}a mac=$(jexec one ifconfig ${epair_one}a | awk '/ether/ { print($2); }') jexec two ifconfig ${epair_two}a ether ${mac} # Enable pf! jexec one /sbin/sysctl net.fibs=8 jexec one pfctl -e pft_set_rules one \ "set skip on ${epair_sync}a" \ "pass rtable 3 keep state" # No extra fibs in two jexec two pfctl -e pft_set_rules two \ "set skip on ${epair_sync}b" \ "pass keep state" ifconfig ${epair_one}b 198.51.100.254/24 up ifconfig ${epair_two}b 198.51.100.253/24 up # Create a new state env PYTHONPATH=${common_dir} \ ${common_dir}/pft_ping.py \ --sendif ${epair_one}b \ --fromaddr 198.51.100.254 \ --to 198.51.100.1 \ --recvif ${epair_one}b # Now jexec one pfctl -ss -vv sleep 2 # Now try to use that state on jail two env PYTHONPATH=${common_dir} \ ${common_dir}/pft_ping.py \ --sendif ${epair_two}b \ --fromaddr 198.51.100.254 \ --to 198.51.100.1 \ --recvif ${epair_two}b echo one jexec one pfctl -ss -vv jexec one pfctl -sr -vv echo two jexec two pfctl -ss -vv jexec two pfctl -sr -vv } rtable_cleanup() { pfsynct_cleanup } route_to_common_head() { + # TODO: Extend setup_router_server_nat64 to create a 2nd router + pfsync_version=$1 shift pfsynct_init epair_sync=$(vnet_mkepair) epair_one=$(vnet_mkepair) epair_two=$(vnet_mkepair) epair_out_one=$(vnet_mkepair) epair_out_two=$(vnet_mkepair) vnet_mkjail one ${epair_one}a ${epair_sync}a ${epair_out_one}a vnet_mkjail two ${epair_two}a ${epair_sync}b ${epair_out_two}a # pfsync interface jexec one ifconfig ${epair_sync}a 192.0.2.1/24 up - jexec one ifconfig ${epair_one}a 198.51.100.1/24 up + jexec one ifconfig ${epair_one}a 198.51.100.1/28 up + jexec one ifconfig ${epair_one}a inet6 2001:db8:4211::1/64 no_dad + jexec one ifconfig ${epair_one}a name inif jexec one ifconfig ${epair_out_one}a 203.0.113.1/24 up + jexec one ifconfig ${epair_out_one}a inet6 2001:db8:4200::1/64 no_dad jexec one ifconfig ${epair_out_one}a name outif jexec one sysctl net.inet.ip.forwarding=1 - jexec one arp -s 203.0.113.254 00:01:02:03:04:05 + jexec one sysctl net.inet6.ip6.forwarding=1 + jexec one arp -s 203.0.113.254 00:01:02:00:00:04 + jexec one ndp -s 2001:db8:4200::fe 00:01:02:00:00:06 jexec one ifconfig pfsync0 \ syncdev ${epair_sync}a \ maxupd 1 \ version $pfsync_version \ up jexec two ifconfig ${epair_sync}b 192.0.2.2/24 up - jexec two ifconfig ${epair_two}a 198.51.100.2/24 up - jexec two ifconfig ${epair_out_two}a 203.0.113.2/24 up + jexec two ifconfig ${epair_two}a 198.51.100.17/28 up + jexec two ifconfig ${epair_two}a inet6 2001:db8:4212::1/64 no_dad + jexec two ifconfig ${epair_two}a name inif + jexec two ifconfig ${epair_out_two}a 203.0.113.1/24 up + jexec two ifconfig ${epair_out_two}a inet6 2001:db8:4200::2/64 no_dad jexec two ifconfig ${epair_out_two}a name outif jexec two sysctl net.inet.ip.forwarding=1 - jexec two arp -s 203.0.113.254 00:01:02:03:04:05 + jexec two sysctl net.inet6.ip6.forwarding=1 + jexec two arp -s 203.0.113.254 00:01:02:00:00:04 + jexec two ndp -s 2001:db8:4200::fe 00:01:02:00:00:06 jexec two ifconfig pfsync0 \ syncdev ${epair_sync}b \ maxupd 1 \ version $pfsync_version \ up - ifconfig ${epair_one}b 198.51.100.254/24 up - ifconfig ${epair_two}b 198.51.100.253/24 up + ifconfig ${epair_one}b 198.51.100.2/28 up + ifconfig ${epair_one}b inet6 2001:db8:4211::2/64 no_dad + ifconfig ${epair_two}b 198.51.100.18/28 up + ifconfig ${epair_two}b inet6 2001:db8:4212::2/64 no_dad + # Target is behind router "one" route add -net 203.0.113.0/24 198.51.100.1 + route add -inet6 -net 64:ff9b::/96 2001:db8:4211::1 + ifconfig ${epair_two}b up ifconfig ${epair_out_one}b up ifconfig ${epair_out_two}b up } route_to_common_tail() { atf_check -s exit:0 env PYTHONPATH=${common_dir} \ ${common_dir}/pft_ping.py \ --sendif ${epair_one}b \ --fromaddr 198.51.100.254 \ --to 203.0.113.254 \ --recvif ${epair_out_one}b # Allow time for sync sleep 2 states_one=$(mktemp) states_two=$(mktemp) jexec one pfctl -qvvss | normalize_pfctl_s > $states_one jexec two pfctl -qvvss | normalize_pfctl_s > $states_two } atf_test_case "route_to_1301_body" "cleanup" route_to_1301_head() { atf_set descr 'Test route-to with pfsync version 13.1' atf_set require.user root atf_set require.progs python3 scapy } route_to_1301_body() { route_to_common_head 1301 jexec one pfctl -e pft_set_rules one \ "set skip on ${epair_sync}a" \ "pass out route-to (outif 203.0.113.254)" jexec two pfctl -e pft_set_rules two \ "set skip on ${epair_sync}b" \ "pass out route-to (outif 203.0.113.254)" route_to_common_tail # Sanity check grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*, rule 0 .* route-to: 203.0.113.254@outif origif: outif' $states_one || atf_fail "State missing on router one" # With identical ruleset the routing information is recovered from the matching rule. grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*, rule 0 .* route-to: 203.0.113.254@outif' $states_two || atf_fail "State missing on router two" true } route_to_1301_cleanup() { pfsynct_cleanup } atf_test_case "route_to_1301_bad_ruleset" "cleanup" route_to_1301_bad_ruleset_head() { atf_set descr 'Test route-to with pfsync version 13.1 and incompatible ruleset' atf_set require.user root atf_set require.progs python3 scapy } route_to_1301_bad_ruleset_body() { route_to_common_head 1301 jexec one pfctl -e pft_set_rules one \ "set skip on ${epair_sync}a" \ "pass out route-to (outif 203.0.113.254)" jexec two pfctl -e pft_set_rules two \ "set debug loud" \ "set skip on ${epair_sync}b" \ "pass out route-to (outif 203.0.113.254)" \ "pass out proto tcp" atf_check -s exit:0 env PYTHONPATH=${common_dir} \ ${common_dir}/pft_ping.py \ --sendif ${epair_one}b \ --fromaddr 198.51.100.254 \ --to 203.0.113.254 \ --recvif ${epair_out_one}b route_to_common_tail # Sanity check grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*, rule 0 .* route-to: 203.0.113.254@outif origif: outif' $states_one || atf_fail "State missing on router one" # Different ruleset on each router means the routing information recovery # from rule is impossible. The state is not synced. grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*' $states_two && atf_fail "State present on router two" true } route_to_1301_bad_ruleset_cleanup() { pfsynct_cleanup } atf_test_case "route_to_1301_bad_rpool" "cleanup" route_to_1301_bad_rpool_head() { atf_set descr 'Test route-to with pfsync version 13.1 and different interface' atf_set require.user root atf_set require.progs python3 scapy } route_to_1301_bad_rpool_body() { route_to_common_head 1301 jexec one pfctl -e pft_set_rules one \ "set skip on ${epair_sync}a" \ "pass out route-to { (outif 203.0.113.254) (outif 203.0.113.254) }" jexec two pfctl -e pft_set_rules two \ "set skip on ${epair_sync}b" \ "pass out route-to { (outif 203.0.113.254) (outif 203.0.113.254) }" atf_check -s exit:0 env PYTHONPATH=${common_dir} \ ${common_dir}/pft_ping.py \ --sendif ${epair_one}b \ --fromaddr 198.51.100.254 \ --to 203.0.113.254 \ --recvif ${epair_out_one}b route_to_common_tail # Sanity check grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*, rule 0 .* route-to: 203.0.113.254@outif origif: outif' $states_one || atf_fail "State missing on router one" # The ruleset is identical but since the redirection pool contains multiple interfaces # pfsync will not attempt to recover the routing information from the rule. grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*' $states_two && atf_fail "State present on router two" true } route_to_1301_bad_rpool_cleanup() { pfsynct_cleanup } atf_test_case "route_to_1400_bad_ruleset" "cleanup" route_to_1400_bad_ruleset_head() { atf_set descr 'Test route-to with pfsync version 14.0' atf_set require.user root atf_set require.progs python3 scapy } route_to_1400_bad_ruleset_body() { route_to_common_head 1400 jexec one pfctl -e pft_set_rules one \ "set skip on ${epair_sync}a" \ "pass out route-to (outif 203.0.113.254)" jexec two pfctl -e pft_set_rules two \ "set skip on ${epair_sync}b" route_to_common_tail # Sanity check grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*, rule 0 .* route-to: 203.0.113.254@outif origif: outif' $states_one || atf_fail "State missing on router one" # Even with a different ruleset FreeBSD 14 syncs the state just fine. # There's no recovery involved, the pfsync packet contains the routing information. grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .* route-to: 203.0.113.254@outif' $states_two || atf_fail "State missing on router two" true } route_to_1400_bad_ruleset_cleanup() { pfsynct_cleanup } atf_test_case "route_to_1400_bad_ifname" "cleanup" route_to_1400_bad_ifname_head() { atf_set descr 'Test route-to with pfsync version 14.0' atf_set require.user root atf_set require.progs python3 scapy } route_to_1400_bad_ifname_body() { route_to_common_head 1400 jexec one pfctl -e pft_set_rules one \ "set skip on ${epair_sync}a" \ "pass out route-to (outif 203.0.113.254)" jexec two pfctl -e jexec two ifconfig outif name outif_new pft_set_rules two \ "set skip on ${epair_sync}b" \ "pass out route-to (outif_new 203.0.113.254)" route_to_common_tail # Sanity check grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*, rule 0 .* route-to: 203.0.113.254@outif origif: outif' $states_one || atf_fail "State missing on router one" # Since FreeBSD 14 never attempts recovery of missing routing information # a state synced to a router with a different interface name is dropped. grep -qE 'all icmp 198.51.100.254 -> 203.0.113.254:8 .*' $states_two && atf_fail "State present on router two" true } route_to_1400_bad_ifname_cleanup() { pfsynct_cleanup } +atf_test_case "af_to_in_floating" "cleanup" +af_to_in_floating_head() +{ + atf_set descr 'Test syncing of states created by inbound af-to rules with floating states' + atf_set require.user root + atf_set require.progs python3 scapy +} + +af_to_in_floating_body() +{ + route_to_common_head 1500 + + jexec one pfctl -e + pft_set_rules one \ + "set state-policy floating" \ + "set skip on ${epair_sync}a" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass in on inif to 64:ff9b::/96 af-to inet from (outif) keep state" + + jexec two pfctl -e + pft_set_rules two \ + "set skip on ${epair_sync}b" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" + + # ptf_ping can't deal with nat64, this test will fail but generate states + atf_check -s exit:1 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --sendif ${epair_one}b \ + --fromaddr 2001:db8:4201::fe \ + --to 64:ff9b::203.0.113.254 \ + --recvif ${epair_out_one}b + + # Allow time for sync + sleep 2 + + states_one=$(mktemp) + states_two=$(mktemp) + jexec one pfctl -qvvss | normalize_pfctl_s > $states_one + jexec two pfctl -qvvss | normalize_pfctl_s > $states_two + + # Sanity check + grep -qE 'all ipv6-icmp 203.0.113.1 \(2001:db8:4201::fe\) -> 203.0.113.254:8 \(64:ff9b::cb00:71fe) .* rule 3 .* origif: inif' $states_one || + atf_fail "State missing on router one" + + grep -qE 'all ipv6-icmp 203.0.113.1 \(2001:db8:4201::fe\) -> 203.0.113.254:8 \(64:ff9b::cb00:71fe) .* origif: inif' $states_two || + atf_fail "State missing on router two" +} + +af_to_in_floating_cleanup() +{ + pfsynct_cleanup +} + +atf_test_case "af_to_in_if_bound" "cleanup" +af_to_in_if_bound_head() +{ + atf_set descr 'Test syncing of states created by inbound af-to rules with if-bound states' + atf_set require.user root + atf_set require.progs python3 scapy +} + +af_to_in_if_bound_body() +{ + route_to_common_head 1500 + + jexec one pfctl -e + pft_set_rules one \ + "set state-policy if-bound" \ + "set skip on ${epair_sync}a" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass in on inif to 64:ff9b::/96 af-to inet from (outif) keep state" + + jexec two pfctl -e + pft_set_rules two \ + "set skip on ${epair_sync}b" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" + + # ptf_ping can't deal with nat64, this test will fail but generate states + atf_check -s exit:1 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --sendif ${epair_one}b \ + --fromaddr 2001:db8:4201::fe \ + --to 64:ff9b::203.0.113.254 \ + --recvif ${epair_out_one}b + + # Allow time for sync + sleep 2 + + states_one=$(mktemp) + states_two=$(mktemp) + jexec one pfctl -qvvss | normalize_pfctl_s > $states_one + jexec two pfctl -qvvss | normalize_pfctl_s > $states_two + + # Sanity check + grep -qE 'outif ipv6-icmp 203.0.113.1 \(2001:db8:4201::fe\) -> 203.0.113.254:8 \(64:ff9b::cb00:71fe) .* rule 3 .* origif: inif' $states_one || + atf_fail "State missing on router one" + + grep -qE 'outif ipv6-icmp 203.0.113.1 \(2001:db8:4201::fe\) -> 203.0.113.254:8 \(64:ff9b::cb00:71fe) .* origif: inif' $states_two || + atf_fail "State missing on router two" +} + +af_to_in_if_bound_cleanup() +{ + pfsynct_cleanup +} + +atf_test_case "af_to_out_if_bound" "cleanup" +af_to_out_if_bound_head() +{ + atf_set descr 'Test syncing of states created by outbound af-to rules with if-bound states' + atf_set require.user root + atf_set require.progs python3 scapy +} + +af_to_out_if_bound_body() +{ + route_to_common_head 1500 + + jexec one route add -inet6 -net 64:ff9b::/96 -iface outif + jexec one sysctl net.inet6.ip6.forwarding=1 + + jexec one pfctl -e + pft_set_rules one \ + "set state-policy if-bound" \ + "set skip on ${epair_sync}a" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass in on inif to 64:ff9b::/96 keep state" \ + "pass out on outif to 64:ff9b::/96 af-to inet from (outif) keep state" + + jexec two pfctl -e + pft_set_rules two \ + "set skip on ${epair_sync}b" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" + + # ptf_ping can't deal with nat64, this test will fail but generate states + atf_check -s exit:1 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --sendif ${epair_one}b \ + --fromaddr 2001:db8:4201::fe \ + --to 64:ff9b::203.0.113.254 \ + --recvif ${epair_out_one}b + + # Allow time for sync + sleep 2 + + states_one=$(mktemp) + states_two=$(mktemp) + jexec one pfctl -qvvss | normalize_pfctl_s > $states_one + jexec two pfctl -qvvss | normalize_pfctl_s > $states_two + + # Sanity check + # st->orig_kif is the same as st->kif, so st->orig_kif is not printed. + for state_regexp in \ + "inif ipv6-icmp 64:ff9b::cb00:71fe\[128\] <- 2001:db8:4201::fe .* rule 3 .* creatorid: [0-9a-f]+" \ + "outif icmp 203.0.113.1 \(64:ff9b::cb00:71fe\[8\]\) -> 203.0.113.254:8 \(2001:db8:4201::fe\) .* rule 4 .* creatorid: [0-9a-f]+" \ + ; do + grep -qE "${state_regexp}" $states_one || atf_fail "State not found for '${state_regexp}'" + done + + for state_regexp in \ + "inif ipv6-icmp 64:ff9b::cb00:71fe\[128\] <- 2001:db8:4201::fe .* creatorid: [0-9a-f]+" \ + "outif icmp 203.0.113.1 \(64:ff9b::cb00:71fe\[8\]\) -> 203.0.113.254:8 \(2001:db8:4201::fe\) .* creatorid: [0-9a-f]+" \ + ; do + grep -qE "${state_regexp}" $states_two || atf_fail "State not found for '${state_regexp}'" + done +} + +af_to_out_if_bound_cleanup() +{ + pfsynct_cleanup +} + +atf_test_case "tag" "cleanup" +tag_head() +{ + atf_set descr 'Test if the pf tag is synced' + atf_set require.user root + atf_set require.progs python3 scapy +} + +tag_body() +{ + route_to_common_head 1500 + + jexec one pfctl -e + pft_set_rules one \ + "set skip on ${epair_sync}a" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass in on inif inet proto udp tag sometag keep state" \ + "pass out on outif tagged sometag keep state (no-sync)" + + jexec two pfctl -e + pft_set_rules two \ + "set debug loud" \ + "set skip on ${epair_sync}b" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "block tagged othertag" \ + "pass out on outif tagged sometag keep state (no-sync)" + + atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --ping-type=udp \ + --sendif ${epair_one}b \ + --fromaddr 198.51.100.254 \ + --to 203.0.113.254 \ + --recvif ${epair_out_one}b + + # Allow time for sync + sleep 2 + + # Force the next request to go through the 2nd router + route change -net 203.0.113.0/24 198.51.100.17 + + atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --ping-type=udp \ + --sendif ${epair_two}b \ + --fromaddr 198.51.100.254 \ + --to 203.0.113.254 \ + --recvif ${epair_out_two}b +} + +tag_cleanup() +{ + pfsynct_cleanup +} + +atf_test_case "altq_queues" "cleanup" +altq_queues_head() +{ + atf_set descr 'Test if the altq queues are synced' + atf_set require.user root + atf_set require.progs python3 scapy +} + +altq_queues_body() +{ + route_to_common_head 1500 + altq_init + is_altq_supported hfsc + + jexec one pfctl -e + pft_set_rules one \ + "set skip on ${epair_sync}a" \ + "altq on outif bandwidth 30000b hfsc queue { default other1 other2 }" \ + "queue default hfsc(linkshare 10000b default)" \ + "queue other1 hfsc(linkshare 10000b)" \ + "queue other2 hfsc(linkshare 10000b)" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass in on inif inet proto udp queue other1 keep state" \ + "pass out on outif inet proto udp keep state" + + jexec two pfctl -e + pft_set_rules two \ + "set debug loud" \ + "set skip on ${epair_sync}b" \ + "altq on outif bandwidth 30000b hfsc queue { default other2 other1 }" \ + "queue default hfsc(linkshare 10000b default)" \ + "queue other2 hfsc(linkshare 10000b)" \ + "queue other1 hfsc(linkshare 10000b)" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass out on outif inet proto udp keep state" + + atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --ping-type=udp \ + --sendif ${epair_one}b \ + --fromaddr 198.51.100.254 \ + --to 203.0.113.254 \ + --recvif ${epair_out_one}b + + queues_one=$(mktemp) + jexec one pfctl -qvsq | normalize_pfctl_s > $queues_one + echo " === queues one === " + cat $queues_one + grep -qE 'queue other1 on outif .* pkts: 1 ' $queues_one || atf_fail 'Packets not sent through queue "other1"' + + # Allow time for sync + sleep 2 + + # Force the next request to go through the 2nd router + route change -net 203.0.113.0/24 198.51.100.17 + + # Send a packet through router "two". It lacks the inbound rule + # but the inbound state should have been pfsynced from router "one" + # including altq queuing information. However the queues are created + # on router "two" in different order and we only sync queue index, + # so the packet ends up in a different queue. One must have identical + # queue set on both routers! + atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --ping-type=udp \ + --sendif ${epair_two}b \ + --fromaddr 198.51.100.254 \ + --to 203.0.113.254 \ + --recvif ${epair_out_two}b + + queues_two=$(mktemp) + jexec two pfctl -qvsq | normalize_pfctl_s > $queues_two + echo " === queues two === " + cat $queues_two + grep -qE 'queue other2 on outif .* pkts: 1 ' $queues_two || atf_fail 'Packets not sent through queue "other2"' +} + +altq_queues_cleanup() +{ + # Interface detaching seems badly broken in altq. If interfaces are + # destroyed when shutting down the vnet and then pf is unloaded, it will + # cause a kernel crash. Work around the issue by first flushing the + # pf rulesets + jexec one pfctl -F all + jexec two pfctl -F all + pfsynct_cleanup +} + +atf_test_case "rt_af" "cleanup" +rt_af_head() +{ + atf_set descr 'Test if the rt_af is synced' + atf_set require.user root + atf_set require.progs python3 scapy +} + +rt_af_body() +{ + route_to_common_head 1500 + + jexec one pfctl -e + pft_set_rules one \ + "set skip on ${epair_sync}a" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + "pass in on inif \ + route-to (outif 203.0.113.254) prefer-ipv6-nexthop \ + inet proto udp \ + to 203.0.113.241 \ + keep state" \ + "pass in on inif \ + route-to (outif 2001:db8:4200::fe) prefer-ipv6-nexthop \ + inet proto udp \ + to 203.0.113.242 \ + keep state" \ + "pass in on inif \ + route-to (outif 2001:db8:4200::fe) prefer-ipv6-nexthop \ + inet6 proto udp \ + to 2001:db8:4200::f3 \ + keep state" \ + "pass out on outif inet proto udp keep state (no-sync)" \ + "pass out on outif inet6 proto udp keep state (no-sync)" + + jexec two pfctl -e + pft_set_rules two \ + "set debug loud" \ + "set skip on ${epair_sync}b" \ + "block" \ + "pass inet6 proto icmp6 icmp6-type { neighbrsol, neighbradv } keep state (no-sync)" \ + + # IPv4 packet over IPv4 gateway + atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --ping-type=udp \ + --sendif ${epair_one}b \ + --fromaddr 198.51.100.254 \ + --to 203.0.113.241 \ + --recvif ${epair_out_one}b + + # FIXME: Routing IPv4 packets over IPv6 gateways with gateway added + # with `ndp -s` causes the static NDP entry to become expired. + # Pfsync tests don't use "servers" which can reply to ARP and NDP, + # but such static entry for gateway and only check if a stateless + # ICMP or UDP packet is forward through. + # + # IPv4 packert over IPv6 gateway + #atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + # ${common_dir}/pft_ping.py \ + # --ping-type=udp \ + # --sendif ${epair_one}b \ + # --fromaddr 198.51.100.254 \ + # --to 203.0.113.242 \ + # --recvif ${epair_out_one}b + + # IPv6 packet over IPv6 gateway + atf_check -s exit:0 env PYTHONPATH=${common_dir} \ + ${common_dir}/pft_ping.py \ + --ping-type=udp \ + --sendif ${epair_one}b \ + --fromaddr 2001:db8:4211::fe \ + --to 2001:db8:4200::f3 \ + --recvif ${epair_out_one}b + + sleep 5 # Wait for pfsync + + states_one=$(mktemp) + states_two=$(mktemp) + jexec one pfctl -qvvss | normalize_pfctl_s > $states_one + jexec two pfctl -qvvss | normalize_pfctl_s > $states_two + + echo " === states one === " + cat $states_one + echo " === states two === " + cat $states_two + + for state_regexp in \ + "all udp 203.0.113.241:9 <- 198.51.100.254 .* route-to: 203.0.113.254@outif origif: inif" \ + "all udp 2001:db8:4200::f3\[9\] <- 2001:db8:4211::fe .* route-to: 2001:db8:4200::fe@outif origif: inif" \ + ; do + grep -qE "${state_regexp}" $states_two || atf_fail "State not found for '${state_regexp}' on router two" + done +} + +rt_af_cleanup() +{ + jexec one pfctl -qvvsr + jexec one pfctl -qvvss + jexec one arp -an + jexec one ndp -an + pfsynct_cleanup +} + atf_init_test_cases() { atf_add_test_case "basic" atf_add_test_case "basic_defer" atf_add_test_case "defer" atf_add_test_case "bulk" atf_add_test_case "pbr" atf_add_test_case "pfsync_pbr" atf_add_test_case "ipsec" atf_add_test_case "timeout" atf_add_test_case "basic_ipv6_unicast" atf_add_test_case "basic_ipv6" atf_add_test_case "rtable" atf_add_test_case "route_to_1301" atf_add_test_case "route_to_1301_bad_ruleset" atf_add_test_case "route_to_1301_bad_rpool" atf_add_test_case "route_to_1400_bad_ruleset" atf_add_test_case "route_to_1400_bad_ifname" + atf_add_test_case "af_to_in_floating" + atf_add_test_case "af_to_in_if_bound" + atf_add_test_case "af_to_out_if_bound" + atf_add_test_case "tag" + atf_add_test_case "altq_queues" + atf_add_test_case "rt_af" } diff --git a/usr.bin/netstat/if.c b/usr.bin/netstat/if.c index 1603c7662bbd..622141e4ff69 100644 --- a/usr.bin/netstat/if.c +++ b/usr.bin/netstat/if.c @@ -1,740 +1,744 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2013 Gleb Smirnoff * Copyright (c) 1983, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PF #include #include #include #endif #include #include #include #ifdef INET6 #include #endif #include #include #include #include #include #include #include #include #include #include "netstat.h" static void sidewaysintpr(void); #ifdef PF static const char* pfsyncacts[] = { /* PFSYNC_ACT_CLR */ "clear all request", /* PFSYNC_ACT_INS_1301 */ "13.1 state insert", /* PFSYNC_ACT_INS_ACK */ "state inserted ack", /* PFSYNC_ACT_UPD_1301 */ "13.1 state update", /* PFSYNC_ACT_UPD_C */ "compressed state update", /* PFSYNC_ACT_UPD_REQ */ "uncompressed state request", /* PFSYNC_ACT_DEL */ "state delete", /* PFSYNC_ACT_DEL_C */ "compressed state delete", /* PFSYNC_ACT_INS_F */ "fragment insert", /* PFSYNC_ACT_DEL_F */ "fragment delete", /* PFSYNC_ACT_BUS */ "bulk update mark", /* PFSYNC_ACT_TDB */ "TDB replay counter update", /* PFSYNC_ACT_EOF */ "end of frame mark", - /* PFSYNC_ACT_INS_1400 */ "state insert", - /* PFSYNC_ACT_UPD_1400 */ "state update", + /* PFSYNC_ACT_INS_1400 */ "14.0 state insert", + /* PFSYNC_ACT_UPD_1400 */ "14.0 state update", + /* PFSYNC_ACT_INS_1500 */ "state insert", + /* PFSYNC_ACT_UPD_1500 */ "state update", }; static const char* pfsyncacts_name[] = { /* PFSYNC_ACT_CLR */ "clear-all-request", /* PFSYNC_ACT_INS_1301 */ "state-insert-1301", /* PFSYNC_ACT_INS_ACK */ "state-inserted-ack", /* PFSYNC_ACT_UPD_1301 */ "state-update-1301", /* PFSYNC_ACT_UPD_C */ "compressed-state-update", /* PFSYNC_ACT_UPD_REQ */ "uncompressed-state-request", /* PFSYNC_ACT_DEL */ "state-delete", /* PFSYNC_ACT_DEL_C */ "compressed-state-delete", /* PFSYNC_ACT_INS_F */ "fragment-insert", /* PFSYNC_ACT_DEL_F */ "fragment-delete", /* PFSYNC_ACT_BUS */ "bulk-update-mark", /* PFSYNC_ACT_TDB */ "TDB-replay-counter-update", /* PFSYNC_ACT_EOF */ "end-of-frame-mark", - /* PFSYNC_ACT_INS_1400 */ "state-insert", - /* PFSYNC_ACT_UPD_1400 */ "state-update", + /* PFSYNC_ACT_INS_1400 */ "state-insert-1400", + /* PFSYNC_ACT_UPD_1400 */ "state-update-1400", + /* PFSYNC_ACT_INS_1500 */ "state-insert", + /* PFSYNC_ACT_UPD_1500 */ "state-update", }; static void pfsync_acts_stats(const char *list, const char *desc, uint64_t *a) { int i; xo_open_list(list); for (i = 0; i < PFSYNC_ACT_MAX; i++, a++) { if (*a || sflag <= 1) { xo_open_instance(list); xo_emit("\t\t{e:name}{:count/%ju} {N:/%s%s %s}\n", pfsyncacts_name[i], (uintmax_t)(*a), pfsyncacts[i], plural(*a), desc); xo_close_instance(list); } } xo_close_list(list); } /* * Dump pfsync statistics structure. */ void pfsync_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct pfsyncstats pfsyncstat; if (fetch_stats("net.pfsync.stats", off, &pfsyncstat, sizeof(pfsyncstat), kread) != 0) return; xo_emit("{T:/%s}:\n", name); xo_open_container(name); #define p(f, m) if (pfsyncstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pfsyncstat.f, plural(pfsyncstat.f)) p(pfsyncs_ipackets, "\t{:received-inet-packets/%ju} " "{N:/packet%s received (IPv4)}\n"); p(pfsyncs_ipackets6, "\t{:received-inet6-packets/%ju} " "{N:/packet%s received (IPv6)}\n"); pfsync_acts_stats("input-histogram", "received", &pfsyncstat.pfsyncs_iacts[0]); p(pfsyncs_badif, "\t\t{:dropped-bad-interface/%ju} " "{N:/packet%s discarded for bad interface}\n"); p(pfsyncs_badttl, "\t\t{:dropped-bad-ttl/%ju} " "{N:/packet%s discarded for bad ttl}\n"); p(pfsyncs_hdrops, "\t\t{:dropped-short-header/%ju} " "{N:/packet%s shorter than header}\n"); p(pfsyncs_badver, "\t\t{:dropped-bad-version/%ju} " "{N:/packet%s discarded for bad version}\n"); p(pfsyncs_badauth, "\t\t{:dropped-bad-auth/%ju} " "{N:/packet%s discarded for bad HMAC}\n"); p(pfsyncs_badact,"\t\t{:dropped-bad-action/%ju} " "{N:/packet%s discarded for bad action}\n"); p(pfsyncs_badlen, "\t\t{:dropped-short/%ju} " "{N:/packet%s discarded for short packet}\n"); p(pfsyncs_badval, "\t\t{:dropped-bad-values/%ju} " "{N:/state%s discarded for bad values}\n"); p(pfsyncs_stale, "\t\t{:dropped-stale-state/%ju} " "{N:/stale state%s}\n"); p(pfsyncs_badstate, "\t\t{:dropped-failed-lookup/%ju} " "{N:/failed state lookup\\/insert%s}\n"); p(pfsyncs_opackets, "\t{:sent-inet-packets/%ju} " "{N:/packet%s sent (IPv4})\n"); p(pfsyncs_opackets6, "\t{:send-inet6-packets/%ju} " "{N:/packet%s sent (IPv6})\n"); pfsync_acts_stats("output-histogram", "sent", &pfsyncstat.pfsyncs_oacts[0]); p(pfsyncs_onomem, "\t\t{:discarded-no-memory/%ju} " "{N:/failure%s due to mbuf memory error}\n"); p(pfsyncs_oerrors, "\t\t{:send-errors/%ju} " "{N:/send error%s}\n"); #undef p xo_close_container(name); } void pflow_stats(u_long off, const char *name, int af1 __unused, int proto __unused) { struct pflowstats pflowstat; if (fetch_stats("net.pflow.stats", off, &pflowstat, sizeof(pflowstat), kread) != 0) return; xo_emit("{T:/%s}:\n", name); xo_open_container(name); #define p(f, m) if (pflowstat.f || sflag <= 1) \ xo_emit(m, (uintmax_t)pflowstat.f, plural(pflowstat.f)) p(pflow_flows, "\t{:flows/%ju} {N:/flow%s sent}\n"); p(pflow_packets, "\t{:packets/%ju} {N:/packet%s sent}\n"); p(pflow_onomem, "\t{:nomem/%ju} " "{N:/send failed due to mbuf memory error}\n"); p(pflow_oerrors, "\t{:send-error/%ju} {N:/send error}\n"); #undef p xo_close_container(name); } #endif /* PF */ /* * Display a formatted value, or a '-' in the same space. */ static void show_stat(const char *fmt, int width, const char *name, u_long value, short showvalue, int div1000) { const char *lsep, *rsep; char newfmt[64]; lsep = ""; if (strncmp(fmt, "LS", 2) == 0) { lsep = " "; fmt += 2; } rsep = " "; if (strncmp(fmt, "NRS", 3) == 0) { rsep = ""; fmt += 3; } if (showvalue == 0) { /* Print just dash. */ xo_emit("{P:/%s}{D:/%*s}{P:/%s}", lsep, width, "-", rsep); return; } /* * XXX: workaround {P:} modifier can't be empty and doesn't seem to * take args... so we need to conditionally include it in the format. */ #define maybe_pad(pad) do { \ if (strlen(pad)) { \ snprintf(newfmt, sizeof(newfmt), "{P:%s}", pad); \ xo_emit(newfmt); \ } \ } while (0) if (hflag) { char buf[5]; /* Format in human readable form. */ humanize_number(buf, sizeof(buf), (int64_t)value, "", HN_AUTOSCALE, HN_NOSPACE | HN_DECIMAL | \ ((div1000) ? HN_DIVISOR_1000 : 0)); maybe_pad(lsep); snprintf(newfmt, sizeof(newfmt), "{:%s/%%%ds}", name, width); xo_emit(newfmt, buf); maybe_pad(rsep); } else { /* Construct the format string. */ maybe_pad(lsep); snprintf(newfmt, sizeof(newfmt), "{:%s/%%%d%s}", name, width, fmt); xo_emit(newfmt, value); maybe_pad(rsep); } } /* * Find next multiaddr for a given interface name. */ static struct ifmaddrs * next_ifma(struct ifmaddrs *ifma, const char *name, const sa_family_t family) { for(; ifma != NULL; ifma = ifma->ifma_next) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)ifma->ifma_name; if (ifma->ifma_addr->sa_family == family && strcmp(sdl->sdl_data, name) == 0) break; } return (ifma); } enum process_op { MEASURE, EMIT }; static void process_ifa_addr(enum process_op op, struct ifaddrs *ifa, int *max_net_len, int *max_addr_len, bool *network, bool *link) { int net_len, addr_len; const char *nn, *rn; if (op == EMIT) { net_len = *max_net_len; addr_len = *max_addr_len; } switch (ifa->ifa_addr->sa_family) { case AF_UNSPEC: if (op == MEASURE) { net_len = strlen("none"); addr_len = strlen("none"); } else { xo_emit("{:network/%-*.*s} ", net_len, net_len, "none"); xo_emit("{:address/%-*.*s} ", addr_len, addr_len, "none"); } break; case AF_INET: #ifdef INET6 case AF_INET6: #endif /* INET6 */ nn = netname(ifa->ifa_addr, ifa->ifa_netmask); rn = routename(ifa->ifa_addr, numeric_addr); if (op == MEASURE) { net_len = strlen(nn); addr_len = strlen(rn); } else { xo_emit("{t:network/%-*s} ", net_len, nn); xo_emit("{t:address/%-*s} ", addr_len, rn); } if (network != NULL) *network = true; break; case AF_LINK: { struct sockaddr_dl *sdl; char linknum[sizeof("")]; sdl = (struct sockaddr_dl *)ifa->ifa_addr; snprintf(linknum, sizeof(linknum), "", sdl->sdl_index); if (op == MEASURE) { net_len = strlen(linknum); if (sdl->sdl_nlen == 0 && sdl->sdl_alen == 0 && sdl->sdl_slen == 0) addr_len = 1; else addr_len = strlen(routename(ifa->ifa_addr, 1)); } else { xo_emit("{t:network/%-*.*s} ", net_len, net_len, linknum); if (sdl->sdl_nlen == 0 && sdl->sdl_alen == 0 && sdl->sdl_slen == 0) xo_emit("{P:/%*s} ", addr_len, ""); else xo_emit("{t:address/%-*.*s} ", addr_len, addr_len, routename(ifa->ifa_addr, 1)); } if (link != NULL) *link = true; break; } } if (op == MEASURE) { if (net_len > *max_net_len) *max_net_len = net_len; if (addr_len > *max_addr_len) *max_addr_len = addr_len; } } static int max_num_len(int max_len, u_long num) { int len = 2; /* include space */ for (; num > 10; len++) num /= 10; return (MAX(max_len, len)); } /* * Print a description of the network interfaces. */ void intpr(void (*pfunc)(char *), int af) { struct ifaddrs *ifap, *ifa; struct ifmaddrs *ifmap, *ifma; u_int ifn_len_max = 5, ifn_len; u_int net_len = strlen("Network "), addr_len = strlen("Address "); u_int npkt_len = 8, nbyte_len = 10, nerr_len = 5; if (interval) return sidewaysintpr(); if (getifaddrs(&ifap) != 0) xo_err(EX_OSERR, "getifaddrs"); if (aflag && getifmaddrs(&ifmap) != 0) xo_err(EX_OSERR, "getifmaddrs"); for (ifa = ifap; ifa; ifa = ifa->ifa_next) { if (interface != NULL && strcmp(ifa->ifa_name, interface) != 0) continue; if (af != AF_UNSPEC && ifa->ifa_addr->sa_family != af) continue; ifn_len = strlen(ifa->ifa_name); if ((ifa->ifa_flags & IFF_UP) == 0) ++ifn_len; ifn_len_max = MAX(ifn_len_max, ifn_len); process_ifa_addr(MEASURE, ifa, &net_len, &addr_len, NULL, NULL); #define IFA_STAT(s) (((struct if_data *)ifa->ifa_data)->ifi_ ## s) if (!hflag) { npkt_len = max_num_len(npkt_len, IFA_STAT(ipackets)); npkt_len = max_num_len(npkt_len, IFA_STAT(opackets)); nerr_len = max_num_len(nerr_len, IFA_STAT(ierrors)); nerr_len = max_num_len(nerr_len, IFA_STAT(iqdrops)); nerr_len = max_num_len(nerr_len, IFA_STAT(collisions)); if (dflag) nerr_len = max_num_len(nerr_len, IFA_STAT(oqdrops)); if (bflag) { nbyte_len = max_num_len(nbyte_len, IFA_STAT(ibytes)); nbyte_len = max_num_len(nbyte_len, IFA_STAT(obytes)); } } } xo_open_list("interface"); if (!pfunc) { xo_emit("{T:/%-*.*s}", ifn_len_max, ifn_len_max, "Name"); xo_emit(" {T:/%5.5s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} " "{T:/%*.*s} {T:/%*.*s}", "Mtu", net_len, net_len, "Network", addr_len, addr_len, "Address", npkt_len, npkt_len, "Ipkts", nerr_len, nerr_len, "Ierrs", nerr_len, nerr_len, "Idrop"); if (bflag) xo_emit(" {T:/%*.*s}", nbyte_len, nbyte_len, "Ibytes"); xo_emit(" {T:/%*.*s} {T:/%*.*s}", npkt_len, npkt_len, "Opkts", nerr_len, nerr_len, "Oerrs"); if (bflag) xo_emit(" {T:/%*.*s}", nbyte_len, nbyte_len, "Obytes"); xo_emit(" {T:/%*s}", nerr_len, "Coll"); if (dflag) xo_emit(" {T:/%*.*s}", nerr_len, nerr_len, "Drop"); xo_emit("\n"); } for (ifa = ifap; ifa; ifa = ifa->ifa_next) { bool network = false, link = false; char *name, *xname, buf[IFNAMSIZ+1]; if (interface != NULL && strcmp(ifa->ifa_name, interface) != 0) continue; name = ifa->ifa_name; if (pfunc) { (*pfunc)(name); /* * Skip all ifaddrs belonging to same interface. */ while(ifa->ifa_next != NULL && (strcmp(ifa->ifa_next->ifa_name, name) == 0)) { ifa = ifa->ifa_next; } continue; } if (af != AF_UNSPEC && ifa->ifa_addr->sa_family != af) continue; xo_open_instance("interface"); if ((ifa->ifa_flags & IFF_UP) == 0) { xname = stpcpy(buf, name); *xname++ = '*'; *xname = '\0'; xname = buf; } else xname = name; xo_emit("{d:/%-*.*s}{etk:name}{eq:flags/0x%x}", ifn_len_max, ifn_len_max, xname, name, ifa->ifa_flags); #define IFA_MTU(ifa) (((struct if_data *)(ifa)->ifa_data)->ifi_mtu) show_stat("lu", 6, "mtu", IFA_MTU(ifa), IFA_MTU(ifa), 0); #undef IFA_MTU process_ifa_addr(EMIT, ifa, &net_len, &addr_len, &network, &link); show_stat("lu", npkt_len, "received-packets", IFA_STAT(ipackets), link|network, 1); show_stat("lu", nerr_len, "received-errors", IFA_STAT(ierrors), link, 1); show_stat("lu", nerr_len, "dropped-packets", IFA_STAT(iqdrops), link, 1); if (bflag) show_stat("lu", nbyte_len, "received-bytes", IFA_STAT(ibytes), link|network, 0); show_stat("lu", npkt_len, "sent-packets", IFA_STAT(opackets), link|network, 1); show_stat("lu", nerr_len, "send-errors", IFA_STAT(oerrors), link, 1); if (bflag) show_stat("lu", nbyte_len, "sent-bytes", IFA_STAT(obytes), link|network, 0); show_stat("NRSlu", nerr_len, "collisions", IFA_STAT(collisions), link, 1); if (dflag) show_stat("LSlu", nerr_len, "dropped-packets", IFA_STAT(oqdrops), link, 1); xo_emit("\n"); if (!aflag) { xo_close_instance("interface"); continue; } /* * Print family's multicast addresses. */ xo_open_list("multicast-address"); for (ifma = next_ifma(ifmap, ifa->ifa_name, ifa->ifa_addr->sa_family); ifma != NULL; ifma = next_ifma(ifma, ifa->ifa_name, ifa->ifa_addr->sa_family)) { const char *fmt = NULL; xo_open_instance("multicast-address"); switch (ifma->ifma_addr->sa_family) { case AF_LINK: { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)ifma->ifma_addr; if (sdl->sdl_type != IFT_ETHER && sdl->sdl_type != IFT_FDDI) break; } /* FALLTHROUGH */ case AF_INET: #ifdef INET6 case AF_INET6: #endif /* INET6 */ fmt = routename(ifma->ifma_addr, numeric_addr); break; } if (fmt) { if (Wflag) xo_emit("{P:/%27s }" "{t:address/%-17s/}", "", fmt); else xo_emit("{P:/%25s }" "{t:address/%-17.17s/}", "", fmt); if (ifma->ifma_addr->sa_family == AF_LINK) { xo_emit(" {:received-packets/%8lu}", IFA_STAT(imcasts)); xo_emit("{P:/%*s}", bflag? 17 : 6, ""); xo_emit(" {:sent-packets/%8lu}", IFA_STAT(omcasts)); } xo_emit("\n"); } xo_close_instance("multicast-address"); ifma = ifma->ifma_next; } xo_close_list("multicast-address"); xo_close_instance("interface"); } xo_close_list("interface"); freeifaddrs(ifap); if (aflag) freeifmaddrs(ifmap); } struct iftot { u_long ift_ip; /* input packets */ u_long ift_ie; /* input errors */ u_long ift_id; /* input drops */ u_long ift_op; /* output packets */ u_long ift_oe; /* output errors */ u_long ift_od; /* output drops */ u_long ift_co; /* collisions */ u_long ift_ib; /* input bytes */ u_long ift_ob; /* output bytes */ }; /* * Obtain stats for interface(s). */ static void fill_iftot(struct iftot *st) { struct ifaddrs *ifap, *ifa; bool found = false; if (getifaddrs(&ifap) != 0) xo_err(EX_OSERR, "getifaddrs"); bzero(st, sizeof(*st)); for (ifa = ifap; ifa; ifa = ifa->ifa_next) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; if (interface) { if (strcmp(ifa->ifa_name, interface) == 0) found = true; else continue; } st->ift_ip += IFA_STAT(ipackets); st->ift_ie += IFA_STAT(ierrors); st->ift_id += IFA_STAT(iqdrops); st->ift_ib += IFA_STAT(ibytes); st->ift_op += IFA_STAT(opackets); st->ift_oe += IFA_STAT(oerrors); st->ift_od += IFA_STAT(oqdrops); st->ift_ob += IFA_STAT(obytes); st->ift_co += IFA_STAT(collisions); } if (interface && found == false) xo_err(EX_DATAERR, "interface %s not found", interface); freeifaddrs(ifap); } /* * Set a flag to indicate that a signal from the periodic itimer has been * caught. */ static sig_atomic_t signalled; static void catchalarm(int signo __unused) { signalled = true; } /* * Print a running summary of interface statistics. * Repeat display every interval seconds, showing statistics * collected over that interval. Assumes that interval is non-zero. * First line printed at top of screen is always cumulative. */ static void sidewaysintpr(void) { struct iftot ift[2], *new, *old; struct itimerval interval_it; int oldmask, line; new = &ift[0]; old = &ift[1]; fill_iftot(old); (void)signal(SIGALRM, catchalarm); signalled = false; interval_it.it_interval.tv_sec = interval; interval_it.it_interval.tv_usec = 0; interval_it.it_value = interval_it.it_interval; setitimer(ITIMER_REAL, &interval_it, NULL); xo_open_list("interface-statistics"); banner: xo_emit("{T:/%17s} {T:/%14s} {T:/%16s}\n", "input", interface != NULL ? interface : "(Total)", "output"); xo_emit("{T:/%10s} {T:/%5s} {T:/%5s} {T:/%10s} {T:/%10s} {T:/%5s} " "{T:/%10s} {T:/%5s}", "packets", "errs", "idrops", "bytes", "packets", "errs", "bytes", "colls"); if (dflag) xo_emit(" {T:/%5.5s}", "drops"); xo_emit("\n"); xo_flush(); line = 0; loop: if ((noutputs != 0) && (--noutputs == 0)) { xo_close_list("interface-statistics"); return; } oldmask = sigblock(sigmask(SIGALRM)); while (!signalled) sigpause(0); signalled = false; sigsetmask(oldmask); line++; fill_iftot(new); xo_open_instance("stats"); show_stat("lu", 10, "received-packets", new->ift_ip - old->ift_ip, 1, 1); show_stat("lu", 5, "received-errors", new->ift_ie - old->ift_ie, 1, 1); show_stat("lu", 5, "dropped-packets", new->ift_id - old->ift_id, 1, 1); show_stat("lu", 10, "received-bytes", new->ift_ib - old->ift_ib, 1, 0); show_stat("lu", 10, "sent-packets", new->ift_op - old->ift_op, 1, 1); show_stat("lu", 5, "send-errors", new->ift_oe - old->ift_oe, 1, 1); show_stat("lu", 10, "sent-bytes", new->ift_ob - old->ift_ob, 1, 0); show_stat("NRSlu", 5, "collisions", new->ift_co - old->ift_co, 1, 1); if (dflag) show_stat("LSlu", 5, "dropped-packets", new->ift_od - old->ift_od, 1, 1); xo_close_instance("stats"); xo_emit("\n"); xo_flush(); if (new == &ift[0]) { new = &ift[1]; old = &ift[0]; } else { new = &ift[0]; old = &ift[1]; } if (line == 21) goto banner; else goto loop; /* NOTREACHED */ }